7814258f23eff93d273c1462b8ec8acc85e0d1b1
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_slave = 0;
62         uint16_t num_rx_total = 0;
63
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68
69         internals = bd_rx_q->dev_private;
70
71
72         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
73                 /* Offset of pointer to *bufs increases as packets are received
74                  * from other slaves */
75                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
76                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
77                 if (num_rx_slave) {
78                         num_rx_total += num_rx_slave;
79                         nb_pkts -= num_rx_slave;
80                 }
81         }
82
83         return num_rx_total;
84 }
85
86 static uint16_t
87 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
88                 uint16_t nb_pkts)
89 {
90         struct bond_dev_private *internals;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97         return rte_eth_rx_burst(internals->current_primary_port,
98                         bd_rx_q->queue_id, bufs, nb_pkts);
99 }
100
101 static inline uint8_t
102 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
103 {
104         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
105
106         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
107                 (ethertype == ether_type_slow_be &&
108                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
109 }
110
111 /*****************************************************************************
112  * Flow director's setup for mode 4 optimization
113  */
114
115 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
116         .dst.addr_bytes = { 0 },
117         .src.addr_bytes = { 0 },
118         .type = RTE_BE16(ETHER_TYPE_SLOW),
119 };
120
121 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = 0xFFFF,
125 };
126
127 static struct rte_flow_item flow_item_8023ad[] = {
128         {
129                 .type = RTE_FLOW_ITEM_TYPE_ETH,
130                 .spec = &flow_item_eth_type_8023ad,
131                 .last = NULL,
132                 .mask = &flow_item_eth_mask_type_8023ad,
133         },
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_END,
136                 .spec = NULL,
137                 .last = NULL,
138                 .mask = NULL,
139         }
140 };
141
142 const struct rte_flow_attr flow_attr_8023ad = {
143         .group = 0,
144         .priority = 0,
145         .ingress = 1,
146         .egress = 0,
147         .reserved = 0,
148 };
149
150 int
151 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
152                 uint16_t slave_port) {
153         struct rte_eth_dev_info slave_info;
154         struct rte_flow_error error;
155         struct bond_dev_private *internals = (struct bond_dev_private *)
156                         (bond_dev->data->dev_private);
157
158         const struct rte_flow_action_queue lacp_queue_conf = {
159                 .index = 0,
160         };
161
162         const struct rte_flow_action actions[] = {
163                 {
164                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
165                         .conf = &lacp_queue_conf
166                 },
167                 {
168                         .type = RTE_FLOW_ACTION_TYPE_END,
169                 }
170         };
171
172         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
173                         flow_item_8023ad, actions, &error);
174         if (ret < 0) {
175                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
176                                 __func__, error.message, slave_port,
177                                 internals->mode4.dedicated_queues.rx_qid);
178                 return -1;
179         }
180
181         rte_eth_dev_info_get(slave_port, &slave_info);
182         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
183                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
184                 RTE_BOND_LOG(ERR,
185                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
186                         __func__, slave_port);
187                 return -1;
188         }
189
190         return 0;
191 }
192
193 int
194 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
195         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
196         struct bond_dev_private *internals = (struct bond_dev_private *)
197                         (bond_dev->data->dev_private);
198         struct rte_eth_dev_info bond_info;
199         uint16_t idx;
200
201         /* Verify if all slaves in bonding supports flow director and */
202         if (internals->slave_count > 0) {
203                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
204
205                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
206                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
207
208                 for (idx = 0; idx < internals->slave_count; idx++) {
209                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
210                                         internals->slaves[idx].port_id) != 0)
211                                 return -1;
212                 }
213         }
214
215         return 0;
216 }
217
218 int
219 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
220
221         struct rte_flow_error error;
222         struct bond_dev_private *internals = (struct bond_dev_private *)
223                         (bond_dev->data->dev_private);
224
225         struct rte_flow_action_queue lacp_queue_conf = {
226                 .index = internals->mode4.dedicated_queues.rx_qid,
227         };
228
229         const struct rte_flow_action actions[] = {
230                 {
231                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
232                         .conf = &lacp_queue_conf
233                 },
234                 {
235                         .type = RTE_FLOW_ACTION_TYPE_END,
236                 }
237         };
238
239         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
240                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
241         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
242                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
243                                 "(slave_port=%d queue_id=%d)",
244                                 error.message, slave_port,
245                                 internals->mode4.dedicated_queues.rx_qid);
246                 return -1;
247         }
248
249         return 0;
250 }
251
252 static uint16_t
253 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
254                 uint16_t nb_pkts)
255 {
256         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
257         struct bond_dev_private *internals = bd_rx_q->dev_private;
258         uint16_t num_rx_total = 0;      /* Total number of received packets */
259         uint16_t slaves[RTE_MAX_ETHPORTS];
260         uint16_t slave_count;
261
262         uint16_t i, idx;
263
264         /* Copy slave list to protect against slave up/down changes during tx
265          * bursting */
266         slave_count = internals->active_slave_count;
267         memcpy(slaves, internals->active_slaves,
268                         sizeof(internals->active_slaves[0]) * slave_count);
269
270         for (i = 0, idx = internals->active_slave;
271                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
272                 idx = idx % slave_count;
273
274                 /* Read packets from this slave */
275                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
276                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
277         }
278
279         internals->active_slave = idx;
280
281         return num_rx_total;
282 }
283
284 static uint16_t
285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
286                 uint16_t nb_bufs)
287 {
288         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
289         struct bond_dev_private *internals = bd_tx_q->dev_private;
290
291         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
292         uint16_t slave_count;
293
294         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
295         uint16_t dist_slave_count;
296
297         /* 2-D array to sort mbufs for transmission on each slave into */
298         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
299         /* Number of mbufs for transmission on each slave */
300         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
301         /* Mapping array generated by hash function to map mbufs to slaves */
302         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
303
304         uint16_t slave_tx_count;
305         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
306
307         uint16_t i;
308
309         if (unlikely(nb_bufs == 0))
310                 return 0;
311
312         /* Copy slave list to protect against slave up/down changes during tx
313          * bursting */
314         slave_count = internals->active_slave_count;
315         if (unlikely(slave_count < 1))
316                 return 0;
317
318         memcpy(slave_port_ids, internals->active_slaves,
319                         sizeof(slave_port_ids[0]) * slave_count);
320
321
322         dist_slave_count = 0;
323         for (i = 0; i < slave_count; i++) {
324                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
325
326                 if (ACTOR_STATE(port, DISTRIBUTING))
327                         dist_slave_port_ids[dist_slave_count++] =
328                                         slave_port_ids[i];
329         }
330
331         if (unlikely(dist_slave_count < 1))
332                 return 0;
333
334         /*
335          * Populate slaves mbuf with the packets which are to be sent on it
336          * selecting output slave using hash based on xmit policy
337          */
338         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
339                         bufs_slave_port_idxs);
340
341         for (i = 0; i < nb_bufs; i++) {
342                 /* Populate slave mbuf arrays with mbufs for that slave. */
343                 uint8_t slave_idx = bufs_slave_port_idxs[i];
344
345                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
346         }
347
348
349         /* Send packet burst on each slave device */
350         for (i = 0; i < dist_slave_count; i++) {
351                 if (slave_nb_bufs[i] == 0)
352                         continue;
353
354                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
355                                 bd_tx_q->queue_id, slave_bufs[i],
356                                 slave_nb_bufs[i]);
357
358                 total_tx_count += slave_tx_count;
359
360                 /* If tx burst fails move packets to end of bufs */
361                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
362                         int slave_tx_fail_count = slave_nb_bufs[i] -
363                                         slave_tx_count;
364                         total_tx_fail_count += slave_tx_fail_count;
365                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
366                                &slave_bufs[i][slave_tx_count],
367                                slave_tx_fail_count * sizeof(bufs[0]));
368                 }
369         }
370
371         return total_tx_count;
372 }
373
374
375 static uint16_t
376 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
377                 uint16_t nb_pkts)
378 {
379         /* Cast to structure, containing bonded device's port id and queue id */
380         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
381         struct bond_dev_private *internals = bd_rx_q->dev_private;
382         struct ether_addr bond_mac;
383
384         struct ether_hdr *hdr;
385
386         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
387         uint16_t num_rx_total = 0;      /* Total number of received packets */
388         uint16_t slaves[RTE_MAX_ETHPORTS];
389         uint16_t slave_count, idx;
390
391         uint8_t collecting;  /* current slave collecting status */
392         const uint8_t promisc = internals->promiscuous_en;
393         uint8_t i, j, k;
394         uint8_t subtype;
395
396         rte_eth_macaddr_get(internals->port_id, &bond_mac);
397         /* Copy slave list to protect against slave up/down changes during tx
398          * bursting */
399         slave_count = internals->active_slave_count;
400         memcpy(slaves, internals->active_slaves,
401                         sizeof(internals->active_slaves[0]) * slave_count);
402
403         idx = internals->active_slave;
404         if (idx >= slave_count) {
405                 internals->active_slave = 0;
406                 idx = 0;
407         }
408         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
409                 j = num_rx_total;
410                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
411                                          COLLECTING);
412
413                 /* Read packets from this slave */
414                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
415                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
416
417                 for (k = j; k < 2 && k < num_rx_total; k++)
418                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
419
420                 /* Handle slow protocol packets. */
421                 while (j < num_rx_total) {
422
423                         /* If packet is not pure L2 and is known, skip it */
424                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
425                                 j++;
426                                 continue;
427                         }
428
429                         if (j + 3 < num_rx_total)
430                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
431
432                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
433                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
434
435                         /* Remove packet from array if it is slow packet or slave is not
436                          * in collecting state or bonding interface is not in promiscuous
437                          * mode and packet address does not match. */
438                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
439                                 !collecting || (!promisc &&
440                                         !is_multicast_ether_addr(&hdr->d_addr) &&
441                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
442
443                                 if (hdr->ether_type == ether_type_slow_be) {
444                                         bond_mode_8023ad_handle_slow_pkt(
445                                             internals, slaves[idx], bufs[j]);
446                                 } else
447                                         rte_pktmbuf_free(bufs[j]);
448
449                                 /* Packet is managed by mode 4 or dropped, shift the array */
450                                 num_rx_total--;
451                                 if (j < num_rx_total) {
452                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
453                                                 (num_rx_total - j));
454                                 }
455                         } else
456                                 j++;
457                 }
458                 if (unlikely(++idx == slave_count))
459                         idx = 0;
460         }
461
462         internals->active_slave = idx;
463         return num_rx_total;
464 }
465
466 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
467 uint32_t burstnumberRX;
468 uint32_t burstnumberTX;
469
470 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
471
472 static void
473 arp_op_name(uint16_t arp_op, char *buf)
474 {
475         switch (arp_op) {
476         case ARP_OP_REQUEST:
477                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
478                 return;
479         case ARP_OP_REPLY:
480                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
481                 return;
482         case ARP_OP_REVREQUEST:
483                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
484                                 "Reverse ARP Request");
485                 return;
486         case ARP_OP_REVREPLY:
487                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
488                                 "Reverse ARP Reply");
489                 return;
490         case ARP_OP_INVREQUEST:
491                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
492                                 "Peer Identify Request");
493                 return;
494         case ARP_OP_INVREPLY:
495                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
496                                 "Peer Identify Reply");
497                 return;
498         default:
499                 break;
500         }
501         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
502         return;
503 }
504 #endif
505 #define MaxIPv4String   16
506 static void
507 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
508 {
509         uint32_t ipv4_addr;
510
511         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
512         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
513                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
514                 ipv4_addr & 0xFF);
515 }
516
517 #define MAX_CLIENTS_NUMBER      128
518 uint8_t active_clients;
519 struct client_stats_t {
520         uint16_t port;
521         uint32_t ipv4_addr;
522         uint32_t ipv4_rx_packets;
523         uint32_t ipv4_tx_packets;
524 };
525 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
526
527 static void
528 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
529 {
530         int i = 0;
531
532         for (; i < MAX_CLIENTS_NUMBER; i++)     {
533                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
534                         /* Just update RX packets number for this client */
535                         if (TXorRXindicator == &burstnumberRX)
536                                 client_stats[i].ipv4_rx_packets++;
537                         else
538                                 client_stats[i].ipv4_tx_packets++;
539                         return;
540                 }
541         }
542         /* We have a new client. Insert him to the table, and increment stats */
543         if (TXorRXindicator == &burstnumberRX)
544                 client_stats[active_clients].ipv4_rx_packets++;
545         else
546                 client_stats[active_clients].ipv4_tx_packets++;
547         client_stats[active_clients].ipv4_addr = addr;
548         client_stats[active_clients].port = port;
549         active_clients++;
550
551 }
552
553 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
554 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
555         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
556                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
557                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
558                 info,                                                   \
559                 port,                                                   \
560                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
561                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
562                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
563                 src_ip,                                                 \
564                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
565                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
566                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
567                 dst_ip,                                                 \
568                 arp_op, ++burstnumber)
569 #endif
570
571 static void
572 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
573                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
574 {
575         struct ipv4_hdr *ipv4_h;
576 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
577         struct arp_hdr *arp_h;
578         char dst_ip[16];
579         char ArpOp[24];
580         char buf[16];
581 #endif
582         char src_ip[16];
583
584         uint16_t ether_type = eth_h->ether_type;
585         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
586
587 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
588         strlcpy(buf, info, 16);
589 #endif
590
591         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
592                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
593                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
594 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
595                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
596                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
597 #endif
598                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
599         }
600 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
601         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
602                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
603                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
604                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
605                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
606                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
607         }
608 #endif
609 }
610 #endif
611
612 static uint16_t
613 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
614 {
615         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
616         struct bond_dev_private *internals = bd_tx_q->dev_private;
617         struct ether_hdr *eth_h;
618         uint16_t ether_type, offset;
619         uint16_t nb_recv_pkts;
620         int i;
621
622         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
623
624         for (i = 0; i < nb_recv_pkts; i++) {
625                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
626                 ether_type = eth_h->ether_type;
627                 offset = get_vlan_offset(eth_h, &ether_type);
628
629                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
630 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
631                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
632 #endif
633                         bond_mode_alb_arp_recv(eth_h, offset, internals);
634                 }
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
637                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
638 #endif
639         }
640
641         return nb_recv_pkts;
642 }
643
644 static uint16_t
645 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
646                 uint16_t nb_pkts)
647 {
648         struct bond_dev_private *internals;
649         struct bond_tx_queue *bd_tx_q;
650
651         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
652         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
653
654         uint16_t num_of_slaves;
655         uint16_t slaves[RTE_MAX_ETHPORTS];
656
657         uint16_t num_tx_total = 0, num_tx_slave;
658
659         static int slave_idx = 0;
660         int i, cslave_idx = 0, tx_fail_total = 0;
661
662         bd_tx_q = (struct bond_tx_queue *)queue;
663         internals = bd_tx_q->dev_private;
664
665         /* Copy slave list to protect against slave up/down changes during tx
666          * bursting */
667         num_of_slaves = internals->active_slave_count;
668         memcpy(slaves, internals->active_slaves,
669                         sizeof(internals->active_slaves[0]) * num_of_slaves);
670
671         if (num_of_slaves < 1)
672                 return num_tx_total;
673
674         /* Populate slaves mbuf with which packets are to be sent on it  */
675         for (i = 0; i < nb_pkts; i++) {
676                 cslave_idx = (slave_idx + i) % num_of_slaves;
677                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
678         }
679
680         /* increment current slave index so the next call to tx burst starts on the
681          * next slave */
682         slave_idx = ++cslave_idx;
683
684         /* Send packet burst on each slave device */
685         for (i = 0; i < num_of_slaves; i++) {
686                 if (slave_nb_pkts[i] > 0) {
687                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
688                                         slave_bufs[i], slave_nb_pkts[i]);
689
690                         /* if tx burst fails move packets to end of bufs */
691                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
692                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
693
694                                 tx_fail_total += tx_fail_slave;
695
696                                 memcpy(&bufs[nb_pkts - tx_fail_total],
697                                        &slave_bufs[i][num_tx_slave],
698                                        tx_fail_slave * sizeof(bufs[0]));
699                         }
700                         num_tx_total += num_tx_slave;
701                 }
702         }
703
704         return num_tx_total;
705 }
706
707 static uint16_t
708 bond_ethdev_tx_burst_active_backup(void *queue,
709                 struct rte_mbuf **bufs, uint16_t nb_pkts)
710 {
711         struct bond_dev_private *internals;
712         struct bond_tx_queue *bd_tx_q;
713
714         bd_tx_q = (struct bond_tx_queue *)queue;
715         internals = bd_tx_q->dev_private;
716
717         if (internals->active_slave_count < 1)
718                 return 0;
719
720         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
721                         bufs, nb_pkts);
722 }
723
724 static inline uint16_t
725 ether_hash(struct ether_hdr *eth_hdr)
726 {
727         unaligned_uint16_t *word_src_addr =
728                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
729         unaligned_uint16_t *word_dst_addr =
730                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
731
732         return (word_src_addr[0] ^ word_dst_addr[0]) ^
733                         (word_src_addr[1] ^ word_dst_addr[1]) ^
734                         (word_src_addr[2] ^ word_dst_addr[2]);
735 }
736
737 static inline uint32_t
738 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
739 {
740         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
741 }
742
743 static inline uint32_t
744 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
745 {
746         unaligned_uint32_t *word_src_addr =
747                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
748         unaligned_uint32_t *word_dst_addr =
749                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
750
751         return (word_src_addr[0] ^ word_dst_addr[0]) ^
752                         (word_src_addr[1] ^ word_dst_addr[1]) ^
753                         (word_src_addr[2] ^ word_dst_addr[2]) ^
754                         (word_src_addr[3] ^ word_dst_addr[3]);
755 }
756
757
758 void
759 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
760                 uint8_t slave_count, uint16_t *slaves)
761 {
762         struct ether_hdr *eth_hdr;
763         uint32_t hash;
764         int i;
765
766         for (i = 0; i < nb_pkts; i++) {
767                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
768
769                 hash = ether_hash(eth_hdr);
770
771                 slaves[i] = (hash ^= hash >> 8) % slave_count;
772         }
773 }
774
775 void
776 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777                 uint8_t slave_count, uint16_t *slaves)
778 {
779         uint16_t i;
780         struct ether_hdr *eth_hdr;
781         uint16_t proto;
782         size_t vlan_offset;
783         uint32_t hash, l3hash;
784
785         for (i = 0; i < nb_pkts; i++) {
786                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
787                 l3hash = 0;
788
789                 proto = eth_hdr->ether_type;
790                 hash = ether_hash(eth_hdr);
791
792                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
793
794                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
795                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
796                                         ((char *)(eth_hdr + 1) + vlan_offset);
797                         l3hash = ipv4_hash(ipv4_hdr);
798
799                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
800                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
801                                         ((char *)(eth_hdr + 1) + vlan_offset);
802                         l3hash = ipv6_hash(ipv6_hdr);
803                 }
804
805                 hash = hash ^ l3hash;
806                 hash ^= hash >> 16;
807                 hash ^= hash >> 8;
808
809                 slaves[i] = hash % slave_count;
810         }
811 }
812
813 void
814 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
815                 uint8_t slave_count, uint16_t *slaves)
816 {
817         struct ether_hdr *eth_hdr;
818         uint16_t proto;
819         size_t vlan_offset;
820         int i;
821
822         struct udp_hdr *udp_hdr;
823         struct tcp_hdr *tcp_hdr;
824         uint32_t hash, l3hash, l4hash;
825
826         for (i = 0; i < nb_pkts; i++) {
827                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
828                 proto = eth_hdr->ether_type;
829                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
830                 l3hash = 0;
831                 l4hash = 0;
832
833                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
834                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
835                                         ((char *)(eth_hdr + 1) + vlan_offset);
836                         size_t ip_hdr_offset;
837
838                         l3hash = ipv4_hash(ipv4_hdr);
839
840                         /* there is no L4 header in fragmented packet */
841                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
842                                                                 == 0)) {
843                                 ip_hdr_offset = (ipv4_hdr->version_ihl
844                                         & IPV4_HDR_IHL_MASK) *
845                                         IPV4_IHL_MULTIPLIER;
846
847                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
848                                         tcp_hdr = (struct tcp_hdr *)
849                                                 ((char *)ipv4_hdr +
850                                                         ip_hdr_offset);
851                                         l4hash = HASH_L4_PORTS(tcp_hdr);
852                                 } else if (ipv4_hdr->next_proto_id ==
853                                                                 IPPROTO_UDP) {
854                                         udp_hdr = (struct udp_hdr *)
855                                                 ((char *)ipv4_hdr +
856                                                         ip_hdr_offset);
857                                         l4hash = HASH_L4_PORTS(udp_hdr);
858                                 }
859                         }
860                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
861                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
862                                         ((char *)(eth_hdr + 1) + vlan_offset);
863                         l3hash = ipv6_hash(ipv6_hdr);
864
865                         if (ipv6_hdr->proto == IPPROTO_TCP) {
866                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
867                                 l4hash = HASH_L4_PORTS(tcp_hdr);
868                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
869                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
870                                 l4hash = HASH_L4_PORTS(udp_hdr);
871                         }
872                 }
873
874                 hash = l3hash ^ l4hash;
875                 hash ^= hash >> 16;
876                 hash ^= hash >> 8;
877
878                 slaves[i] = hash % slave_count;
879         }
880 }
881
882 struct bwg_slave {
883         uint64_t bwg_left_int;
884         uint64_t bwg_left_remainder;
885         uint8_t slave;
886 };
887
888 void
889 bond_tlb_activate_slave(struct bond_dev_private *internals) {
890         int i;
891
892         for (i = 0; i < internals->active_slave_count; i++) {
893                 tlb_last_obytets[internals->active_slaves[i]] = 0;
894         }
895 }
896
897 static int
898 bandwidth_cmp(const void *a, const void *b)
899 {
900         const struct bwg_slave *bwg_a = a;
901         const struct bwg_slave *bwg_b = b;
902         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
903         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
904                         (int64_t)bwg_a->bwg_left_remainder;
905         if (diff > 0)
906                 return 1;
907         else if (diff < 0)
908                 return -1;
909         else if (diff2 > 0)
910                 return 1;
911         else if (diff2 < 0)
912                 return -1;
913         else
914                 return 0;
915 }
916
917 static void
918 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
919                 struct bwg_slave *bwg_slave)
920 {
921         struct rte_eth_link link_status;
922
923         rte_eth_link_get_nowait(port_id, &link_status);
924         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
925         if (link_bwg == 0)
926                 return;
927         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
928         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
929         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
930 }
931
932 static void
933 bond_ethdev_update_tlb_slave_cb(void *arg)
934 {
935         struct bond_dev_private *internals = arg;
936         struct rte_eth_stats slave_stats;
937         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
938         uint8_t slave_count;
939         uint64_t tx_bytes;
940
941         uint8_t update_stats = 0;
942         uint8_t i, slave_id;
943
944         internals->slave_update_idx++;
945
946
947         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
948                 update_stats = 1;
949
950         for (i = 0; i < internals->active_slave_count; i++) {
951                 slave_id = internals->active_slaves[i];
952                 rte_eth_stats_get(slave_id, &slave_stats);
953                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
954                 bandwidth_left(slave_id, tx_bytes,
955                                 internals->slave_update_idx, &bwg_array[i]);
956                 bwg_array[i].slave = slave_id;
957
958                 if (update_stats) {
959                         tlb_last_obytets[slave_id] = slave_stats.obytes;
960                 }
961         }
962
963         if (update_stats == 1)
964                 internals->slave_update_idx = 0;
965
966         slave_count = i;
967         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
968         for (i = 0; i < slave_count; i++)
969                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
970
971         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
972                         (struct bond_dev_private *)internals);
973 }
974
975 static uint16_t
976 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
977 {
978         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
979         struct bond_dev_private *internals = bd_tx_q->dev_private;
980
981         struct rte_eth_dev *primary_port =
982                         &rte_eth_devices[internals->primary_port];
983         uint16_t num_tx_total = 0;
984         uint16_t i, j;
985
986         uint16_t num_of_slaves = internals->active_slave_count;
987         uint16_t slaves[RTE_MAX_ETHPORTS];
988
989         struct ether_hdr *ether_hdr;
990         struct ether_addr primary_slave_addr;
991         struct ether_addr active_slave_addr;
992
993         if (num_of_slaves < 1)
994                 return num_tx_total;
995
996         memcpy(slaves, internals->tlb_slaves_order,
997                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
998
999
1000         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1001
1002         if (nb_pkts > 3) {
1003                 for (i = 0; i < 3; i++)
1004                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1005         }
1006
1007         for (i = 0; i < num_of_slaves; i++) {
1008                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1009                 for (j = num_tx_total; j < nb_pkts; j++) {
1010                         if (j + 3 < nb_pkts)
1011                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1012
1013                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1014                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1015                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1016 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1017                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1018 #endif
1019                 }
1020
1021                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1022                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1023
1024                 if (num_tx_total == nb_pkts)
1025                         break;
1026         }
1027
1028         return num_tx_total;
1029 }
1030
1031 void
1032 bond_tlb_disable(struct bond_dev_private *internals)
1033 {
1034         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1035 }
1036
1037 void
1038 bond_tlb_enable(struct bond_dev_private *internals)
1039 {
1040         bond_ethdev_update_tlb_slave_cb(internals);
1041 }
1042
1043 static uint16_t
1044 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1045 {
1046         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1047         struct bond_dev_private *internals = bd_tx_q->dev_private;
1048
1049         struct ether_hdr *eth_h;
1050         uint16_t ether_type, offset;
1051
1052         struct client_data *client_info;
1053
1054         /*
1055          * We create transmit buffers for every slave and one additional to send
1056          * through tlb. In worst case every packet will be send on one port.
1057          */
1058         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1059         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1060
1061         /*
1062          * We create separate transmit buffers for update packets as they won't
1063          * be counted in num_tx_total.
1064          */
1065         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1066         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1067
1068         struct rte_mbuf *upd_pkt;
1069         size_t pkt_size;
1070
1071         uint16_t num_send, num_not_send = 0;
1072         uint16_t num_tx_total = 0;
1073         uint16_t slave_idx;
1074
1075         int i, j;
1076
1077         /* Search tx buffer for ARP packets and forward them to alb */
1078         for (i = 0; i < nb_pkts; i++) {
1079                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1080                 ether_type = eth_h->ether_type;
1081                 offset = get_vlan_offset(eth_h, &ether_type);
1082
1083                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1084                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1085
1086                         /* Change src mac in eth header */
1087                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1088
1089                         /* Add packet to slave tx buffer */
1090                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1091                         slave_bufs_pkts[slave_idx]++;
1092                 } else {
1093                         /* If packet is not ARP, send it with TLB policy */
1094                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1095                                         bufs[i];
1096                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1097                 }
1098         }
1099
1100         /* Update connected client ARP tables */
1101         if (internals->mode6.ntt) {
1102                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1103                         client_info = &internals->mode6.client_table[i];
1104
1105                         if (client_info->in_use) {
1106                                 /* Allocate new packet to send ARP update on current slave */
1107                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1108                                 if (upd_pkt == NULL) {
1109                                         RTE_BOND_LOG(ERR,
1110                                                      "Failed to allocate ARP packet from pool");
1111                                         continue;
1112                                 }
1113                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115                                 upd_pkt->data_len = pkt_size;
1116                                 upd_pkt->pkt_len = pkt_size;
1117
1118                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1119                                                 internals);
1120
1121                                 /* Add packet to update tx buffer */
1122                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123                                 update_bufs_pkts[slave_idx]++;
1124                         }
1125                 }
1126                 internals->mode6.ntt = 0;
1127         }
1128
1129         /* Send ARP packets on proper slaves */
1130         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131                 if (slave_bufs_pkts[i] > 0) {
1132                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133                                         slave_bufs[i], slave_bufs_pkts[i]);
1134                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135                                 bufs[nb_pkts - 1 - num_not_send - j] =
1136                                                 slave_bufs[i][nb_pkts - 1 - j];
1137                         }
1138
1139                         num_tx_total += num_send;
1140                         num_not_send += slave_bufs_pkts[i] - num_send;
1141
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143         /* Print TX stats including update packets */
1144                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1147                         }
1148 #endif
1149                 }
1150         }
1151
1152         /* Send update packets on proper slaves */
1153         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154                 if (update_bufs_pkts[i] > 0) {
1155                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156                                         update_bufs_pkts[i]);
1157                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158                                 rte_pktmbuf_free(update_bufs[i][j]);
1159                         }
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send non-ARP packets using tlb policy */
1170         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171                 num_send = bond_ethdev_tx_burst_tlb(queue,
1172                                 slave_bufs[RTE_MAX_ETHPORTS],
1173                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174
1175                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176                         bufs[nb_pkts - 1 - num_not_send - j] =
1177                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1178                 }
1179
1180                 num_tx_total += num_send;
1181         }
1182
1183         return num_tx_total;
1184 }
1185
1186 static uint16_t
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1188                 uint16_t nb_bufs)
1189 {
1190         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1191         struct bond_dev_private *internals = bd_tx_q->dev_private;
1192
1193         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1194         uint16_t slave_count;
1195
1196         /* Array to sort mbufs for transmission on each slave into */
1197         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1198         /* Number of mbufs for transmission on each slave */
1199         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1200         /* Mapping array generated by hash function to map mbufs to slaves */
1201         uint16_t bufs_slave_port_idxs[nb_bufs];
1202
1203         uint16_t slave_tx_count;
1204         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1205
1206         uint16_t i;
1207
1208         if (unlikely(nb_bufs == 0))
1209                 return 0;
1210
1211         /* Copy slave list to protect against slave up/down changes during tx
1212          * bursting */
1213         slave_count = internals->active_slave_count;
1214         if (unlikely(slave_count < 1))
1215                 return 0;
1216
1217         memcpy(slave_port_ids, internals->active_slaves,
1218                         sizeof(slave_port_ids[0]) * slave_count);
1219
1220         /*
1221          * Populate slaves mbuf with the packets which are to be sent on it
1222          * selecting output slave using hash based on xmit policy
1223          */
1224         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1225                         bufs_slave_port_idxs);
1226
1227         for (i = 0; i < nb_bufs; i++) {
1228                 /* Populate slave mbuf arrays with mbufs for that slave. */
1229                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1230
1231                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1232         }
1233
1234         /* Send packet burst on each slave device */
1235         for (i = 0; i < slave_count; i++) {
1236                 if (slave_nb_bufs[i] == 0)
1237                         continue;
1238
1239                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1240                                 bd_tx_q->queue_id, slave_bufs[i],
1241                                 slave_nb_bufs[i]);
1242
1243                 total_tx_count += slave_tx_count;
1244
1245                 /* If tx burst fails move packets to end of bufs */
1246                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1247                         int slave_tx_fail_count = slave_nb_bufs[i] -
1248                                         slave_tx_count;
1249                         total_tx_fail_count += slave_tx_fail_count;
1250                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1251                                &slave_bufs[i][slave_tx_count],
1252                                slave_tx_fail_count * sizeof(bufs[0]));
1253                 }
1254         }
1255
1256         return total_tx_count;
1257 }
1258
1259 static uint16_t
1260 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1261                 uint16_t nb_bufs)
1262 {
1263         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1264         struct bond_dev_private *internals = bd_tx_q->dev_private;
1265
1266         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1267         uint16_t slave_count;
1268
1269         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1270         uint16_t dist_slave_count;
1271
1272         /* 2-D array to sort mbufs for transmission on each slave into */
1273         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1274         /* Number of mbufs for transmission on each slave */
1275         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1276         /* Mapping array generated by hash function to map mbufs to slaves */
1277         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1278
1279         uint16_t slave_tx_count;
1280         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1281
1282         uint16_t i;
1283
1284         if (unlikely(nb_bufs == 0))
1285                 return 0;
1286
1287         /* Copy slave list to protect against slave up/down changes during tx
1288          * bursting */
1289         slave_count = internals->active_slave_count;
1290         if (unlikely(slave_count < 1))
1291                 return 0;
1292
1293         memcpy(slave_port_ids, internals->active_slaves,
1294                         sizeof(slave_port_ids[0]) * slave_count);
1295
1296         dist_slave_count = 0;
1297         for (i = 0; i < slave_count; i++) {
1298                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1299
1300                 if (ACTOR_STATE(port, DISTRIBUTING))
1301                         dist_slave_port_ids[dist_slave_count++] =
1302                                         slave_port_ids[i];
1303         }
1304
1305         if (likely(dist_slave_count > 1)) {
1306
1307                 /*
1308                  * Populate slaves mbuf with the packets which are to be sent
1309                  * on it, selecting output slave using hash based on xmit policy
1310                  */
1311                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1312                                 bufs_slave_port_idxs);
1313
1314                 for (i = 0; i < nb_bufs; i++) {
1315                         /*
1316                          * Populate slave mbuf arrays with mbufs for that
1317                          * slave
1318                          */
1319                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1320
1321                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1322                                         bufs[i];
1323                 }
1324
1325
1326                 /* Send packet burst on each slave device */
1327                 for (i = 0; i < dist_slave_count; i++) {
1328                         if (slave_nb_bufs[i] == 0)
1329                                 continue;
1330
1331                         slave_tx_count = rte_eth_tx_burst(
1332                                         dist_slave_port_ids[i],
1333                                         bd_tx_q->queue_id, slave_bufs[i],
1334                                         slave_nb_bufs[i]);
1335
1336                         total_tx_count += slave_tx_count;
1337
1338                         /* If tx burst fails move packets to end of bufs */
1339                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1340                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1341                                                 slave_tx_count;
1342                                 total_tx_fail_count += slave_tx_fail_count;
1343
1344                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1345                                        &slave_bufs[i][slave_tx_count],
1346                                        slave_tx_fail_count * sizeof(bufs[0]));
1347                         }
1348                 }
1349         }
1350
1351         /* Check for LACP control packets and send if available */
1352         for (i = 0; i < slave_count; i++) {
1353                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1354                 struct rte_mbuf *ctrl_pkt = NULL;
1355
1356                 if (likely(rte_ring_empty(port->tx_ring)))
1357                         continue;
1358
1359                 if (rte_ring_dequeue(port->tx_ring,
1360                                      (void **)&ctrl_pkt) != -ENOENT) {
1361                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1362                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1363                         /*
1364                          * re-enqueue LAG control plane packets to buffering
1365                          * ring if transmission fails so the packet isn't lost.
1366                          */
1367                         if (slave_tx_count != 1)
1368                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1369                 }
1370         }
1371
1372         return total_tx_count;
1373 }
1374
1375 static uint16_t
1376 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1377                 uint16_t nb_pkts)
1378 {
1379         struct bond_dev_private *internals;
1380         struct bond_tx_queue *bd_tx_q;
1381
1382         uint8_t tx_failed_flag = 0, num_of_slaves;
1383         uint16_t slaves[RTE_MAX_ETHPORTS];
1384
1385         uint16_t max_nb_of_tx_pkts = 0;
1386
1387         int slave_tx_total[RTE_MAX_ETHPORTS];
1388         int i, most_successful_tx_slave = -1;
1389
1390         bd_tx_q = (struct bond_tx_queue *)queue;
1391         internals = bd_tx_q->dev_private;
1392
1393         /* Copy slave list to protect against slave up/down changes during tx
1394          * bursting */
1395         num_of_slaves = internals->active_slave_count;
1396         memcpy(slaves, internals->active_slaves,
1397                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1398
1399         if (num_of_slaves < 1)
1400                 return 0;
1401
1402         /* Increment reference count on mbufs */
1403         for (i = 0; i < nb_pkts; i++)
1404                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1405
1406         /* Transmit burst on each active slave */
1407         for (i = 0; i < num_of_slaves; i++) {
1408                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1409                                         bufs, nb_pkts);
1410
1411                 if (unlikely(slave_tx_total[i] < nb_pkts))
1412                         tx_failed_flag = 1;
1413
1414                 /* record the value and slave index for the slave which transmits the
1415                  * maximum number of packets */
1416                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1417                         max_nb_of_tx_pkts = slave_tx_total[i];
1418                         most_successful_tx_slave = i;
1419                 }
1420         }
1421
1422         /* if slaves fail to transmit packets from burst, the calling application
1423          * is not expected to know about multiple references to packets so we must
1424          * handle failures of all packets except those of the most successful slave
1425          */
1426         if (unlikely(tx_failed_flag))
1427                 for (i = 0; i < num_of_slaves; i++)
1428                         if (i != most_successful_tx_slave)
1429                                 while (slave_tx_total[i] < nb_pkts)
1430                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1431
1432         return max_nb_of_tx_pkts;
1433 }
1434
1435 void
1436 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1437 {
1438         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1439
1440         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1441                 /**
1442                  * If in mode 4 then save the link properties of the first
1443                  * slave, all subsequent slaves must match these properties
1444                  */
1445                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1446
1447                 bond_link->link_autoneg = slave_link->link_autoneg;
1448                 bond_link->link_duplex = slave_link->link_duplex;
1449                 bond_link->link_speed = slave_link->link_speed;
1450         } else {
1451                 /**
1452                  * In any other mode the link properties are set to default
1453                  * values of AUTONEG/DUPLEX
1454                  */
1455                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1456                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1457         }
1458 }
1459
1460 int
1461 link_properties_valid(struct rte_eth_dev *ethdev,
1462                 struct rte_eth_link *slave_link)
1463 {
1464         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1465
1466         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1467                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1468
1469                 if (bond_link->link_duplex != slave_link->link_duplex ||
1470                         bond_link->link_autoneg != slave_link->link_autoneg ||
1471                         bond_link->link_speed != slave_link->link_speed)
1472                         return -1;
1473         }
1474
1475         return 0;
1476 }
1477
1478 int
1479 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1480 {
1481         struct ether_addr *mac_addr;
1482
1483         if (eth_dev == NULL) {
1484                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1485                 return -1;
1486         }
1487
1488         if (dst_mac_addr == NULL) {
1489                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1490                 return -1;
1491         }
1492
1493         mac_addr = eth_dev->data->mac_addrs;
1494
1495         ether_addr_copy(mac_addr, dst_mac_addr);
1496         return 0;
1497 }
1498
1499 int
1500 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1501 {
1502         struct ether_addr *mac_addr;
1503
1504         if (eth_dev == NULL) {
1505                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1506                 return -1;
1507         }
1508
1509         if (new_mac_addr == NULL) {
1510                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1511                 return -1;
1512         }
1513
1514         mac_addr = eth_dev->data->mac_addrs;
1515
1516         /* If new MAC is different to current MAC then update */
1517         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1518                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1519
1520         return 0;
1521 }
1522
1523 static const struct ether_addr null_mac_addr;
1524
1525 /*
1526  * Add additional MAC addresses to the slave
1527  */
1528 int
1529 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1530                 uint16_t slave_port_id)
1531 {
1532         int i, ret;
1533         struct ether_addr *mac_addr;
1534
1535         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1536                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1537                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1538                         break;
1539
1540                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1541                 if (ret < 0) {
1542                         /* rollback */
1543                         for (i--; i > 0; i--)
1544                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1545                                         &bonded_eth_dev->data->mac_addrs[i]);
1546                         return ret;
1547                 }
1548         }
1549
1550         return 0;
1551 }
1552
1553 /*
1554  * Remove additional MAC addresses from the slave
1555  */
1556 int
1557 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1558                 uint16_t slave_port_id)
1559 {
1560         int i, rc, ret;
1561         struct ether_addr *mac_addr;
1562
1563         rc = 0;
1564         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1565                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1566                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1567                         break;
1568
1569                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1570                 /* save only the first error */
1571                 if (ret < 0 && rc == 0)
1572                         rc = ret;
1573         }
1574
1575         return rc;
1576 }
1577
1578 int
1579 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1580 {
1581         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1582         int i;
1583
1584         /* Update slave devices MAC addresses */
1585         if (internals->slave_count < 1)
1586                 return -1;
1587
1588         switch (internals->mode) {
1589         case BONDING_MODE_ROUND_ROBIN:
1590         case BONDING_MODE_BALANCE:
1591         case BONDING_MODE_BROADCAST:
1592                 for (i = 0; i < internals->slave_count; i++) {
1593                         if (rte_eth_dev_default_mac_addr_set(
1594                                         internals->slaves[i].port_id,
1595                                         bonded_eth_dev->data->mac_addrs)) {
1596                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1597                                                 internals->slaves[i].port_id);
1598                                 return -1;
1599                         }
1600                 }
1601                 break;
1602         case BONDING_MODE_8023AD:
1603                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1604                 break;
1605         case BONDING_MODE_ACTIVE_BACKUP:
1606         case BONDING_MODE_TLB:
1607         case BONDING_MODE_ALB:
1608         default:
1609                 for (i = 0; i < internals->slave_count; i++) {
1610                         if (internals->slaves[i].port_id ==
1611                                         internals->current_primary_port) {
1612                                 if (rte_eth_dev_default_mac_addr_set(
1613                                                 internals->primary_port,
1614                                                 bonded_eth_dev->data->mac_addrs)) {
1615                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1616                                                         internals->current_primary_port);
1617                                         return -1;
1618                                 }
1619                         } else {
1620                                 if (rte_eth_dev_default_mac_addr_set(
1621                                                 internals->slaves[i].port_id,
1622                                                 &internals->slaves[i].persisted_mac_addr)) {
1623                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1624                                                         internals->slaves[i].port_id);
1625                                         return -1;
1626                                 }
1627                         }
1628                 }
1629         }
1630
1631         return 0;
1632 }
1633
1634 int
1635 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1636 {
1637         struct bond_dev_private *internals;
1638
1639         internals = eth_dev->data->dev_private;
1640
1641         switch (mode) {
1642         case BONDING_MODE_ROUND_ROBIN:
1643                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1644                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1645                 break;
1646         case BONDING_MODE_ACTIVE_BACKUP:
1647                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1648                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1649                 break;
1650         case BONDING_MODE_BALANCE:
1651                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1652                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1653                 break;
1654         case BONDING_MODE_BROADCAST:
1655                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1656                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1657                 break;
1658         case BONDING_MODE_8023AD:
1659                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1660                         return -1;
1661
1662                 if (internals->mode4.dedicated_queues.enabled == 0) {
1663                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1664                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1665                         RTE_BOND_LOG(WARNING,
1666                                 "Using mode 4, it is necessary to do TX burst "
1667                                 "and RX burst at least every 100ms.");
1668                 } else {
1669                         /* Use flow director's optimization */
1670                         eth_dev->rx_pkt_burst =
1671                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1672                         eth_dev->tx_pkt_burst =
1673                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1674                 }
1675                 break;
1676         case BONDING_MODE_TLB:
1677                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1678                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1679                 break;
1680         case BONDING_MODE_ALB:
1681                 if (bond_mode_alb_enable(eth_dev) != 0)
1682                         return -1;
1683
1684                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1685                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1686                 break;
1687         default:
1688                 return -1;
1689         }
1690
1691         internals->mode = mode;
1692
1693         return 0;
1694 }
1695
1696
1697 static int
1698 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1699                 struct rte_eth_dev *slave_eth_dev)
1700 {
1701         int errval = 0;
1702         struct bond_dev_private *internals = (struct bond_dev_private *)
1703                 bonded_eth_dev->data->dev_private;
1704         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1705
1706         if (port->slow_pool == NULL) {
1707                 char mem_name[256];
1708                 int slave_id = slave_eth_dev->data->port_id;
1709
1710                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1711                                 slave_id);
1712                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1713                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1714                         slave_eth_dev->data->numa_node);
1715
1716                 /* Any memory allocation failure in initialization is critical because
1717                  * resources can't be free, so reinitialization is impossible. */
1718                 if (port->slow_pool == NULL) {
1719                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1720                                 slave_id, mem_name, rte_strerror(rte_errno));
1721                 }
1722         }
1723
1724         if (internals->mode4.dedicated_queues.enabled == 1) {
1725                 /* Configure slow Rx queue */
1726
1727                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1728                                 internals->mode4.dedicated_queues.rx_qid, 128,
1729                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1730                                 NULL, port->slow_pool);
1731                 if (errval != 0) {
1732                         RTE_BOND_LOG(ERR,
1733                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1734                                         slave_eth_dev->data->port_id,
1735                                         internals->mode4.dedicated_queues.rx_qid,
1736                                         errval);
1737                         return errval;
1738                 }
1739
1740                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1741                                 internals->mode4.dedicated_queues.tx_qid, 512,
1742                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1743                                 NULL);
1744                 if (errval != 0) {
1745                         RTE_BOND_LOG(ERR,
1746                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1747                                 slave_eth_dev->data->port_id,
1748                                 internals->mode4.dedicated_queues.tx_qid,
1749                                 errval);
1750                         return errval;
1751                 }
1752         }
1753         return 0;
1754 }
1755
1756 int
1757 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1758                 struct rte_eth_dev *slave_eth_dev)
1759 {
1760         struct bond_rx_queue *bd_rx_q;
1761         struct bond_tx_queue *bd_tx_q;
1762         uint16_t nb_rx_queues;
1763         uint16_t nb_tx_queues;
1764
1765         int errval;
1766         uint16_t q_id;
1767         struct rte_flow_error flow_error;
1768
1769         struct bond_dev_private *internals = (struct bond_dev_private *)
1770                 bonded_eth_dev->data->dev_private;
1771
1772         /* Stop slave */
1773         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1774
1775         /* Enable interrupts on slave device if supported */
1776         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1777                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1778
1779         /* If RSS is enabled for bonding, try to enable it for slaves  */
1780         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1781                 if (internals->rss_key_len != 0) {
1782                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1783                                         internals->rss_key_len;
1784                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1785                                         internals->rss_key;
1786                 } else {
1787                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1788                 }
1789
1790                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1791                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1792                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1793                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1794         }
1795
1796         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1797                         DEV_RX_OFFLOAD_VLAN_FILTER)
1798                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1799                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1800         else
1801                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1802                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1803
1804         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1805         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1806
1807         if (internals->mode == BONDING_MODE_8023AD) {
1808                 if (internals->mode4.dedicated_queues.enabled == 1) {
1809                         nb_rx_queues++;
1810                         nb_tx_queues++;
1811                 }
1812         }
1813
1814         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1815                                      bonded_eth_dev->data->mtu);
1816         if (errval != 0 && errval != -ENOTSUP) {
1817                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1818                                 slave_eth_dev->data->port_id, errval);
1819                 return errval;
1820         }
1821
1822         /* Configure device */
1823         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1824                         nb_rx_queues, nb_tx_queues,
1825                         &(slave_eth_dev->data->dev_conf));
1826         if (errval != 0) {
1827                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1828                                 slave_eth_dev->data->port_id, errval);
1829                 return errval;
1830         }
1831
1832         /* Setup Rx Queues */
1833         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1834                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1835
1836                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1837                                 bd_rx_q->nb_rx_desc,
1838                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1839                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1840                 if (errval != 0) {
1841                         RTE_BOND_LOG(ERR,
1842                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1843                                         slave_eth_dev->data->port_id, q_id, errval);
1844                         return errval;
1845                 }
1846         }
1847
1848         /* Setup Tx Queues */
1849         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1850                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1851
1852                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1853                                 bd_tx_q->nb_tx_desc,
1854                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1855                                 &bd_tx_q->tx_conf);
1856                 if (errval != 0) {
1857                         RTE_BOND_LOG(ERR,
1858                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1859                                 slave_eth_dev->data->port_id, q_id, errval);
1860                         return errval;
1861                 }
1862         }
1863
1864         if (internals->mode == BONDING_MODE_8023AD &&
1865                         internals->mode4.dedicated_queues.enabled == 1) {
1866                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1867                                 != 0)
1868                         return errval;
1869
1870                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1871                                 slave_eth_dev->data->port_id) != 0) {
1872                         RTE_BOND_LOG(ERR,
1873                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1874                                 slave_eth_dev->data->port_id, q_id, errval);
1875                         return -1;
1876                 }
1877
1878                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1879                         rte_flow_destroy(slave_eth_dev->data->port_id,
1880                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1881                                         &flow_error);
1882
1883                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1884                                 slave_eth_dev->data->port_id);
1885         }
1886
1887         /* Start device */
1888         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1889         if (errval != 0) {
1890                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1891                                 slave_eth_dev->data->port_id, errval);
1892                 return -1;
1893         }
1894
1895         /* If RSS is enabled for bonding, synchronize RETA */
1896         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1897                 int i;
1898                 struct bond_dev_private *internals;
1899
1900                 internals = bonded_eth_dev->data->dev_private;
1901
1902                 for (i = 0; i < internals->slave_count; i++) {
1903                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1904                                 errval = rte_eth_dev_rss_reta_update(
1905                                                 slave_eth_dev->data->port_id,
1906                                                 &internals->reta_conf[0],
1907                                                 internals->slaves[i].reta_size);
1908                                 if (errval != 0) {
1909                                         RTE_BOND_LOG(WARNING,
1910                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1911                                                      " RSS Configuration for bonding may be inconsistent.",
1912                                                      slave_eth_dev->data->port_id, errval);
1913                                 }
1914                                 break;
1915                         }
1916                 }
1917         }
1918
1919         /* If lsc interrupt is set, check initial slave's link status */
1920         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1921                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1922                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1923                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1924                         NULL);
1925         }
1926
1927         return 0;
1928 }
1929
1930 void
1931 slave_remove(struct bond_dev_private *internals,
1932                 struct rte_eth_dev *slave_eth_dev)
1933 {
1934         uint8_t i;
1935
1936         for (i = 0; i < internals->slave_count; i++)
1937                 if (internals->slaves[i].port_id ==
1938                                 slave_eth_dev->data->port_id)
1939                         break;
1940
1941         if (i < (internals->slave_count - 1)) {
1942                 struct rte_flow *flow;
1943
1944                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1945                                 sizeof(internals->slaves[0]) *
1946                                 (internals->slave_count - i - 1));
1947                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1948                         memmove(&flow->flows[i], &flow->flows[i + 1],
1949                                 sizeof(flow->flows[0]) *
1950                                 (internals->slave_count - i - 1));
1951                         flow->flows[internals->slave_count - 1] = NULL;
1952                 }
1953         }
1954
1955         internals->slave_count--;
1956
1957         /* force reconfiguration of slave interfaces */
1958         _rte_eth_dev_reset(slave_eth_dev);
1959 }
1960
1961 static void
1962 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1963
1964 void
1965 slave_add(struct bond_dev_private *internals,
1966                 struct rte_eth_dev *slave_eth_dev)
1967 {
1968         struct bond_slave_details *slave_details =
1969                         &internals->slaves[internals->slave_count];
1970
1971         slave_details->port_id = slave_eth_dev->data->port_id;
1972         slave_details->last_link_status = 0;
1973
1974         /* Mark slave devices that don't support interrupts so we can
1975          * compensate when we start the bond
1976          */
1977         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1978                 slave_details->link_status_poll_enabled = 1;
1979         }
1980
1981         slave_details->link_status_wait_to_complete = 0;
1982         /* clean tlb_last_obytes when adding port for bonding device */
1983         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1984                         sizeof(struct ether_addr));
1985 }
1986
1987 void
1988 bond_ethdev_primary_set(struct bond_dev_private *internals,
1989                 uint16_t slave_port_id)
1990 {
1991         int i;
1992
1993         if (internals->active_slave_count < 1)
1994                 internals->current_primary_port = slave_port_id;
1995         else
1996                 /* Search bonded device slave ports for new proposed primary port */
1997                 for (i = 0; i < internals->active_slave_count; i++) {
1998                         if (internals->active_slaves[i] == slave_port_id)
1999                                 internals->current_primary_port = slave_port_id;
2000                 }
2001 }
2002
2003 static void
2004 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2005
2006 static int
2007 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2008 {
2009         struct bond_dev_private *internals;
2010         int i;
2011
2012         /* slave eth dev will be started by bonded device */
2013         if (check_for_bonded_ethdev(eth_dev)) {
2014                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2015                                 eth_dev->data->port_id);
2016                 return -1;
2017         }
2018
2019         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2020         eth_dev->data->dev_started = 1;
2021
2022         internals = eth_dev->data->dev_private;
2023
2024         if (internals->slave_count == 0) {
2025                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2026                 goto out_err;
2027         }
2028
2029         if (internals->user_defined_mac == 0) {
2030                 struct ether_addr *new_mac_addr = NULL;
2031
2032                 for (i = 0; i < internals->slave_count; i++)
2033                         if (internals->slaves[i].port_id == internals->primary_port)
2034                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2035
2036                 if (new_mac_addr == NULL)
2037                         goto out_err;
2038
2039                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2040                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2041                                         eth_dev->data->port_id);
2042                         goto out_err;
2043                 }
2044         }
2045
2046         /* If bonded device is configure in promiscuous mode then re-apply config */
2047         if (internals->promiscuous_en)
2048                 bond_ethdev_promiscuous_enable(eth_dev);
2049
2050         if (internals->mode == BONDING_MODE_8023AD) {
2051                 if (internals->mode4.dedicated_queues.enabled == 1) {
2052                         internals->mode4.dedicated_queues.rx_qid =
2053                                         eth_dev->data->nb_rx_queues;
2054                         internals->mode4.dedicated_queues.tx_qid =
2055                                         eth_dev->data->nb_tx_queues;
2056                 }
2057         }
2058
2059
2060         /* Reconfigure each slave device if starting bonded device */
2061         for (i = 0; i < internals->slave_count; i++) {
2062                 struct rte_eth_dev *slave_ethdev =
2063                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2064                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2065                         RTE_BOND_LOG(ERR,
2066                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2067                                 eth_dev->data->port_id,
2068                                 internals->slaves[i].port_id);
2069                         goto out_err;
2070                 }
2071                 /* We will need to poll for link status if any slave doesn't
2072                  * support interrupts
2073                  */
2074                 if (internals->slaves[i].link_status_poll_enabled)
2075                         internals->link_status_polling_enabled = 1;
2076         }
2077
2078         /* start polling if needed */
2079         if (internals->link_status_polling_enabled) {
2080                 rte_eal_alarm_set(
2081                         internals->link_status_polling_interval_ms * 1000,
2082                         bond_ethdev_slave_link_status_change_monitor,
2083                         (void *)&rte_eth_devices[internals->port_id]);
2084         }
2085
2086         /* Update all slave devices MACs*/
2087         if (mac_address_slaves_update(eth_dev) != 0)
2088                 goto out_err;
2089
2090         if (internals->user_defined_primary_port)
2091                 bond_ethdev_primary_set(internals, internals->primary_port);
2092
2093         if (internals->mode == BONDING_MODE_8023AD)
2094                 bond_mode_8023ad_start(eth_dev);
2095
2096         if (internals->mode == BONDING_MODE_TLB ||
2097                         internals->mode == BONDING_MODE_ALB)
2098                 bond_tlb_enable(internals);
2099
2100         return 0;
2101
2102 out_err:
2103         eth_dev->data->dev_started = 0;
2104         return -1;
2105 }
2106
2107 static void
2108 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2109 {
2110         uint8_t i;
2111
2112         if (dev->data->rx_queues != NULL) {
2113                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2114                         rte_free(dev->data->rx_queues[i]);
2115                         dev->data->rx_queues[i] = NULL;
2116                 }
2117                 dev->data->nb_rx_queues = 0;
2118         }
2119
2120         if (dev->data->tx_queues != NULL) {
2121                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2122                         rte_free(dev->data->tx_queues[i]);
2123                         dev->data->tx_queues[i] = NULL;
2124                 }
2125                 dev->data->nb_tx_queues = 0;
2126         }
2127 }
2128
2129 void
2130 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2131 {
2132         struct bond_dev_private *internals = eth_dev->data->dev_private;
2133         uint8_t i;
2134
2135         if (internals->mode == BONDING_MODE_8023AD) {
2136                 struct port *port;
2137                 void *pkt = NULL;
2138
2139                 bond_mode_8023ad_stop(eth_dev);
2140
2141                 /* Discard all messages to/from mode 4 state machines */
2142                 for (i = 0; i < internals->active_slave_count; i++) {
2143                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2144
2145                         RTE_ASSERT(port->rx_ring != NULL);
2146                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2147                                 rte_pktmbuf_free(pkt);
2148
2149                         RTE_ASSERT(port->tx_ring != NULL);
2150                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2151                                 rte_pktmbuf_free(pkt);
2152                 }
2153         }
2154
2155         if (internals->mode == BONDING_MODE_TLB ||
2156                         internals->mode == BONDING_MODE_ALB) {
2157                 bond_tlb_disable(internals);
2158                 for (i = 0; i < internals->active_slave_count; i++)
2159                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2160         }
2161
2162         internals->link_status_polling_enabled = 0;
2163         for (i = 0; i < internals->slave_count; i++)
2164                 internals->slaves[i].last_link_status = 0;
2165
2166         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2167         eth_dev->data->dev_started = 0;
2168 }
2169
2170 void
2171 bond_ethdev_close(struct rte_eth_dev *dev)
2172 {
2173         struct bond_dev_private *internals = dev->data->dev_private;
2174         uint8_t bond_port_id = internals->port_id;
2175         int skipped = 0;
2176         struct rte_flow_error ferror;
2177
2178         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2179         while (internals->slave_count != skipped) {
2180                 uint16_t port_id = internals->slaves[skipped].port_id;
2181
2182                 rte_eth_dev_stop(port_id);
2183
2184                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2185                         RTE_BOND_LOG(ERR,
2186                                      "Failed to remove port %d from bonded device %s",
2187                                      port_id, dev->device->name);
2188                         skipped++;
2189                 }
2190         }
2191         bond_flow_ops.flush(dev, &ferror);
2192         bond_ethdev_free_queues(dev);
2193         rte_bitmap_reset(internals->vlan_filter_bmp);
2194 }
2195
2196 /* forward declaration */
2197 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2198
2199 static void
2200 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2201 {
2202         struct bond_dev_private *internals = dev->data->dev_private;
2203
2204         uint16_t max_nb_rx_queues = UINT16_MAX;
2205         uint16_t max_nb_tx_queues = UINT16_MAX;
2206
2207         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2208
2209         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2210                         internals->candidate_max_rx_pktlen :
2211                         ETHER_MAX_JUMBO_FRAME_LEN;
2212
2213         /* Max number of tx/rx queues that the bonded device can support is the
2214          * minimum values of the bonded slaves, as all slaves must be capable
2215          * of supporting the same number of tx/rx queues.
2216          */
2217         if (internals->slave_count > 0) {
2218                 struct rte_eth_dev_info slave_info;
2219                 uint8_t idx;
2220
2221                 for (idx = 0; idx < internals->slave_count; idx++) {
2222                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2223                                         &slave_info);
2224
2225                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2226                                 max_nb_rx_queues = slave_info.max_rx_queues;
2227
2228                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2229                                 max_nb_tx_queues = slave_info.max_tx_queues;
2230                 }
2231         }
2232
2233         dev_info->max_rx_queues = max_nb_rx_queues;
2234         dev_info->max_tx_queues = max_nb_tx_queues;
2235
2236         /**
2237          * If dedicated hw queues enabled for link bonding device in LACP mode
2238          * then we need to reduce the maximum number of data path queues by 1.
2239          */
2240         if (internals->mode == BONDING_MODE_8023AD &&
2241                 internals->mode4.dedicated_queues.enabled == 1) {
2242                 dev_info->max_rx_queues--;
2243                 dev_info->max_tx_queues--;
2244         }
2245
2246         dev_info->min_rx_bufsize = 0;
2247
2248         dev_info->rx_offload_capa = internals->rx_offload_capa;
2249         dev_info->tx_offload_capa = internals->tx_offload_capa;
2250         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2251         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2252         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2253
2254         dev_info->reta_size = internals->reta_size;
2255 }
2256
2257 static int
2258 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2259 {
2260         int res;
2261         uint16_t i;
2262         struct bond_dev_private *internals = dev->data->dev_private;
2263
2264         /* don't do this while a slave is being added */
2265         rte_spinlock_lock(&internals->lock);
2266
2267         if (on)
2268                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2269         else
2270                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2271
2272         for (i = 0; i < internals->slave_count; i++) {
2273                 uint16_t port_id = internals->slaves[i].port_id;
2274
2275                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2276                 if (res == ENOTSUP)
2277                         RTE_BOND_LOG(WARNING,
2278                                      "Setting VLAN filter on slave port %u not supported.",
2279                                      port_id);
2280         }
2281
2282         rte_spinlock_unlock(&internals->lock);
2283         return 0;
2284 }
2285
2286 static int
2287 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2288                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2289                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2290 {
2291         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2292                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2293                                         0, dev->data->numa_node);
2294         if (bd_rx_q == NULL)
2295                 return -1;
2296
2297         bd_rx_q->queue_id = rx_queue_id;
2298         bd_rx_q->dev_private = dev->data->dev_private;
2299
2300         bd_rx_q->nb_rx_desc = nb_rx_desc;
2301
2302         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2303         bd_rx_q->mb_pool = mb_pool;
2304
2305         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2306
2307         return 0;
2308 }
2309
2310 static int
2311 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2312                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2313                 const struct rte_eth_txconf *tx_conf)
2314 {
2315         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2316                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2317                                         0, dev->data->numa_node);
2318
2319         if (bd_tx_q == NULL)
2320                 return -1;
2321
2322         bd_tx_q->queue_id = tx_queue_id;
2323         bd_tx_q->dev_private = dev->data->dev_private;
2324
2325         bd_tx_q->nb_tx_desc = nb_tx_desc;
2326         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2327
2328         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2329
2330         return 0;
2331 }
2332
2333 static void
2334 bond_ethdev_rx_queue_release(void *queue)
2335 {
2336         if (queue == NULL)
2337                 return;
2338
2339         rte_free(queue);
2340 }
2341
2342 static void
2343 bond_ethdev_tx_queue_release(void *queue)
2344 {
2345         if (queue == NULL)
2346                 return;
2347
2348         rte_free(queue);
2349 }
2350
2351 static void
2352 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2353 {
2354         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2355         struct bond_dev_private *internals;
2356
2357         /* Default value for polling slave found is true as we don't want to
2358          * disable the polling thread if we cannot get the lock */
2359         int i, polling_slave_found = 1;
2360
2361         if (cb_arg == NULL)
2362                 return;
2363
2364         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2365         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2366
2367         if (!bonded_ethdev->data->dev_started ||
2368                 !internals->link_status_polling_enabled)
2369                 return;
2370
2371         /* If device is currently being configured then don't check slaves link
2372          * status, wait until next period */
2373         if (rte_spinlock_trylock(&internals->lock)) {
2374                 if (internals->slave_count > 0)
2375                         polling_slave_found = 0;
2376
2377                 for (i = 0; i < internals->slave_count; i++) {
2378                         if (!internals->slaves[i].link_status_poll_enabled)
2379                                 continue;
2380
2381                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2382                         polling_slave_found = 1;
2383
2384                         /* Update slave link status */
2385                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2386                                         internals->slaves[i].link_status_wait_to_complete);
2387
2388                         /* if link status has changed since last checked then call lsc
2389                          * event callback */
2390                         if (slave_ethdev->data->dev_link.link_status !=
2391                                         internals->slaves[i].last_link_status) {
2392                                 internals->slaves[i].last_link_status =
2393                                                 slave_ethdev->data->dev_link.link_status;
2394
2395                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2396                                                 RTE_ETH_EVENT_INTR_LSC,
2397                                                 &bonded_ethdev->data->port_id,
2398                                                 NULL);
2399                         }
2400                 }
2401                 rte_spinlock_unlock(&internals->lock);
2402         }
2403
2404         if (polling_slave_found)
2405                 /* Set alarm to continue monitoring link status of slave ethdev's */
2406                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2407                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2408 }
2409
2410 static int
2411 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2412 {
2413         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2414
2415         struct bond_dev_private *bond_ctx;
2416         struct rte_eth_link slave_link;
2417
2418         uint32_t idx;
2419
2420         bond_ctx = ethdev->data->dev_private;
2421
2422         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2423
2424         if (ethdev->data->dev_started == 0 ||
2425                         bond_ctx->active_slave_count == 0) {
2426                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2427                 return 0;
2428         }
2429
2430         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2431
2432         if (wait_to_complete)
2433                 link_update = rte_eth_link_get;
2434         else
2435                 link_update = rte_eth_link_get_nowait;
2436
2437         switch (bond_ctx->mode) {
2438         case BONDING_MODE_BROADCAST:
2439                 /**
2440                  * Setting link speed to UINT32_MAX to ensure we pick up the
2441                  * value of the first active slave
2442                  */
2443                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2444
2445                 /**
2446                  * link speed is minimum value of all the slaves link speed as
2447                  * packet loss will occur on this slave if transmission at rates
2448                  * greater than this are attempted
2449                  */
2450                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2451                         link_update(bond_ctx->active_slaves[0], &slave_link);
2452
2453                         if (slave_link.link_speed <
2454                                         ethdev->data->dev_link.link_speed)
2455                                 ethdev->data->dev_link.link_speed =
2456                                                 slave_link.link_speed;
2457                 }
2458                 break;
2459         case BONDING_MODE_ACTIVE_BACKUP:
2460                 /* Current primary slave */
2461                 link_update(bond_ctx->current_primary_port, &slave_link);
2462
2463                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2464                 break;
2465         case BONDING_MODE_8023AD:
2466                 ethdev->data->dev_link.link_autoneg =
2467                                 bond_ctx->mode4.slave_link.link_autoneg;
2468                 ethdev->data->dev_link.link_duplex =
2469                                 bond_ctx->mode4.slave_link.link_duplex;
2470                 /* fall through to update link speed */
2471         case BONDING_MODE_ROUND_ROBIN:
2472         case BONDING_MODE_BALANCE:
2473         case BONDING_MODE_TLB:
2474         case BONDING_MODE_ALB:
2475         default:
2476                 /**
2477                  * In theses mode the maximum theoretical link speed is the sum
2478                  * of all the slaves
2479                  */
2480                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2481
2482                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2483                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2484
2485                         ethdev->data->dev_link.link_speed +=
2486                                         slave_link.link_speed;
2487                 }
2488         }
2489
2490
2491         return 0;
2492 }
2493
2494
2495 static int
2496 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2497 {
2498         struct bond_dev_private *internals = dev->data->dev_private;
2499         struct rte_eth_stats slave_stats;
2500         int i, j;
2501
2502         for (i = 0; i < internals->slave_count; i++) {
2503                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2504
2505                 stats->ipackets += slave_stats.ipackets;
2506                 stats->opackets += slave_stats.opackets;
2507                 stats->ibytes += slave_stats.ibytes;
2508                 stats->obytes += slave_stats.obytes;
2509                 stats->imissed += slave_stats.imissed;
2510                 stats->ierrors += slave_stats.ierrors;
2511                 stats->oerrors += slave_stats.oerrors;
2512                 stats->rx_nombuf += slave_stats.rx_nombuf;
2513
2514                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2515                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2516                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2517                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2518                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2519                         stats->q_errors[j] += slave_stats.q_errors[j];
2520                 }
2521
2522         }
2523
2524         return 0;
2525 }
2526
2527 static void
2528 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2529 {
2530         struct bond_dev_private *internals = dev->data->dev_private;
2531         int i;
2532
2533         for (i = 0; i < internals->slave_count; i++)
2534                 rte_eth_stats_reset(internals->slaves[i].port_id);
2535 }
2536
2537 static void
2538 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2539 {
2540         struct bond_dev_private *internals = eth_dev->data->dev_private;
2541         int i;
2542
2543         internals->promiscuous_en = 1;
2544
2545         switch (internals->mode) {
2546         /* Promiscuous mode is propagated to all slaves */
2547         case BONDING_MODE_ROUND_ROBIN:
2548         case BONDING_MODE_BALANCE:
2549         case BONDING_MODE_BROADCAST:
2550                 for (i = 0; i < internals->slave_count; i++)
2551                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2552                 break;
2553         /* In mode4 promiscus mode is managed when slave is added/removed */
2554         case BONDING_MODE_8023AD:
2555                 break;
2556         /* Promiscuous mode is propagated only to primary slave */
2557         case BONDING_MODE_ACTIVE_BACKUP:
2558         case BONDING_MODE_TLB:
2559         case BONDING_MODE_ALB:
2560         default:
2561                 rte_eth_promiscuous_enable(internals->current_primary_port);
2562         }
2563 }
2564
2565 static void
2566 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2567 {
2568         struct bond_dev_private *internals = dev->data->dev_private;
2569         int i;
2570
2571         internals->promiscuous_en = 0;
2572
2573         switch (internals->mode) {
2574         /* Promiscuous mode is propagated to all slaves */
2575         case BONDING_MODE_ROUND_ROBIN:
2576         case BONDING_MODE_BALANCE:
2577         case BONDING_MODE_BROADCAST:
2578                 for (i = 0; i < internals->slave_count; i++)
2579                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2580                 break;
2581         /* In mode4 promiscus mode is set managed when slave is added/removed */
2582         case BONDING_MODE_8023AD:
2583                 break;
2584         /* Promiscuous mode is propagated only to primary slave */
2585         case BONDING_MODE_ACTIVE_BACKUP:
2586         case BONDING_MODE_TLB:
2587         case BONDING_MODE_ALB:
2588         default:
2589                 rte_eth_promiscuous_disable(internals->current_primary_port);
2590         }
2591 }
2592
2593 static void
2594 bond_ethdev_delayed_lsc_propagation(void *arg)
2595 {
2596         if (arg == NULL)
2597                 return;
2598
2599         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2600                         RTE_ETH_EVENT_INTR_LSC, NULL);
2601 }
2602
2603 int
2604 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2605                 void *param, void *ret_param __rte_unused)
2606 {
2607         struct rte_eth_dev *bonded_eth_dev;
2608         struct bond_dev_private *internals;
2609         struct rte_eth_link link;
2610         int rc = -1;
2611
2612         int i, valid_slave = 0;
2613         uint8_t active_pos;
2614         uint8_t lsc_flag = 0;
2615
2616         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2617                 return rc;
2618
2619         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2620
2621         if (check_for_bonded_ethdev(bonded_eth_dev))
2622                 return rc;
2623
2624         internals = bonded_eth_dev->data->dev_private;
2625
2626         /* If the device isn't started don't handle interrupts */
2627         if (!bonded_eth_dev->data->dev_started)
2628                 return rc;
2629
2630         /* verify that port_id is a valid slave of bonded port */
2631         for (i = 0; i < internals->slave_count; i++) {
2632                 if (internals->slaves[i].port_id == port_id) {
2633                         valid_slave = 1;
2634                         break;
2635                 }
2636         }
2637
2638         if (!valid_slave)
2639                 return rc;
2640
2641         /* Synchronize lsc callback parallel calls either by real link event
2642          * from the slaves PMDs or by the bonding PMD itself.
2643          */
2644         rte_spinlock_lock(&internals->lsc_lock);
2645
2646         /* Search for port in active port list */
2647         active_pos = find_slave_by_id(internals->active_slaves,
2648                         internals->active_slave_count, port_id);
2649
2650         rte_eth_link_get_nowait(port_id, &link);
2651         if (link.link_status) {
2652                 if (active_pos < internals->active_slave_count)
2653                         goto link_update;
2654
2655                 /* if no active slave ports then set this port to be primary port */
2656                 if (internals->active_slave_count < 1) {
2657                         /* If first active slave, then change link status */
2658                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2659                         internals->current_primary_port = port_id;
2660                         lsc_flag = 1;
2661
2662                         mac_address_slaves_update(bonded_eth_dev);
2663                 }
2664
2665                 /* check link state properties if bonded link is up*/
2666                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2667                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2668                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2669                                              "for slave %d in bonding mode %d",
2670                                              port_id, internals->mode);
2671                 } else {
2672                         /* inherit slave link properties */
2673                         link_properties_set(bonded_eth_dev, &link);
2674                 }
2675
2676                 activate_slave(bonded_eth_dev, port_id);
2677
2678                 /* If user has defined the primary port then default to using it */
2679                 if (internals->user_defined_primary_port &&
2680                                 internals->primary_port == port_id)
2681                         bond_ethdev_primary_set(internals, port_id);
2682         } else {
2683                 if (active_pos == internals->active_slave_count)
2684                         goto link_update;
2685
2686                 /* Remove from active slave list */
2687                 deactivate_slave(bonded_eth_dev, port_id);
2688
2689                 if (internals->active_slave_count < 1)
2690                         lsc_flag = 1;
2691
2692                 /* Update primary id, take first active slave from list or if none
2693                  * available set to -1 */
2694                 if (port_id == internals->current_primary_port) {
2695                         if (internals->active_slave_count > 0)
2696                                 bond_ethdev_primary_set(internals,
2697                                                 internals->active_slaves[0]);
2698                         else
2699                                 internals->current_primary_port = internals->primary_port;
2700                 }
2701         }
2702
2703 link_update:
2704         /**
2705          * Update bonded device link properties after any change to active
2706          * slaves
2707          */
2708         bond_ethdev_link_update(bonded_eth_dev, 0);
2709
2710         if (lsc_flag) {
2711                 /* Cancel any possible outstanding interrupts if delays are enabled */
2712                 if (internals->link_up_delay_ms > 0 ||
2713                         internals->link_down_delay_ms > 0)
2714                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2715                                         bonded_eth_dev);
2716
2717                 if (bonded_eth_dev->data->dev_link.link_status) {
2718                         if (internals->link_up_delay_ms > 0)
2719                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2720                                                 bond_ethdev_delayed_lsc_propagation,
2721                                                 (void *)bonded_eth_dev);
2722                         else
2723                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2724                                                 RTE_ETH_EVENT_INTR_LSC,
2725                                                 NULL);
2726
2727                 } else {
2728                         if (internals->link_down_delay_ms > 0)
2729                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2730                                                 bond_ethdev_delayed_lsc_propagation,
2731                                                 (void *)bonded_eth_dev);
2732                         else
2733                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2734                                                 RTE_ETH_EVENT_INTR_LSC,
2735                                                 NULL);
2736                 }
2737         }
2738
2739         rte_spinlock_unlock(&internals->lsc_lock);
2740
2741         return rc;
2742 }
2743
2744 static int
2745 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2746                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2747 {
2748         unsigned i, j;
2749         int result = 0;
2750         int slave_reta_size;
2751         unsigned reta_count;
2752         struct bond_dev_private *internals = dev->data->dev_private;
2753
2754         if (reta_size != internals->reta_size)
2755                 return -EINVAL;
2756
2757          /* Copy RETA table */
2758         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2759
2760         for (i = 0; i < reta_count; i++) {
2761                 internals->reta_conf[i].mask = reta_conf[i].mask;
2762                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2763                         if ((reta_conf[i].mask >> j) & 0x01)
2764                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2765         }
2766
2767         /* Fill rest of array */
2768         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2769                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2770                                 sizeof(internals->reta_conf[0]) * reta_count);
2771
2772         /* Propagate RETA over slaves */
2773         for (i = 0; i < internals->slave_count; i++) {
2774                 slave_reta_size = internals->slaves[i].reta_size;
2775                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2776                                 &internals->reta_conf[0], slave_reta_size);
2777                 if (result < 0)
2778                         return result;
2779         }
2780
2781         return 0;
2782 }
2783
2784 static int
2785 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2786                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2787 {
2788         int i, j;
2789         struct bond_dev_private *internals = dev->data->dev_private;
2790
2791         if (reta_size != internals->reta_size)
2792                 return -EINVAL;
2793
2794          /* Copy RETA table */
2795         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2796                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2797                         if ((reta_conf[i].mask >> j) & 0x01)
2798                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2799
2800         return 0;
2801 }
2802
2803 static int
2804 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2805                 struct rte_eth_rss_conf *rss_conf)
2806 {
2807         int i, result = 0;
2808         struct bond_dev_private *internals = dev->data->dev_private;
2809         struct rte_eth_rss_conf bond_rss_conf;
2810
2811         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2812
2813         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2814
2815         if (bond_rss_conf.rss_hf != 0)
2816                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2817
2818         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2819                         sizeof(internals->rss_key)) {
2820                 if (bond_rss_conf.rss_key_len == 0)
2821                         bond_rss_conf.rss_key_len = 40;
2822                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2823                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2824                                 internals->rss_key_len);
2825         }
2826
2827         for (i = 0; i < internals->slave_count; i++) {
2828                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2829                                 &bond_rss_conf);
2830                 if (result < 0)
2831                         return result;
2832         }
2833
2834         return 0;
2835 }
2836
2837 static int
2838 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2839                 struct rte_eth_rss_conf *rss_conf)
2840 {
2841         struct bond_dev_private *internals = dev->data->dev_private;
2842
2843         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2844         rss_conf->rss_key_len = internals->rss_key_len;
2845         if (rss_conf->rss_key)
2846                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2847
2848         return 0;
2849 }
2850
2851 static int
2852 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2853 {
2854         struct rte_eth_dev *slave_eth_dev;
2855         struct bond_dev_private *internals = dev->data->dev_private;
2856         int ret, i;
2857
2858         rte_spinlock_lock(&internals->lock);
2859
2860         for (i = 0; i < internals->slave_count; i++) {
2861                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2862                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2863                         rte_spinlock_unlock(&internals->lock);
2864                         return -ENOTSUP;
2865                 }
2866         }
2867         for (i = 0; i < internals->slave_count; i++) {
2868                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2869                 if (ret < 0) {
2870                         rte_spinlock_unlock(&internals->lock);
2871                         return ret;
2872                 }
2873         }
2874
2875         rte_spinlock_unlock(&internals->lock);
2876         return 0;
2877 }
2878
2879 static int
2880 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2881 {
2882         if (mac_address_set(dev, addr)) {
2883                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2884                 return -EINVAL;
2885         }
2886
2887         return 0;
2888 }
2889
2890 static int
2891 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2892                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2893 {
2894         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2895                 *(const void **)arg = &bond_flow_ops;
2896                 return 0;
2897         }
2898         return -ENOTSUP;
2899 }
2900
2901 static int
2902 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2903                                 __rte_unused uint32_t index, uint32_t vmdq)
2904 {
2905         struct rte_eth_dev *slave_eth_dev;
2906         struct bond_dev_private *internals = dev->data->dev_private;
2907         int ret, i;
2908
2909         rte_spinlock_lock(&internals->lock);
2910
2911         for (i = 0; i < internals->slave_count; i++) {
2912                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2913                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2914                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2915                         ret = -ENOTSUP;
2916                         goto end;
2917                 }
2918         }
2919
2920         for (i = 0; i < internals->slave_count; i++) {
2921                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2922                                 mac_addr, vmdq);
2923                 if (ret < 0) {
2924                         /* rollback */
2925                         for (i--; i >= 0; i--)
2926                                 rte_eth_dev_mac_addr_remove(
2927                                         internals->slaves[i].port_id, mac_addr);
2928                         goto end;
2929                 }
2930         }
2931
2932         ret = 0;
2933 end:
2934         rte_spinlock_unlock(&internals->lock);
2935         return ret;
2936 }
2937
2938 static void
2939 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2940 {
2941         struct rte_eth_dev *slave_eth_dev;
2942         struct bond_dev_private *internals = dev->data->dev_private;
2943         int i;
2944
2945         rte_spinlock_lock(&internals->lock);
2946
2947         for (i = 0; i < internals->slave_count; i++) {
2948                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2949                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2950                         goto end;
2951         }
2952
2953         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2954
2955         for (i = 0; i < internals->slave_count; i++)
2956                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2957                                 mac_addr);
2958
2959 end:
2960         rte_spinlock_unlock(&internals->lock);
2961 }
2962
2963 const struct eth_dev_ops default_dev_ops = {
2964         .dev_start            = bond_ethdev_start,
2965         .dev_stop             = bond_ethdev_stop,
2966         .dev_close            = bond_ethdev_close,
2967         .dev_configure        = bond_ethdev_configure,
2968         .dev_infos_get        = bond_ethdev_info,
2969         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2970         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2971         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2972         .rx_queue_release     = bond_ethdev_rx_queue_release,
2973         .tx_queue_release     = bond_ethdev_tx_queue_release,
2974         .link_update          = bond_ethdev_link_update,
2975         .stats_get            = bond_ethdev_stats_get,
2976         .stats_reset          = bond_ethdev_stats_reset,
2977         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2978         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2979         .reta_update          = bond_ethdev_rss_reta_update,
2980         .reta_query           = bond_ethdev_rss_reta_query,
2981         .rss_hash_update      = bond_ethdev_rss_hash_update,
2982         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2983         .mtu_set              = bond_ethdev_mtu_set,
2984         .mac_addr_set         = bond_ethdev_mac_address_set,
2985         .mac_addr_add         = bond_ethdev_mac_addr_add,
2986         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
2987         .filter_ctrl          = bond_filter_ctrl
2988 };
2989
2990 static int
2991 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2992 {
2993         const char *name = rte_vdev_device_name(dev);
2994         uint8_t socket_id = dev->device.numa_node;
2995         struct bond_dev_private *internals = NULL;
2996         struct rte_eth_dev *eth_dev = NULL;
2997         uint32_t vlan_filter_bmp_size;
2998
2999         /* now do all data allocation - for eth_dev structure, dummy pci driver
3000          * and internal (private) data
3001          */
3002
3003         /* reserve an ethdev entry */
3004         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3005         if (eth_dev == NULL) {
3006                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3007                 goto err;
3008         }
3009
3010         internals = eth_dev->data->dev_private;
3011         eth_dev->data->nb_rx_queues = (uint16_t)1;
3012         eth_dev->data->nb_tx_queues = (uint16_t)1;
3013
3014         /* Allocate memory for storing MAC addresses */
3015         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3016                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3017         if (eth_dev->data->mac_addrs == NULL) {
3018                 RTE_BOND_LOG(ERR,
3019                              "Failed to allocate %u bytes needed to store MAC addresses",
3020                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3021                 goto err;
3022         }
3023
3024         eth_dev->dev_ops = &default_dev_ops;
3025         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3026
3027         rte_spinlock_init(&internals->lock);
3028         rte_spinlock_init(&internals->lsc_lock);
3029
3030         internals->port_id = eth_dev->data->port_id;
3031         internals->mode = BONDING_MODE_INVALID;
3032         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3033         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3034         internals->burst_xmit_hash = burst_xmit_l2_hash;
3035         internals->user_defined_mac = 0;
3036
3037         internals->link_status_polling_enabled = 0;
3038
3039         internals->link_status_polling_interval_ms =
3040                 DEFAULT_POLLING_INTERVAL_10_MS;
3041         internals->link_down_delay_ms = 0;
3042         internals->link_up_delay_ms = 0;
3043
3044         internals->slave_count = 0;
3045         internals->active_slave_count = 0;
3046         internals->rx_offload_capa = 0;
3047         internals->tx_offload_capa = 0;
3048         internals->rx_queue_offload_capa = 0;
3049         internals->tx_queue_offload_capa = 0;
3050         internals->candidate_max_rx_pktlen = 0;
3051         internals->max_rx_pktlen = 0;
3052
3053         /* Initially allow to choose any offload type */
3054         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3055
3056         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3057         memset(internals->slaves, 0, sizeof(internals->slaves));
3058
3059         TAILQ_INIT(&internals->flow_list);
3060         internals->flow_isolated_valid = 0;
3061
3062         /* Set mode 4 default configuration */
3063         bond_mode_8023ad_setup(eth_dev, NULL);
3064         if (bond_ethdev_mode_set(eth_dev, mode)) {
3065                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3066                                  eth_dev->data->port_id, mode);
3067                 goto err;
3068         }
3069
3070         vlan_filter_bmp_size =
3071                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3072         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3073                                                    RTE_CACHE_LINE_SIZE);
3074         if (internals->vlan_filter_bmpmem == NULL) {
3075                 RTE_BOND_LOG(ERR,
3076                              "Failed to allocate vlan bitmap for bonded device %u",
3077                              eth_dev->data->port_id);
3078                 goto err;
3079         }
3080
3081         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3082                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3083         if (internals->vlan_filter_bmp == NULL) {
3084                 RTE_BOND_LOG(ERR,
3085                              "Failed to init vlan bitmap for bonded device %u",
3086                              eth_dev->data->port_id);
3087                 rte_free(internals->vlan_filter_bmpmem);
3088                 goto err;
3089         }
3090
3091         return eth_dev->data->port_id;
3092
3093 err:
3094         rte_free(internals);
3095         if (eth_dev != NULL) {
3096                 rte_free(eth_dev->data->mac_addrs);
3097                 rte_eth_dev_release_port(eth_dev);
3098         }
3099         return -1;
3100 }
3101
3102 static int
3103 bond_probe(struct rte_vdev_device *dev)
3104 {
3105         const char *name;
3106         struct bond_dev_private *internals;
3107         struct rte_kvargs *kvlist;
3108         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3109         int  arg_count, port_id;
3110         uint8_t agg_mode;
3111         struct rte_eth_dev *eth_dev;
3112
3113         if (!dev)
3114                 return -EINVAL;
3115
3116         name = rte_vdev_device_name(dev);
3117         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3118
3119         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3120             strlen(rte_vdev_device_args(dev)) == 0) {
3121                 eth_dev = rte_eth_dev_attach_secondary(name);
3122                 if (!eth_dev) {
3123                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3124                         return -1;
3125                 }
3126                 /* TODO: request info from primary to set up Rx and Tx */
3127                 eth_dev->dev_ops = &default_dev_ops;
3128                 eth_dev->device = &dev->device;
3129                 rte_eth_dev_probing_finish(eth_dev);
3130                 return 0;
3131         }
3132
3133         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3134                 pmd_bond_init_valid_arguments);
3135         if (kvlist == NULL)
3136                 return -1;
3137
3138         /* Parse link bonding mode */
3139         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3140                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3141                                 &bond_ethdev_parse_slave_mode_kvarg,
3142                                 &bonding_mode) != 0) {
3143                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3144                                         name);
3145                         goto parse_error;
3146                 }
3147         } else {
3148                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3149                                 "device %s", name);
3150                 goto parse_error;
3151         }
3152
3153         /* Parse socket id to create bonding device on */
3154         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3155         if (arg_count == 1) {
3156                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3157                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3158                                 != 0) {
3159                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3160                                         "bonded device %s", name);
3161                         goto parse_error;
3162                 }
3163         } else if (arg_count > 1) {
3164                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3165                                 "bonded device %s", name);
3166                 goto parse_error;
3167         } else {
3168                 socket_id = rte_socket_id();
3169         }
3170
3171         dev->device.numa_node = socket_id;
3172
3173         /* Create link bonding eth device */
3174         port_id = bond_alloc(dev, bonding_mode);
3175         if (port_id < 0) {
3176                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3177                                 "socket %u.",   name, bonding_mode, socket_id);
3178                 goto parse_error;
3179         }
3180         internals = rte_eth_devices[port_id].data->dev_private;
3181         internals->kvlist = kvlist;
3182
3183         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3184
3185         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3186                 if (rte_kvargs_process(kvlist,
3187                                 PMD_BOND_AGG_MODE_KVARG,
3188                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3189                                 &agg_mode) != 0) {
3190                         RTE_BOND_LOG(ERR,
3191                                         "Failed to parse agg selection mode for bonded device %s",
3192                                         name);
3193                         goto parse_error;
3194                 }
3195
3196                 if (internals->mode == BONDING_MODE_8023AD)
3197                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3198                                         agg_mode);
3199         } else {
3200                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3201         }
3202
3203         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3204                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3205         return 0;
3206
3207 parse_error:
3208         rte_kvargs_free(kvlist);
3209
3210         return -1;
3211 }
3212
3213 static int
3214 bond_remove(struct rte_vdev_device *dev)
3215 {
3216         struct rte_eth_dev *eth_dev;
3217         struct bond_dev_private *internals;
3218         const char *name;
3219
3220         if (!dev)
3221                 return -EINVAL;
3222
3223         name = rte_vdev_device_name(dev);
3224         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3225
3226         /* now free all data allocation - for eth_dev structure,
3227          * dummy pci driver and internal (private) data
3228          */
3229
3230         /* find an ethdev entry */
3231         eth_dev = rte_eth_dev_allocated(name);
3232         if (eth_dev == NULL)
3233                 return -ENODEV;
3234
3235         RTE_ASSERT(eth_dev->device == &dev->device);
3236
3237         internals = eth_dev->data->dev_private;
3238         if (internals->slave_count != 0)
3239                 return -EBUSY;
3240
3241         if (eth_dev->data->dev_started == 1) {
3242                 bond_ethdev_stop(eth_dev);
3243                 bond_ethdev_close(eth_dev);
3244         }
3245
3246         eth_dev->dev_ops = NULL;
3247         eth_dev->rx_pkt_burst = NULL;
3248         eth_dev->tx_pkt_burst = NULL;
3249
3250         internals = eth_dev->data->dev_private;
3251         /* Try to release mempool used in mode6. If the bond
3252          * device is not mode6, free the NULL is not problem.
3253          */
3254         rte_mempool_free(internals->mode6.mempool);
3255         rte_bitmap_free(internals->vlan_filter_bmp);
3256         rte_free(internals->vlan_filter_bmpmem);
3257         rte_free(eth_dev->data->dev_private);
3258         rte_free(eth_dev->data->mac_addrs);
3259
3260         rte_eth_dev_release_port(eth_dev);
3261
3262         return 0;
3263 }
3264
3265 /* this part will resolve the slave portids after all the other pdev and vdev
3266  * have been allocated */
3267 static int
3268 bond_ethdev_configure(struct rte_eth_dev *dev)
3269 {
3270         const char *name = dev->device->name;
3271         struct bond_dev_private *internals = dev->data->dev_private;
3272         struct rte_kvargs *kvlist = internals->kvlist;
3273         int arg_count;
3274         uint16_t port_id = dev - rte_eth_devices;
3275         uint8_t agg_mode;
3276
3277         static const uint8_t default_rss_key[40] = {
3278                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3279                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3280                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3281                 0xBE, 0xAC, 0x01, 0xFA
3282         };
3283
3284         unsigned i, j;
3285
3286         /*
3287          * If RSS is enabled, fill table with default values and
3288          * set key to the the value specified in port RSS configuration.
3289          * Fall back to default RSS key if the key is not specified
3290          */
3291         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3292                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3293                         internals->rss_key_len =
3294                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3295                         memcpy(internals->rss_key,
3296                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3297                                internals->rss_key_len);
3298                 } else {
3299                         internals->rss_key_len = sizeof(default_rss_key);
3300                         memcpy(internals->rss_key, default_rss_key,
3301                                internals->rss_key_len);
3302                 }
3303
3304                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3305                         internals->reta_conf[i].mask = ~0LL;
3306                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3307                                 internals->reta_conf[i].reta[j] =
3308                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3309                                                 dev->data->nb_rx_queues;
3310                 }
3311         }
3312
3313         /* set the max_rx_pktlen */
3314         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3315
3316         /*
3317          * if no kvlist, it means that this bonded device has been created
3318          * through the bonding api.
3319          */
3320         if (!kvlist)
3321                 return 0;
3322
3323         /* Parse MAC address for bonded device */
3324         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3325         if (arg_count == 1) {
3326                 struct ether_addr bond_mac;
3327
3328                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3329                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3330                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3331                                      name);
3332                         return -1;
3333                 }
3334
3335                 /* Set MAC address */
3336                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3337                         RTE_BOND_LOG(ERR,
3338                                      "Failed to set mac address on bonded device %s",
3339                                      name);
3340                         return -1;
3341                 }
3342         } else if (arg_count > 1) {
3343                 RTE_BOND_LOG(ERR,
3344                              "MAC address can be specified only once for bonded device %s",
3345                              name);
3346                 return -1;
3347         }
3348
3349         /* Parse/set balance mode transmit policy */
3350         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3351         if (arg_count == 1) {
3352                 uint8_t xmit_policy;
3353
3354                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3355                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3356                     0) {
3357                         RTE_BOND_LOG(INFO,
3358                                      "Invalid xmit policy specified for bonded device %s",
3359                                      name);
3360                         return -1;
3361                 }
3362
3363                 /* Set balance mode transmit policy*/
3364                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3365                         RTE_BOND_LOG(ERR,
3366                                      "Failed to set balance xmit policy on bonded device %s",
3367                                      name);
3368                         return -1;
3369                 }
3370         } else if (arg_count > 1) {
3371                 RTE_BOND_LOG(ERR,
3372                              "Transmit policy can be specified only once for bonded device %s",
3373                              name);
3374                 return -1;
3375         }
3376
3377         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3378                 if (rte_kvargs_process(kvlist,
3379                                        PMD_BOND_AGG_MODE_KVARG,
3380                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3381                                        &agg_mode) != 0) {
3382                         RTE_BOND_LOG(ERR,
3383                                      "Failed to parse agg selection mode for bonded device %s",
3384                                      name);
3385                 }
3386                 if (internals->mode == BONDING_MODE_8023AD)
3387                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3388                                                               agg_mode);
3389         }
3390
3391         /* Parse/add slave ports to bonded device */
3392         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3393                 struct bond_ethdev_slave_ports slave_ports;
3394                 unsigned i;
3395
3396                 memset(&slave_ports, 0, sizeof(slave_ports));
3397
3398                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3399                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3400                         RTE_BOND_LOG(ERR,
3401                                      "Failed to parse slave ports for bonded device %s",
3402                                      name);
3403                         return -1;
3404                 }
3405
3406                 for (i = 0; i < slave_ports.slave_count; i++) {
3407                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3408                                 RTE_BOND_LOG(ERR,
3409                                              "Failed to add port %d as slave to bonded device %s",
3410                                              slave_ports.slaves[i], name);
3411                         }
3412                 }
3413
3414         } else {
3415                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3416                 return -1;
3417         }
3418
3419         /* Parse/set primary slave port id*/
3420         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3421         if (arg_count == 1) {
3422                 uint16_t primary_slave_port_id;
3423
3424                 if (rte_kvargs_process(kvlist,
3425                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3426                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3427                                        &primary_slave_port_id) < 0) {
3428                         RTE_BOND_LOG(INFO,
3429                                      "Invalid primary slave port id specified for bonded device %s",
3430                                      name);
3431                         return -1;
3432                 }
3433
3434                 /* Set balance mode transmit policy*/
3435                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3436                     != 0) {
3437                         RTE_BOND_LOG(ERR,
3438                                      "Failed to set primary slave port %d on bonded device %s",
3439                                      primary_slave_port_id, name);
3440                         return -1;
3441                 }
3442         } else if (arg_count > 1) {
3443                 RTE_BOND_LOG(INFO,
3444                              "Primary slave can be specified only once for bonded device %s",
3445                              name);
3446                 return -1;
3447         }
3448
3449         /* Parse link status monitor polling interval */
3450         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3451         if (arg_count == 1) {
3452                 uint32_t lsc_poll_interval_ms;
3453
3454                 if (rte_kvargs_process(kvlist,
3455                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3456                                        &bond_ethdev_parse_time_ms_kvarg,
3457                                        &lsc_poll_interval_ms) < 0) {
3458                         RTE_BOND_LOG(INFO,
3459                                      "Invalid lsc polling interval value specified for bonded"
3460                                      " device %s", name);
3461                         return -1;
3462                 }
3463
3464                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3465                     != 0) {
3466                         RTE_BOND_LOG(ERR,
3467                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3468                                      lsc_poll_interval_ms, name);
3469                         return -1;
3470                 }
3471         } else if (arg_count > 1) {
3472                 RTE_BOND_LOG(INFO,
3473                              "LSC polling interval can be specified only once for bonded"
3474                              " device %s", name);
3475                 return -1;
3476         }
3477
3478         /* Parse link up interrupt propagation delay */
3479         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3480         if (arg_count == 1) {
3481                 uint32_t link_up_delay_ms;
3482
3483                 if (rte_kvargs_process(kvlist,
3484                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3485                                        &bond_ethdev_parse_time_ms_kvarg,
3486                                        &link_up_delay_ms) < 0) {
3487                         RTE_BOND_LOG(INFO,
3488                                      "Invalid link up propagation delay value specified for"
3489                                      " bonded device %s", name);
3490                         return -1;
3491                 }
3492
3493                 /* Set balance mode transmit policy*/
3494                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3495                     != 0) {
3496                         RTE_BOND_LOG(ERR,
3497                                      "Failed to set link up propagation delay (%u ms) on bonded"
3498                                      " device %s", link_up_delay_ms, name);
3499                         return -1;
3500                 }
3501         } else if (arg_count > 1) {
3502                 RTE_BOND_LOG(INFO,
3503                              "Link up propagation delay can be specified only once for"
3504                              " bonded device %s", name);
3505                 return -1;
3506         }
3507
3508         /* Parse link down interrupt propagation delay */
3509         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3510         if (arg_count == 1) {
3511                 uint32_t link_down_delay_ms;
3512
3513                 if (rte_kvargs_process(kvlist,
3514                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3515                                        &bond_ethdev_parse_time_ms_kvarg,
3516                                        &link_down_delay_ms) < 0) {
3517                         RTE_BOND_LOG(INFO,
3518                                      "Invalid link down propagation delay value specified for"
3519                                      " bonded device %s", name);
3520                         return -1;
3521                 }
3522
3523                 /* Set balance mode transmit policy*/
3524                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3525                     != 0) {
3526                         RTE_BOND_LOG(ERR,
3527                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3528                                      link_down_delay_ms, name);
3529                         return -1;
3530                 }
3531         } else if (arg_count > 1) {
3532                 RTE_BOND_LOG(INFO,
3533                              "Link down propagation delay can be specified only once for  bonded device %s",
3534                              name);
3535                 return -1;
3536         }
3537
3538         return 0;
3539 }
3540
3541 struct rte_vdev_driver pmd_bond_drv = {
3542         .probe = bond_probe,
3543         .remove = bond_remove,
3544 };
3545
3546 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3547 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3548
3549 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3550         "slave=<ifc> "
3551         "primary=<ifc> "
3552         "mode=[0-6] "
3553         "xmit_policy=[l2 | l23 | l34] "
3554         "agg_mode=[count | stable | bandwidth] "
3555         "socket_id=<int> "
3556         "mac=<mac addr> "
3557         "lsc_poll_period_ms=<int> "
3558         "up_delay=<int> "
3559         "down_delay=<int>");
3560
3561 int bond_logtype;
3562
3563 RTE_INIT(bond_init_log)
3564 {
3565         bond_logtype = rte_log_register("pmd.net.bon");
3566         if (bond_logtype >= 0)
3567                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3568 }