net/bonding: fix slave id types
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave >= slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave >= slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint16_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t subtype;
408         uint8_t j, k;
409         uint16_t i;
410
411         /* Copy slave list to protect against slave up/down changes during tx
412          * bursting */
413         slave_count = internals->active_slave_count;
414         memcpy(slaves, internals->active_slaves,
415                         sizeof(internals->active_slaves[0]) * slave_count);
416
417         idx = internals->active_slave;
418         if (idx >= slave_count) {
419                 internals->active_slave = 0;
420                 idx = 0;
421         }
422         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
423                 j = num_rx_total;
424                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
425                                          COLLECTING);
426
427                 /* Read packets from this slave */
428                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
429                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
430
431                 for (k = j; k < 2 && k < num_rx_total; k++)
432                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
433
434                 /* Handle slow protocol packets. */
435                 while (j < num_rx_total) {
436
437                         /* If packet is not pure L2 and is known, skip it */
438                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
439                                 j++;
440                                 continue;
441                         }
442
443                         if (j + 3 < num_rx_total)
444                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
445
446                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
447                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
448
449                         /* Remove packet from array if it is slow packet or slave is not
450                          * in collecting state or bonding interface is not in promiscuous
451                          * mode and packet address does not match. */
452                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
453                                 !collecting ||
454                                 (!promisc &&
455                                  !is_multicast_ether_addr(&hdr->d_addr) &&
456                                  !is_same_ether_addr(bond_mac,
457                                                      &hdr->d_addr)))) {
458
459                                 if (hdr->ether_type == ether_type_slow_be) {
460                                         bond_mode_8023ad_handle_slow_pkt(
461                                             internals, slaves[idx], bufs[j]);
462                                 } else
463                                         rte_pktmbuf_free(bufs[j]);
464
465                                 /* Packet is managed by mode 4 or dropped, shift the array */
466                                 num_rx_total--;
467                                 if (j < num_rx_total) {
468                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
469                                                 (num_rx_total - j));
470                                 }
471                         } else
472                                 j++;
473                 }
474                 if (unlikely(++idx == slave_count))
475                         idx = 0;
476         }
477
478         if (++internals->active_slave >= slave_count)
479                 internals->active_slave = 0;
480
481         return num_rx_total;
482 }
483
484 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
485 uint32_t burstnumberRX;
486 uint32_t burstnumberTX;
487
488 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
489
490 static void
491 arp_op_name(uint16_t arp_op, char *buf)
492 {
493         switch (arp_op) {
494         case ARP_OP_REQUEST:
495                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
496                 return;
497         case ARP_OP_REPLY:
498                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
499                 return;
500         case ARP_OP_REVREQUEST:
501                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
502                                 "Reverse ARP Request");
503                 return;
504         case ARP_OP_REVREPLY:
505                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
506                                 "Reverse ARP Reply");
507                 return;
508         case ARP_OP_INVREQUEST:
509                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
510                                 "Peer Identify Request");
511                 return;
512         case ARP_OP_INVREPLY:
513                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
514                                 "Peer Identify Reply");
515                 return;
516         default:
517                 break;
518         }
519         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
520         return;
521 }
522 #endif
523 #define MaxIPv4String   16
524 static void
525 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
526 {
527         uint32_t ipv4_addr;
528
529         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
530         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
531                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
532                 ipv4_addr & 0xFF);
533 }
534
535 #define MAX_CLIENTS_NUMBER      128
536 uint8_t active_clients;
537 struct client_stats_t {
538         uint16_t port;
539         uint32_t ipv4_addr;
540         uint32_t ipv4_rx_packets;
541         uint32_t ipv4_tx_packets;
542 };
543 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
544
545 static void
546 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
547 {
548         int i = 0;
549
550         for (; i < MAX_CLIENTS_NUMBER; i++)     {
551                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
552                         /* Just update RX packets number for this client */
553                         if (TXorRXindicator == &burstnumberRX)
554                                 client_stats[i].ipv4_rx_packets++;
555                         else
556                                 client_stats[i].ipv4_tx_packets++;
557                         return;
558                 }
559         }
560         /* We have a new client. Insert him to the table, and increment stats */
561         if (TXorRXindicator == &burstnumberRX)
562                 client_stats[active_clients].ipv4_rx_packets++;
563         else
564                 client_stats[active_clients].ipv4_tx_packets++;
565         client_stats[active_clients].ipv4_addr = addr;
566         client_stats[active_clients].port = port;
567         active_clients++;
568
569 }
570
571 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
572 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
573         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
574                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
575                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
576                 info,                                                   \
577                 port,                                                   \
578                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
579                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
580                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
581                 src_ip,                                                 \
582                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
583                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
584                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
585                 dst_ip,                                                 \
586                 arp_op, ++burstnumber)
587 #endif
588
589 static void
590 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
591                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
592 {
593         struct ipv4_hdr *ipv4_h;
594 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
595         struct arp_hdr *arp_h;
596         char dst_ip[16];
597         char ArpOp[24];
598         char buf[16];
599 #endif
600         char src_ip[16];
601
602         uint16_t ether_type = eth_h->ether_type;
603         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
604
605 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
606         strlcpy(buf, info, 16);
607 #endif
608
609         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
610                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
611                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
612 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
613                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
614                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
615 #endif
616                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
617         }
618 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
619         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
620                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
622                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
623                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
624                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
625         }
626 #endif
627 }
628 #endif
629
630 static uint16_t
631 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
634         struct bond_dev_private *internals = bd_tx_q->dev_private;
635         struct ether_hdr *eth_h;
636         uint16_t ether_type, offset;
637         uint16_t nb_recv_pkts;
638         int i;
639
640         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
641
642         for (i = 0; i < nb_recv_pkts; i++) {
643                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
644                 ether_type = eth_h->ether_type;
645                 offset = get_vlan_offset(eth_h, &ether_type);
646
647                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
648 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
649                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
650 #endif
651                         bond_mode_alb_arp_recv(eth_h, offset, internals);
652                 }
653 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
654                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
655                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
656 #endif
657         }
658
659         return nb_recv_pkts;
660 }
661
662 static uint16_t
663 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
664                 uint16_t nb_pkts)
665 {
666         struct bond_dev_private *internals;
667         struct bond_tx_queue *bd_tx_q;
668
669         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
670         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
671
672         uint16_t num_of_slaves;
673         uint16_t slaves[RTE_MAX_ETHPORTS];
674
675         uint16_t num_tx_total = 0, num_tx_slave;
676
677         static int slave_idx = 0;
678         int i, cslave_idx = 0, tx_fail_total = 0;
679
680         bd_tx_q = (struct bond_tx_queue *)queue;
681         internals = bd_tx_q->dev_private;
682
683         /* Copy slave list to protect against slave up/down changes during tx
684          * bursting */
685         num_of_slaves = internals->active_slave_count;
686         memcpy(slaves, internals->active_slaves,
687                         sizeof(internals->active_slaves[0]) * num_of_slaves);
688
689         if (num_of_slaves < 1)
690                 return num_tx_total;
691
692         /* Populate slaves mbuf with which packets are to be sent on it  */
693         for (i = 0; i < nb_pkts; i++) {
694                 cslave_idx = (slave_idx + i) % num_of_slaves;
695                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
696         }
697
698         /* increment current slave index so the next call to tx burst starts on the
699          * next slave */
700         slave_idx = ++cslave_idx;
701
702         /* Send packet burst on each slave device */
703         for (i = 0; i < num_of_slaves; i++) {
704                 if (slave_nb_pkts[i] > 0) {
705                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
706                                         slave_bufs[i], slave_nb_pkts[i]);
707
708                         /* if tx burst fails move packets to end of bufs */
709                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
710                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
711
712                                 tx_fail_total += tx_fail_slave;
713
714                                 memcpy(&bufs[nb_pkts - tx_fail_total],
715                                        &slave_bufs[i][num_tx_slave],
716                                        tx_fail_slave * sizeof(bufs[0]));
717                         }
718                         num_tx_total += num_tx_slave;
719                 }
720         }
721
722         return num_tx_total;
723 }
724
725 static uint16_t
726 bond_ethdev_tx_burst_active_backup(void *queue,
727                 struct rte_mbuf **bufs, uint16_t nb_pkts)
728 {
729         struct bond_dev_private *internals;
730         struct bond_tx_queue *bd_tx_q;
731
732         bd_tx_q = (struct bond_tx_queue *)queue;
733         internals = bd_tx_q->dev_private;
734
735         if (internals->active_slave_count < 1)
736                 return 0;
737
738         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
739                         bufs, nb_pkts);
740 }
741
742 static inline uint16_t
743 ether_hash(struct ether_hdr *eth_hdr)
744 {
745         unaligned_uint16_t *word_src_addr =
746                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
747         unaligned_uint16_t *word_dst_addr =
748                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
749
750         return (word_src_addr[0] ^ word_dst_addr[0]) ^
751                         (word_src_addr[1] ^ word_dst_addr[1]) ^
752                         (word_src_addr[2] ^ word_dst_addr[2]);
753 }
754
755 static inline uint32_t
756 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
757 {
758         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
759 }
760
761 static inline uint32_t
762 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
763 {
764         unaligned_uint32_t *word_src_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
766         unaligned_uint32_t *word_dst_addr =
767                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
768
769         return (word_src_addr[0] ^ word_dst_addr[0]) ^
770                         (word_src_addr[1] ^ word_dst_addr[1]) ^
771                         (word_src_addr[2] ^ word_dst_addr[2]) ^
772                         (word_src_addr[3] ^ word_dst_addr[3]);
773 }
774
775
776 void
777 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
778                 uint16_t slave_count, uint16_t *slaves)
779 {
780         struct ether_hdr *eth_hdr;
781         uint32_t hash;
782         int i;
783
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
786
787                 hash = ether_hash(eth_hdr);
788
789                 slaves[i] = (hash ^= hash >> 8) % slave_count;
790         }
791 }
792
793 void
794 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
795                 uint16_t slave_count, uint16_t *slaves)
796 {
797         uint16_t i;
798         struct ether_hdr *eth_hdr;
799         uint16_t proto;
800         size_t vlan_offset;
801         uint32_t hash, l3hash;
802
803         for (i = 0; i < nb_pkts; i++) {
804                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
805                 l3hash = 0;
806
807                 proto = eth_hdr->ether_type;
808                 hash = ether_hash(eth_hdr);
809
810                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
811
812                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
813                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
814                                         ((char *)(eth_hdr + 1) + vlan_offset);
815                         l3hash = ipv4_hash(ipv4_hdr);
816
817                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
818                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
819                                         ((char *)(eth_hdr + 1) + vlan_offset);
820                         l3hash = ipv6_hash(ipv6_hdr);
821                 }
822
823                 hash = hash ^ l3hash;
824                 hash ^= hash >> 16;
825                 hash ^= hash >> 8;
826
827                 slaves[i] = hash % slave_count;
828         }
829 }
830
831 void
832 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
833                 uint16_t slave_count, uint16_t *slaves)
834 {
835         struct ether_hdr *eth_hdr;
836         uint16_t proto;
837         size_t vlan_offset;
838         int i;
839
840         struct udp_hdr *udp_hdr;
841         struct tcp_hdr *tcp_hdr;
842         uint32_t hash, l3hash, l4hash;
843
844         for (i = 0; i < nb_pkts; i++) {
845                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
846                 proto = eth_hdr->ether_type;
847                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
848                 l3hash = 0;
849                 l4hash = 0;
850
851                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
852                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
853                                         ((char *)(eth_hdr + 1) + vlan_offset);
854                         size_t ip_hdr_offset;
855
856                         l3hash = ipv4_hash(ipv4_hdr);
857
858                         /* there is no L4 header in fragmented packet */
859                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
860                                                                 == 0)) {
861                                 ip_hdr_offset = (ipv4_hdr->version_ihl
862                                         & IPV4_HDR_IHL_MASK) *
863                                         IPV4_IHL_MULTIPLIER;
864
865                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
866                                         tcp_hdr = (struct tcp_hdr *)
867                                                 ((char *)ipv4_hdr +
868                                                         ip_hdr_offset);
869                                         l4hash = HASH_L4_PORTS(tcp_hdr);
870                                 } else if (ipv4_hdr->next_proto_id ==
871                                                                 IPPROTO_UDP) {
872                                         udp_hdr = (struct udp_hdr *)
873                                                 ((char *)ipv4_hdr +
874                                                         ip_hdr_offset);
875                                         l4hash = HASH_L4_PORTS(udp_hdr);
876                                 }
877                         }
878                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
879                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
880                                         ((char *)(eth_hdr + 1) + vlan_offset);
881                         l3hash = ipv6_hash(ipv6_hdr);
882
883                         if (ipv6_hdr->proto == IPPROTO_TCP) {
884                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
885                                 l4hash = HASH_L4_PORTS(tcp_hdr);
886                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
887                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
888                                 l4hash = HASH_L4_PORTS(udp_hdr);
889                         }
890                 }
891
892                 hash = l3hash ^ l4hash;
893                 hash ^= hash >> 16;
894                 hash ^= hash >> 8;
895
896                 slaves[i] = hash % slave_count;
897         }
898 }
899
900 struct bwg_slave {
901         uint64_t bwg_left_int;
902         uint64_t bwg_left_remainder;
903         uint16_t slave;
904 };
905
906 void
907 bond_tlb_activate_slave(struct bond_dev_private *internals) {
908         int i;
909
910         for (i = 0; i < internals->active_slave_count; i++) {
911                 tlb_last_obytets[internals->active_slaves[i]] = 0;
912         }
913 }
914
915 static int
916 bandwidth_cmp(const void *a, const void *b)
917 {
918         const struct bwg_slave *bwg_a = a;
919         const struct bwg_slave *bwg_b = b;
920         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
921         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
922                         (int64_t)bwg_a->bwg_left_remainder;
923         if (diff > 0)
924                 return 1;
925         else if (diff < 0)
926                 return -1;
927         else if (diff2 > 0)
928                 return 1;
929         else if (diff2 < 0)
930                 return -1;
931         else
932                 return 0;
933 }
934
935 static void
936 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
937                 struct bwg_slave *bwg_slave)
938 {
939         struct rte_eth_link link_status;
940
941         rte_eth_link_get_nowait(port_id, &link_status);
942         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
943         if (link_bwg == 0)
944                 return;
945         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
946         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
947         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
948 }
949
950 static void
951 bond_ethdev_update_tlb_slave_cb(void *arg)
952 {
953         struct bond_dev_private *internals = arg;
954         struct rte_eth_stats slave_stats;
955         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
956         uint16_t slave_count;
957         uint64_t tx_bytes;
958
959         uint8_t update_stats = 0;
960         uint16_t slave_id;
961         uint16_t i;
962
963         internals->slave_update_idx++;
964
965
966         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
967                 update_stats = 1;
968
969         for (i = 0; i < internals->active_slave_count; i++) {
970                 slave_id = internals->active_slaves[i];
971                 rte_eth_stats_get(slave_id, &slave_stats);
972                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
973                 bandwidth_left(slave_id, tx_bytes,
974                                 internals->slave_update_idx, &bwg_array[i]);
975                 bwg_array[i].slave = slave_id;
976
977                 if (update_stats) {
978                         tlb_last_obytets[slave_id] = slave_stats.obytes;
979                 }
980         }
981
982         if (update_stats == 1)
983                 internals->slave_update_idx = 0;
984
985         slave_count = i;
986         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
987         for (i = 0; i < slave_count; i++)
988                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
989
990         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
991                         (struct bond_dev_private *)internals);
992 }
993
994 static uint16_t
995 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
996 {
997         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
998         struct bond_dev_private *internals = bd_tx_q->dev_private;
999
1000         struct rte_eth_dev *primary_port =
1001                         &rte_eth_devices[internals->primary_port];
1002         uint16_t num_tx_total = 0;
1003         uint16_t i, j;
1004
1005         uint16_t num_of_slaves = internals->active_slave_count;
1006         uint16_t slaves[RTE_MAX_ETHPORTS];
1007
1008         struct ether_hdr *ether_hdr;
1009         struct ether_addr primary_slave_addr;
1010         struct ether_addr active_slave_addr;
1011
1012         if (num_of_slaves < 1)
1013                 return num_tx_total;
1014
1015         memcpy(slaves, internals->tlb_slaves_order,
1016                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1017
1018
1019         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1020
1021         if (nb_pkts > 3) {
1022                 for (i = 0; i < 3; i++)
1023                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1024         }
1025
1026         for (i = 0; i < num_of_slaves; i++) {
1027                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1028                 for (j = num_tx_total; j < nb_pkts; j++) {
1029                         if (j + 3 < nb_pkts)
1030                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1031
1032                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1033                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1034                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1035 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1036                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1037 #endif
1038                 }
1039
1040                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1041                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1042
1043                 if (num_tx_total == nb_pkts)
1044                         break;
1045         }
1046
1047         return num_tx_total;
1048 }
1049
1050 void
1051 bond_tlb_disable(struct bond_dev_private *internals)
1052 {
1053         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1054 }
1055
1056 void
1057 bond_tlb_enable(struct bond_dev_private *internals)
1058 {
1059         bond_ethdev_update_tlb_slave_cb(internals);
1060 }
1061
1062 static uint16_t
1063 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1064 {
1065         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1066         struct bond_dev_private *internals = bd_tx_q->dev_private;
1067
1068         struct ether_hdr *eth_h;
1069         uint16_t ether_type, offset;
1070
1071         struct client_data *client_info;
1072
1073         /*
1074          * We create transmit buffers for every slave and one additional to send
1075          * through tlb. In worst case every packet will be send on one port.
1076          */
1077         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1078         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1079
1080         /*
1081          * We create separate transmit buffers for update packets as they won't
1082          * be counted in num_tx_total.
1083          */
1084         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1085         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1086
1087         struct rte_mbuf *upd_pkt;
1088         size_t pkt_size;
1089
1090         uint16_t num_send, num_not_send = 0;
1091         uint16_t num_tx_total = 0;
1092         uint16_t slave_idx;
1093
1094         int i, j;
1095
1096         /* Search tx buffer for ARP packets and forward them to alb */
1097         for (i = 0; i < nb_pkts; i++) {
1098                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1099                 ether_type = eth_h->ether_type;
1100                 offset = get_vlan_offset(eth_h, &ether_type);
1101
1102                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1103                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1104
1105                         /* Change src mac in eth header */
1106                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1107
1108                         /* Add packet to slave tx buffer */
1109                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1110                         slave_bufs_pkts[slave_idx]++;
1111                 } else {
1112                         /* If packet is not ARP, send it with TLB policy */
1113                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1114                                         bufs[i];
1115                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1116                 }
1117         }
1118
1119         /* Update connected client ARP tables */
1120         if (internals->mode6.ntt) {
1121                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1122                         client_info = &internals->mode6.client_table[i];
1123
1124                         if (client_info->in_use) {
1125                                 /* Allocate new packet to send ARP update on current slave */
1126                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1127                                 if (upd_pkt == NULL) {
1128                                         RTE_BOND_LOG(ERR,
1129                                                      "Failed to allocate ARP packet from pool");
1130                                         continue;
1131                                 }
1132                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1133                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1134                                 upd_pkt->data_len = pkt_size;
1135                                 upd_pkt->pkt_len = pkt_size;
1136
1137                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1138                                                 internals);
1139
1140                                 /* Add packet to update tx buffer */
1141                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1142                                 update_bufs_pkts[slave_idx]++;
1143                         }
1144                 }
1145                 internals->mode6.ntt = 0;
1146         }
1147
1148         /* Send ARP packets on proper slaves */
1149         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1150                 if (slave_bufs_pkts[i] > 0) {
1151                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1152                                         slave_bufs[i], slave_bufs_pkts[i]);
1153                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1154                                 bufs[nb_pkts - 1 - num_not_send - j] =
1155                                                 slave_bufs[i][nb_pkts - 1 - j];
1156                         }
1157
1158                         num_tx_total += num_send;
1159                         num_not_send += slave_bufs_pkts[i] - num_send;
1160
1161 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1162         /* Print TX stats including update packets */
1163                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1164                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1165                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1166                         }
1167 #endif
1168                 }
1169         }
1170
1171         /* Send update packets on proper slaves */
1172         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1173                 if (update_bufs_pkts[i] > 0) {
1174                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1175                                         update_bufs_pkts[i]);
1176                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1177                                 rte_pktmbuf_free(update_bufs[i][j]);
1178                         }
1179 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1180                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1181                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1182                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1183                         }
1184 #endif
1185                 }
1186         }
1187
1188         /* Send non-ARP packets using tlb policy */
1189         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1190                 num_send = bond_ethdev_tx_burst_tlb(queue,
1191                                 slave_bufs[RTE_MAX_ETHPORTS],
1192                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1193
1194                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1195                         bufs[nb_pkts - 1 - num_not_send - j] =
1196                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1197                 }
1198
1199                 num_tx_total += num_send;
1200         }
1201
1202         return num_tx_total;
1203 }
1204
1205 static uint16_t
1206 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1207                 uint16_t nb_bufs)
1208 {
1209         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1210         struct bond_dev_private *internals = bd_tx_q->dev_private;
1211
1212         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1213         uint16_t slave_count;
1214
1215         /* Array to sort mbufs for transmission on each slave into */
1216         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1217         /* Number of mbufs for transmission on each slave */
1218         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1219         /* Mapping array generated by hash function to map mbufs to slaves */
1220         uint16_t bufs_slave_port_idxs[nb_bufs];
1221
1222         uint16_t slave_tx_count;
1223         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1224
1225         uint16_t i;
1226
1227         if (unlikely(nb_bufs == 0))
1228                 return 0;
1229
1230         /* Copy slave list to protect against slave up/down changes during tx
1231          * bursting */
1232         slave_count = internals->active_slave_count;
1233         if (unlikely(slave_count < 1))
1234                 return 0;
1235
1236         memcpy(slave_port_ids, internals->active_slaves,
1237                         sizeof(slave_port_ids[0]) * slave_count);
1238
1239         /*
1240          * Populate slaves mbuf with the packets which are to be sent on it
1241          * selecting output slave using hash based on xmit policy
1242          */
1243         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1244                         bufs_slave_port_idxs);
1245
1246         for (i = 0; i < nb_bufs; i++) {
1247                 /* Populate slave mbuf arrays with mbufs for that slave. */
1248                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1249
1250                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1251         }
1252
1253         /* Send packet burst on each slave device */
1254         for (i = 0; i < slave_count; i++) {
1255                 if (slave_nb_bufs[i] == 0)
1256                         continue;
1257
1258                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1259                                 bd_tx_q->queue_id, slave_bufs[i],
1260                                 slave_nb_bufs[i]);
1261
1262                 total_tx_count += slave_tx_count;
1263
1264                 /* If tx burst fails move packets to end of bufs */
1265                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1266                         int slave_tx_fail_count = slave_nb_bufs[i] -
1267                                         slave_tx_count;
1268                         total_tx_fail_count += slave_tx_fail_count;
1269                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1270                                &slave_bufs[i][slave_tx_count],
1271                                slave_tx_fail_count * sizeof(bufs[0]));
1272                 }
1273         }
1274
1275         return total_tx_count;
1276 }
1277
1278 static uint16_t
1279 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1280                 uint16_t nb_bufs)
1281 {
1282         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1283         struct bond_dev_private *internals = bd_tx_q->dev_private;
1284
1285         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1286         uint16_t slave_count;
1287
1288         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1289         uint16_t dist_slave_count;
1290
1291         /* 2-D array to sort mbufs for transmission on each slave into */
1292         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1293         /* Number of mbufs for transmission on each slave */
1294         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1295         /* Mapping array generated by hash function to map mbufs to slaves */
1296         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1297
1298         uint16_t slave_tx_count;
1299         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1300
1301         uint16_t i;
1302
1303         /* Copy slave list to protect against slave up/down changes during tx
1304          * bursting */
1305         slave_count = internals->active_slave_count;
1306         if (unlikely(slave_count < 1))
1307                 return 0;
1308
1309         memcpy(slave_port_ids, internals->active_slaves,
1310                         sizeof(slave_port_ids[0]) * slave_count);
1311
1312         /* Check for LACP control packets and send if available */
1313         for (i = 0; i < slave_count; i++) {
1314                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1315                 struct rte_mbuf *ctrl_pkt = NULL;
1316
1317                 if (likely(rte_ring_empty(port->tx_ring)))
1318                         continue;
1319
1320                 if (rte_ring_dequeue(port->tx_ring,
1321                                      (void **)&ctrl_pkt) != -ENOENT) {
1322                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1323                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1324                         /*
1325                          * re-enqueue LAG control plane packets to buffering
1326                          * ring if transmission fails so the packet isn't lost.
1327                          */
1328                         if (slave_tx_count != 1)
1329                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1330                 }
1331         }
1332
1333         if (unlikely(nb_bufs == 0))
1334                 return 0;
1335
1336         dist_slave_count = 0;
1337         for (i = 0; i < slave_count; i++) {
1338                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1339
1340                 if (ACTOR_STATE(port, DISTRIBUTING))
1341                         dist_slave_port_ids[dist_slave_count++] =
1342                                         slave_port_ids[i];
1343         }
1344
1345         if (likely(dist_slave_count > 0)) {
1346
1347                 /*
1348                  * Populate slaves mbuf with the packets which are to be sent
1349                  * on it, selecting output slave using hash based on xmit policy
1350                  */
1351                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1352                                 bufs_slave_port_idxs);
1353
1354                 for (i = 0; i < nb_bufs; i++) {
1355                         /*
1356                          * Populate slave mbuf arrays with mbufs for that
1357                          * slave
1358                          */
1359                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1360
1361                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1362                                         bufs[i];
1363                 }
1364
1365
1366                 /* Send packet burst on each slave device */
1367                 for (i = 0; i < dist_slave_count; i++) {
1368                         if (slave_nb_bufs[i] == 0)
1369                                 continue;
1370
1371                         slave_tx_count = rte_eth_tx_burst(
1372                                         dist_slave_port_ids[i],
1373                                         bd_tx_q->queue_id, slave_bufs[i],
1374                                         slave_nb_bufs[i]);
1375
1376                         total_tx_count += slave_tx_count;
1377
1378                         /* If tx burst fails move packets to end of bufs */
1379                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1380                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1381                                                 slave_tx_count;
1382                                 total_tx_fail_count += slave_tx_fail_count;
1383
1384                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1385                                        &slave_bufs[i][slave_tx_count],
1386                                        slave_tx_fail_count * sizeof(bufs[0]));
1387                         }
1388                 }
1389         }
1390
1391         return total_tx_count;
1392 }
1393
1394 static uint16_t
1395 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1396                 uint16_t nb_pkts)
1397 {
1398         struct bond_dev_private *internals;
1399         struct bond_tx_queue *bd_tx_q;
1400
1401         uint16_t slaves[RTE_MAX_ETHPORTS];
1402         uint8_t tx_failed_flag = 0;
1403         uint16_t num_of_slaves;
1404
1405         uint16_t max_nb_of_tx_pkts = 0;
1406
1407         int slave_tx_total[RTE_MAX_ETHPORTS];
1408         int i, most_successful_tx_slave = -1;
1409
1410         bd_tx_q = (struct bond_tx_queue *)queue;
1411         internals = bd_tx_q->dev_private;
1412
1413         /* Copy slave list to protect against slave up/down changes during tx
1414          * bursting */
1415         num_of_slaves = internals->active_slave_count;
1416         memcpy(slaves, internals->active_slaves,
1417                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1418
1419         if (num_of_slaves < 1)
1420                 return 0;
1421
1422         /* Increment reference count on mbufs */
1423         for (i = 0; i < nb_pkts; i++)
1424                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1425
1426         /* Transmit burst on each active slave */
1427         for (i = 0; i < num_of_slaves; i++) {
1428                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1429                                         bufs, nb_pkts);
1430
1431                 if (unlikely(slave_tx_total[i] < nb_pkts))
1432                         tx_failed_flag = 1;
1433
1434                 /* record the value and slave index for the slave which transmits the
1435                  * maximum number of packets */
1436                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1437                         max_nb_of_tx_pkts = slave_tx_total[i];
1438                         most_successful_tx_slave = i;
1439                 }
1440         }
1441
1442         /* if slaves fail to transmit packets from burst, the calling application
1443          * is not expected to know about multiple references to packets so we must
1444          * handle failures of all packets except those of the most successful slave
1445          */
1446         if (unlikely(tx_failed_flag))
1447                 for (i = 0; i < num_of_slaves; i++)
1448                         if (i != most_successful_tx_slave)
1449                                 while (slave_tx_total[i] < nb_pkts)
1450                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1451
1452         return max_nb_of_tx_pkts;
1453 }
1454
1455 static void
1456 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1457 {
1458         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1459
1460         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1461                 /**
1462                  * If in mode 4 then save the link properties of the first
1463                  * slave, all subsequent slaves must match these properties
1464                  */
1465                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1466
1467                 bond_link->link_autoneg = slave_link->link_autoneg;
1468                 bond_link->link_duplex = slave_link->link_duplex;
1469                 bond_link->link_speed = slave_link->link_speed;
1470         } else {
1471                 /**
1472                  * In any other mode the link properties are set to default
1473                  * values of AUTONEG/DUPLEX
1474                  */
1475                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1476                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1477         }
1478 }
1479
1480 static int
1481 link_properties_valid(struct rte_eth_dev *ethdev,
1482                 struct rte_eth_link *slave_link)
1483 {
1484         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1485
1486         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1487                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1488
1489                 if (bond_link->link_duplex != slave_link->link_duplex ||
1490                         bond_link->link_autoneg != slave_link->link_autoneg ||
1491                         bond_link->link_speed != slave_link->link_speed)
1492                         return -1;
1493         }
1494
1495         return 0;
1496 }
1497
1498 int
1499 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1500 {
1501         struct ether_addr *mac_addr;
1502
1503         if (eth_dev == NULL) {
1504                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1505                 return -1;
1506         }
1507
1508         if (dst_mac_addr == NULL) {
1509                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1510                 return -1;
1511         }
1512
1513         mac_addr = eth_dev->data->mac_addrs;
1514
1515         ether_addr_copy(mac_addr, dst_mac_addr);
1516         return 0;
1517 }
1518
1519 int
1520 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1521 {
1522         struct ether_addr *mac_addr;
1523
1524         if (eth_dev == NULL) {
1525                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1526                 return -1;
1527         }
1528
1529         if (new_mac_addr == NULL) {
1530                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1531                 return -1;
1532         }
1533
1534         mac_addr = eth_dev->data->mac_addrs;
1535
1536         /* If new MAC is different to current MAC then update */
1537         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1538                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1539
1540         return 0;
1541 }
1542
1543 static const struct ether_addr null_mac_addr;
1544
1545 /*
1546  * Add additional MAC addresses to the slave
1547  */
1548 int
1549 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1550                 uint16_t slave_port_id)
1551 {
1552         int i, ret;
1553         struct ether_addr *mac_addr;
1554
1555         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1556                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1557                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1558                         break;
1559
1560                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1561                 if (ret < 0) {
1562                         /* rollback */
1563                         for (i--; i > 0; i--)
1564                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1565                                         &bonded_eth_dev->data->mac_addrs[i]);
1566                         return ret;
1567                 }
1568         }
1569
1570         return 0;
1571 }
1572
1573 /*
1574  * Remove additional MAC addresses from the slave
1575  */
1576 int
1577 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1578                 uint16_t slave_port_id)
1579 {
1580         int i, rc, ret;
1581         struct ether_addr *mac_addr;
1582
1583         rc = 0;
1584         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1585                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1586                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1587                         break;
1588
1589                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1590                 /* save only the first error */
1591                 if (ret < 0 && rc == 0)
1592                         rc = ret;
1593         }
1594
1595         return rc;
1596 }
1597
1598 int
1599 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1600 {
1601         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1602         int i;
1603
1604         /* Update slave devices MAC addresses */
1605         if (internals->slave_count < 1)
1606                 return -1;
1607
1608         switch (internals->mode) {
1609         case BONDING_MODE_ROUND_ROBIN:
1610         case BONDING_MODE_BALANCE:
1611         case BONDING_MODE_BROADCAST:
1612                 for (i = 0; i < internals->slave_count; i++) {
1613                         if (rte_eth_dev_default_mac_addr_set(
1614                                         internals->slaves[i].port_id,
1615                                         bonded_eth_dev->data->mac_addrs)) {
1616                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1617                                                 internals->slaves[i].port_id);
1618                                 return -1;
1619                         }
1620                 }
1621                 break;
1622         case BONDING_MODE_8023AD:
1623                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1624                 break;
1625         case BONDING_MODE_ACTIVE_BACKUP:
1626         case BONDING_MODE_TLB:
1627         case BONDING_MODE_ALB:
1628         default:
1629                 for (i = 0; i < internals->slave_count; i++) {
1630                         if (internals->slaves[i].port_id ==
1631                                         internals->current_primary_port) {
1632                                 if (rte_eth_dev_default_mac_addr_set(
1633                                                 internals->primary_port,
1634                                                 bonded_eth_dev->data->mac_addrs)) {
1635                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1636                                                         internals->current_primary_port);
1637                                         return -1;
1638                                 }
1639                         } else {
1640                                 if (rte_eth_dev_default_mac_addr_set(
1641                                                 internals->slaves[i].port_id,
1642                                                 &internals->slaves[i].persisted_mac_addr)) {
1643                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1644                                                         internals->slaves[i].port_id);
1645                                         return -1;
1646                                 }
1647                         }
1648                 }
1649         }
1650
1651         return 0;
1652 }
1653
1654 int
1655 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1656 {
1657         struct bond_dev_private *internals;
1658
1659         internals = eth_dev->data->dev_private;
1660
1661         switch (mode) {
1662         case BONDING_MODE_ROUND_ROBIN:
1663                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1664                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1665                 break;
1666         case BONDING_MODE_ACTIVE_BACKUP:
1667                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1668                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1669                 break;
1670         case BONDING_MODE_BALANCE:
1671                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1672                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1673                 break;
1674         case BONDING_MODE_BROADCAST:
1675                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1676                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1677                 break;
1678         case BONDING_MODE_8023AD:
1679                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1680                         return -1;
1681
1682                 if (internals->mode4.dedicated_queues.enabled == 0) {
1683                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1684                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1685                         RTE_BOND_LOG(WARNING,
1686                                 "Using mode 4, it is necessary to do TX burst "
1687                                 "and RX burst at least every 100ms.");
1688                 } else {
1689                         /* Use flow director's optimization */
1690                         eth_dev->rx_pkt_burst =
1691                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1692                         eth_dev->tx_pkt_burst =
1693                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1694                 }
1695                 break;
1696         case BONDING_MODE_TLB:
1697                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1698                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1699                 break;
1700         case BONDING_MODE_ALB:
1701                 if (bond_mode_alb_enable(eth_dev) != 0)
1702                         return -1;
1703
1704                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1705                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1706                 break;
1707         default:
1708                 return -1;
1709         }
1710
1711         internals->mode = mode;
1712
1713         return 0;
1714 }
1715
1716
1717 static int
1718 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1719                 struct rte_eth_dev *slave_eth_dev)
1720 {
1721         int errval = 0;
1722         struct bond_dev_private *internals = (struct bond_dev_private *)
1723                 bonded_eth_dev->data->dev_private;
1724         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1725
1726         if (port->slow_pool == NULL) {
1727                 char mem_name[256];
1728                 int slave_id = slave_eth_dev->data->port_id;
1729
1730                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1731                                 slave_id);
1732                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1733                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1734                         slave_eth_dev->data->numa_node);
1735
1736                 /* Any memory allocation failure in initialization is critical because
1737                  * resources can't be free, so reinitialization is impossible. */
1738                 if (port->slow_pool == NULL) {
1739                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1740                                 slave_id, mem_name, rte_strerror(rte_errno));
1741                 }
1742         }
1743
1744         if (internals->mode4.dedicated_queues.enabled == 1) {
1745                 /* Configure slow Rx queue */
1746
1747                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1748                                 internals->mode4.dedicated_queues.rx_qid, 128,
1749                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1750                                 NULL, port->slow_pool);
1751                 if (errval != 0) {
1752                         RTE_BOND_LOG(ERR,
1753                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1754                                         slave_eth_dev->data->port_id,
1755                                         internals->mode4.dedicated_queues.rx_qid,
1756                                         errval);
1757                         return errval;
1758                 }
1759
1760                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1761                                 internals->mode4.dedicated_queues.tx_qid, 512,
1762                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1763                                 NULL);
1764                 if (errval != 0) {
1765                         RTE_BOND_LOG(ERR,
1766                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1767                                 slave_eth_dev->data->port_id,
1768                                 internals->mode4.dedicated_queues.tx_qid,
1769                                 errval);
1770                         return errval;
1771                 }
1772         }
1773         return 0;
1774 }
1775
1776 int
1777 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1778                 struct rte_eth_dev *slave_eth_dev)
1779 {
1780         struct bond_rx_queue *bd_rx_q;
1781         struct bond_tx_queue *bd_tx_q;
1782         uint16_t nb_rx_queues;
1783         uint16_t nb_tx_queues;
1784
1785         int errval;
1786         uint16_t q_id;
1787         struct rte_flow_error flow_error;
1788
1789         struct bond_dev_private *internals = (struct bond_dev_private *)
1790                 bonded_eth_dev->data->dev_private;
1791
1792         /* Stop slave */
1793         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1794
1795         /* Enable interrupts on slave device if supported */
1796         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1797                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1798
1799         /* If RSS is enabled for bonding, try to enable it for slaves  */
1800         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1801                 if (internals->rss_key_len != 0) {
1802                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1803                                         internals->rss_key_len;
1804                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1805                                         internals->rss_key;
1806                 } else {
1807                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1808                 }
1809
1810                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1811                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1812                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1813                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1814         }
1815
1816         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1817                         DEV_RX_OFFLOAD_VLAN_FILTER)
1818                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1819                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1820         else
1821                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1822                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1823
1824         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1825         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1826
1827         if (internals->mode == BONDING_MODE_8023AD) {
1828                 if (internals->mode4.dedicated_queues.enabled == 1) {
1829                         nb_rx_queues++;
1830                         nb_tx_queues++;
1831                 }
1832         }
1833
1834         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1835                                      bonded_eth_dev->data->mtu);
1836         if (errval != 0 && errval != -ENOTSUP) {
1837                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1838                                 slave_eth_dev->data->port_id, errval);
1839                 return errval;
1840         }
1841
1842         /* Configure device */
1843         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1844                         nb_rx_queues, nb_tx_queues,
1845                         &(slave_eth_dev->data->dev_conf));
1846         if (errval != 0) {
1847                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1848                                 slave_eth_dev->data->port_id, errval);
1849                 return errval;
1850         }
1851
1852         /* Setup Rx Queues */
1853         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1854                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1855
1856                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1857                                 bd_rx_q->nb_rx_desc,
1858                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1859                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1860                 if (errval != 0) {
1861                         RTE_BOND_LOG(ERR,
1862                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1863                                         slave_eth_dev->data->port_id, q_id, errval);
1864                         return errval;
1865                 }
1866         }
1867
1868         /* Setup Tx Queues */
1869         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1870                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1871
1872                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1873                                 bd_tx_q->nb_tx_desc,
1874                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1875                                 &bd_tx_q->tx_conf);
1876                 if (errval != 0) {
1877                         RTE_BOND_LOG(ERR,
1878                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1879                                 slave_eth_dev->data->port_id, q_id, errval);
1880                         return errval;
1881                 }
1882         }
1883
1884         if (internals->mode == BONDING_MODE_8023AD &&
1885                         internals->mode4.dedicated_queues.enabled == 1) {
1886                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1887                                 != 0)
1888                         return errval;
1889
1890                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1891                                 slave_eth_dev->data->port_id) != 0) {
1892                         RTE_BOND_LOG(ERR,
1893                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1894                                 slave_eth_dev->data->port_id, q_id, errval);
1895                         return -1;
1896                 }
1897
1898                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1899                         rte_flow_destroy(slave_eth_dev->data->port_id,
1900                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1901                                         &flow_error);
1902
1903                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1904                                 slave_eth_dev->data->port_id);
1905         }
1906
1907         /* Start device */
1908         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1909         if (errval != 0) {
1910                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1911                                 slave_eth_dev->data->port_id, errval);
1912                 return -1;
1913         }
1914
1915         /* If RSS is enabled for bonding, synchronize RETA */
1916         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1917                 int i;
1918                 struct bond_dev_private *internals;
1919
1920                 internals = bonded_eth_dev->data->dev_private;
1921
1922                 for (i = 0; i < internals->slave_count; i++) {
1923                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1924                                 errval = rte_eth_dev_rss_reta_update(
1925                                                 slave_eth_dev->data->port_id,
1926                                                 &internals->reta_conf[0],
1927                                                 internals->slaves[i].reta_size);
1928                                 if (errval != 0) {
1929                                         RTE_BOND_LOG(WARNING,
1930                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1931                                                      " RSS Configuration for bonding may be inconsistent.",
1932                                                      slave_eth_dev->data->port_id, errval);
1933                                 }
1934                                 break;
1935                         }
1936                 }
1937         }
1938
1939         /* If lsc interrupt is set, check initial slave's link status */
1940         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1941                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1942                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1943                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1944                         NULL);
1945         }
1946
1947         return 0;
1948 }
1949
1950 void
1951 slave_remove(struct bond_dev_private *internals,
1952                 struct rte_eth_dev *slave_eth_dev)
1953 {
1954         uint16_t i;
1955
1956         for (i = 0; i < internals->slave_count; i++)
1957                 if (internals->slaves[i].port_id ==
1958                                 slave_eth_dev->data->port_id)
1959                         break;
1960
1961         if (i < (internals->slave_count - 1)) {
1962                 struct rte_flow *flow;
1963
1964                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1965                                 sizeof(internals->slaves[0]) *
1966                                 (internals->slave_count - i - 1));
1967                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1968                         memmove(&flow->flows[i], &flow->flows[i + 1],
1969                                 sizeof(flow->flows[0]) *
1970                                 (internals->slave_count - i - 1));
1971                         flow->flows[internals->slave_count - 1] = NULL;
1972                 }
1973         }
1974
1975         internals->slave_count--;
1976
1977         /* force reconfiguration of slave interfaces */
1978         _rte_eth_dev_reset(slave_eth_dev);
1979 }
1980
1981 static void
1982 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1983
1984 void
1985 slave_add(struct bond_dev_private *internals,
1986                 struct rte_eth_dev *slave_eth_dev)
1987 {
1988         struct bond_slave_details *slave_details =
1989                         &internals->slaves[internals->slave_count];
1990
1991         slave_details->port_id = slave_eth_dev->data->port_id;
1992         slave_details->last_link_status = 0;
1993
1994         /* Mark slave devices that don't support interrupts so we can
1995          * compensate when we start the bond
1996          */
1997         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1998                 slave_details->link_status_poll_enabled = 1;
1999         }
2000
2001         slave_details->link_status_wait_to_complete = 0;
2002         /* clean tlb_last_obytes when adding port for bonding device */
2003         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2004                         sizeof(struct ether_addr));
2005 }
2006
2007 void
2008 bond_ethdev_primary_set(struct bond_dev_private *internals,
2009                 uint16_t slave_port_id)
2010 {
2011         int i;
2012
2013         if (internals->active_slave_count < 1)
2014                 internals->current_primary_port = slave_port_id;
2015         else
2016                 /* Search bonded device slave ports for new proposed primary port */
2017                 for (i = 0; i < internals->active_slave_count; i++) {
2018                         if (internals->active_slaves[i] == slave_port_id)
2019                                 internals->current_primary_port = slave_port_id;
2020                 }
2021 }
2022
2023 static void
2024 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2025
2026 static int
2027 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2028 {
2029         struct bond_dev_private *internals;
2030         int i;
2031
2032         /* slave eth dev will be started by bonded device */
2033         if (check_for_bonded_ethdev(eth_dev)) {
2034                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2035                                 eth_dev->data->port_id);
2036                 return -1;
2037         }
2038
2039         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2040         eth_dev->data->dev_started = 1;
2041
2042         internals = eth_dev->data->dev_private;
2043
2044         if (internals->slave_count == 0) {
2045                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2046                 goto out_err;
2047         }
2048
2049         if (internals->user_defined_mac == 0) {
2050                 struct ether_addr *new_mac_addr = NULL;
2051
2052                 for (i = 0; i < internals->slave_count; i++)
2053                         if (internals->slaves[i].port_id == internals->primary_port)
2054                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2055
2056                 if (new_mac_addr == NULL)
2057                         goto out_err;
2058
2059                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2060                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2061                                         eth_dev->data->port_id);
2062                         goto out_err;
2063                 }
2064         }
2065
2066         /* If bonded device is configure in promiscuous mode then re-apply config */
2067         if (internals->promiscuous_en)
2068                 bond_ethdev_promiscuous_enable(eth_dev);
2069
2070         if (internals->mode == BONDING_MODE_8023AD) {
2071                 if (internals->mode4.dedicated_queues.enabled == 1) {
2072                         internals->mode4.dedicated_queues.rx_qid =
2073                                         eth_dev->data->nb_rx_queues;
2074                         internals->mode4.dedicated_queues.tx_qid =
2075                                         eth_dev->data->nb_tx_queues;
2076                 }
2077         }
2078
2079
2080         /* Reconfigure each slave device if starting bonded device */
2081         for (i = 0; i < internals->slave_count; i++) {
2082                 struct rte_eth_dev *slave_ethdev =
2083                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2084                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2085                         RTE_BOND_LOG(ERR,
2086                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2087                                 eth_dev->data->port_id,
2088                                 internals->slaves[i].port_id);
2089                         goto out_err;
2090                 }
2091                 /* We will need to poll for link status if any slave doesn't
2092                  * support interrupts
2093                  */
2094                 if (internals->slaves[i].link_status_poll_enabled)
2095                         internals->link_status_polling_enabled = 1;
2096         }
2097
2098         /* start polling if needed */
2099         if (internals->link_status_polling_enabled) {
2100                 rte_eal_alarm_set(
2101                         internals->link_status_polling_interval_ms * 1000,
2102                         bond_ethdev_slave_link_status_change_monitor,
2103                         (void *)&rte_eth_devices[internals->port_id]);
2104         }
2105
2106         /* Update all slave devices MACs*/
2107         if (mac_address_slaves_update(eth_dev) != 0)
2108                 goto out_err;
2109
2110         if (internals->user_defined_primary_port)
2111                 bond_ethdev_primary_set(internals, internals->primary_port);
2112
2113         if (internals->mode == BONDING_MODE_8023AD)
2114                 bond_mode_8023ad_start(eth_dev);
2115
2116         if (internals->mode == BONDING_MODE_TLB ||
2117                         internals->mode == BONDING_MODE_ALB)
2118                 bond_tlb_enable(internals);
2119
2120         return 0;
2121
2122 out_err:
2123         eth_dev->data->dev_started = 0;
2124         return -1;
2125 }
2126
2127 static void
2128 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2129 {
2130         uint8_t i;
2131
2132         if (dev->data->rx_queues != NULL) {
2133                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2134                         rte_free(dev->data->rx_queues[i]);
2135                         dev->data->rx_queues[i] = NULL;
2136                 }
2137                 dev->data->nb_rx_queues = 0;
2138         }
2139
2140         if (dev->data->tx_queues != NULL) {
2141                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2142                         rte_free(dev->data->tx_queues[i]);
2143                         dev->data->tx_queues[i] = NULL;
2144                 }
2145                 dev->data->nb_tx_queues = 0;
2146         }
2147 }
2148
2149 void
2150 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2151 {
2152         struct bond_dev_private *internals = eth_dev->data->dev_private;
2153         uint16_t i;
2154
2155         if (internals->mode == BONDING_MODE_8023AD) {
2156                 struct port *port;
2157                 void *pkt = NULL;
2158
2159                 bond_mode_8023ad_stop(eth_dev);
2160
2161                 /* Discard all messages to/from mode 4 state machines */
2162                 for (i = 0; i < internals->active_slave_count; i++) {
2163                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2164
2165                         RTE_ASSERT(port->rx_ring != NULL);
2166                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2167                                 rte_pktmbuf_free(pkt);
2168
2169                         RTE_ASSERT(port->tx_ring != NULL);
2170                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2171                                 rte_pktmbuf_free(pkt);
2172                 }
2173         }
2174
2175         if (internals->mode == BONDING_MODE_TLB ||
2176                         internals->mode == BONDING_MODE_ALB) {
2177                 bond_tlb_disable(internals);
2178                 for (i = 0; i < internals->active_slave_count; i++)
2179                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2180         }
2181
2182         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2183         eth_dev->data->dev_started = 0;
2184
2185         internals->link_status_polling_enabled = 0;
2186         for (i = 0; i < internals->slave_count; i++) {
2187                 uint16_t slave_id = internals->slaves[i].port_id;
2188                 if (find_slave_by_id(internals->active_slaves,
2189                                 internals->active_slave_count, slave_id) !=
2190                                                 internals->active_slave_count) {
2191                         internals->slaves[i].last_link_status = 0;
2192                         rte_eth_dev_stop(slave_id);
2193                         deactivate_slave(eth_dev, slave_id);
2194                 }
2195         }
2196 }
2197
2198 void
2199 bond_ethdev_close(struct rte_eth_dev *dev)
2200 {
2201         struct bond_dev_private *internals = dev->data->dev_private;
2202         uint16_t bond_port_id = internals->port_id;
2203         int skipped = 0;
2204         struct rte_flow_error ferror;
2205
2206         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2207         while (internals->slave_count != skipped) {
2208                 uint16_t port_id = internals->slaves[skipped].port_id;
2209
2210                 rte_eth_dev_stop(port_id);
2211
2212                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2213                         RTE_BOND_LOG(ERR,
2214                                      "Failed to remove port %d from bonded device %s",
2215                                      port_id, dev->device->name);
2216                         skipped++;
2217                 }
2218         }
2219         bond_flow_ops.flush(dev, &ferror);
2220         bond_ethdev_free_queues(dev);
2221         rte_bitmap_reset(internals->vlan_filter_bmp);
2222 }
2223
2224 /* forward declaration */
2225 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2226
2227 static void
2228 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2229 {
2230         struct bond_dev_private *internals = dev->data->dev_private;
2231
2232         uint16_t max_nb_rx_queues = UINT16_MAX;
2233         uint16_t max_nb_tx_queues = UINT16_MAX;
2234         uint16_t max_rx_desc_lim = UINT16_MAX;
2235         uint16_t max_tx_desc_lim = UINT16_MAX;
2236
2237         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2238
2239         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2240                         internals->candidate_max_rx_pktlen :
2241                         ETHER_MAX_JUMBO_FRAME_LEN;
2242
2243         /* Max number of tx/rx queues that the bonded device can support is the
2244          * minimum values of the bonded slaves, as all slaves must be capable
2245          * of supporting the same number of tx/rx queues.
2246          */
2247         if (internals->slave_count > 0) {
2248                 struct rte_eth_dev_info slave_info;
2249                 uint16_t idx;
2250
2251                 for (idx = 0; idx < internals->slave_count; idx++) {
2252                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2253                                         &slave_info);
2254
2255                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2256                                 max_nb_rx_queues = slave_info.max_rx_queues;
2257
2258                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2259                                 max_nb_tx_queues = slave_info.max_tx_queues;
2260
2261                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2262                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2263
2264                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2265                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2266                 }
2267         }
2268
2269         dev_info->max_rx_queues = max_nb_rx_queues;
2270         dev_info->max_tx_queues = max_nb_tx_queues;
2271
2272         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2273                sizeof(dev_info->default_rxconf));
2274         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2275                sizeof(dev_info->default_txconf));
2276
2277         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2278         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2279
2280         /**
2281          * If dedicated hw queues enabled for link bonding device in LACP mode
2282          * then we need to reduce the maximum number of data path queues by 1.
2283          */
2284         if (internals->mode == BONDING_MODE_8023AD &&
2285                 internals->mode4.dedicated_queues.enabled == 1) {
2286                 dev_info->max_rx_queues--;
2287                 dev_info->max_tx_queues--;
2288         }
2289
2290         dev_info->min_rx_bufsize = 0;
2291
2292         dev_info->rx_offload_capa = internals->rx_offload_capa;
2293         dev_info->tx_offload_capa = internals->tx_offload_capa;
2294         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2295         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2296         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2297
2298         dev_info->reta_size = internals->reta_size;
2299 }
2300
2301 static int
2302 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2303 {
2304         int res;
2305         uint16_t i;
2306         struct bond_dev_private *internals = dev->data->dev_private;
2307
2308         /* don't do this while a slave is being added */
2309         rte_spinlock_lock(&internals->lock);
2310
2311         if (on)
2312                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2313         else
2314                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2315
2316         for (i = 0; i < internals->slave_count; i++) {
2317                 uint16_t port_id = internals->slaves[i].port_id;
2318
2319                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2320                 if (res == ENOTSUP)
2321                         RTE_BOND_LOG(WARNING,
2322                                      "Setting VLAN filter on slave port %u not supported.",
2323                                      port_id);
2324         }
2325
2326         rte_spinlock_unlock(&internals->lock);
2327         return 0;
2328 }
2329
2330 static int
2331 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2332                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2333                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2334 {
2335         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2336                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2337                                         0, dev->data->numa_node);
2338         if (bd_rx_q == NULL)
2339                 return -1;
2340
2341         bd_rx_q->queue_id = rx_queue_id;
2342         bd_rx_q->dev_private = dev->data->dev_private;
2343
2344         bd_rx_q->nb_rx_desc = nb_rx_desc;
2345
2346         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2347         bd_rx_q->mb_pool = mb_pool;
2348
2349         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2350
2351         return 0;
2352 }
2353
2354 static int
2355 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2356                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2357                 const struct rte_eth_txconf *tx_conf)
2358 {
2359         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2360                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2361                                         0, dev->data->numa_node);
2362
2363         if (bd_tx_q == NULL)
2364                 return -1;
2365
2366         bd_tx_q->queue_id = tx_queue_id;
2367         bd_tx_q->dev_private = dev->data->dev_private;
2368
2369         bd_tx_q->nb_tx_desc = nb_tx_desc;
2370         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2371
2372         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2373
2374         return 0;
2375 }
2376
2377 static void
2378 bond_ethdev_rx_queue_release(void *queue)
2379 {
2380         if (queue == NULL)
2381                 return;
2382
2383         rte_free(queue);
2384 }
2385
2386 static void
2387 bond_ethdev_tx_queue_release(void *queue)
2388 {
2389         if (queue == NULL)
2390                 return;
2391
2392         rte_free(queue);
2393 }
2394
2395 static void
2396 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2397 {
2398         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2399         struct bond_dev_private *internals;
2400
2401         /* Default value for polling slave found is true as we don't want to
2402          * disable the polling thread if we cannot get the lock */
2403         int i, polling_slave_found = 1;
2404
2405         if (cb_arg == NULL)
2406                 return;
2407
2408         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2409         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2410
2411         if (!bonded_ethdev->data->dev_started ||
2412                 !internals->link_status_polling_enabled)
2413                 return;
2414
2415         /* If device is currently being configured then don't check slaves link
2416          * status, wait until next period */
2417         if (rte_spinlock_trylock(&internals->lock)) {
2418                 if (internals->slave_count > 0)
2419                         polling_slave_found = 0;
2420
2421                 for (i = 0; i < internals->slave_count; i++) {
2422                         if (!internals->slaves[i].link_status_poll_enabled)
2423                                 continue;
2424
2425                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2426                         polling_slave_found = 1;
2427
2428                         /* Update slave link status */
2429                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2430                                         internals->slaves[i].link_status_wait_to_complete);
2431
2432                         /* if link status has changed since last checked then call lsc
2433                          * event callback */
2434                         if (slave_ethdev->data->dev_link.link_status !=
2435                                         internals->slaves[i].last_link_status) {
2436                                 internals->slaves[i].last_link_status =
2437                                                 slave_ethdev->data->dev_link.link_status;
2438
2439                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2440                                                 RTE_ETH_EVENT_INTR_LSC,
2441                                                 &bonded_ethdev->data->port_id,
2442                                                 NULL);
2443                         }
2444                 }
2445                 rte_spinlock_unlock(&internals->lock);
2446         }
2447
2448         if (polling_slave_found)
2449                 /* Set alarm to continue monitoring link status of slave ethdev's */
2450                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2451                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2452 }
2453
2454 static int
2455 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2456 {
2457         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2458
2459         struct bond_dev_private *bond_ctx;
2460         struct rte_eth_link slave_link;
2461
2462         uint32_t idx;
2463
2464         bond_ctx = ethdev->data->dev_private;
2465
2466         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2467
2468         if (ethdev->data->dev_started == 0 ||
2469                         bond_ctx->active_slave_count == 0) {
2470                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2471                 return 0;
2472         }
2473
2474         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2475
2476         if (wait_to_complete)
2477                 link_update = rte_eth_link_get;
2478         else
2479                 link_update = rte_eth_link_get_nowait;
2480
2481         switch (bond_ctx->mode) {
2482         case BONDING_MODE_BROADCAST:
2483                 /**
2484                  * Setting link speed to UINT32_MAX to ensure we pick up the
2485                  * value of the first active slave
2486                  */
2487                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2488
2489                 /**
2490                  * link speed is minimum value of all the slaves link speed as
2491                  * packet loss will occur on this slave if transmission at rates
2492                  * greater than this are attempted
2493                  */
2494                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2495                         link_update(bond_ctx->active_slaves[0], &slave_link);
2496
2497                         if (slave_link.link_speed <
2498                                         ethdev->data->dev_link.link_speed)
2499                                 ethdev->data->dev_link.link_speed =
2500                                                 slave_link.link_speed;
2501                 }
2502                 break;
2503         case BONDING_MODE_ACTIVE_BACKUP:
2504                 /* Current primary slave */
2505                 link_update(bond_ctx->current_primary_port, &slave_link);
2506
2507                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2508                 break;
2509         case BONDING_MODE_8023AD:
2510                 ethdev->data->dev_link.link_autoneg =
2511                                 bond_ctx->mode4.slave_link.link_autoneg;
2512                 ethdev->data->dev_link.link_duplex =
2513                                 bond_ctx->mode4.slave_link.link_duplex;
2514                 /* fall through to update link speed */
2515         case BONDING_MODE_ROUND_ROBIN:
2516         case BONDING_MODE_BALANCE:
2517         case BONDING_MODE_TLB:
2518         case BONDING_MODE_ALB:
2519         default:
2520                 /**
2521                  * In theses mode the maximum theoretical link speed is the sum
2522                  * of all the slaves
2523                  */
2524                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2525
2526                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2527                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2528
2529                         ethdev->data->dev_link.link_speed +=
2530                                         slave_link.link_speed;
2531                 }
2532         }
2533
2534
2535         return 0;
2536 }
2537
2538
2539 static int
2540 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2541 {
2542         struct bond_dev_private *internals = dev->data->dev_private;
2543         struct rte_eth_stats slave_stats;
2544         int i, j;
2545
2546         for (i = 0; i < internals->slave_count; i++) {
2547                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2548
2549                 stats->ipackets += slave_stats.ipackets;
2550                 stats->opackets += slave_stats.opackets;
2551                 stats->ibytes += slave_stats.ibytes;
2552                 stats->obytes += slave_stats.obytes;
2553                 stats->imissed += slave_stats.imissed;
2554                 stats->ierrors += slave_stats.ierrors;
2555                 stats->oerrors += slave_stats.oerrors;
2556                 stats->rx_nombuf += slave_stats.rx_nombuf;
2557
2558                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2559                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2560                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2561                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2562                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2563                         stats->q_errors[j] += slave_stats.q_errors[j];
2564                 }
2565
2566         }
2567
2568         return 0;
2569 }
2570
2571 static void
2572 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2573 {
2574         struct bond_dev_private *internals = dev->data->dev_private;
2575         int i;
2576
2577         for (i = 0; i < internals->slave_count; i++)
2578                 rte_eth_stats_reset(internals->slaves[i].port_id);
2579 }
2580
2581 static void
2582 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2583 {
2584         struct bond_dev_private *internals = eth_dev->data->dev_private;
2585         int i;
2586
2587         internals->promiscuous_en = 1;
2588
2589         switch (internals->mode) {
2590         /* Promiscuous mode is propagated to all slaves */
2591         case BONDING_MODE_ROUND_ROBIN:
2592         case BONDING_MODE_BALANCE:
2593         case BONDING_MODE_BROADCAST:
2594                 for (i = 0; i < internals->slave_count; i++)
2595                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2596                 break;
2597         /* In mode4 promiscus mode is managed when slave is added/removed */
2598         case BONDING_MODE_8023AD:
2599                 break;
2600         /* Promiscuous mode is propagated only to primary slave */
2601         case BONDING_MODE_ACTIVE_BACKUP:
2602         case BONDING_MODE_TLB:
2603         case BONDING_MODE_ALB:
2604         default:
2605                 /* Do not touch promisc when there cannot be primary ports */
2606                 if (internals->slave_count == 0)
2607                         break;
2608                 rte_eth_promiscuous_enable(internals->current_primary_port);
2609         }
2610 }
2611
2612 static void
2613 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2614 {
2615         struct bond_dev_private *internals = dev->data->dev_private;
2616         int i;
2617
2618         internals->promiscuous_en = 0;
2619
2620         switch (internals->mode) {
2621         /* Promiscuous mode is propagated to all slaves */
2622         case BONDING_MODE_ROUND_ROBIN:
2623         case BONDING_MODE_BALANCE:
2624         case BONDING_MODE_BROADCAST:
2625                 for (i = 0; i < internals->slave_count; i++)
2626                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2627                 break;
2628         /* In mode4 promiscus mode is set managed when slave is added/removed */
2629         case BONDING_MODE_8023AD:
2630                 break;
2631         /* Promiscuous mode is propagated only to primary slave */
2632         case BONDING_MODE_ACTIVE_BACKUP:
2633         case BONDING_MODE_TLB:
2634         case BONDING_MODE_ALB:
2635         default:
2636                 /* Do not touch promisc when there cannot be primary ports */
2637                 if (internals->slave_count == 0)
2638                         break;
2639                 rte_eth_promiscuous_disable(internals->current_primary_port);
2640         }
2641 }
2642
2643 static void
2644 bond_ethdev_delayed_lsc_propagation(void *arg)
2645 {
2646         if (arg == NULL)
2647                 return;
2648
2649         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2650                         RTE_ETH_EVENT_INTR_LSC, NULL);
2651 }
2652
2653 int
2654 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2655                 void *param, void *ret_param __rte_unused)
2656 {
2657         struct rte_eth_dev *bonded_eth_dev;
2658         struct bond_dev_private *internals;
2659         struct rte_eth_link link;
2660         int rc = -1;
2661
2662         uint8_t lsc_flag = 0;
2663         int valid_slave = 0;
2664         uint16_t active_pos;
2665         uint16_t i;
2666
2667         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2668                 return rc;
2669
2670         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2671
2672         if (check_for_bonded_ethdev(bonded_eth_dev))
2673                 return rc;
2674
2675         internals = bonded_eth_dev->data->dev_private;
2676
2677         /* If the device isn't started don't handle interrupts */
2678         if (!bonded_eth_dev->data->dev_started)
2679                 return rc;
2680
2681         /* verify that port_id is a valid slave of bonded port */
2682         for (i = 0; i < internals->slave_count; i++) {
2683                 if (internals->slaves[i].port_id == port_id) {
2684                         valid_slave = 1;
2685                         break;
2686                 }
2687         }
2688
2689         if (!valid_slave)
2690                 return rc;
2691
2692         /* Synchronize lsc callback parallel calls either by real link event
2693          * from the slaves PMDs or by the bonding PMD itself.
2694          */
2695         rte_spinlock_lock(&internals->lsc_lock);
2696
2697         /* Search for port in active port list */
2698         active_pos = find_slave_by_id(internals->active_slaves,
2699                         internals->active_slave_count, port_id);
2700
2701         rte_eth_link_get_nowait(port_id, &link);
2702         if (link.link_status) {
2703                 if (active_pos < internals->active_slave_count)
2704                         goto link_update;
2705
2706                 /* check link state properties if bonded link is up*/
2707                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2708                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2709                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2710                                              "for slave %d in bonding mode %d",
2711                                              port_id, internals->mode);
2712                 } else {
2713                         /* inherit slave link properties */
2714                         link_properties_set(bonded_eth_dev, &link);
2715                 }
2716
2717                 /* If no active slave ports then set this port to be
2718                  * the primary port.
2719                  */
2720                 if (internals->active_slave_count < 1) {
2721                         /* If first active slave, then change link status */
2722                         bonded_eth_dev->data->dev_link.link_status =
2723                                                                 ETH_LINK_UP;
2724                         internals->current_primary_port = port_id;
2725                         lsc_flag = 1;
2726
2727                         mac_address_slaves_update(bonded_eth_dev);
2728                 }
2729
2730                 activate_slave(bonded_eth_dev, port_id);
2731
2732                 /* If the user has defined the primary port then default to
2733                  * using it.
2734                  */
2735                 if (internals->user_defined_primary_port &&
2736                                 internals->primary_port == port_id)
2737                         bond_ethdev_primary_set(internals, port_id);
2738         } else {
2739                 if (active_pos == internals->active_slave_count)
2740                         goto link_update;
2741
2742                 /* Remove from active slave list */
2743                 deactivate_slave(bonded_eth_dev, port_id);
2744
2745                 if (internals->active_slave_count < 1)
2746                         lsc_flag = 1;
2747
2748                 /* Update primary id, take first active slave from list or if none
2749                  * available set to -1 */
2750                 if (port_id == internals->current_primary_port) {
2751                         if (internals->active_slave_count > 0)
2752                                 bond_ethdev_primary_set(internals,
2753                                                 internals->active_slaves[0]);
2754                         else
2755                                 internals->current_primary_port = internals->primary_port;
2756                 }
2757         }
2758
2759 link_update:
2760         /**
2761          * Update bonded device link properties after any change to active
2762          * slaves
2763          */
2764         bond_ethdev_link_update(bonded_eth_dev, 0);
2765
2766         if (lsc_flag) {
2767                 /* Cancel any possible outstanding interrupts if delays are enabled */
2768                 if (internals->link_up_delay_ms > 0 ||
2769                         internals->link_down_delay_ms > 0)
2770                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2771                                         bonded_eth_dev);
2772
2773                 if (bonded_eth_dev->data->dev_link.link_status) {
2774                         if (internals->link_up_delay_ms > 0)
2775                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2776                                                 bond_ethdev_delayed_lsc_propagation,
2777                                                 (void *)bonded_eth_dev);
2778                         else
2779                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2780                                                 RTE_ETH_EVENT_INTR_LSC,
2781                                                 NULL);
2782
2783                 } else {
2784                         if (internals->link_down_delay_ms > 0)
2785                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2786                                                 bond_ethdev_delayed_lsc_propagation,
2787                                                 (void *)bonded_eth_dev);
2788                         else
2789                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2790                                                 RTE_ETH_EVENT_INTR_LSC,
2791                                                 NULL);
2792                 }
2793         }
2794
2795         rte_spinlock_unlock(&internals->lsc_lock);
2796
2797         return rc;
2798 }
2799
2800 static int
2801 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2802                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2803 {
2804         unsigned i, j;
2805         int result = 0;
2806         int slave_reta_size;
2807         unsigned reta_count;
2808         struct bond_dev_private *internals = dev->data->dev_private;
2809
2810         if (reta_size != internals->reta_size)
2811                 return -EINVAL;
2812
2813          /* Copy RETA table */
2814         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2815
2816         for (i = 0; i < reta_count; i++) {
2817                 internals->reta_conf[i].mask = reta_conf[i].mask;
2818                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2819                         if ((reta_conf[i].mask >> j) & 0x01)
2820                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2821         }
2822
2823         /* Fill rest of array */
2824         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2825                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2826                                 sizeof(internals->reta_conf[0]) * reta_count);
2827
2828         /* Propagate RETA over slaves */
2829         for (i = 0; i < internals->slave_count; i++) {
2830                 slave_reta_size = internals->slaves[i].reta_size;
2831                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2832                                 &internals->reta_conf[0], slave_reta_size);
2833                 if (result < 0)
2834                         return result;
2835         }
2836
2837         return 0;
2838 }
2839
2840 static int
2841 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2842                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2843 {
2844         int i, j;
2845         struct bond_dev_private *internals = dev->data->dev_private;
2846
2847         if (reta_size != internals->reta_size)
2848                 return -EINVAL;
2849
2850          /* Copy RETA table */
2851         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2852                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2853                         if ((reta_conf[i].mask >> j) & 0x01)
2854                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2855
2856         return 0;
2857 }
2858
2859 static int
2860 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2861                 struct rte_eth_rss_conf *rss_conf)
2862 {
2863         int i, result = 0;
2864         struct bond_dev_private *internals = dev->data->dev_private;
2865         struct rte_eth_rss_conf bond_rss_conf;
2866
2867         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2868
2869         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2870
2871         if (bond_rss_conf.rss_hf != 0)
2872                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2873
2874         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2875                         sizeof(internals->rss_key)) {
2876                 if (bond_rss_conf.rss_key_len == 0)
2877                         bond_rss_conf.rss_key_len = 40;
2878                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2879                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2880                                 internals->rss_key_len);
2881         }
2882
2883         for (i = 0; i < internals->slave_count; i++) {
2884                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2885                                 &bond_rss_conf);
2886                 if (result < 0)
2887                         return result;
2888         }
2889
2890         return 0;
2891 }
2892
2893 static int
2894 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2895                 struct rte_eth_rss_conf *rss_conf)
2896 {
2897         struct bond_dev_private *internals = dev->data->dev_private;
2898
2899         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2900         rss_conf->rss_key_len = internals->rss_key_len;
2901         if (rss_conf->rss_key)
2902                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2903
2904         return 0;
2905 }
2906
2907 static int
2908 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2909 {
2910         struct rte_eth_dev *slave_eth_dev;
2911         struct bond_dev_private *internals = dev->data->dev_private;
2912         int ret, i;
2913
2914         rte_spinlock_lock(&internals->lock);
2915
2916         for (i = 0; i < internals->slave_count; i++) {
2917                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2918                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2919                         rte_spinlock_unlock(&internals->lock);
2920                         return -ENOTSUP;
2921                 }
2922         }
2923         for (i = 0; i < internals->slave_count; i++) {
2924                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2925                 if (ret < 0) {
2926                         rte_spinlock_unlock(&internals->lock);
2927                         return ret;
2928                 }
2929         }
2930
2931         rte_spinlock_unlock(&internals->lock);
2932         return 0;
2933 }
2934
2935 static int
2936 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2937 {
2938         if (mac_address_set(dev, addr)) {
2939                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2940                 return -EINVAL;
2941         }
2942
2943         return 0;
2944 }
2945
2946 static int
2947 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2948                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2949 {
2950         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2951                 *(const void **)arg = &bond_flow_ops;
2952                 return 0;
2953         }
2954         return -ENOTSUP;
2955 }
2956
2957 static int
2958 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2959                                 __rte_unused uint32_t index, uint32_t vmdq)
2960 {
2961         struct rte_eth_dev *slave_eth_dev;
2962         struct bond_dev_private *internals = dev->data->dev_private;
2963         int ret, i;
2964
2965         rte_spinlock_lock(&internals->lock);
2966
2967         for (i = 0; i < internals->slave_count; i++) {
2968                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2969                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2970                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2971                         ret = -ENOTSUP;
2972                         goto end;
2973                 }
2974         }
2975
2976         for (i = 0; i < internals->slave_count; i++) {
2977                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2978                                 mac_addr, vmdq);
2979                 if (ret < 0) {
2980                         /* rollback */
2981                         for (i--; i >= 0; i--)
2982                                 rte_eth_dev_mac_addr_remove(
2983                                         internals->slaves[i].port_id, mac_addr);
2984                         goto end;
2985                 }
2986         }
2987
2988         ret = 0;
2989 end:
2990         rte_spinlock_unlock(&internals->lock);
2991         return ret;
2992 }
2993
2994 static void
2995 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2996 {
2997         struct rte_eth_dev *slave_eth_dev;
2998         struct bond_dev_private *internals = dev->data->dev_private;
2999         int i;
3000
3001         rte_spinlock_lock(&internals->lock);
3002
3003         for (i = 0; i < internals->slave_count; i++) {
3004                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3005                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3006                         goto end;
3007         }
3008
3009         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3010
3011         for (i = 0; i < internals->slave_count; i++)
3012                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3013                                 mac_addr);
3014
3015 end:
3016         rte_spinlock_unlock(&internals->lock);
3017 }
3018
3019 const struct eth_dev_ops default_dev_ops = {
3020         .dev_start            = bond_ethdev_start,
3021         .dev_stop             = bond_ethdev_stop,
3022         .dev_close            = bond_ethdev_close,
3023         .dev_configure        = bond_ethdev_configure,
3024         .dev_infos_get        = bond_ethdev_info,
3025         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3026         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3027         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3028         .rx_queue_release     = bond_ethdev_rx_queue_release,
3029         .tx_queue_release     = bond_ethdev_tx_queue_release,
3030         .link_update          = bond_ethdev_link_update,
3031         .stats_get            = bond_ethdev_stats_get,
3032         .stats_reset          = bond_ethdev_stats_reset,
3033         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3034         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3035         .reta_update          = bond_ethdev_rss_reta_update,
3036         .reta_query           = bond_ethdev_rss_reta_query,
3037         .rss_hash_update      = bond_ethdev_rss_hash_update,
3038         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3039         .mtu_set              = bond_ethdev_mtu_set,
3040         .mac_addr_set         = bond_ethdev_mac_address_set,
3041         .mac_addr_add         = bond_ethdev_mac_addr_add,
3042         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3043         .filter_ctrl          = bond_filter_ctrl
3044 };
3045
3046 static int
3047 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3048 {
3049         const char *name = rte_vdev_device_name(dev);
3050         uint8_t socket_id = dev->device.numa_node;
3051         struct bond_dev_private *internals = NULL;
3052         struct rte_eth_dev *eth_dev = NULL;
3053         uint32_t vlan_filter_bmp_size;
3054
3055         /* now do all data allocation - for eth_dev structure, dummy pci driver
3056          * and internal (private) data
3057          */
3058
3059         /* reserve an ethdev entry */
3060         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3061         if (eth_dev == NULL) {
3062                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3063                 goto err;
3064         }
3065
3066         internals = eth_dev->data->dev_private;
3067         eth_dev->data->nb_rx_queues = (uint16_t)1;
3068         eth_dev->data->nb_tx_queues = (uint16_t)1;
3069
3070         /* Allocate memory for storing MAC addresses */
3071         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3072                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3073         if (eth_dev->data->mac_addrs == NULL) {
3074                 RTE_BOND_LOG(ERR,
3075                              "Failed to allocate %u bytes needed to store MAC addresses",
3076                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3077                 goto err;
3078         }
3079
3080         eth_dev->dev_ops = &default_dev_ops;
3081         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3082
3083         rte_spinlock_init(&internals->lock);
3084         rte_spinlock_init(&internals->lsc_lock);
3085
3086         internals->port_id = eth_dev->data->port_id;
3087         internals->mode = BONDING_MODE_INVALID;
3088         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3089         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3090         internals->burst_xmit_hash = burst_xmit_l2_hash;
3091         internals->user_defined_mac = 0;
3092
3093         internals->link_status_polling_enabled = 0;
3094
3095         internals->link_status_polling_interval_ms =
3096                 DEFAULT_POLLING_INTERVAL_10_MS;
3097         internals->link_down_delay_ms = 0;
3098         internals->link_up_delay_ms = 0;
3099
3100         internals->slave_count = 0;
3101         internals->active_slave_count = 0;
3102         internals->rx_offload_capa = 0;
3103         internals->tx_offload_capa = 0;
3104         internals->rx_queue_offload_capa = 0;
3105         internals->tx_queue_offload_capa = 0;
3106         internals->candidate_max_rx_pktlen = 0;
3107         internals->max_rx_pktlen = 0;
3108
3109         /* Initially allow to choose any offload type */
3110         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3111
3112         memset(&internals->default_rxconf, 0,
3113                sizeof(internals->default_rxconf));
3114         memset(&internals->default_txconf, 0,
3115                sizeof(internals->default_txconf));
3116
3117         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3118         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3119
3120         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3121         memset(internals->slaves, 0, sizeof(internals->slaves));
3122
3123         TAILQ_INIT(&internals->flow_list);
3124         internals->flow_isolated_valid = 0;
3125
3126         /* Set mode 4 default configuration */
3127         bond_mode_8023ad_setup(eth_dev, NULL);
3128         if (bond_ethdev_mode_set(eth_dev, mode)) {
3129                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3130                                  eth_dev->data->port_id, mode);
3131                 goto err;
3132         }
3133
3134         vlan_filter_bmp_size =
3135                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3136         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3137                                                    RTE_CACHE_LINE_SIZE);
3138         if (internals->vlan_filter_bmpmem == NULL) {
3139                 RTE_BOND_LOG(ERR,
3140                              "Failed to allocate vlan bitmap for bonded device %u",
3141                              eth_dev->data->port_id);
3142                 goto err;
3143         }
3144
3145         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3146                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3147         if (internals->vlan_filter_bmp == NULL) {
3148                 RTE_BOND_LOG(ERR,
3149                              "Failed to init vlan bitmap for bonded device %u",
3150                              eth_dev->data->port_id);
3151                 rte_free(internals->vlan_filter_bmpmem);
3152                 goto err;
3153         }
3154
3155         return eth_dev->data->port_id;
3156
3157 err:
3158         rte_free(internals);
3159         if (eth_dev != NULL)
3160                 eth_dev->data->dev_private = NULL;
3161         rte_eth_dev_release_port(eth_dev);
3162         return -1;
3163 }
3164
3165 static int
3166 bond_probe(struct rte_vdev_device *dev)
3167 {
3168         const char *name;
3169         struct bond_dev_private *internals;
3170         struct rte_kvargs *kvlist;
3171         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3172         int  arg_count, port_id;
3173         uint8_t agg_mode;
3174         struct rte_eth_dev *eth_dev;
3175
3176         if (!dev)
3177                 return -EINVAL;
3178
3179         name = rte_vdev_device_name(dev);
3180         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3181
3182         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3183                 eth_dev = rte_eth_dev_attach_secondary(name);
3184                 if (!eth_dev) {
3185                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3186                         return -1;
3187                 }
3188                 /* TODO: request info from primary to set up Rx and Tx */
3189                 eth_dev->dev_ops = &default_dev_ops;
3190                 eth_dev->device = &dev->device;
3191                 rte_eth_dev_probing_finish(eth_dev);
3192                 return 0;
3193         }
3194
3195         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3196                 pmd_bond_init_valid_arguments);
3197         if (kvlist == NULL)
3198                 return -1;
3199
3200         /* Parse link bonding mode */
3201         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3202                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3203                                 &bond_ethdev_parse_slave_mode_kvarg,
3204                                 &bonding_mode) != 0) {
3205                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3206                                         name);
3207                         goto parse_error;
3208                 }
3209         } else {
3210                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3211                                 "device %s", name);
3212                 goto parse_error;
3213         }
3214
3215         /* Parse socket id to create bonding device on */
3216         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3217         if (arg_count == 1) {
3218                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3219                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3220                                 != 0) {
3221                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3222                                         "bonded device %s", name);
3223                         goto parse_error;
3224                 }
3225         } else if (arg_count > 1) {
3226                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3227                                 "bonded device %s", name);
3228                 goto parse_error;
3229         } else {
3230                 socket_id = rte_socket_id();
3231         }
3232
3233         dev->device.numa_node = socket_id;
3234
3235         /* Create link bonding eth device */
3236         port_id = bond_alloc(dev, bonding_mode);
3237         if (port_id < 0) {
3238                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3239                                 "socket %u.",   name, bonding_mode, socket_id);
3240                 goto parse_error;
3241         }
3242         internals = rte_eth_devices[port_id].data->dev_private;
3243         internals->kvlist = kvlist;
3244
3245         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3246                 if (rte_kvargs_process(kvlist,
3247                                 PMD_BOND_AGG_MODE_KVARG,
3248                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3249                                 &agg_mode) != 0) {
3250                         RTE_BOND_LOG(ERR,
3251                                         "Failed to parse agg selection mode for bonded device %s",
3252                                         name);
3253                         goto parse_error;
3254                 }
3255
3256                 if (internals->mode == BONDING_MODE_8023AD)
3257                         internals->mode4.agg_selection = agg_mode;
3258         } else {
3259                 internals->mode4.agg_selection = AGG_STABLE;
3260         }
3261
3262         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3263         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3264                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3265         return 0;
3266
3267 parse_error:
3268         rte_kvargs_free(kvlist);
3269
3270         return -1;
3271 }
3272
3273 static int
3274 bond_remove(struct rte_vdev_device *dev)
3275 {
3276         struct rte_eth_dev *eth_dev;
3277         struct bond_dev_private *internals;
3278         const char *name;
3279
3280         if (!dev)
3281                 return -EINVAL;
3282
3283         name = rte_vdev_device_name(dev);
3284         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3285
3286         /* now free all data allocation - for eth_dev structure,
3287          * dummy pci driver and internal (private) data
3288          */
3289
3290         /* find an ethdev entry */
3291         eth_dev = rte_eth_dev_allocated(name);
3292         if (eth_dev == NULL)
3293                 return -ENODEV;
3294
3295         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3296                 return rte_eth_dev_release_port(eth_dev);
3297
3298         RTE_ASSERT(eth_dev->device == &dev->device);
3299
3300         internals = eth_dev->data->dev_private;
3301         if (internals->slave_count != 0)
3302                 return -EBUSY;
3303
3304         if (eth_dev->data->dev_started == 1) {
3305                 bond_ethdev_stop(eth_dev);
3306                 bond_ethdev_close(eth_dev);
3307         }
3308
3309         eth_dev->dev_ops = NULL;
3310         eth_dev->rx_pkt_burst = NULL;
3311         eth_dev->tx_pkt_burst = NULL;
3312
3313         internals = eth_dev->data->dev_private;
3314         /* Try to release mempool used in mode6. If the bond
3315          * device is not mode6, free the NULL is not problem.
3316          */
3317         rte_mempool_free(internals->mode6.mempool);
3318         rte_bitmap_free(internals->vlan_filter_bmp);
3319         rte_free(internals->vlan_filter_bmpmem);
3320
3321         rte_eth_dev_release_port(eth_dev);
3322
3323         return 0;
3324 }
3325
3326 /* this part will resolve the slave portids after all the other pdev and vdev
3327  * have been allocated */
3328 static int
3329 bond_ethdev_configure(struct rte_eth_dev *dev)
3330 {
3331         const char *name = dev->device->name;
3332         struct bond_dev_private *internals = dev->data->dev_private;
3333         struct rte_kvargs *kvlist = internals->kvlist;
3334         int arg_count;
3335         uint16_t port_id = dev - rte_eth_devices;
3336         uint8_t agg_mode;
3337
3338         static const uint8_t default_rss_key[40] = {
3339                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3340                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3341                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3342                 0xBE, 0xAC, 0x01, 0xFA
3343         };
3344
3345         unsigned i, j;
3346
3347         /*
3348          * If RSS is enabled, fill table with default values and
3349          * set key to the the value specified in port RSS configuration.
3350          * Fall back to default RSS key if the key is not specified
3351          */
3352         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3353                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3354                         internals->rss_key_len =
3355                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3356                         memcpy(internals->rss_key,
3357                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3358                                internals->rss_key_len);
3359                 } else {
3360                         internals->rss_key_len = sizeof(default_rss_key);
3361                         memcpy(internals->rss_key, default_rss_key,
3362                                internals->rss_key_len);
3363                 }
3364
3365                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3366                         internals->reta_conf[i].mask = ~0LL;
3367                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3368                                 internals->reta_conf[i].reta[j] =
3369                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3370                                                 dev->data->nb_rx_queues;
3371                 }
3372         }
3373
3374         /* set the max_rx_pktlen */
3375         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3376
3377         /*
3378          * if no kvlist, it means that this bonded device has been created
3379          * through the bonding api.
3380          */
3381         if (!kvlist)
3382                 return 0;
3383
3384         /* Parse MAC address for bonded device */
3385         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3386         if (arg_count == 1) {
3387                 struct ether_addr bond_mac;
3388
3389                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3390                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3391                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3392                                      name);
3393                         return -1;
3394                 }
3395
3396                 /* Set MAC address */
3397                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3398                         RTE_BOND_LOG(ERR,
3399                                      "Failed to set mac address on bonded device %s",
3400                                      name);
3401                         return -1;
3402                 }
3403         } else if (arg_count > 1) {
3404                 RTE_BOND_LOG(ERR,
3405                              "MAC address can be specified only once for bonded device %s",
3406                              name);
3407                 return -1;
3408         }
3409
3410         /* Parse/set balance mode transmit policy */
3411         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3412         if (arg_count == 1) {
3413                 uint8_t xmit_policy;
3414
3415                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3416                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3417                     0) {
3418                         RTE_BOND_LOG(INFO,
3419                                      "Invalid xmit policy specified for bonded device %s",
3420                                      name);
3421                         return -1;
3422                 }
3423
3424                 /* Set balance mode transmit policy*/
3425                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3426                         RTE_BOND_LOG(ERR,
3427                                      "Failed to set balance xmit policy on bonded device %s",
3428                                      name);
3429                         return -1;
3430                 }
3431         } else if (arg_count > 1) {
3432                 RTE_BOND_LOG(ERR,
3433                              "Transmit policy can be specified only once for bonded device %s",
3434                              name);
3435                 return -1;
3436         }
3437
3438         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3439                 if (rte_kvargs_process(kvlist,
3440                                        PMD_BOND_AGG_MODE_KVARG,
3441                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3442                                        &agg_mode) != 0) {
3443                         RTE_BOND_LOG(ERR,
3444                                      "Failed to parse agg selection mode for bonded device %s",
3445                                      name);
3446                 }
3447                 if (internals->mode == BONDING_MODE_8023AD) {
3448                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3449                                         agg_mode);
3450                         if (ret < 0) {
3451                                 RTE_BOND_LOG(ERR,
3452                                         "Invalid args for agg selection set for bonded device %s",
3453                                         name);
3454                                 return -1;
3455                         }
3456                 }
3457         }
3458
3459         /* Parse/add slave ports to bonded device */
3460         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3461                 struct bond_ethdev_slave_ports slave_ports;
3462                 unsigned i;
3463
3464                 memset(&slave_ports, 0, sizeof(slave_ports));
3465
3466                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3467                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3468                         RTE_BOND_LOG(ERR,
3469                                      "Failed to parse slave ports for bonded device %s",
3470                                      name);
3471                         return -1;
3472                 }
3473
3474                 for (i = 0; i < slave_ports.slave_count; i++) {
3475                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3476                                 RTE_BOND_LOG(ERR,
3477                                              "Failed to add port %d as slave to bonded device %s",
3478                                              slave_ports.slaves[i], name);
3479                         }
3480                 }
3481
3482         } else {
3483                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3484                 return -1;
3485         }
3486
3487         /* Parse/set primary slave port id*/
3488         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3489         if (arg_count == 1) {
3490                 uint16_t primary_slave_port_id;
3491
3492                 if (rte_kvargs_process(kvlist,
3493                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3494                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3495                                        &primary_slave_port_id) < 0) {
3496                         RTE_BOND_LOG(INFO,
3497                                      "Invalid primary slave port id specified for bonded device %s",
3498                                      name);
3499                         return -1;
3500                 }
3501
3502                 /* Set balance mode transmit policy*/
3503                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3504                     != 0) {
3505                         RTE_BOND_LOG(ERR,
3506                                      "Failed to set primary slave port %d on bonded device %s",
3507                                      primary_slave_port_id, name);
3508                         return -1;
3509                 }
3510         } else if (arg_count > 1) {
3511                 RTE_BOND_LOG(INFO,
3512                              "Primary slave can be specified only once for bonded device %s",
3513                              name);
3514                 return -1;
3515         }
3516
3517         /* Parse link status monitor polling interval */
3518         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3519         if (arg_count == 1) {
3520                 uint32_t lsc_poll_interval_ms;
3521
3522                 if (rte_kvargs_process(kvlist,
3523                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3524                                        &bond_ethdev_parse_time_ms_kvarg,
3525                                        &lsc_poll_interval_ms) < 0) {
3526                         RTE_BOND_LOG(INFO,
3527                                      "Invalid lsc polling interval value specified for bonded"
3528                                      " device %s", name);
3529                         return -1;
3530                 }
3531
3532                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3533                     != 0) {
3534                         RTE_BOND_LOG(ERR,
3535                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3536                                      lsc_poll_interval_ms, name);
3537                         return -1;
3538                 }
3539         } else if (arg_count > 1) {
3540                 RTE_BOND_LOG(INFO,
3541                              "LSC polling interval can be specified only once for bonded"
3542                              " device %s", name);
3543                 return -1;
3544         }
3545
3546         /* Parse link up interrupt propagation delay */
3547         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3548         if (arg_count == 1) {
3549                 uint32_t link_up_delay_ms;
3550
3551                 if (rte_kvargs_process(kvlist,
3552                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3553                                        &bond_ethdev_parse_time_ms_kvarg,
3554                                        &link_up_delay_ms) < 0) {
3555                         RTE_BOND_LOG(INFO,
3556                                      "Invalid link up propagation delay value specified for"
3557                                      " bonded device %s", name);
3558                         return -1;
3559                 }
3560
3561                 /* Set balance mode transmit policy*/
3562                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3563                     != 0) {
3564                         RTE_BOND_LOG(ERR,
3565                                      "Failed to set link up propagation delay (%u ms) on bonded"
3566                                      " device %s", link_up_delay_ms, name);
3567                         return -1;
3568                 }
3569         } else if (arg_count > 1) {
3570                 RTE_BOND_LOG(INFO,
3571                              "Link up propagation delay can be specified only once for"
3572                              " bonded device %s", name);
3573                 return -1;
3574         }
3575
3576         /* Parse link down interrupt propagation delay */
3577         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3578         if (arg_count == 1) {
3579                 uint32_t link_down_delay_ms;
3580
3581                 if (rte_kvargs_process(kvlist,
3582                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3583                                        &bond_ethdev_parse_time_ms_kvarg,
3584                                        &link_down_delay_ms) < 0) {
3585                         RTE_BOND_LOG(INFO,
3586                                      "Invalid link down propagation delay value specified for"
3587                                      " bonded device %s", name);
3588                         return -1;
3589                 }
3590
3591                 /* Set balance mode transmit policy*/
3592                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3593                     != 0) {
3594                         RTE_BOND_LOG(ERR,
3595                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3596                                      link_down_delay_ms, name);
3597                         return -1;
3598                 }
3599         } else if (arg_count > 1) {
3600                 RTE_BOND_LOG(INFO,
3601                              "Link down propagation delay can be specified only once for  bonded device %s",
3602                              name);
3603                 return -1;
3604         }
3605
3606         return 0;
3607 }
3608
3609 struct rte_vdev_driver pmd_bond_drv = {
3610         .probe = bond_probe,
3611         .remove = bond_remove,
3612 };
3613
3614 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3615 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3616
3617 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3618         "slave=<ifc> "
3619         "primary=<ifc> "
3620         "mode=[0-6] "
3621         "xmit_policy=[l2 | l23 | l34] "
3622         "agg_mode=[count | stable | bandwidth] "
3623         "socket_id=<int> "
3624         "mac=<mac addr> "
3625         "lsc_poll_period_ms=<int> "
3626         "up_delay=<int> "
3627         "down_delay=<int>");
3628
3629 int bond_logtype;
3630
3631 RTE_INIT(bond_init_log)
3632 {
3633         bond_logtype = rte_log_register("pmd.net.bond");
3634         if (bond_logtype >= 0)
3635                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3636 }