net/bonding: inherit descriptor limits from slaves
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct ether_addr bond_mac;
396
397         struct ether_hdr *hdr;
398
399         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
400         uint16_t num_rx_total = 0;      /* Total number of received packets */
401         uint16_t slaves[RTE_MAX_ETHPORTS];
402         uint16_t slave_count, idx;
403
404         uint8_t collecting;  /* current slave collecting status */
405         const uint8_t promisc = internals->promiscuous_en;
406         uint8_t i, j, k;
407         uint8_t subtype;
408
409         rte_eth_macaddr_get(internals->port_id, &bond_mac);
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting || (!promisc &&
453                                         !is_multicast_ether_addr(&hdr->d_addr) &&
454                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
455
456                                 if (hdr->ether_type == ether_type_slow_be) {
457                                         bond_mode_8023ad_handle_slow_pkt(
458                                             internals, slaves[idx], bufs[j]);
459                                 } else
460                                         rte_pktmbuf_free(bufs[j]);
461
462                                 /* Packet is managed by mode 4 or dropped, shift the array */
463                                 num_rx_total--;
464                                 if (j < num_rx_total) {
465                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
466                                                 (num_rx_total - j));
467                                 }
468                         } else
469                                 j++;
470                 }
471                 if (unlikely(++idx == slave_count))
472                         idx = 0;
473         }
474
475         if (++internals->active_slave == slave_count)
476                 internals->active_slave = 0;
477
478         return num_rx_total;
479 }
480
481 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
482 uint32_t burstnumberRX;
483 uint32_t burstnumberTX;
484
485 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
486
487 static void
488 arp_op_name(uint16_t arp_op, char *buf)
489 {
490         switch (arp_op) {
491         case ARP_OP_REQUEST:
492                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
493                 return;
494         case ARP_OP_REPLY:
495                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
496                 return;
497         case ARP_OP_REVREQUEST:
498                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
499                                 "Reverse ARP Request");
500                 return;
501         case ARP_OP_REVREPLY:
502                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
503                                 "Reverse ARP Reply");
504                 return;
505         case ARP_OP_INVREQUEST:
506                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
507                                 "Peer Identify Request");
508                 return;
509         case ARP_OP_INVREPLY:
510                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
511                                 "Peer Identify Reply");
512                 return;
513         default:
514                 break;
515         }
516         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
621                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
622         }
623 #endif
624 }
625 #endif
626
627 static uint16_t
628 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
629 {
630         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
631         struct bond_dev_private *internals = bd_tx_q->dev_private;
632         struct ether_hdr *eth_h;
633         uint16_t ether_type, offset;
634         uint16_t nb_recv_pkts;
635         int i;
636
637         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
638
639         for (i = 0; i < nb_recv_pkts; i++) {
640                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
641                 ether_type = eth_h->ether_type;
642                 offset = get_vlan_offset(eth_h, &ether_type);
643
644                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
645 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
646                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
647 #endif
648                         bond_mode_alb_arp_recv(eth_h, offset, internals);
649                 }
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
652                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
653 #endif
654         }
655
656         return nb_recv_pkts;
657 }
658
659 static uint16_t
660 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
661                 uint16_t nb_pkts)
662 {
663         struct bond_dev_private *internals;
664         struct bond_tx_queue *bd_tx_q;
665
666         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
667         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
668
669         uint16_t num_of_slaves;
670         uint16_t slaves[RTE_MAX_ETHPORTS];
671
672         uint16_t num_tx_total = 0, num_tx_slave;
673
674         static int slave_idx = 0;
675         int i, cslave_idx = 0, tx_fail_total = 0;
676
677         bd_tx_q = (struct bond_tx_queue *)queue;
678         internals = bd_tx_q->dev_private;
679
680         /* Copy slave list to protect against slave up/down changes during tx
681          * bursting */
682         num_of_slaves = internals->active_slave_count;
683         memcpy(slaves, internals->active_slaves,
684                         sizeof(internals->active_slaves[0]) * num_of_slaves);
685
686         if (num_of_slaves < 1)
687                 return num_tx_total;
688
689         /* Populate slaves mbuf with which packets are to be sent on it  */
690         for (i = 0; i < nb_pkts; i++) {
691                 cslave_idx = (slave_idx + i) % num_of_slaves;
692                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
693         }
694
695         /* increment current slave index so the next call to tx burst starts on the
696          * next slave */
697         slave_idx = ++cslave_idx;
698
699         /* Send packet burst on each slave device */
700         for (i = 0; i < num_of_slaves; i++) {
701                 if (slave_nb_pkts[i] > 0) {
702                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
703                                         slave_bufs[i], slave_nb_pkts[i]);
704
705                         /* if tx burst fails move packets to end of bufs */
706                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
707                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
708
709                                 tx_fail_total += tx_fail_slave;
710
711                                 memcpy(&bufs[nb_pkts - tx_fail_total],
712                                        &slave_bufs[i][num_tx_slave],
713                                        tx_fail_slave * sizeof(bufs[0]));
714                         }
715                         num_tx_total += num_tx_slave;
716                 }
717         }
718
719         return num_tx_total;
720 }
721
722 static uint16_t
723 bond_ethdev_tx_burst_active_backup(void *queue,
724                 struct rte_mbuf **bufs, uint16_t nb_pkts)
725 {
726         struct bond_dev_private *internals;
727         struct bond_tx_queue *bd_tx_q;
728
729         bd_tx_q = (struct bond_tx_queue *)queue;
730         internals = bd_tx_q->dev_private;
731
732         if (internals->active_slave_count < 1)
733                 return 0;
734
735         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
736                         bufs, nb_pkts);
737 }
738
739 static inline uint16_t
740 ether_hash(struct ether_hdr *eth_hdr)
741 {
742         unaligned_uint16_t *word_src_addr =
743                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
744         unaligned_uint16_t *word_dst_addr =
745                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
746
747         return (word_src_addr[0] ^ word_dst_addr[0]) ^
748                         (word_src_addr[1] ^ word_dst_addr[1]) ^
749                         (word_src_addr[2] ^ word_dst_addr[2]);
750 }
751
752 static inline uint32_t
753 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
754 {
755         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
756 }
757
758 static inline uint32_t
759 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
760 {
761         unaligned_uint32_t *word_src_addr =
762                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
763         unaligned_uint32_t *word_dst_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]) ^
769                         (word_src_addr[3] ^ word_dst_addr[3]);
770 }
771
772
773 void
774 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
775                 uint8_t slave_count, uint16_t *slaves)
776 {
777         struct ether_hdr *eth_hdr;
778         uint32_t hash;
779         int i;
780
781         for (i = 0; i < nb_pkts; i++) {
782                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
783
784                 hash = ether_hash(eth_hdr);
785
786                 slaves[i] = (hash ^= hash >> 8) % slave_count;
787         }
788 }
789
790 void
791 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792                 uint8_t slave_count, uint16_t *slaves)
793 {
794         uint16_t i;
795         struct ether_hdr *eth_hdr;
796         uint16_t proto;
797         size_t vlan_offset;
798         uint32_t hash, l3hash;
799
800         for (i = 0; i < nb_pkts; i++) {
801                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
802                 l3hash = 0;
803
804                 proto = eth_hdr->ether_type;
805                 hash = ether_hash(eth_hdr);
806
807                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
808
809                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
810                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
811                                         ((char *)(eth_hdr + 1) + vlan_offset);
812                         l3hash = ipv4_hash(ipv4_hdr);
813
814                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
815                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
816                                         ((char *)(eth_hdr + 1) + vlan_offset);
817                         l3hash = ipv6_hash(ipv6_hdr);
818                 }
819
820                 hash = hash ^ l3hash;
821                 hash ^= hash >> 16;
822                 hash ^= hash >> 8;
823
824                 slaves[i] = hash % slave_count;
825         }
826 }
827
828 void
829 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
830                 uint8_t slave_count, uint16_t *slaves)
831 {
832         struct ether_hdr *eth_hdr;
833         uint16_t proto;
834         size_t vlan_offset;
835         int i;
836
837         struct udp_hdr *udp_hdr;
838         struct tcp_hdr *tcp_hdr;
839         uint32_t hash, l3hash, l4hash;
840
841         for (i = 0; i < nb_pkts; i++) {
842                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
843                 proto = eth_hdr->ether_type;
844                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
845                 l3hash = 0;
846                 l4hash = 0;
847
848                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
849                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
850                                         ((char *)(eth_hdr + 1) + vlan_offset);
851                         size_t ip_hdr_offset;
852
853                         l3hash = ipv4_hash(ipv4_hdr);
854
855                         /* there is no L4 header in fragmented packet */
856                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
857                                                                 == 0)) {
858                                 ip_hdr_offset = (ipv4_hdr->version_ihl
859                                         & IPV4_HDR_IHL_MASK) *
860                                         IPV4_IHL_MULTIPLIER;
861
862                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
863                                         tcp_hdr = (struct tcp_hdr *)
864                                                 ((char *)ipv4_hdr +
865                                                         ip_hdr_offset);
866                                         l4hash = HASH_L4_PORTS(tcp_hdr);
867                                 } else if (ipv4_hdr->next_proto_id ==
868                                                                 IPPROTO_UDP) {
869                                         udp_hdr = (struct udp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         l4hash = HASH_L4_PORTS(udp_hdr);
873                                 }
874                         }
875                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
876                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
877                                         ((char *)(eth_hdr + 1) + vlan_offset);
878                         l3hash = ipv6_hash(ipv6_hdr);
879
880                         if (ipv6_hdr->proto == IPPROTO_TCP) {
881                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
882                                 l4hash = HASH_L4_PORTS(tcp_hdr);
883                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
884                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
885                                 l4hash = HASH_L4_PORTS(udp_hdr);
886                         }
887                 }
888
889                 hash = l3hash ^ l4hash;
890                 hash ^= hash >> 16;
891                 hash ^= hash >> 8;
892
893                 slaves[i] = hash % slave_count;
894         }
895 }
896
897 struct bwg_slave {
898         uint64_t bwg_left_int;
899         uint64_t bwg_left_remainder;
900         uint8_t slave;
901 };
902
903 void
904 bond_tlb_activate_slave(struct bond_dev_private *internals) {
905         int i;
906
907         for (i = 0; i < internals->active_slave_count; i++) {
908                 tlb_last_obytets[internals->active_slaves[i]] = 0;
909         }
910 }
911
912 static int
913 bandwidth_cmp(const void *a, const void *b)
914 {
915         const struct bwg_slave *bwg_a = a;
916         const struct bwg_slave *bwg_b = b;
917         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
918         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
919                         (int64_t)bwg_a->bwg_left_remainder;
920         if (diff > 0)
921                 return 1;
922         else if (diff < 0)
923                 return -1;
924         else if (diff2 > 0)
925                 return 1;
926         else if (diff2 < 0)
927                 return -1;
928         else
929                 return 0;
930 }
931
932 static void
933 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
934                 struct bwg_slave *bwg_slave)
935 {
936         struct rte_eth_link link_status;
937
938         rte_eth_link_get_nowait(port_id, &link_status);
939         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
940         if (link_bwg == 0)
941                 return;
942         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
943         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
944         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
945 }
946
947 static void
948 bond_ethdev_update_tlb_slave_cb(void *arg)
949 {
950         struct bond_dev_private *internals = arg;
951         struct rte_eth_stats slave_stats;
952         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
953         uint8_t slave_count;
954         uint64_t tx_bytes;
955
956         uint8_t update_stats = 0;
957         uint8_t i, slave_id;
958
959         internals->slave_update_idx++;
960
961
962         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
963                 update_stats = 1;
964
965         for (i = 0; i < internals->active_slave_count; i++) {
966                 slave_id = internals->active_slaves[i];
967                 rte_eth_stats_get(slave_id, &slave_stats);
968                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
969                 bandwidth_left(slave_id, tx_bytes,
970                                 internals->slave_update_idx, &bwg_array[i]);
971                 bwg_array[i].slave = slave_id;
972
973                 if (update_stats) {
974                         tlb_last_obytets[slave_id] = slave_stats.obytes;
975                 }
976         }
977
978         if (update_stats == 1)
979                 internals->slave_update_idx = 0;
980
981         slave_count = i;
982         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
983         for (i = 0; i < slave_count; i++)
984                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
985
986         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
987                         (struct bond_dev_private *)internals);
988 }
989
990 static uint16_t
991 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
992 {
993         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
994         struct bond_dev_private *internals = bd_tx_q->dev_private;
995
996         struct rte_eth_dev *primary_port =
997                         &rte_eth_devices[internals->primary_port];
998         uint16_t num_tx_total = 0;
999         uint16_t i, j;
1000
1001         uint16_t num_of_slaves = internals->active_slave_count;
1002         uint16_t slaves[RTE_MAX_ETHPORTS];
1003
1004         struct ether_hdr *ether_hdr;
1005         struct ether_addr primary_slave_addr;
1006         struct ether_addr active_slave_addr;
1007
1008         if (num_of_slaves < 1)
1009                 return num_tx_total;
1010
1011         memcpy(slaves, internals->tlb_slaves_order,
1012                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1013
1014
1015         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1016
1017         if (nb_pkts > 3) {
1018                 for (i = 0; i < 3; i++)
1019                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1020         }
1021
1022         for (i = 0; i < num_of_slaves; i++) {
1023                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1024                 for (j = num_tx_total; j < nb_pkts; j++) {
1025                         if (j + 3 < nb_pkts)
1026                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1027
1028                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1029                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1030                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1031 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1032                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1033 #endif
1034                 }
1035
1036                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1037                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1038
1039                 if (num_tx_total == nb_pkts)
1040                         break;
1041         }
1042
1043         return num_tx_total;
1044 }
1045
1046 void
1047 bond_tlb_disable(struct bond_dev_private *internals)
1048 {
1049         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1050 }
1051
1052 void
1053 bond_tlb_enable(struct bond_dev_private *internals)
1054 {
1055         bond_ethdev_update_tlb_slave_cb(internals);
1056 }
1057
1058 static uint16_t
1059 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1060 {
1061         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1062         struct bond_dev_private *internals = bd_tx_q->dev_private;
1063
1064         struct ether_hdr *eth_h;
1065         uint16_t ether_type, offset;
1066
1067         struct client_data *client_info;
1068
1069         /*
1070          * We create transmit buffers for every slave and one additional to send
1071          * through tlb. In worst case every packet will be send on one port.
1072          */
1073         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1074         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1075
1076         /*
1077          * We create separate transmit buffers for update packets as they won't
1078          * be counted in num_tx_total.
1079          */
1080         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1081         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1082
1083         struct rte_mbuf *upd_pkt;
1084         size_t pkt_size;
1085
1086         uint16_t num_send, num_not_send = 0;
1087         uint16_t num_tx_total = 0;
1088         uint16_t slave_idx;
1089
1090         int i, j;
1091
1092         /* Search tx buffer for ARP packets and forward them to alb */
1093         for (i = 0; i < nb_pkts; i++) {
1094                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1095                 ether_type = eth_h->ether_type;
1096                 offset = get_vlan_offset(eth_h, &ether_type);
1097
1098                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1099                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1100
1101                         /* Change src mac in eth header */
1102                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1103
1104                         /* Add packet to slave tx buffer */
1105                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1106                         slave_bufs_pkts[slave_idx]++;
1107                 } else {
1108                         /* If packet is not ARP, send it with TLB policy */
1109                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1110                                         bufs[i];
1111                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1112                 }
1113         }
1114
1115         /* Update connected client ARP tables */
1116         if (internals->mode6.ntt) {
1117                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1118                         client_info = &internals->mode6.client_table[i];
1119
1120                         if (client_info->in_use) {
1121                                 /* Allocate new packet to send ARP update on current slave */
1122                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1123                                 if (upd_pkt == NULL) {
1124                                         RTE_BOND_LOG(ERR,
1125                                                      "Failed to allocate ARP packet from pool");
1126                                         continue;
1127                                 }
1128                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1129                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1130                                 upd_pkt->data_len = pkt_size;
1131                                 upd_pkt->pkt_len = pkt_size;
1132
1133                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1134                                                 internals);
1135
1136                                 /* Add packet to update tx buffer */
1137                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1138                                 update_bufs_pkts[slave_idx]++;
1139                         }
1140                 }
1141                 internals->mode6.ntt = 0;
1142         }
1143
1144         /* Send ARP packets on proper slaves */
1145         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1146                 if (slave_bufs_pkts[i] > 0) {
1147                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1148                                         slave_bufs[i], slave_bufs_pkts[i]);
1149                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1150                                 bufs[nb_pkts - 1 - num_not_send - j] =
1151                                                 slave_bufs[i][nb_pkts - 1 - j];
1152                         }
1153
1154                         num_tx_total += num_send;
1155                         num_not_send += slave_bufs_pkts[i] - num_send;
1156
1157 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1158         /* Print TX stats including update packets */
1159                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1160                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1161                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1162                         }
1163 #endif
1164                 }
1165         }
1166
1167         /* Send update packets on proper slaves */
1168         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1169                 if (update_bufs_pkts[i] > 0) {
1170                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1171                                         update_bufs_pkts[i]);
1172                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1173                                 rte_pktmbuf_free(update_bufs[i][j]);
1174                         }
1175 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1176                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send non-ARP packets using tlb policy */
1185         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1186                 num_send = bond_ethdev_tx_burst_tlb(queue,
1187                                 slave_bufs[RTE_MAX_ETHPORTS],
1188                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1189
1190                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1191                         bufs[nb_pkts - 1 - num_not_send - j] =
1192                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1193                 }
1194
1195                 num_tx_total += num_send;
1196         }
1197
1198         return num_tx_total;
1199 }
1200
1201 static uint16_t
1202 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1203                 uint16_t nb_bufs)
1204 {
1205         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1206         struct bond_dev_private *internals = bd_tx_q->dev_private;
1207
1208         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1209         uint16_t slave_count;
1210
1211         /* Array to sort mbufs for transmission on each slave into */
1212         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1213         /* Number of mbufs for transmission on each slave */
1214         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1215         /* Mapping array generated by hash function to map mbufs to slaves */
1216         uint16_t bufs_slave_port_idxs[nb_bufs];
1217
1218         uint16_t slave_tx_count;
1219         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1220
1221         uint16_t i;
1222
1223         if (unlikely(nb_bufs == 0))
1224                 return 0;
1225
1226         /* Copy slave list to protect against slave up/down changes during tx
1227          * bursting */
1228         slave_count = internals->active_slave_count;
1229         if (unlikely(slave_count < 1))
1230                 return 0;
1231
1232         memcpy(slave_port_ids, internals->active_slaves,
1233                         sizeof(slave_port_ids[0]) * slave_count);
1234
1235         /*
1236          * Populate slaves mbuf with the packets which are to be sent on it
1237          * selecting output slave using hash based on xmit policy
1238          */
1239         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1240                         bufs_slave_port_idxs);
1241
1242         for (i = 0; i < nb_bufs; i++) {
1243                 /* Populate slave mbuf arrays with mbufs for that slave. */
1244                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1245
1246                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1247         }
1248
1249         /* Send packet burst on each slave device */
1250         for (i = 0; i < slave_count; i++) {
1251                 if (slave_nb_bufs[i] == 0)
1252                         continue;
1253
1254                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1255                                 bd_tx_q->queue_id, slave_bufs[i],
1256                                 slave_nb_bufs[i]);
1257
1258                 total_tx_count += slave_tx_count;
1259
1260                 /* If tx burst fails move packets to end of bufs */
1261                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1262                         int slave_tx_fail_count = slave_nb_bufs[i] -
1263                                         slave_tx_count;
1264                         total_tx_fail_count += slave_tx_fail_count;
1265                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1266                                &slave_bufs[i][slave_tx_count],
1267                                slave_tx_fail_count * sizeof(bufs[0]));
1268                 }
1269         }
1270
1271         return total_tx_count;
1272 }
1273
1274 static uint16_t
1275 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1276                 uint16_t nb_bufs)
1277 {
1278         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1279         struct bond_dev_private *internals = bd_tx_q->dev_private;
1280
1281         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1282         uint16_t slave_count;
1283
1284         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1285         uint16_t dist_slave_count;
1286
1287         /* 2-D array to sort mbufs for transmission on each slave into */
1288         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1289         /* Number of mbufs for transmission on each slave */
1290         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1291         /* Mapping array generated by hash function to map mbufs to slaves */
1292         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1293
1294         uint16_t slave_tx_count;
1295         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1296
1297         uint16_t i;
1298
1299         if (unlikely(nb_bufs == 0))
1300                 return 0;
1301
1302         /* Copy slave list to protect against slave up/down changes during tx
1303          * bursting */
1304         slave_count = internals->active_slave_count;
1305         if (unlikely(slave_count < 1))
1306                 return 0;
1307
1308         memcpy(slave_port_ids, internals->active_slaves,
1309                         sizeof(slave_port_ids[0]) * slave_count);
1310
1311         dist_slave_count = 0;
1312         for (i = 0; i < slave_count; i++) {
1313                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1314
1315                 if (ACTOR_STATE(port, DISTRIBUTING))
1316                         dist_slave_port_ids[dist_slave_count++] =
1317                                         slave_port_ids[i];
1318         }
1319
1320         if (likely(dist_slave_count > 1)) {
1321
1322                 /*
1323                  * Populate slaves mbuf with the packets which are to be sent
1324                  * on it, selecting output slave using hash based on xmit policy
1325                  */
1326                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1327                                 bufs_slave_port_idxs);
1328
1329                 for (i = 0; i < nb_bufs; i++) {
1330                         /*
1331                          * Populate slave mbuf arrays with mbufs for that
1332                          * slave
1333                          */
1334                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1335
1336                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1337                                         bufs[i];
1338                 }
1339
1340
1341                 /* Send packet burst on each slave device */
1342                 for (i = 0; i < dist_slave_count; i++) {
1343                         if (slave_nb_bufs[i] == 0)
1344                                 continue;
1345
1346                         slave_tx_count = rte_eth_tx_burst(
1347                                         dist_slave_port_ids[i],
1348                                         bd_tx_q->queue_id, slave_bufs[i],
1349                                         slave_nb_bufs[i]);
1350
1351                         total_tx_count += slave_tx_count;
1352
1353                         /* If tx burst fails move packets to end of bufs */
1354                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1355                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1356                                                 slave_tx_count;
1357                                 total_tx_fail_count += slave_tx_fail_count;
1358
1359                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1360                                        &slave_bufs[i][slave_tx_count],
1361                                        slave_tx_fail_count * sizeof(bufs[0]));
1362                         }
1363                 }
1364         }
1365
1366         /* Check for LACP control packets and send if available */
1367         for (i = 0; i < slave_count; i++) {
1368                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1369                 struct rte_mbuf *ctrl_pkt = NULL;
1370
1371                 if (likely(rte_ring_empty(port->tx_ring)))
1372                         continue;
1373
1374                 if (rte_ring_dequeue(port->tx_ring,
1375                                      (void **)&ctrl_pkt) != -ENOENT) {
1376                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1377                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1378                         /*
1379                          * re-enqueue LAG control plane packets to buffering
1380                          * ring if transmission fails so the packet isn't lost.
1381                          */
1382                         if (slave_tx_count != 1)
1383                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1384                 }
1385         }
1386
1387         return total_tx_count;
1388 }
1389
1390 static uint16_t
1391 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1392                 uint16_t nb_pkts)
1393 {
1394         struct bond_dev_private *internals;
1395         struct bond_tx_queue *bd_tx_q;
1396
1397         uint8_t tx_failed_flag = 0, num_of_slaves;
1398         uint16_t slaves[RTE_MAX_ETHPORTS];
1399
1400         uint16_t max_nb_of_tx_pkts = 0;
1401
1402         int slave_tx_total[RTE_MAX_ETHPORTS];
1403         int i, most_successful_tx_slave = -1;
1404
1405         bd_tx_q = (struct bond_tx_queue *)queue;
1406         internals = bd_tx_q->dev_private;
1407
1408         /* Copy slave list to protect against slave up/down changes during tx
1409          * bursting */
1410         num_of_slaves = internals->active_slave_count;
1411         memcpy(slaves, internals->active_slaves,
1412                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1413
1414         if (num_of_slaves < 1)
1415                 return 0;
1416
1417         /* Increment reference count on mbufs */
1418         for (i = 0; i < nb_pkts; i++)
1419                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1420
1421         /* Transmit burst on each active slave */
1422         for (i = 0; i < num_of_slaves; i++) {
1423                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1424                                         bufs, nb_pkts);
1425
1426                 if (unlikely(slave_tx_total[i] < nb_pkts))
1427                         tx_failed_flag = 1;
1428
1429                 /* record the value and slave index for the slave which transmits the
1430                  * maximum number of packets */
1431                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1432                         max_nb_of_tx_pkts = slave_tx_total[i];
1433                         most_successful_tx_slave = i;
1434                 }
1435         }
1436
1437         /* if slaves fail to transmit packets from burst, the calling application
1438          * is not expected to know about multiple references to packets so we must
1439          * handle failures of all packets except those of the most successful slave
1440          */
1441         if (unlikely(tx_failed_flag))
1442                 for (i = 0; i < num_of_slaves; i++)
1443                         if (i != most_successful_tx_slave)
1444                                 while (slave_tx_total[i] < nb_pkts)
1445                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1446
1447         return max_nb_of_tx_pkts;
1448 }
1449
1450 void
1451 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1452 {
1453         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1454
1455         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1456                 /**
1457                  * If in mode 4 then save the link properties of the first
1458                  * slave, all subsequent slaves must match these properties
1459                  */
1460                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1461
1462                 bond_link->link_autoneg = slave_link->link_autoneg;
1463                 bond_link->link_duplex = slave_link->link_duplex;
1464                 bond_link->link_speed = slave_link->link_speed;
1465         } else {
1466                 /**
1467                  * In any other mode the link properties are set to default
1468                  * values of AUTONEG/DUPLEX
1469                  */
1470                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1471                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1472         }
1473 }
1474
1475 int
1476 link_properties_valid(struct rte_eth_dev *ethdev,
1477                 struct rte_eth_link *slave_link)
1478 {
1479         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1480
1481         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1482                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1483
1484                 if (bond_link->link_duplex != slave_link->link_duplex ||
1485                         bond_link->link_autoneg != slave_link->link_autoneg ||
1486                         bond_link->link_speed != slave_link->link_speed)
1487                         return -1;
1488         }
1489
1490         return 0;
1491 }
1492
1493 int
1494 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1495 {
1496         struct ether_addr *mac_addr;
1497
1498         if (eth_dev == NULL) {
1499                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1500                 return -1;
1501         }
1502
1503         if (dst_mac_addr == NULL) {
1504                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1505                 return -1;
1506         }
1507
1508         mac_addr = eth_dev->data->mac_addrs;
1509
1510         ether_addr_copy(mac_addr, dst_mac_addr);
1511         return 0;
1512 }
1513
1514 int
1515 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1516 {
1517         struct ether_addr *mac_addr;
1518
1519         if (eth_dev == NULL) {
1520                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1521                 return -1;
1522         }
1523
1524         if (new_mac_addr == NULL) {
1525                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1526                 return -1;
1527         }
1528
1529         mac_addr = eth_dev->data->mac_addrs;
1530
1531         /* If new MAC is different to current MAC then update */
1532         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1533                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1534
1535         return 0;
1536 }
1537
1538 static const struct ether_addr null_mac_addr;
1539
1540 /*
1541  * Add additional MAC addresses to the slave
1542  */
1543 int
1544 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1545                 uint16_t slave_port_id)
1546 {
1547         int i, ret;
1548         struct ether_addr *mac_addr;
1549
1550         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1551                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1552                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1553                         break;
1554
1555                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1556                 if (ret < 0) {
1557                         /* rollback */
1558                         for (i--; i > 0; i--)
1559                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1560                                         &bonded_eth_dev->data->mac_addrs[i]);
1561                         return ret;
1562                 }
1563         }
1564
1565         return 0;
1566 }
1567
1568 /*
1569  * Remove additional MAC addresses from the slave
1570  */
1571 int
1572 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1573                 uint16_t slave_port_id)
1574 {
1575         int i, rc, ret;
1576         struct ether_addr *mac_addr;
1577
1578         rc = 0;
1579         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1580                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1581                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1582                         break;
1583
1584                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1585                 /* save only the first error */
1586                 if (ret < 0 && rc == 0)
1587                         rc = ret;
1588         }
1589
1590         return rc;
1591 }
1592
1593 int
1594 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1595 {
1596         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1597         int i;
1598
1599         /* Update slave devices MAC addresses */
1600         if (internals->slave_count < 1)
1601                 return -1;
1602
1603         switch (internals->mode) {
1604         case BONDING_MODE_ROUND_ROBIN:
1605         case BONDING_MODE_BALANCE:
1606         case BONDING_MODE_BROADCAST:
1607                 for (i = 0; i < internals->slave_count; i++) {
1608                         if (rte_eth_dev_default_mac_addr_set(
1609                                         internals->slaves[i].port_id,
1610                                         bonded_eth_dev->data->mac_addrs)) {
1611                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1612                                                 internals->slaves[i].port_id);
1613                                 return -1;
1614                         }
1615                 }
1616                 break;
1617         case BONDING_MODE_8023AD:
1618                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1619                 break;
1620         case BONDING_MODE_ACTIVE_BACKUP:
1621         case BONDING_MODE_TLB:
1622         case BONDING_MODE_ALB:
1623         default:
1624                 for (i = 0; i < internals->slave_count; i++) {
1625                         if (internals->slaves[i].port_id ==
1626                                         internals->current_primary_port) {
1627                                 if (rte_eth_dev_default_mac_addr_set(
1628                                                 internals->primary_port,
1629                                                 bonded_eth_dev->data->mac_addrs)) {
1630                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1631                                                         internals->current_primary_port);
1632                                         return -1;
1633                                 }
1634                         } else {
1635                                 if (rte_eth_dev_default_mac_addr_set(
1636                                                 internals->slaves[i].port_id,
1637                                                 &internals->slaves[i].persisted_mac_addr)) {
1638                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1639                                                         internals->slaves[i].port_id);
1640                                         return -1;
1641                                 }
1642                         }
1643                 }
1644         }
1645
1646         return 0;
1647 }
1648
1649 int
1650 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1651 {
1652         struct bond_dev_private *internals;
1653
1654         internals = eth_dev->data->dev_private;
1655
1656         switch (mode) {
1657         case BONDING_MODE_ROUND_ROBIN:
1658                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1659                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1660                 break;
1661         case BONDING_MODE_ACTIVE_BACKUP:
1662                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1663                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1664                 break;
1665         case BONDING_MODE_BALANCE:
1666                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1667                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1668                 break;
1669         case BONDING_MODE_BROADCAST:
1670                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1671                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1672                 break;
1673         case BONDING_MODE_8023AD:
1674                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1675                         return -1;
1676
1677                 if (internals->mode4.dedicated_queues.enabled == 0) {
1678                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1679                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1680                         RTE_BOND_LOG(WARNING,
1681                                 "Using mode 4, it is necessary to do TX burst "
1682                                 "and RX burst at least every 100ms.");
1683                 } else {
1684                         /* Use flow director's optimization */
1685                         eth_dev->rx_pkt_burst =
1686                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1687                         eth_dev->tx_pkt_burst =
1688                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1689                 }
1690                 break;
1691         case BONDING_MODE_TLB:
1692                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1693                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1694                 break;
1695         case BONDING_MODE_ALB:
1696                 if (bond_mode_alb_enable(eth_dev) != 0)
1697                         return -1;
1698
1699                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1700                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1701                 break;
1702         default:
1703                 return -1;
1704         }
1705
1706         internals->mode = mode;
1707
1708         return 0;
1709 }
1710
1711
1712 static int
1713 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1714                 struct rte_eth_dev *slave_eth_dev)
1715 {
1716         int errval = 0;
1717         struct bond_dev_private *internals = (struct bond_dev_private *)
1718                 bonded_eth_dev->data->dev_private;
1719         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1720
1721         if (port->slow_pool == NULL) {
1722                 char mem_name[256];
1723                 int slave_id = slave_eth_dev->data->port_id;
1724
1725                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1726                                 slave_id);
1727                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1728                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1729                         slave_eth_dev->data->numa_node);
1730
1731                 /* Any memory allocation failure in initialization is critical because
1732                  * resources can't be free, so reinitialization is impossible. */
1733                 if (port->slow_pool == NULL) {
1734                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1735                                 slave_id, mem_name, rte_strerror(rte_errno));
1736                 }
1737         }
1738
1739         if (internals->mode4.dedicated_queues.enabled == 1) {
1740                 /* Configure slow Rx queue */
1741
1742                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1743                                 internals->mode4.dedicated_queues.rx_qid, 128,
1744                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1745                                 NULL, port->slow_pool);
1746                 if (errval != 0) {
1747                         RTE_BOND_LOG(ERR,
1748                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1749                                         slave_eth_dev->data->port_id,
1750                                         internals->mode4.dedicated_queues.rx_qid,
1751                                         errval);
1752                         return errval;
1753                 }
1754
1755                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1756                                 internals->mode4.dedicated_queues.tx_qid, 512,
1757                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758                                 NULL);
1759                 if (errval != 0) {
1760                         RTE_BOND_LOG(ERR,
1761                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1762                                 slave_eth_dev->data->port_id,
1763                                 internals->mode4.dedicated_queues.tx_qid,
1764                                 errval);
1765                         return errval;
1766                 }
1767         }
1768         return 0;
1769 }
1770
1771 int
1772 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1773                 struct rte_eth_dev *slave_eth_dev)
1774 {
1775         struct bond_rx_queue *bd_rx_q;
1776         struct bond_tx_queue *bd_tx_q;
1777         uint16_t nb_rx_queues;
1778         uint16_t nb_tx_queues;
1779
1780         int errval;
1781         uint16_t q_id;
1782         struct rte_flow_error flow_error;
1783
1784         struct bond_dev_private *internals = (struct bond_dev_private *)
1785                 bonded_eth_dev->data->dev_private;
1786
1787         /* Stop slave */
1788         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1789
1790         /* Enable interrupts on slave device if supported */
1791         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1792                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1793
1794         /* If RSS is enabled for bonding, try to enable it for slaves  */
1795         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1796                 if (internals->rss_key_len != 0) {
1797                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1798                                         internals->rss_key_len;
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1800                                         internals->rss_key;
1801                 } else {
1802                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1803                 }
1804
1805                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1806                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1807                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1808                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1809         }
1810
1811         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1812                         DEV_RX_OFFLOAD_VLAN_FILTER)
1813                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1814                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1815         else
1816                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1817                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1818
1819         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1820         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1821
1822         if (internals->mode == BONDING_MODE_8023AD) {
1823                 if (internals->mode4.dedicated_queues.enabled == 1) {
1824                         nb_rx_queues++;
1825                         nb_tx_queues++;
1826                 }
1827         }
1828
1829         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1830                                      bonded_eth_dev->data->mtu);
1831         if (errval != 0 && errval != -ENOTSUP) {
1832                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1833                                 slave_eth_dev->data->port_id, errval);
1834                 return errval;
1835         }
1836
1837         /* Configure device */
1838         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1839                         nb_rx_queues, nb_tx_queues,
1840                         &(slave_eth_dev->data->dev_conf));
1841         if (errval != 0) {
1842                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1843                                 slave_eth_dev->data->port_id, errval);
1844                 return errval;
1845         }
1846
1847         /* Setup Rx Queues */
1848         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1849                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1850
1851                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1852                                 bd_rx_q->nb_rx_desc,
1853                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1854                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1855                 if (errval != 0) {
1856                         RTE_BOND_LOG(ERR,
1857                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1858                                         slave_eth_dev->data->port_id, q_id, errval);
1859                         return errval;
1860                 }
1861         }
1862
1863         /* Setup Tx Queues */
1864         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1865                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1866
1867                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1868                                 bd_tx_q->nb_tx_desc,
1869                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1870                                 &bd_tx_q->tx_conf);
1871                 if (errval != 0) {
1872                         RTE_BOND_LOG(ERR,
1873                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1874                                 slave_eth_dev->data->port_id, q_id, errval);
1875                         return errval;
1876                 }
1877         }
1878
1879         if (internals->mode == BONDING_MODE_8023AD &&
1880                         internals->mode4.dedicated_queues.enabled == 1) {
1881                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1882                                 != 0)
1883                         return errval;
1884
1885                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1886                                 slave_eth_dev->data->port_id) != 0) {
1887                         RTE_BOND_LOG(ERR,
1888                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1889                                 slave_eth_dev->data->port_id, q_id, errval);
1890                         return -1;
1891                 }
1892
1893                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1894                         rte_flow_destroy(slave_eth_dev->data->port_id,
1895                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1896                                         &flow_error);
1897
1898                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1899                                 slave_eth_dev->data->port_id);
1900         }
1901
1902         /* Start device */
1903         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1904         if (errval != 0) {
1905                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1906                                 slave_eth_dev->data->port_id, errval);
1907                 return -1;
1908         }
1909
1910         /* If RSS is enabled for bonding, synchronize RETA */
1911         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1912                 int i;
1913                 struct bond_dev_private *internals;
1914
1915                 internals = bonded_eth_dev->data->dev_private;
1916
1917                 for (i = 0; i < internals->slave_count; i++) {
1918                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1919                                 errval = rte_eth_dev_rss_reta_update(
1920                                                 slave_eth_dev->data->port_id,
1921                                                 &internals->reta_conf[0],
1922                                                 internals->slaves[i].reta_size);
1923                                 if (errval != 0) {
1924                                         RTE_BOND_LOG(WARNING,
1925                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1926                                                      " RSS Configuration for bonding may be inconsistent.",
1927                                                      slave_eth_dev->data->port_id, errval);
1928                                 }
1929                                 break;
1930                         }
1931                 }
1932         }
1933
1934         /* If lsc interrupt is set, check initial slave's link status */
1935         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1936                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1937                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1938                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1939                         NULL);
1940         }
1941
1942         return 0;
1943 }
1944
1945 void
1946 slave_remove(struct bond_dev_private *internals,
1947                 struct rte_eth_dev *slave_eth_dev)
1948 {
1949         uint8_t i;
1950
1951         for (i = 0; i < internals->slave_count; i++)
1952                 if (internals->slaves[i].port_id ==
1953                                 slave_eth_dev->data->port_id)
1954                         break;
1955
1956         if (i < (internals->slave_count - 1)) {
1957                 struct rte_flow *flow;
1958
1959                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1960                                 sizeof(internals->slaves[0]) *
1961                                 (internals->slave_count - i - 1));
1962                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1963                         memmove(&flow->flows[i], &flow->flows[i + 1],
1964                                 sizeof(flow->flows[0]) *
1965                                 (internals->slave_count - i - 1));
1966                         flow->flows[internals->slave_count - 1] = NULL;
1967                 }
1968         }
1969
1970         internals->slave_count--;
1971
1972         /* force reconfiguration of slave interfaces */
1973         _rte_eth_dev_reset(slave_eth_dev);
1974 }
1975
1976 static void
1977 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1978
1979 void
1980 slave_add(struct bond_dev_private *internals,
1981                 struct rte_eth_dev *slave_eth_dev)
1982 {
1983         struct bond_slave_details *slave_details =
1984                         &internals->slaves[internals->slave_count];
1985
1986         slave_details->port_id = slave_eth_dev->data->port_id;
1987         slave_details->last_link_status = 0;
1988
1989         /* Mark slave devices that don't support interrupts so we can
1990          * compensate when we start the bond
1991          */
1992         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1993                 slave_details->link_status_poll_enabled = 1;
1994         }
1995
1996         slave_details->link_status_wait_to_complete = 0;
1997         /* clean tlb_last_obytes when adding port for bonding device */
1998         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1999                         sizeof(struct ether_addr));
2000 }
2001
2002 void
2003 bond_ethdev_primary_set(struct bond_dev_private *internals,
2004                 uint16_t slave_port_id)
2005 {
2006         int i;
2007
2008         if (internals->active_slave_count < 1)
2009                 internals->current_primary_port = slave_port_id;
2010         else
2011                 /* Search bonded device slave ports for new proposed primary port */
2012                 for (i = 0; i < internals->active_slave_count; i++) {
2013                         if (internals->active_slaves[i] == slave_port_id)
2014                                 internals->current_primary_port = slave_port_id;
2015                 }
2016 }
2017
2018 static void
2019 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2020
2021 static int
2022 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2023 {
2024         struct bond_dev_private *internals;
2025         int i;
2026
2027         /* slave eth dev will be started by bonded device */
2028         if (check_for_bonded_ethdev(eth_dev)) {
2029                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2030                                 eth_dev->data->port_id);
2031                 return -1;
2032         }
2033
2034         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2035         eth_dev->data->dev_started = 1;
2036
2037         internals = eth_dev->data->dev_private;
2038
2039         if (internals->slave_count == 0) {
2040                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2041                 goto out_err;
2042         }
2043
2044         if (internals->user_defined_mac == 0) {
2045                 struct ether_addr *new_mac_addr = NULL;
2046
2047                 for (i = 0; i < internals->slave_count; i++)
2048                         if (internals->slaves[i].port_id == internals->primary_port)
2049                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2050
2051                 if (new_mac_addr == NULL)
2052                         goto out_err;
2053
2054                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2055                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2056                                         eth_dev->data->port_id);
2057                         goto out_err;
2058                 }
2059         }
2060
2061         /* If bonded device is configure in promiscuous mode then re-apply config */
2062         if (internals->promiscuous_en)
2063                 bond_ethdev_promiscuous_enable(eth_dev);
2064
2065         if (internals->mode == BONDING_MODE_8023AD) {
2066                 if (internals->mode4.dedicated_queues.enabled == 1) {
2067                         internals->mode4.dedicated_queues.rx_qid =
2068                                         eth_dev->data->nb_rx_queues;
2069                         internals->mode4.dedicated_queues.tx_qid =
2070                                         eth_dev->data->nb_tx_queues;
2071                 }
2072         }
2073
2074
2075         /* Reconfigure each slave device if starting bonded device */
2076         for (i = 0; i < internals->slave_count; i++) {
2077                 struct rte_eth_dev *slave_ethdev =
2078                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2079                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2080                         RTE_BOND_LOG(ERR,
2081                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2082                                 eth_dev->data->port_id,
2083                                 internals->slaves[i].port_id);
2084                         goto out_err;
2085                 }
2086                 /* We will need to poll for link status if any slave doesn't
2087                  * support interrupts
2088                  */
2089                 if (internals->slaves[i].link_status_poll_enabled)
2090                         internals->link_status_polling_enabled = 1;
2091         }
2092
2093         /* start polling if needed */
2094         if (internals->link_status_polling_enabled) {
2095                 rte_eal_alarm_set(
2096                         internals->link_status_polling_interval_ms * 1000,
2097                         bond_ethdev_slave_link_status_change_monitor,
2098                         (void *)&rte_eth_devices[internals->port_id]);
2099         }
2100
2101         /* Update all slave devices MACs*/
2102         if (mac_address_slaves_update(eth_dev) != 0)
2103                 goto out_err;
2104
2105         if (internals->user_defined_primary_port)
2106                 bond_ethdev_primary_set(internals, internals->primary_port);
2107
2108         if (internals->mode == BONDING_MODE_8023AD)
2109                 bond_mode_8023ad_start(eth_dev);
2110
2111         if (internals->mode == BONDING_MODE_TLB ||
2112                         internals->mode == BONDING_MODE_ALB)
2113                 bond_tlb_enable(internals);
2114
2115         return 0;
2116
2117 out_err:
2118         eth_dev->data->dev_started = 0;
2119         return -1;
2120 }
2121
2122 static void
2123 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2124 {
2125         uint8_t i;
2126
2127         if (dev->data->rx_queues != NULL) {
2128                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2129                         rte_free(dev->data->rx_queues[i]);
2130                         dev->data->rx_queues[i] = NULL;
2131                 }
2132                 dev->data->nb_rx_queues = 0;
2133         }
2134
2135         if (dev->data->tx_queues != NULL) {
2136                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2137                         rte_free(dev->data->tx_queues[i]);
2138                         dev->data->tx_queues[i] = NULL;
2139                 }
2140                 dev->data->nb_tx_queues = 0;
2141         }
2142 }
2143
2144 void
2145 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2146 {
2147         struct bond_dev_private *internals = eth_dev->data->dev_private;
2148         uint8_t i;
2149
2150         if (internals->mode == BONDING_MODE_8023AD) {
2151                 struct port *port;
2152                 void *pkt = NULL;
2153
2154                 bond_mode_8023ad_stop(eth_dev);
2155
2156                 /* Discard all messages to/from mode 4 state machines */
2157                 for (i = 0; i < internals->active_slave_count; i++) {
2158                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2159
2160                         RTE_ASSERT(port->rx_ring != NULL);
2161                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2162                                 rte_pktmbuf_free(pkt);
2163
2164                         RTE_ASSERT(port->tx_ring != NULL);
2165                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2166                                 rte_pktmbuf_free(pkt);
2167                 }
2168         }
2169
2170         if (internals->mode == BONDING_MODE_TLB ||
2171                         internals->mode == BONDING_MODE_ALB) {
2172                 bond_tlb_disable(internals);
2173                 for (i = 0; i < internals->active_slave_count; i++)
2174                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2175         }
2176
2177         internals->link_status_polling_enabled = 0;
2178         for (i = 0; i < internals->slave_count; i++)
2179                 internals->slaves[i].last_link_status = 0;
2180
2181         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2182         eth_dev->data->dev_started = 0;
2183 }
2184
2185 void
2186 bond_ethdev_close(struct rte_eth_dev *dev)
2187 {
2188         struct bond_dev_private *internals = dev->data->dev_private;
2189         uint8_t bond_port_id = internals->port_id;
2190         int skipped = 0;
2191         struct rte_flow_error ferror;
2192
2193         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2194         while (internals->slave_count != skipped) {
2195                 uint16_t port_id = internals->slaves[skipped].port_id;
2196
2197                 rte_eth_dev_stop(port_id);
2198
2199                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2200                         RTE_BOND_LOG(ERR,
2201                                      "Failed to remove port %d from bonded device %s",
2202                                      port_id, dev->device->name);
2203                         skipped++;
2204                 }
2205         }
2206         bond_flow_ops.flush(dev, &ferror);
2207         bond_ethdev_free_queues(dev);
2208         rte_bitmap_reset(internals->vlan_filter_bmp);
2209 }
2210
2211 /* forward declaration */
2212 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2213
2214 static void
2215 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2216 {
2217         struct bond_dev_private *internals = dev->data->dev_private;
2218
2219         uint16_t max_nb_rx_queues = UINT16_MAX;
2220         uint16_t max_nb_tx_queues = UINT16_MAX;
2221
2222         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2223
2224         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2225                         internals->candidate_max_rx_pktlen :
2226                         ETHER_MAX_JUMBO_FRAME_LEN;
2227
2228         /* Max number of tx/rx queues that the bonded device can support is the
2229          * minimum values of the bonded slaves, as all slaves must be capable
2230          * of supporting the same number of tx/rx queues.
2231          */
2232         if (internals->slave_count > 0) {
2233                 struct rte_eth_dev_info slave_info;
2234                 uint8_t idx;
2235
2236                 for (idx = 0; idx < internals->slave_count; idx++) {
2237                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2238                                         &slave_info);
2239
2240                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2241                                 max_nb_rx_queues = slave_info.max_rx_queues;
2242
2243                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2244                                 max_nb_tx_queues = slave_info.max_tx_queues;
2245                 }
2246         }
2247
2248         dev_info->max_rx_queues = max_nb_rx_queues;
2249         dev_info->max_tx_queues = max_nb_tx_queues;
2250
2251         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2252                sizeof(dev_info->default_rxconf));
2253         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2254                sizeof(dev_info->default_txconf));
2255
2256         memcpy(&dev_info->rx_desc_lim, &internals->rx_desc_lim,
2257                sizeof(dev_info->rx_desc_lim));
2258         memcpy(&dev_info->tx_desc_lim, &internals->tx_desc_lim,
2259                sizeof(dev_info->tx_desc_lim));
2260
2261         /**
2262          * If dedicated hw queues enabled for link bonding device in LACP mode
2263          * then we need to reduce the maximum number of data path queues by 1.
2264          */
2265         if (internals->mode == BONDING_MODE_8023AD &&
2266                 internals->mode4.dedicated_queues.enabled == 1) {
2267                 dev_info->max_rx_queues--;
2268                 dev_info->max_tx_queues--;
2269         }
2270
2271         dev_info->min_rx_bufsize = 0;
2272
2273         dev_info->rx_offload_capa = internals->rx_offload_capa;
2274         dev_info->tx_offload_capa = internals->tx_offload_capa;
2275         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2276         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2277         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2278
2279         dev_info->reta_size = internals->reta_size;
2280 }
2281
2282 static int
2283 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2284 {
2285         int res;
2286         uint16_t i;
2287         struct bond_dev_private *internals = dev->data->dev_private;
2288
2289         /* don't do this while a slave is being added */
2290         rte_spinlock_lock(&internals->lock);
2291
2292         if (on)
2293                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2294         else
2295                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2296
2297         for (i = 0; i < internals->slave_count; i++) {
2298                 uint16_t port_id = internals->slaves[i].port_id;
2299
2300                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2301                 if (res == ENOTSUP)
2302                         RTE_BOND_LOG(WARNING,
2303                                      "Setting VLAN filter on slave port %u not supported.",
2304                                      port_id);
2305         }
2306
2307         rte_spinlock_unlock(&internals->lock);
2308         return 0;
2309 }
2310
2311 static int
2312 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2313                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2314                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2315 {
2316         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2317                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2318                                         0, dev->data->numa_node);
2319         if (bd_rx_q == NULL)
2320                 return -1;
2321
2322         bd_rx_q->queue_id = rx_queue_id;
2323         bd_rx_q->dev_private = dev->data->dev_private;
2324
2325         bd_rx_q->nb_rx_desc = nb_rx_desc;
2326
2327         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2328         bd_rx_q->mb_pool = mb_pool;
2329
2330         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2331
2332         return 0;
2333 }
2334
2335 static int
2336 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2337                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2338                 const struct rte_eth_txconf *tx_conf)
2339 {
2340         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2341                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2342                                         0, dev->data->numa_node);
2343
2344         if (bd_tx_q == NULL)
2345                 return -1;
2346
2347         bd_tx_q->queue_id = tx_queue_id;
2348         bd_tx_q->dev_private = dev->data->dev_private;
2349
2350         bd_tx_q->nb_tx_desc = nb_tx_desc;
2351         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2352
2353         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2354
2355         return 0;
2356 }
2357
2358 static void
2359 bond_ethdev_rx_queue_release(void *queue)
2360 {
2361         if (queue == NULL)
2362                 return;
2363
2364         rte_free(queue);
2365 }
2366
2367 static void
2368 bond_ethdev_tx_queue_release(void *queue)
2369 {
2370         if (queue == NULL)
2371                 return;
2372
2373         rte_free(queue);
2374 }
2375
2376 static void
2377 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2378 {
2379         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2380         struct bond_dev_private *internals;
2381
2382         /* Default value for polling slave found is true as we don't want to
2383          * disable the polling thread if we cannot get the lock */
2384         int i, polling_slave_found = 1;
2385
2386         if (cb_arg == NULL)
2387                 return;
2388
2389         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2390         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2391
2392         if (!bonded_ethdev->data->dev_started ||
2393                 !internals->link_status_polling_enabled)
2394                 return;
2395
2396         /* If device is currently being configured then don't check slaves link
2397          * status, wait until next period */
2398         if (rte_spinlock_trylock(&internals->lock)) {
2399                 if (internals->slave_count > 0)
2400                         polling_slave_found = 0;
2401
2402                 for (i = 0; i < internals->slave_count; i++) {
2403                         if (!internals->slaves[i].link_status_poll_enabled)
2404                                 continue;
2405
2406                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2407                         polling_slave_found = 1;
2408
2409                         /* Update slave link status */
2410                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2411                                         internals->slaves[i].link_status_wait_to_complete);
2412
2413                         /* if link status has changed since last checked then call lsc
2414                          * event callback */
2415                         if (slave_ethdev->data->dev_link.link_status !=
2416                                         internals->slaves[i].last_link_status) {
2417                                 internals->slaves[i].last_link_status =
2418                                                 slave_ethdev->data->dev_link.link_status;
2419
2420                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2421                                                 RTE_ETH_EVENT_INTR_LSC,
2422                                                 &bonded_ethdev->data->port_id,
2423                                                 NULL);
2424                         }
2425                 }
2426                 rte_spinlock_unlock(&internals->lock);
2427         }
2428
2429         if (polling_slave_found)
2430                 /* Set alarm to continue monitoring link status of slave ethdev's */
2431                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2432                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2433 }
2434
2435 static int
2436 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2437 {
2438         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2439
2440         struct bond_dev_private *bond_ctx;
2441         struct rte_eth_link slave_link;
2442
2443         uint32_t idx;
2444
2445         bond_ctx = ethdev->data->dev_private;
2446
2447         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2448
2449         if (ethdev->data->dev_started == 0 ||
2450                         bond_ctx->active_slave_count == 0) {
2451                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2452                 return 0;
2453         }
2454
2455         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2456
2457         if (wait_to_complete)
2458                 link_update = rte_eth_link_get;
2459         else
2460                 link_update = rte_eth_link_get_nowait;
2461
2462         switch (bond_ctx->mode) {
2463         case BONDING_MODE_BROADCAST:
2464                 /**
2465                  * Setting link speed to UINT32_MAX to ensure we pick up the
2466                  * value of the first active slave
2467                  */
2468                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2469
2470                 /**
2471                  * link speed is minimum value of all the slaves link speed as
2472                  * packet loss will occur on this slave if transmission at rates
2473                  * greater than this are attempted
2474                  */
2475                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2476                         link_update(bond_ctx->active_slaves[0], &slave_link);
2477
2478                         if (slave_link.link_speed <
2479                                         ethdev->data->dev_link.link_speed)
2480                                 ethdev->data->dev_link.link_speed =
2481                                                 slave_link.link_speed;
2482                 }
2483                 break;
2484         case BONDING_MODE_ACTIVE_BACKUP:
2485                 /* Current primary slave */
2486                 link_update(bond_ctx->current_primary_port, &slave_link);
2487
2488                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2489                 break;
2490         case BONDING_MODE_8023AD:
2491                 ethdev->data->dev_link.link_autoneg =
2492                                 bond_ctx->mode4.slave_link.link_autoneg;
2493                 ethdev->data->dev_link.link_duplex =
2494                                 bond_ctx->mode4.slave_link.link_duplex;
2495                 /* fall through to update link speed */
2496         case BONDING_MODE_ROUND_ROBIN:
2497         case BONDING_MODE_BALANCE:
2498         case BONDING_MODE_TLB:
2499         case BONDING_MODE_ALB:
2500         default:
2501                 /**
2502                  * In theses mode the maximum theoretical link speed is the sum
2503                  * of all the slaves
2504                  */
2505                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2506
2507                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2508                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2509
2510                         ethdev->data->dev_link.link_speed +=
2511                                         slave_link.link_speed;
2512                 }
2513         }
2514
2515
2516         return 0;
2517 }
2518
2519
2520 static int
2521 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2522 {
2523         struct bond_dev_private *internals = dev->data->dev_private;
2524         struct rte_eth_stats slave_stats;
2525         int i, j;
2526
2527         for (i = 0; i < internals->slave_count; i++) {
2528                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2529
2530                 stats->ipackets += slave_stats.ipackets;
2531                 stats->opackets += slave_stats.opackets;
2532                 stats->ibytes += slave_stats.ibytes;
2533                 stats->obytes += slave_stats.obytes;
2534                 stats->imissed += slave_stats.imissed;
2535                 stats->ierrors += slave_stats.ierrors;
2536                 stats->oerrors += slave_stats.oerrors;
2537                 stats->rx_nombuf += slave_stats.rx_nombuf;
2538
2539                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2540                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2541                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2542                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2543                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2544                         stats->q_errors[j] += slave_stats.q_errors[j];
2545                 }
2546
2547         }
2548
2549         return 0;
2550 }
2551
2552 static void
2553 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2554 {
2555         struct bond_dev_private *internals = dev->data->dev_private;
2556         int i;
2557
2558         for (i = 0; i < internals->slave_count; i++)
2559                 rte_eth_stats_reset(internals->slaves[i].port_id);
2560 }
2561
2562 static void
2563 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2564 {
2565         struct bond_dev_private *internals = eth_dev->data->dev_private;
2566         int i;
2567
2568         internals->promiscuous_en = 1;
2569
2570         switch (internals->mode) {
2571         /* Promiscuous mode is propagated to all slaves */
2572         case BONDING_MODE_ROUND_ROBIN:
2573         case BONDING_MODE_BALANCE:
2574         case BONDING_MODE_BROADCAST:
2575                 for (i = 0; i < internals->slave_count; i++)
2576                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2577                 break;
2578         /* In mode4 promiscus mode is managed when slave is added/removed */
2579         case BONDING_MODE_8023AD:
2580                 break;
2581         /* Promiscuous mode is propagated only to primary slave */
2582         case BONDING_MODE_ACTIVE_BACKUP:
2583         case BONDING_MODE_TLB:
2584         case BONDING_MODE_ALB:
2585         default:
2586                 rte_eth_promiscuous_enable(internals->current_primary_port);
2587         }
2588 }
2589
2590 static void
2591 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2592 {
2593         struct bond_dev_private *internals = dev->data->dev_private;
2594         int i;
2595
2596         internals->promiscuous_en = 0;
2597
2598         switch (internals->mode) {
2599         /* Promiscuous mode is propagated to all slaves */
2600         case BONDING_MODE_ROUND_ROBIN:
2601         case BONDING_MODE_BALANCE:
2602         case BONDING_MODE_BROADCAST:
2603                 for (i = 0; i < internals->slave_count; i++)
2604                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2605                 break;
2606         /* In mode4 promiscus mode is set managed when slave is added/removed */
2607         case BONDING_MODE_8023AD:
2608                 break;
2609         /* Promiscuous mode is propagated only to primary slave */
2610         case BONDING_MODE_ACTIVE_BACKUP:
2611         case BONDING_MODE_TLB:
2612         case BONDING_MODE_ALB:
2613         default:
2614                 rte_eth_promiscuous_disable(internals->current_primary_port);
2615         }
2616 }
2617
2618 static void
2619 bond_ethdev_delayed_lsc_propagation(void *arg)
2620 {
2621         if (arg == NULL)
2622                 return;
2623
2624         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2625                         RTE_ETH_EVENT_INTR_LSC, NULL);
2626 }
2627
2628 int
2629 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2630                 void *param, void *ret_param __rte_unused)
2631 {
2632         struct rte_eth_dev *bonded_eth_dev;
2633         struct bond_dev_private *internals;
2634         struct rte_eth_link link;
2635         int rc = -1;
2636
2637         int i, valid_slave = 0;
2638         uint8_t active_pos;
2639         uint8_t lsc_flag = 0;
2640
2641         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2642                 return rc;
2643
2644         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2645
2646         if (check_for_bonded_ethdev(bonded_eth_dev))
2647                 return rc;
2648
2649         internals = bonded_eth_dev->data->dev_private;
2650
2651         /* If the device isn't started don't handle interrupts */
2652         if (!bonded_eth_dev->data->dev_started)
2653                 return rc;
2654
2655         /* verify that port_id is a valid slave of bonded port */
2656         for (i = 0; i < internals->slave_count; i++) {
2657                 if (internals->slaves[i].port_id == port_id) {
2658                         valid_slave = 1;
2659                         break;
2660                 }
2661         }
2662
2663         if (!valid_slave)
2664                 return rc;
2665
2666         /* Synchronize lsc callback parallel calls either by real link event
2667          * from the slaves PMDs or by the bonding PMD itself.
2668          */
2669         rte_spinlock_lock(&internals->lsc_lock);
2670
2671         /* Search for port in active port list */
2672         active_pos = find_slave_by_id(internals->active_slaves,
2673                         internals->active_slave_count, port_id);
2674
2675         rte_eth_link_get_nowait(port_id, &link);
2676         if (link.link_status) {
2677                 if (active_pos < internals->active_slave_count)
2678                         goto link_update;
2679
2680                 /* if no active slave ports then set this port to be primary port */
2681                 if (internals->active_slave_count < 1) {
2682                         /* If first active slave, then change link status */
2683                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2684                         internals->current_primary_port = port_id;
2685                         lsc_flag = 1;
2686
2687                         mac_address_slaves_update(bonded_eth_dev);
2688                 }
2689
2690                 /* check link state properties if bonded link is up*/
2691                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2692                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2693                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2694                                              "for slave %d in bonding mode %d",
2695                                              port_id, internals->mode);
2696                 } else {
2697                         /* inherit slave link properties */
2698                         link_properties_set(bonded_eth_dev, &link);
2699                 }
2700
2701                 activate_slave(bonded_eth_dev, port_id);
2702
2703                 /* If user has defined the primary port then default to using it */
2704                 if (internals->user_defined_primary_port &&
2705                                 internals->primary_port == port_id)
2706                         bond_ethdev_primary_set(internals, port_id);
2707         } else {
2708                 if (active_pos == internals->active_slave_count)
2709                         goto link_update;
2710
2711                 /* Remove from active slave list */
2712                 deactivate_slave(bonded_eth_dev, port_id);
2713
2714                 if (internals->active_slave_count < 1)
2715                         lsc_flag = 1;
2716
2717                 /* Update primary id, take first active slave from list or if none
2718                  * available set to -1 */
2719                 if (port_id == internals->current_primary_port) {
2720                         if (internals->active_slave_count > 0)
2721                                 bond_ethdev_primary_set(internals,
2722                                                 internals->active_slaves[0]);
2723                         else
2724                                 internals->current_primary_port = internals->primary_port;
2725                 }
2726         }
2727
2728 link_update:
2729         /**
2730          * Update bonded device link properties after any change to active
2731          * slaves
2732          */
2733         bond_ethdev_link_update(bonded_eth_dev, 0);
2734
2735         if (lsc_flag) {
2736                 /* Cancel any possible outstanding interrupts if delays are enabled */
2737                 if (internals->link_up_delay_ms > 0 ||
2738                         internals->link_down_delay_ms > 0)
2739                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2740                                         bonded_eth_dev);
2741
2742                 if (bonded_eth_dev->data->dev_link.link_status) {
2743                         if (internals->link_up_delay_ms > 0)
2744                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2745                                                 bond_ethdev_delayed_lsc_propagation,
2746                                                 (void *)bonded_eth_dev);
2747                         else
2748                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2749                                                 RTE_ETH_EVENT_INTR_LSC,
2750                                                 NULL);
2751
2752                 } else {
2753                         if (internals->link_down_delay_ms > 0)
2754                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2755                                                 bond_ethdev_delayed_lsc_propagation,
2756                                                 (void *)bonded_eth_dev);
2757                         else
2758                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2759                                                 RTE_ETH_EVENT_INTR_LSC,
2760                                                 NULL);
2761                 }
2762         }
2763
2764         rte_spinlock_unlock(&internals->lsc_lock);
2765
2766         return rc;
2767 }
2768
2769 static int
2770 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2771                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2772 {
2773         unsigned i, j;
2774         int result = 0;
2775         int slave_reta_size;
2776         unsigned reta_count;
2777         struct bond_dev_private *internals = dev->data->dev_private;
2778
2779         if (reta_size != internals->reta_size)
2780                 return -EINVAL;
2781
2782          /* Copy RETA table */
2783         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2784
2785         for (i = 0; i < reta_count; i++) {
2786                 internals->reta_conf[i].mask = reta_conf[i].mask;
2787                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2788                         if ((reta_conf[i].mask >> j) & 0x01)
2789                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2790         }
2791
2792         /* Fill rest of array */
2793         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2794                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2795                                 sizeof(internals->reta_conf[0]) * reta_count);
2796
2797         /* Propagate RETA over slaves */
2798         for (i = 0; i < internals->slave_count; i++) {
2799                 slave_reta_size = internals->slaves[i].reta_size;
2800                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2801                                 &internals->reta_conf[0], slave_reta_size);
2802                 if (result < 0)
2803                         return result;
2804         }
2805
2806         return 0;
2807 }
2808
2809 static int
2810 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2811                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2812 {
2813         int i, j;
2814         struct bond_dev_private *internals = dev->data->dev_private;
2815
2816         if (reta_size != internals->reta_size)
2817                 return -EINVAL;
2818
2819          /* Copy RETA table */
2820         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2821                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2822                         if ((reta_conf[i].mask >> j) & 0x01)
2823                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2824
2825         return 0;
2826 }
2827
2828 static int
2829 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2830                 struct rte_eth_rss_conf *rss_conf)
2831 {
2832         int i, result = 0;
2833         struct bond_dev_private *internals = dev->data->dev_private;
2834         struct rte_eth_rss_conf bond_rss_conf;
2835
2836         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2837
2838         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2839
2840         if (bond_rss_conf.rss_hf != 0)
2841                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2842
2843         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2844                         sizeof(internals->rss_key)) {
2845                 if (bond_rss_conf.rss_key_len == 0)
2846                         bond_rss_conf.rss_key_len = 40;
2847                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2848                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2849                                 internals->rss_key_len);
2850         }
2851
2852         for (i = 0; i < internals->slave_count; i++) {
2853                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2854                                 &bond_rss_conf);
2855                 if (result < 0)
2856                         return result;
2857         }
2858
2859         return 0;
2860 }
2861
2862 static int
2863 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2864                 struct rte_eth_rss_conf *rss_conf)
2865 {
2866         struct bond_dev_private *internals = dev->data->dev_private;
2867
2868         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2869         rss_conf->rss_key_len = internals->rss_key_len;
2870         if (rss_conf->rss_key)
2871                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2872
2873         return 0;
2874 }
2875
2876 static int
2877 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2878 {
2879         struct rte_eth_dev *slave_eth_dev;
2880         struct bond_dev_private *internals = dev->data->dev_private;
2881         int ret, i;
2882
2883         rte_spinlock_lock(&internals->lock);
2884
2885         for (i = 0; i < internals->slave_count; i++) {
2886                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2887                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2888                         rte_spinlock_unlock(&internals->lock);
2889                         return -ENOTSUP;
2890                 }
2891         }
2892         for (i = 0; i < internals->slave_count; i++) {
2893                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2894                 if (ret < 0) {
2895                         rte_spinlock_unlock(&internals->lock);
2896                         return ret;
2897                 }
2898         }
2899
2900         rte_spinlock_unlock(&internals->lock);
2901         return 0;
2902 }
2903
2904 static int
2905 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2906 {
2907         if (mac_address_set(dev, addr)) {
2908                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2909                 return -EINVAL;
2910         }
2911
2912         return 0;
2913 }
2914
2915 static int
2916 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2917                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2918 {
2919         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2920                 *(const void **)arg = &bond_flow_ops;
2921                 return 0;
2922         }
2923         return -ENOTSUP;
2924 }
2925
2926 static int
2927 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2928                                 __rte_unused uint32_t index, uint32_t vmdq)
2929 {
2930         struct rte_eth_dev *slave_eth_dev;
2931         struct bond_dev_private *internals = dev->data->dev_private;
2932         int ret, i;
2933
2934         rte_spinlock_lock(&internals->lock);
2935
2936         for (i = 0; i < internals->slave_count; i++) {
2937                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2938                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2939                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2940                         ret = -ENOTSUP;
2941                         goto end;
2942                 }
2943         }
2944
2945         for (i = 0; i < internals->slave_count; i++) {
2946                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2947                                 mac_addr, vmdq);
2948                 if (ret < 0) {
2949                         /* rollback */
2950                         for (i--; i >= 0; i--)
2951                                 rte_eth_dev_mac_addr_remove(
2952                                         internals->slaves[i].port_id, mac_addr);
2953                         goto end;
2954                 }
2955         }
2956
2957         ret = 0;
2958 end:
2959         rte_spinlock_unlock(&internals->lock);
2960         return ret;
2961 }
2962
2963 static void
2964 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2965 {
2966         struct rte_eth_dev *slave_eth_dev;
2967         struct bond_dev_private *internals = dev->data->dev_private;
2968         int i;
2969
2970         rte_spinlock_lock(&internals->lock);
2971
2972         for (i = 0; i < internals->slave_count; i++) {
2973                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2974                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2975                         goto end;
2976         }
2977
2978         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2979
2980         for (i = 0; i < internals->slave_count; i++)
2981                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2982                                 mac_addr);
2983
2984 end:
2985         rte_spinlock_unlock(&internals->lock);
2986 }
2987
2988 const struct eth_dev_ops default_dev_ops = {
2989         .dev_start            = bond_ethdev_start,
2990         .dev_stop             = bond_ethdev_stop,
2991         .dev_close            = bond_ethdev_close,
2992         .dev_configure        = bond_ethdev_configure,
2993         .dev_infos_get        = bond_ethdev_info,
2994         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2995         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2996         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2997         .rx_queue_release     = bond_ethdev_rx_queue_release,
2998         .tx_queue_release     = bond_ethdev_tx_queue_release,
2999         .link_update          = bond_ethdev_link_update,
3000         .stats_get            = bond_ethdev_stats_get,
3001         .stats_reset          = bond_ethdev_stats_reset,
3002         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3003         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3004         .reta_update          = bond_ethdev_rss_reta_update,
3005         .reta_query           = bond_ethdev_rss_reta_query,
3006         .rss_hash_update      = bond_ethdev_rss_hash_update,
3007         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3008         .mtu_set              = bond_ethdev_mtu_set,
3009         .mac_addr_set         = bond_ethdev_mac_address_set,
3010         .mac_addr_add         = bond_ethdev_mac_addr_add,
3011         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3012         .filter_ctrl          = bond_filter_ctrl
3013 };
3014
3015 static int
3016 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3017 {
3018         const char *name = rte_vdev_device_name(dev);
3019         uint8_t socket_id = dev->device.numa_node;
3020         struct bond_dev_private *internals = NULL;
3021         struct rte_eth_dev *eth_dev = NULL;
3022         uint32_t vlan_filter_bmp_size;
3023
3024         /* now do all data allocation - for eth_dev structure, dummy pci driver
3025          * and internal (private) data
3026          */
3027
3028         /* reserve an ethdev entry */
3029         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3030         if (eth_dev == NULL) {
3031                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3032                 goto err;
3033         }
3034
3035         internals = eth_dev->data->dev_private;
3036         eth_dev->data->nb_rx_queues = (uint16_t)1;
3037         eth_dev->data->nb_tx_queues = (uint16_t)1;
3038
3039         /* Allocate memory for storing MAC addresses */
3040         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3041                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3042         if (eth_dev->data->mac_addrs == NULL) {
3043                 RTE_BOND_LOG(ERR,
3044                              "Failed to allocate %u bytes needed to store MAC addresses",
3045                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3046                 goto err;
3047         }
3048
3049         eth_dev->dev_ops = &default_dev_ops;
3050         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3051
3052         rte_spinlock_init(&internals->lock);
3053         rte_spinlock_init(&internals->lsc_lock);
3054
3055         internals->port_id = eth_dev->data->port_id;
3056         internals->mode = BONDING_MODE_INVALID;
3057         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3058         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3059         internals->burst_xmit_hash = burst_xmit_l2_hash;
3060         internals->user_defined_mac = 0;
3061
3062         internals->link_status_polling_enabled = 0;
3063
3064         internals->link_status_polling_interval_ms =
3065                 DEFAULT_POLLING_INTERVAL_10_MS;
3066         internals->link_down_delay_ms = 0;
3067         internals->link_up_delay_ms = 0;
3068
3069         internals->slave_count = 0;
3070         internals->active_slave_count = 0;
3071         internals->rx_offload_capa = 0;
3072         internals->tx_offload_capa = 0;
3073         internals->rx_queue_offload_capa = 0;
3074         internals->tx_queue_offload_capa = 0;
3075         internals->candidate_max_rx_pktlen = 0;
3076         internals->max_rx_pktlen = 0;
3077
3078         /* Initially allow to choose any offload type */
3079         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3080
3081         memset(&internals->default_rxconf, 0,
3082                sizeof(internals->default_rxconf));
3083         memset(&internals->default_txconf, 0,
3084                sizeof(internals->default_txconf));
3085
3086         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3087         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3088
3089         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3090         memset(internals->slaves, 0, sizeof(internals->slaves));
3091
3092         TAILQ_INIT(&internals->flow_list);
3093         internals->flow_isolated_valid = 0;
3094
3095         /* Set mode 4 default configuration */
3096         bond_mode_8023ad_setup(eth_dev, NULL);
3097         if (bond_ethdev_mode_set(eth_dev, mode)) {
3098                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3099                                  eth_dev->data->port_id, mode);
3100                 goto err;
3101         }
3102
3103         vlan_filter_bmp_size =
3104                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3105         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3106                                                    RTE_CACHE_LINE_SIZE);
3107         if (internals->vlan_filter_bmpmem == NULL) {
3108                 RTE_BOND_LOG(ERR,
3109                              "Failed to allocate vlan bitmap for bonded device %u",
3110                              eth_dev->data->port_id);
3111                 goto err;
3112         }
3113
3114         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3115                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3116         if (internals->vlan_filter_bmp == NULL) {
3117                 RTE_BOND_LOG(ERR,
3118                              "Failed to init vlan bitmap for bonded device %u",
3119                              eth_dev->data->port_id);
3120                 rte_free(internals->vlan_filter_bmpmem);
3121                 goto err;
3122         }
3123
3124         return eth_dev->data->port_id;
3125
3126 err:
3127         rte_free(internals);
3128         if (eth_dev != NULL) {
3129                 rte_free(eth_dev->data->mac_addrs);
3130                 rte_eth_dev_release_port(eth_dev);
3131         }
3132         return -1;
3133 }
3134
3135 static int
3136 bond_probe(struct rte_vdev_device *dev)
3137 {
3138         const char *name;
3139         struct bond_dev_private *internals;
3140         struct rte_kvargs *kvlist;
3141         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3142         int  arg_count, port_id;
3143         uint8_t agg_mode;
3144         struct rte_eth_dev *eth_dev;
3145
3146         if (!dev)
3147                 return -EINVAL;
3148
3149         name = rte_vdev_device_name(dev);
3150         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3151
3152         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3153             strlen(rte_vdev_device_args(dev)) == 0) {
3154                 eth_dev = rte_eth_dev_attach_secondary(name);
3155                 if (!eth_dev) {
3156                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3157                         return -1;
3158                 }
3159                 /* TODO: request info from primary to set up Rx and Tx */
3160                 eth_dev->dev_ops = &default_dev_ops;
3161                 eth_dev->device = &dev->device;
3162                 rte_eth_dev_probing_finish(eth_dev);
3163                 return 0;
3164         }
3165
3166         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3167                 pmd_bond_init_valid_arguments);
3168         if (kvlist == NULL)
3169                 return -1;
3170
3171         /* Parse link bonding mode */
3172         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3173                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3174                                 &bond_ethdev_parse_slave_mode_kvarg,
3175                                 &bonding_mode) != 0) {
3176                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3177                                         name);
3178                         goto parse_error;
3179                 }
3180         } else {
3181                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3182                                 "device %s", name);
3183                 goto parse_error;
3184         }
3185
3186         /* Parse socket id to create bonding device on */
3187         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3188         if (arg_count == 1) {
3189                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3190                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3191                                 != 0) {
3192                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3193                                         "bonded device %s", name);
3194                         goto parse_error;
3195                 }
3196         } else if (arg_count > 1) {
3197                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3198                                 "bonded device %s", name);
3199                 goto parse_error;
3200         } else {
3201                 socket_id = rte_socket_id();
3202         }
3203
3204         dev->device.numa_node = socket_id;
3205
3206         /* Create link bonding eth device */
3207         port_id = bond_alloc(dev, bonding_mode);
3208         if (port_id < 0) {
3209                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3210                                 "socket %u.",   name, bonding_mode, socket_id);
3211                 goto parse_error;
3212         }
3213         internals = rte_eth_devices[port_id].data->dev_private;
3214         internals->kvlist = kvlist;
3215
3216         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3217
3218         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3219                 if (rte_kvargs_process(kvlist,
3220                                 PMD_BOND_AGG_MODE_KVARG,
3221                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3222                                 &agg_mode) != 0) {
3223                         RTE_BOND_LOG(ERR,
3224                                         "Failed to parse agg selection mode for bonded device %s",
3225                                         name);
3226                         goto parse_error;
3227                 }
3228
3229                 if (internals->mode == BONDING_MODE_8023AD)
3230                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3231                                         agg_mode);
3232         } else {
3233                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3234         }
3235
3236         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3237                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3238         return 0;
3239
3240 parse_error:
3241         rte_kvargs_free(kvlist);
3242
3243         return -1;
3244 }
3245
3246 static int
3247 bond_remove(struct rte_vdev_device *dev)
3248 {
3249         struct rte_eth_dev *eth_dev;
3250         struct bond_dev_private *internals;
3251         const char *name;
3252
3253         if (!dev)
3254                 return -EINVAL;
3255
3256         name = rte_vdev_device_name(dev);
3257         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3258
3259         /* now free all data allocation - for eth_dev structure,
3260          * dummy pci driver and internal (private) data
3261          */
3262
3263         /* find an ethdev entry */
3264         eth_dev = rte_eth_dev_allocated(name);
3265         if (eth_dev == NULL)
3266                 return -ENODEV;
3267
3268         RTE_ASSERT(eth_dev->device == &dev->device);
3269
3270         internals = eth_dev->data->dev_private;
3271         if (internals->slave_count != 0)
3272                 return -EBUSY;
3273
3274         if (eth_dev->data->dev_started == 1) {
3275                 bond_ethdev_stop(eth_dev);
3276                 bond_ethdev_close(eth_dev);
3277         }
3278
3279         eth_dev->dev_ops = NULL;
3280         eth_dev->rx_pkt_burst = NULL;
3281         eth_dev->tx_pkt_burst = NULL;
3282
3283         internals = eth_dev->data->dev_private;
3284         /* Try to release mempool used in mode6. If the bond
3285          * device is not mode6, free the NULL is not problem.
3286          */
3287         rte_mempool_free(internals->mode6.mempool);
3288         rte_bitmap_free(internals->vlan_filter_bmp);
3289         rte_free(internals->vlan_filter_bmpmem);
3290         rte_free(eth_dev->data->dev_private);
3291         rte_free(eth_dev->data->mac_addrs);
3292
3293         rte_eth_dev_release_port(eth_dev);
3294
3295         return 0;
3296 }
3297
3298 /* this part will resolve the slave portids after all the other pdev and vdev
3299  * have been allocated */
3300 static int
3301 bond_ethdev_configure(struct rte_eth_dev *dev)
3302 {
3303         const char *name = dev->device->name;
3304         struct bond_dev_private *internals = dev->data->dev_private;
3305         struct rte_kvargs *kvlist = internals->kvlist;
3306         int arg_count;
3307         uint16_t port_id = dev - rte_eth_devices;
3308         uint8_t agg_mode;
3309
3310         static const uint8_t default_rss_key[40] = {
3311                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3312                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3313                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3314                 0xBE, 0xAC, 0x01, 0xFA
3315         };
3316
3317         unsigned i, j;
3318
3319         /*
3320          * If RSS is enabled, fill table with default values and
3321          * set key to the the value specified in port RSS configuration.
3322          * Fall back to default RSS key if the key is not specified
3323          */
3324         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3325                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3326                         internals->rss_key_len =
3327                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3328                         memcpy(internals->rss_key,
3329                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3330                                internals->rss_key_len);
3331                 } else {
3332                         internals->rss_key_len = sizeof(default_rss_key);
3333                         memcpy(internals->rss_key, default_rss_key,
3334                                internals->rss_key_len);
3335                 }
3336
3337                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3338                         internals->reta_conf[i].mask = ~0LL;
3339                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3340                                 internals->reta_conf[i].reta[j] =
3341                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3342                                                 dev->data->nb_rx_queues;
3343                 }
3344         }
3345
3346         /* set the max_rx_pktlen */
3347         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3348
3349         /*
3350          * if no kvlist, it means that this bonded device has been created
3351          * through the bonding api.
3352          */
3353         if (!kvlist)
3354                 return 0;
3355
3356         /* Parse MAC address for bonded device */
3357         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3358         if (arg_count == 1) {
3359                 struct ether_addr bond_mac;
3360
3361                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3362                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3363                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3364                                      name);
3365                         return -1;
3366                 }
3367
3368                 /* Set MAC address */
3369                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3370                         RTE_BOND_LOG(ERR,
3371                                      "Failed to set mac address on bonded device %s",
3372                                      name);
3373                         return -1;
3374                 }
3375         } else if (arg_count > 1) {
3376                 RTE_BOND_LOG(ERR,
3377                              "MAC address can be specified only once for bonded device %s",
3378                              name);
3379                 return -1;
3380         }
3381
3382         /* Parse/set balance mode transmit policy */
3383         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3384         if (arg_count == 1) {
3385                 uint8_t xmit_policy;
3386
3387                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3388                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3389                     0) {
3390                         RTE_BOND_LOG(INFO,
3391                                      "Invalid xmit policy specified for bonded device %s",
3392                                      name);
3393                         return -1;
3394                 }
3395
3396                 /* Set balance mode transmit policy*/
3397                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3398                         RTE_BOND_LOG(ERR,
3399                                      "Failed to set balance xmit policy on bonded device %s",
3400                                      name);
3401                         return -1;
3402                 }
3403         } else if (arg_count > 1) {
3404                 RTE_BOND_LOG(ERR,
3405                              "Transmit policy can be specified only once for bonded device %s",
3406                              name);
3407                 return -1;
3408         }
3409
3410         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3411                 if (rte_kvargs_process(kvlist,
3412                                        PMD_BOND_AGG_MODE_KVARG,
3413                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3414                                        &agg_mode) != 0) {
3415                         RTE_BOND_LOG(ERR,
3416                                      "Failed to parse agg selection mode for bonded device %s",
3417                                      name);
3418                 }
3419                 if (internals->mode == BONDING_MODE_8023AD)
3420                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3421                                                               agg_mode);
3422         }
3423
3424         /* Parse/add slave ports to bonded device */
3425         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3426                 struct bond_ethdev_slave_ports slave_ports;
3427                 unsigned i;
3428
3429                 memset(&slave_ports, 0, sizeof(slave_ports));
3430
3431                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3432                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3433                         RTE_BOND_LOG(ERR,
3434                                      "Failed to parse slave ports for bonded device %s",
3435                                      name);
3436                         return -1;
3437                 }
3438
3439                 for (i = 0; i < slave_ports.slave_count; i++) {
3440                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3441                                 RTE_BOND_LOG(ERR,
3442                                              "Failed to add port %d as slave to bonded device %s",
3443                                              slave_ports.slaves[i], name);
3444                         }
3445                 }
3446
3447         } else {
3448                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3449                 return -1;
3450         }
3451
3452         /* Parse/set primary slave port id*/
3453         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3454         if (arg_count == 1) {
3455                 uint16_t primary_slave_port_id;
3456
3457                 if (rte_kvargs_process(kvlist,
3458                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3459                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3460                                        &primary_slave_port_id) < 0) {
3461                         RTE_BOND_LOG(INFO,
3462                                      "Invalid primary slave port id specified for bonded device %s",
3463                                      name);
3464                         return -1;
3465                 }
3466
3467                 /* Set balance mode transmit policy*/
3468                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3469                     != 0) {
3470                         RTE_BOND_LOG(ERR,
3471                                      "Failed to set primary slave port %d on bonded device %s",
3472                                      primary_slave_port_id, name);
3473                         return -1;
3474                 }
3475         } else if (arg_count > 1) {
3476                 RTE_BOND_LOG(INFO,
3477                              "Primary slave can be specified only once for bonded device %s",
3478                              name);
3479                 return -1;
3480         }
3481
3482         /* Parse link status monitor polling interval */
3483         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3484         if (arg_count == 1) {
3485                 uint32_t lsc_poll_interval_ms;
3486
3487                 if (rte_kvargs_process(kvlist,
3488                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3489                                        &bond_ethdev_parse_time_ms_kvarg,
3490                                        &lsc_poll_interval_ms) < 0) {
3491                         RTE_BOND_LOG(INFO,
3492                                      "Invalid lsc polling interval value specified for bonded"
3493                                      " device %s", name);
3494                         return -1;
3495                 }
3496
3497                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3498                     != 0) {
3499                         RTE_BOND_LOG(ERR,
3500                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3501                                      lsc_poll_interval_ms, name);
3502                         return -1;
3503                 }
3504         } else if (arg_count > 1) {
3505                 RTE_BOND_LOG(INFO,
3506                              "LSC polling interval can be specified only once for bonded"
3507                              " device %s", name);
3508                 return -1;
3509         }
3510
3511         /* Parse link up interrupt propagation delay */
3512         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3513         if (arg_count == 1) {
3514                 uint32_t link_up_delay_ms;
3515
3516                 if (rte_kvargs_process(kvlist,
3517                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3518                                        &bond_ethdev_parse_time_ms_kvarg,
3519                                        &link_up_delay_ms) < 0) {
3520                         RTE_BOND_LOG(INFO,
3521                                      "Invalid link up propagation delay value specified for"
3522                                      " bonded device %s", name);
3523                         return -1;
3524                 }
3525
3526                 /* Set balance mode transmit policy*/
3527                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3528                     != 0) {
3529                         RTE_BOND_LOG(ERR,
3530                                      "Failed to set link up propagation delay (%u ms) on bonded"
3531                                      " device %s", link_up_delay_ms, name);
3532                         return -1;
3533                 }
3534         } else if (arg_count > 1) {
3535                 RTE_BOND_LOG(INFO,
3536                              "Link up propagation delay can be specified only once for"
3537                              " bonded device %s", name);
3538                 return -1;
3539         }
3540
3541         /* Parse link down interrupt propagation delay */
3542         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3543         if (arg_count == 1) {
3544                 uint32_t link_down_delay_ms;
3545
3546                 if (rte_kvargs_process(kvlist,
3547                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3548                                        &bond_ethdev_parse_time_ms_kvarg,
3549                                        &link_down_delay_ms) < 0) {
3550                         RTE_BOND_LOG(INFO,
3551                                      "Invalid link down propagation delay value specified for"
3552                                      " bonded device %s", name);
3553                         return -1;
3554                 }
3555
3556                 /* Set balance mode transmit policy*/
3557                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3558                     != 0) {
3559                         RTE_BOND_LOG(ERR,
3560                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3561                                      link_down_delay_ms, name);
3562                         return -1;
3563                 }
3564         } else if (arg_count > 1) {
3565                 RTE_BOND_LOG(INFO,
3566                              "Link down propagation delay can be specified only once for  bonded device %s",
3567                              name);
3568                 return -1;
3569         }
3570
3571         return 0;
3572 }
3573
3574 struct rte_vdev_driver pmd_bond_drv = {
3575         .probe = bond_probe,
3576         .remove = bond_remove,
3577 };
3578
3579 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3580 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3581
3582 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3583         "slave=<ifc> "
3584         "primary=<ifc> "
3585         "mode=[0-6] "
3586         "xmit_policy=[l2 | l23 | l34] "
3587         "agg_mode=[count | stable | bandwidth] "
3588         "socket_id=<int> "
3589         "mac=<mac addr> "
3590         "lsc_poll_period_ms=<int> "
3591         "up_delay=<int> "
3592         "down_delay=<int>");
3593
3594 int bond_logtype;
3595
3596 RTE_INIT(bond_init_log)
3597 {
3598         bond_logtype = rte_log_register("pmd.net.bon");
3599         if (bond_logtype >= 0)
3600                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3601 }