net/bonding: provide default Rx/Tx configuration
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct ether_addr bond_mac;
396
397         struct ether_hdr *hdr;
398
399         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
400         uint16_t num_rx_total = 0;      /* Total number of received packets */
401         uint16_t slaves[RTE_MAX_ETHPORTS];
402         uint16_t slave_count, idx;
403
404         uint8_t collecting;  /* current slave collecting status */
405         const uint8_t promisc = internals->promiscuous_en;
406         uint8_t i, j, k;
407         uint8_t subtype;
408
409         rte_eth_macaddr_get(internals->port_id, &bond_mac);
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting || (!promisc &&
453                                         !is_multicast_ether_addr(&hdr->d_addr) &&
454                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
455
456                                 if (hdr->ether_type == ether_type_slow_be) {
457                                         bond_mode_8023ad_handle_slow_pkt(
458                                             internals, slaves[idx], bufs[j]);
459                                 } else
460                                         rte_pktmbuf_free(bufs[j]);
461
462                                 /* Packet is managed by mode 4 or dropped, shift the array */
463                                 num_rx_total--;
464                                 if (j < num_rx_total) {
465                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
466                                                 (num_rx_total - j));
467                                 }
468                         } else
469                                 j++;
470                 }
471                 if (unlikely(++idx == slave_count))
472                         idx = 0;
473         }
474
475         if (++internals->active_slave == slave_count)
476                 internals->active_slave = 0;
477
478         return num_rx_total;
479 }
480
481 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
482 uint32_t burstnumberRX;
483 uint32_t burstnumberTX;
484
485 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
486
487 static void
488 arp_op_name(uint16_t arp_op, char *buf)
489 {
490         switch (arp_op) {
491         case ARP_OP_REQUEST:
492                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
493                 return;
494         case ARP_OP_REPLY:
495                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
496                 return;
497         case ARP_OP_REVREQUEST:
498                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
499                                 "Reverse ARP Request");
500                 return;
501         case ARP_OP_REVREPLY:
502                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
503                                 "Reverse ARP Reply");
504                 return;
505         case ARP_OP_INVREQUEST:
506                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
507                                 "Peer Identify Request");
508                 return;
509         case ARP_OP_INVREPLY:
510                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
511                                 "Peer Identify Reply");
512                 return;
513         default:
514                 break;
515         }
516         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
621                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
622         }
623 #endif
624 }
625 #endif
626
627 static uint16_t
628 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
629 {
630         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
631         struct bond_dev_private *internals = bd_tx_q->dev_private;
632         struct ether_hdr *eth_h;
633         uint16_t ether_type, offset;
634         uint16_t nb_recv_pkts;
635         int i;
636
637         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
638
639         for (i = 0; i < nb_recv_pkts; i++) {
640                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
641                 ether_type = eth_h->ether_type;
642                 offset = get_vlan_offset(eth_h, &ether_type);
643
644                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
645 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
646                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
647 #endif
648                         bond_mode_alb_arp_recv(eth_h, offset, internals);
649                 }
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
652                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
653 #endif
654         }
655
656         return nb_recv_pkts;
657 }
658
659 static uint16_t
660 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
661                 uint16_t nb_pkts)
662 {
663         struct bond_dev_private *internals;
664         struct bond_tx_queue *bd_tx_q;
665
666         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
667         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
668
669         uint16_t num_of_slaves;
670         uint16_t slaves[RTE_MAX_ETHPORTS];
671
672         uint16_t num_tx_total = 0, num_tx_slave;
673
674         static int slave_idx = 0;
675         int i, cslave_idx = 0, tx_fail_total = 0;
676
677         bd_tx_q = (struct bond_tx_queue *)queue;
678         internals = bd_tx_q->dev_private;
679
680         /* Copy slave list to protect against slave up/down changes during tx
681          * bursting */
682         num_of_slaves = internals->active_slave_count;
683         memcpy(slaves, internals->active_slaves,
684                         sizeof(internals->active_slaves[0]) * num_of_slaves);
685
686         if (num_of_slaves < 1)
687                 return num_tx_total;
688
689         /* Populate slaves mbuf with which packets are to be sent on it  */
690         for (i = 0; i < nb_pkts; i++) {
691                 cslave_idx = (slave_idx + i) % num_of_slaves;
692                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
693         }
694
695         /* increment current slave index so the next call to tx burst starts on the
696          * next slave */
697         slave_idx = ++cslave_idx;
698
699         /* Send packet burst on each slave device */
700         for (i = 0; i < num_of_slaves; i++) {
701                 if (slave_nb_pkts[i] > 0) {
702                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
703                                         slave_bufs[i], slave_nb_pkts[i]);
704
705                         /* if tx burst fails move packets to end of bufs */
706                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
707                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
708
709                                 tx_fail_total += tx_fail_slave;
710
711                                 memcpy(&bufs[nb_pkts - tx_fail_total],
712                                        &slave_bufs[i][num_tx_slave],
713                                        tx_fail_slave * sizeof(bufs[0]));
714                         }
715                         num_tx_total += num_tx_slave;
716                 }
717         }
718
719         return num_tx_total;
720 }
721
722 static uint16_t
723 bond_ethdev_tx_burst_active_backup(void *queue,
724                 struct rte_mbuf **bufs, uint16_t nb_pkts)
725 {
726         struct bond_dev_private *internals;
727         struct bond_tx_queue *bd_tx_q;
728
729         bd_tx_q = (struct bond_tx_queue *)queue;
730         internals = bd_tx_q->dev_private;
731
732         if (internals->active_slave_count < 1)
733                 return 0;
734
735         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
736                         bufs, nb_pkts);
737 }
738
739 static inline uint16_t
740 ether_hash(struct ether_hdr *eth_hdr)
741 {
742         unaligned_uint16_t *word_src_addr =
743                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
744         unaligned_uint16_t *word_dst_addr =
745                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
746
747         return (word_src_addr[0] ^ word_dst_addr[0]) ^
748                         (word_src_addr[1] ^ word_dst_addr[1]) ^
749                         (word_src_addr[2] ^ word_dst_addr[2]);
750 }
751
752 static inline uint32_t
753 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
754 {
755         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
756 }
757
758 static inline uint32_t
759 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
760 {
761         unaligned_uint32_t *word_src_addr =
762                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
763         unaligned_uint32_t *word_dst_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]) ^
769                         (word_src_addr[3] ^ word_dst_addr[3]);
770 }
771
772
773 void
774 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
775                 uint8_t slave_count, uint16_t *slaves)
776 {
777         struct ether_hdr *eth_hdr;
778         uint32_t hash;
779         int i;
780
781         for (i = 0; i < nb_pkts; i++) {
782                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
783
784                 hash = ether_hash(eth_hdr);
785
786                 slaves[i] = (hash ^= hash >> 8) % slave_count;
787         }
788 }
789
790 void
791 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792                 uint8_t slave_count, uint16_t *slaves)
793 {
794         uint16_t i;
795         struct ether_hdr *eth_hdr;
796         uint16_t proto;
797         size_t vlan_offset;
798         uint32_t hash, l3hash;
799
800         for (i = 0; i < nb_pkts; i++) {
801                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
802                 l3hash = 0;
803
804                 proto = eth_hdr->ether_type;
805                 hash = ether_hash(eth_hdr);
806
807                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
808
809                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
810                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
811                                         ((char *)(eth_hdr + 1) + vlan_offset);
812                         l3hash = ipv4_hash(ipv4_hdr);
813
814                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
815                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
816                                         ((char *)(eth_hdr + 1) + vlan_offset);
817                         l3hash = ipv6_hash(ipv6_hdr);
818                 }
819
820                 hash = hash ^ l3hash;
821                 hash ^= hash >> 16;
822                 hash ^= hash >> 8;
823
824                 slaves[i] = hash % slave_count;
825         }
826 }
827
828 void
829 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
830                 uint8_t slave_count, uint16_t *slaves)
831 {
832         struct ether_hdr *eth_hdr;
833         uint16_t proto;
834         size_t vlan_offset;
835         int i;
836
837         struct udp_hdr *udp_hdr;
838         struct tcp_hdr *tcp_hdr;
839         uint32_t hash, l3hash, l4hash;
840
841         for (i = 0; i < nb_pkts; i++) {
842                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
843                 proto = eth_hdr->ether_type;
844                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
845                 l3hash = 0;
846                 l4hash = 0;
847
848                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
849                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
850                                         ((char *)(eth_hdr + 1) + vlan_offset);
851                         size_t ip_hdr_offset;
852
853                         l3hash = ipv4_hash(ipv4_hdr);
854
855                         /* there is no L4 header in fragmented packet */
856                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
857                                                                 == 0)) {
858                                 ip_hdr_offset = (ipv4_hdr->version_ihl
859                                         & IPV4_HDR_IHL_MASK) *
860                                         IPV4_IHL_MULTIPLIER;
861
862                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
863                                         tcp_hdr = (struct tcp_hdr *)
864                                                 ((char *)ipv4_hdr +
865                                                         ip_hdr_offset);
866                                         l4hash = HASH_L4_PORTS(tcp_hdr);
867                                 } else if (ipv4_hdr->next_proto_id ==
868                                                                 IPPROTO_UDP) {
869                                         udp_hdr = (struct udp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         l4hash = HASH_L4_PORTS(udp_hdr);
873                                 }
874                         }
875                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
876                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
877                                         ((char *)(eth_hdr + 1) + vlan_offset);
878                         l3hash = ipv6_hash(ipv6_hdr);
879
880                         if (ipv6_hdr->proto == IPPROTO_TCP) {
881                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
882                                 l4hash = HASH_L4_PORTS(tcp_hdr);
883                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
884                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
885                                 l4hash = HASH_L4_PORTS(udp_hdr);
886                         }
887                 }
888
889                 hash = l3hash ^ l4hash;
890                 hash ^= hash >> 16;
891                 hash ^= hash >> 8;
892
893                 slaves[i] = hash % slave_count;
894         }
895 }
896
897 struct bwg_slave {
898         uint64_t bwg_left_int;
899         uint64_t bwg_left_remainder;
900         uint8_t slave;
901 };
902
903 void
904 bond_tlb_activate_slave(struct bond_dev_private *internals) {
905         int i;
906
907         for (i = 0; i < internals->active_slave_count; i++) {
908                 tlb_last_obytets[internals->active_slaves[i]] = 0;
909         }
910 }
911
912 static int
913 bandwidth_cmp(const void *a, const void *b)
914 {
915         const struct bwg_slave *bwg_a = a;
916         const struct bwg_slave *bwg_b = b;
917         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
918         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
919                         (int64_t)bwg_a->bwg_left_remainder;
920         if (diff > 0)
921                 return 1;
922         else if (diff < 0)
923                 return -1;
924         else if (diff2 > 0)
925                 return 1;
926         else if (diff2 < 0)
927                 return -1;
928         else
929                 return 0;
930 }
931
932 static void
933 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
934                 struct bwg_slave *bwg_slave)
935 {
936         struct rte_eth_link link_status;
937
938         rte_eth_link_get_nowait(port_id, &link_status);
939         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
940         if (link_bwg == 0)
941                 return;
942         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
943         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
944         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
945 }
946
947 static void
948 bond_ethdev_update_tlb_slave_cb(void *arg)
949 {
950         struct bond_dev_private *internals = arg;
951         struct rte_eth_stats slave_stats;
952         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
953         uint8_t slave_count;
954         uint64_t tx_bytes;
955
956         uint8_t update_stats = 0;
957         uint8_t i, slave_id;
958
959         internals->slave_update_idx++;
960
961
962         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
963                 update_stats = 1;
964
965         for (i = 0; i < internals->active_slave_count; i++) {
966                 slave_id = internals->active_slaves[i];
967                 rte_eth_stats_get(slave_id, &slave_stats);
968                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
969                 bandwidth_left(slave_id, tx_bytes,
970                                 internals->slave_update_idx, &bwg_array[i]);
971                 bwg_array[i].slave = slave_id;
972
973                 if (update_stats) {
974                         tlb_last_obytets[slave_id] = slave_stats.obytes;
975                 }
976         }
977
978         if (update_stats == 1)
979                 internals->slave_update_idx = 0;
980
981         slave_count = i;
982         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
983         for (i = 0; i < slave_count; i++)
984                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
985
986         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
987                         (struct bond_dev_private *)internals);
988 }
989
990 static uint16_t
991 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
992 {
993         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
994         struct bond_dev_private *internals = bd_tx_q->dev_private;
995
996         struct rte_eth_dev *primary_port =
997                         &rte_eth_devices[internals->primary_port];
998         uint16_t num_tx_total = 0;
999         uint16_t i, j;
1000
1001         uint16_t num_of_slaves = internals->active_slave_count;
1002         uint16_t slaves[RTE_MAX_ETHPORTS];
1003
1004         struct ether_hdr *ether_hdr;
1005         struct ether_addr primary_slave_addr;
1006         struct ether_addr active_slave_addr;
1007
1008         if (num_of_slaves < 1)
1009                 return num_tx_total;
1010
1011         memcpy(slaves, internals->tlb_slaves_order,
1012                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1013
1014
1015         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1016
1017         if (nb_pkts > 3) {
1018                 for (i = 0; i < 3; i++)
1019                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1020         }
1021
1022         for (i = 0; i < num_of_slaves; i++) {
1023                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1024                 for (j = num_tx_total; j < nb_pkts; j++) {
1025                         if (j + 3 < nb_pkts)
1026                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1027
1028                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1029                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1030                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1031 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1032                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1033 #endif
1034                 }
1035
1036                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1037                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1038
1039                 if (num_tx_total == nb_pkts)
1040                         break;
1041         }
1042
1043         return num_tx_total;
1044 }
1045
1046 void
1047 bond_tlb_disable(struct bond_dev_private *internals)
1048 {
1049         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1050 }
1051
1052 void
1053 bond_tlb_enable(struct bond_dev_private *internals)
1054 {
1055         bond_ethdev_update_tlb_slave_cb(internals);
1056 }
1057
1058 static uint16_t
1059 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1060 {
1061         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1062         struct bond_dev_private *internals = bd_tx_q->dev_private;
1063
1064         struct ether_hdr *eth_h;
1065         uint16_t ether_type, offset;
1066
1067         struct client_data *client_info;
1068
1069         /*
1070          * We create transmit buffers for every slave and one additional to send
1071          * through tlb. In worst case every packet will be send on one port.
1072          */
1073         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1074         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1075
1076         /*
1077          * We create separate transmit buffers for update packets as they won't
1078          * be counted in num_tx_total.
1079          */
1080         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1081         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1082
1083         struct rte_mbuf *upd_pkt;
1084         size_t pkt_size;
1085
1086         uint16_t num_send, num_not_send = 0;
1087         uint16_t num_tx_total = 0;
1088         uint16_t slave_idx;
1089
1090         int i, j;
1091
1092         /* Search tx buffer for ARP packets and forward them to alb */
1093         for (i = 0; i < nb_pkts; i++) {
1094                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1095                 ether_type = eth_h->ether_type;
1096                 offset = get_vlan_offset(eth_h, &ether_type);
1097
1098                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1099                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1100
1101                         /* Change src mac in eth header */
1102                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1103
1104                         /* Add packet to slave tx buffer */
1105                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1106                         slave_bufs_pkts[slave_idx]++;
1107                 } else {
1108                         /* If packet is not ARP, send it with TLB policy */
1109                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1110                                         bufs[i];
1111                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1112                 }
1113         }
1114
1115         /* Update connected client ARP tables */
1116         if (internals->mode6.ntt) {
1117                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1118                         client_info = &internals->mode6.client_table[i];
1119
1120                         if (client_info->in_use) {
1121                                 /* Allocate new packet to send ARP update on current slave */
1122                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1123                                 if (upd_pkt == NULL) {
1124                                         RTE_BOND_LOG(ERR,
1125                                                      "Failed to allocate ARP packet from pool");
1126                                         continue;
1127                                 }
1128                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1129                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1130                                 upd_pkt->data_len = pkt_size;
1131                                 upd_pkt->pkt_len = pkt_size;
1132
1133                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1134                                                 internals);
1135
1136                                 /* Add packet to update tx buffer */
1137                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1138                                 update_bufs_pkts[slave_idx]++;
1139                         }
1140                 }
1141                 internals->mode6.ntt = 0;
1142         }
1143
1144         /* Send ARP packets on proper slaves */
1145         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1146                 if (slave_bufs_pkts[i] > 0) {
1147                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1148                                         slave_bufs[i], slave_bufs_pkts[i]);
1149                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1150                                 bufs[nb_pkts - 1 - num_not_send - j] =
1151                                                 slave_bufs[i][nb_pkts - 1 - j];
1152                         }
1153
1154                         num_tx_total += num_send;
1155                         num_not_send += slave_bufs_pkts[i] - num_send;
1156
1157 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1158         /* Print TX stats including update packets */
1159                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1160                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1161                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1162                         }
1163 #endif
1164                 }
1165         }
1166
1167         /* Send update packets on proper slaves */
1168         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1169                 if (update_bufs_pkts[i] > 0) {
1170                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1171                                         update_bufs_pkts[i]);
1172                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1173                                 rte_pktmbuf_free(update_bufs[i][j]);
1174                         }
1175 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1176                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send non-ARP packets using tlb policy */
1185         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1186                 num_send = bond_ethdev_tx_burst_tlb(queue,
1187                                 slave_bufs[RTE_MAX_ETHPORTS],
1188                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1189
1190                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1191                         bufs[nb_pkts - 1 - num_not_send - j] =
1192                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1193                 }
1194
1195                 num_tx_total += num_send;
1196         }
1197
1198         return num_tx_total;
1199 }
1200
1201 static uint16_t
1202 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1203                 uint16_t nb_bufs)
1204 {
1205         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1206         struct bond_dev_private *internals = bd_tx_q->dev_private;
1207
1208         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1209         uint16_t slave_count;
1210
1211         /* Array to sort mbufs for transmission on each slave into */
1212         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1213         /* Number of mbufs for transmission on each slave */
1214         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1215         /* Mapping array generated by hash function to map mbufs to slaves */
1216         uint16_t bufs_slave_port_idxs[nb_bufs];
1217
1218         uint16_t slave_tx_count;
1219         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1220
1221         uint16_t i;
1222
1223         if (unlikely(nb_bufs == 0))
1224                 return 0;
1225
1226         /* Copy slave list to protect against slave up/down changes during tx
1227          * bursting */
1228         slave_count = internals->active_slave_count;
1229         if (unlikely(slave_count < 1))
1230                 return 0;
1231
1232         memcpy(slave_port_ids, internals->active_slaves,
1233                         sizeof(slave_port_ids[0]) * slave_count);
1234
1235         /*
1236          * Populate slaves mbuf with the packets which are to be sent on it
1237          * selecting output slave using hash based on xmit policy
1238          */
1239         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1240                         bufs_slave_port_idxs);
1241
1242         for (i = 0; i < nb_bufs; i++) {
1243                 /* Populate slave mbuf arrays with mbufs for that slave. */
1244                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1245
1246                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1247         }
1248
1249         /* Send packet burst on each slave device */
1250         for (i = 0; i < slave_count; i++) {
1251                 if (slave_nb_bufs[i] == 0)
1252                         continue;
1253
1254                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1255                                 bd_tx_q->queue_id, slave_bufs[i],
1256                                 slave_nb_bufs[i]);
1257
1258                 total_tx_count += slave_tx_count;
1259
1260                 /* If tx burst fails move packets to end of bufs */
1261                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1262                         int slave_tx_fail_count = slave_nb_bufs[i] -
1263                                         slave_tx_count;
1264                         total_tx_fail_count += slave_tx_fail_count;
1265                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1266                                &slave_bufs[i][slave_tx_count],
1267                                slave_tx_fail_count * sizeof(bufs[0]));
1268                 }
1269         }
1270
1271         return total_tx_count;
1272 }
1273
1274 static uint16_t
1275 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1276                 uint16_t nb_bufs)
1277 {
1278         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1279         struct bond_dev_private *internals = bd_tx_q->dev_private;
1280
1281         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1282         uint16_t slave_count;
1283
1284         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1285         uint16_t dist_slave_count;
1286
1287         /* 2-D array to sort mbufs for transmission on each slave into */
1288         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1289         /* Number of mbufs for transmission on each slave */
1290         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1291         /* Mapping array generated by hash function to map mbufs to slaves */
1292         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1293
1294         uint16_t slave_tx_count;
1295         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1296
1297         uint16_t i;
1298
1299         if (unlikely(nb_bufs == 0))
1300                 return 0;
1301
1302         /* Copy slave list to protect against slave up/down changes during tx
1303          * bursting */
1304         slave_count = internals->active_slave_count;
1305         if (unlikely(slave_count < 1))
1306                 return 0;
1307
1308         memcpy(slave_port_ids, internals->active_slaves,
1309                         sizeof(slave_port_ids[0]) * slave_count);
1310
1311         dist_slave_count = 0;
1312         for (i = 0; i < slave_count; i++) {
1313                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1314
1315                 if (ACTOR_STATE(port, DISTRIBUTING))
1316                         dist_slave_port_ids[dist_slave_count++] =
1317                                         slave_port_ids[i];
1318         }
1319
1320         if (likely(dist_slave_count > 1)) {
1321
1322                 /*
1323                  * Populate slaves mbuf with the packets which are to be sent
1324                  * on it, selecting output slave using hash based on xmit policy
1325                  */
1326                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1327                                 bufs_slave_port_idxs);
1328
1329                 for (i = 0; i < nb_bufs; i++) {
1330                         /*
1331                          * Populate slave mbuf arrays with mbufs for that
1332                          * slave
1333                          */
1334                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1335
1336                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1337                                         bufs[i];
1338                 }
1339
1340
1341                 /* Send packet burst on each slave device */
1342                 for (i = 0; i < dist_slave_count; i++) {
1343                         if (slave_nb_bufs[i] == 0)
1344                                 continue;
1345
1346                         slave_tx_count = rte_eth_tx_burst(
1347                                         dist_slave_port_ids[i],
1348                                         bd_tx_q->queue_id, slave_bufs[i],
1349                                         slave_nb_bufs[i]);
1350
1351                         total_tx_count += slave_tx_count;
1352
1353                         /* If tx burst fails move packets to end of bufs */
1354                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1355                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1356                                                 slave_tx_count;
1357                                 total_tx_fail_count += slave_tx_fail_count;
1358
1359                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1360                                        &slave_bufs[i][slave_tx_count],
1361                                        slave_tx_fail_count * sizeof(bufs[0]));
1362                         }
1363                 }
1364         }
1365
1366         /* Check for LACP control packets and send if available */
1367         for (i = 0; i < slave_count; i++) {
1368                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1369                 struct rte_mbuf *ctrl_pkt = NULL;
1370
1371                 if (likely(rte_ring_empty(port->tx_ring)))
1372                         continue;
1373
1374                 if (rte_ring_dequeue(port->tx_ring,
1375                                      (void **)&ctrl_pkt) != -ENOENT) {
1376                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1377                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1378                         /*
1379                          * re-enqueue LAG control plane packets to buffering
1380                          * ring if transmission fails so the packet isn't lost.
1381                          */
1382                         if (slave_tx_count != 1)
1383                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1384                 }
1385         }
1386
1387         return total_tx_count;
1388 }
1389
1390 static uint16_t
1391 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1392                 uint16_t nb_pkts)
1393 {
1394         struct bond_dev_private *internals;
1395         struct bond_tx_queue *bd_tx_q;
1396
1397         uint8_t tx_failed_flag = 0, num_of_slaves;
1398         uint16_t slaves[RTE_MAX_ETHPORTS];
1399
1400         uint16_t max_nb_of_tx_pkts = 0;
1401
1402         int slave_tx_total[RTE_MAX_ETHPORTS];
1403         int i, most_successful_tx_slave = -1;
1404
1405         bd_tx_q = (struct bond_tx_queue *)queue;
1406         internals = bd_tx_q->dev_private;
1407
1408         /* Copy slave list to protect against slave up/down changes during tx
1409          * bursting */
1410         num_of_slaves = internals->active_slave_count;
1411         memcpy(slaves, internals->active_slaves,
1412                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1413
1414         if (num_of_slaves < 1)
1415                 return 0;
1416
1417         /* Increment reference count on mbufs */
1418         for (i = 0; i < nb_pkts; i++)
1419                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1420
1421         /* Transmit burst on each active slave */
1422         for (i = 0; i < num_of_slaves; i++) {
1423                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1424                                         bufs, nb_pkts);
1425
1426                 if (unlikely(slave_tx_total[i] < nb_pkts))
1427                         tx_failed_flag = 1;
1428
1429                 /* record the value and slave index for the slave which transmits the
1430                  * maximum number of packets */
1431                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1432                         max_nb_of_tx_pkts = slave_tx_total[i];
1433                         most_successful_tx_slave = i;
1434                 }
1435         }
1436
1437         /* if slaves fail to transmit packets from burst, the calling application
1438          * is not expected to know about multiple references to packets so we must
1439          * handle failures of all packets except those of the most successful slave
1440          */
1441         if (unlikely(tx_failed_flag))
1442                 for (i = 0; i < num_of_slaves; i++)
1443                         if (i != most_successful_tx_slave)
1444                                 while (slave_tx_total[i] < nb_pkts)
1445                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1446
1447         return max_nb_of_tx_pkts;
1448 }
1449
1450 void
1451 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1452 {
1453         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1454
1455         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1456                 /**
1457                  * If in mode 4 then save the link properties of the first
1458                  * slave, all subsequent slaves must match these properties
1459                  */
1460                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1461
1462                 bond_link->link_autoneg = slave_link->link_autoneg;
1463                 bond_link->link_duplex = slave_link->link_duplex;
1464                 bond_link->link_speed = slave_link->link_speed;
1465         } else {
1466                 /**
1467                  * In any other mode the link properties are set to default
1468                  * values of AUTONEG/DUPLEX
1469                  */
1470                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1471                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1472         }
1473 }
1474
1475 int
1476 link_properties_valid(struct rte_eth_dev *ethdev,
1477                 struct rte_eth_link *slave_link)
1478 {
1479         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1480
1481         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1482                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1483
1484                 if (bond_link->link_duplex != slave_link->link_duplex ||
1485                         bond_link->link_autoneg != slave_link->link_autoneg ||
1486                         bond_link->link_speed != slave_link->link_speed)
1487                         return -1;
1488         }
1489
1490         return 0;
1491 }
1492
1493 int
1494 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1495 {
1496         struct ether_addr *mac_addr;
1497
1498         if (eth_dev == NULL) {
1499                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1500                 return -1;
1501         }
1502
1503         if (dst_mac_addr == NULL) {
1504                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1505                 return -1;
1506         }
1507
1508         mac_addr = eth_dev->data->mac_addrs;
1509
1510         ether_addr_copy(mac_addr, dst_mac_addr);
1511         return 0;
1512 }
1513
1514 int
1515 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1516 {
1517         struct ether_addr *mac_addr;
1518
1519         if (eth_dev == NULL) {
1520                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1521                 return -1;
1522         }
1523
1524         if (new_mac_addr == NULL) {
1525                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1526                 return -1;
1527         }
1528
1529         mac_addr = eth_dev->data->mac_addrs;
1530
1531         /* If new MAC is different to current MAC then update */
1532         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1533                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1534
1535         return 0;
1536 }
1537
1538 static const struct ether_addr null_mac_addr;
1539
1540 /*
1541  * Add additional MAC addresses to the slave
1542  */
1543 int
1544 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1545                 uint16_t slave_port_id)
1546 {
1547         int i, ret;
1548         struct ether_addr *mac_addr;
1549
1550         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1551                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1552                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1553                         break;
1554
1555                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1556                 if (ret < 0) {
1557                         /* rollback */
1558                         for (i--; i > 0; i--)
1559                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1560                                         &bonded_eth_dev->data->mac_addrs[i]);
1561                         return ret;
1562                 }
1563         }
1564
1565         return 0;
1566 }
1567
1568 /*
1569  * Remove additional MAC addresses from the slave
1570  */
1571 int
1572 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1573                 uint16_t slave_port_id)
1574 {
1575         int i, rc, ret;
1576         struct ether_addr *mac_addr;
1577
1578         rc = 0;
1579         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1580                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1581                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1582                         break;
1583
1584                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1585                 /* save only the first error */
1586                 if (ret < 0 && rc == 0)
1587                         rc = ret;
1588         }
1589
1590         return rc;
1591 }
1592
1593 int
1594 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1595 {
1596         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1597         int i;
1598
1599         /* Update slave devices MAC addresses */
1600         if (internals->slave_count < 1)
1601                 return -1;
1602
1603         switch (internals->mode) {
1604         case BONDING_MODE_ROUND_ROBIN:
1605         case BONDING_MODE_BALANCE:
1606         case BONDING_MODE_BROADCAST:
1607                 for (i = 0; i < internals->slave_count; i++) {
1608                         if (rte_eth_dev_default_mac_addr_set(
1609                                         internals->slaves[i].port_id,
1610                                         bonded_eth_dev->data->mac_addrs)) {
1611                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1612                                                 internals->slaves[i].port_id);
1613                                 return -1;
1614                         }
1615                 }
1616                 break;
1617         case BONDING_MODE_8023AD:
1618                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1619                 break;
1620         case BONDING_MODE_ACTIVE_BACKUP:
1621         case BONDING_MODE_TLB:
1622         case BONDING_MODE_ALB:
1623         default:
1624                 for (i = 0; i < internals->slave_count; i++) {
1625                         if (internals->slaves[i].port_id ==
1626                                         internals->current_primary_port) {
1627                                 if (rte_eth_dev_default_mac_addr_set(
1628                                                 internals->primary_port,
1629                                                 bonded_eth_dev->data->mac_addrs)) {
1630                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1631                                                         internals->current_primary_port);
1632                                         return -1;
1633                                 }
1634                         } else {
1635                                 if (rte_eth_dev_default_mac_addr_set(
1636                                                 internals->slaves[i].port_id,
1637                                                 &internals->slaves[i].persisted_mac_addr)) {
1638                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1639                                                         internals->slaves[i].port_id);
1640                                         return -1;
1641                                 }
1642                         }
1643                 }
1644         }
1645
1646         return 0;
1647 }
1648
1649 int
1650 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1651 {
1652         struct bond_dev_private *internals;
1653
1654         internals = eth_dev->data->dev_private;
1655
1656         switch (mode) {
1657         case BONDING_MODE_ROUND_ROBIN:
1658                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1659                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1660                 break;
1661         case BONDING_MODE_ACTIVE_BACKUP:
1662                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1663                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1664                 break;
1665         case BONDING_MODE_BALANCE:
1666                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1667                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1668                 break;
1669         case BONDING_MODE_BROADCAST:
1670                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1671                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1672                 break;
1673         case BONDING_MODE_8023AD:
1674                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1675                         return -1;
1676
1677                 if (internals->mode4.dedicated_queues.enabled == 0) {
1678                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1679                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1680                         RTE_BOND_LOG(WARNING,
1681                                 "Using mode 4, it is necessary to do TX burst "
1682                                 "and RX burst at least every 100ms.");
1683                 } else {
1684                         /* Use flow director's optimization */
1685                         eth_dev->rx_pkt_burst =
1686                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1687                         eth_dev->tx_pkt_burst =
1688                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1689                 }
1690                 break;
1691         case BONDING_MODE_TLB:
1692                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1693                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1694                 break;
1695         case BONDING_MODE_ALB:
1696                 if (bond_mode_alb_enable(eth_dev) != 0)
1697                         return -1;
1698
1699                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1700                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1701                 break;
1702         default:
1703                 return -1;
1704         }
1705
1706         internals->mode = mode;
1707
1708         return 0;
1709 }
1710
1711
1712 static int
1713 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1714                 struct rte_eth_dev *slave_eth_dev)
1715 {
1716         int errval = 0;
1717         struct bond_dev_private *internals = (struct bond_dev_private *)
1718                 bonded_eth_dev->data->dev_private;
1719         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1720
1721         if (port->slow_pool == NULL) {
1722                 char mem_name[256];
1723                 int slave_id = slave_eth_dev->data->port_id;
1724
1725                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1726                                 slave_id);
1727                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1728                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1729                         slave_eth_dev->data->numa_node);
1730
1731                 /* Any memory allocation failure in initialization is critical because
1732                  * resources can't be free, so reinitialization is impossible. */
1733                 if (port->slow_pool == NULL) {
1734                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1735                                 slave_id, mem_name, rte_strerror(rte_errno));
1736                 }
1737         }
1738
1739         if (internals->mode4.dedicated_queues.enabled == 1) {
1740                 /* Configure slow Rx queue */
1741
1742                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1743                                 internals->mode4.dedicated_queues.rx_qid, 128,
1744                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1745                                 NULL, port->slow_pool);
1746                 if (errval != 0) {
1747                         RTE_BOND_LOG(ERR,
1748                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1749                                         slave_eth_dev->data->port_id,
1750                                         internals->mode4.dedicated_queues.rx_qid,
1751                                         errval);
1752                         return errval;
1753                 }
1754
1755                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1756                                 internals->mode4.dedicated_queues.tx_qid, 512,
1757                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758                                 NULL);
1759                 if (errval != 0) {
1760                         RTE_BOND_LOG(ERR,
1761                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1762                                 slave_eth_dev->data->port_id,
1763                                 internals->mode4.dedicated_queues.tx_qid,
1764                                 errval);
1765                         return errval;
1766                 }
1767         }
1768         return 0;
1769 }
1770
1771 int
1772 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1773                 struct rte_eth_dev *slave_eth_dev)
1774 {
1775         struct bond_rx_queue *bd_rx_q;
1776         struct bond_tx_queue *bd_tx_q;
1777         uint16_t nb_rx_queues;
1778         uint16_t nb_tx_queues;
1779
1780         int errval;
1781         uint16_t q_id;
1782         struct rte_flow_error flow_error;
1783
1784         struct bond_dev_private *internals = (struct bond_dev_private *)
1785                 bonded_eth_dev->data->dev_private;
1786
1787         /* Stop slave */
1788         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1789
1790         /* Enable interrupts on slave device if supported */
1791         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1792                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1793
1794         /* If RSS is enabled for bonding, try to enable it for slaves  */
1795         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1796                 if (internals->rss_key_len != 0) {
1797                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1798                                         internals->rss_key_len;
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1800                                         internals->rss_key;
1801                 } else {
1802                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1803                 }
1804
1805                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1806                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1807                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1808                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1809         }
1810
1811         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1812                         DEV_RX_OFFLOAD_VLAN_FILTER)
1813                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1814                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1815         else
1816                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1817                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1818
1819         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1820         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1821
1822         if (internals->mode == BONDING_MODE_8023AD) {
1823                 if (internals->mode4.dedicated_queues.enabled == 1) {
1824                         nb_rx_queues++;
1825                         nb_tx_queues++;
1826                 }
1827         }
1828
1829         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1830                                      bonded_eth_dev->data->mtu);
1831         if (errval != 0 && errval != -ENOTSUP) {
1832                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1833                                 slave_eth_dev->data->port_id, errval);
1834                 return errval;
1835         }
1836
1837         /* Configure device */
1838         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1839                         nb_rx_queues, nb_tx_queues,
1840                         &(slave_eth_dev->data->dev_conf));
1841         if (errval != 0) {
1842                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1843                                 slave_eth_dev->data->port_id, errval);
1844                 return errval;
1845         }
1846
1847         /* Setup Rx Queues */
1848         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1849                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1850
1851                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1852                                 bd_rx_q->nb_rx_desc,
1853                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1854                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1855                 if (errval != 0) {
1856                         RTE_BOND_LOG(ERR,
1857                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1858                                         slave_eth_dev->data->port_id, q_id, errval);
1859                         return errval;
1860                 }
1861         }
1862
1863         /* Setup Tx Queues */
1864         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1865                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1866
1867                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1868                                 bd_tx_q->nb_tx_desc,
1869                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1870                                 &bd_tx_q->tx_conf);
1871                 if (errval != 0) {
1872                         RTE_BOND_LOG(ERR,
1873                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1874                                 slave_eth_dev->data->port_id, q_id, errval);
1875                         return errval;
1876                 }
1877         }
1878
1879         if (internals->mode == BONDING_MODE_8023AD &&
1880                         internals->mode4.dedicated_queues.enabled == 1) {
1881                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1882                                 != 0)
1883                         return errval;
1884
1885                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1886                                 slave_eth_dev->data->port_id) != 0) {
1887                         RTE_BOND_LOG(ERR,
1888                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1889                                 slave_eth_dev->data->port_id, q_id, errval);
1890                         return -1;
1891                 }
1892
1893                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1894                         rte_flow_destroy(slave_eth_dev->data->port_id,
1895                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1896                                         &flow_error);
1897
1898                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1899                                 slave_eth_dev->data->port_id);
1900         }
1901
1902         /* Start device */
1903         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1904         if (errval != 0) {
1905                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1906                                 slave_eth_dev->data->port_id, errval);
1907                 return -1;
1908         }
1909
1910         /* If RSS is enabled for bonding, synchronize RETA */
1911         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1912                 int i;
1913                 struct bond_dev_private *internals;
1914
1915                 internals = bonded_eth_dev->data->dev_private;
1916
1917                 for (i = 0; i < internals->slave_count; i++) {
1918                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1919                                 errval = rte_eth_dev_rss_reta_update(
1920                                                 slave_eth_dev->data->port_id,
1921                                                 &internals->reta_conf[0],
1922                                                 internals->slaves[i].reta_size);
1923                                 if (errval != 0) {
1924                                         RTE_BOND_LOG(WARNING,
1925                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1926                                                      " RSS Configuration for bonding may be inconsistent.",
1927                                                      slave_eth_dev->data->port_id, errval);
1928                                 }
1929                                 break;
1930                         }
1931                 }
1932         }
1933
1934         /* If lsc interrupt is set, check initial slave's link status */
1935         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1936                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1937                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1938                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1939                         NULL);
1940         }
1941
1942         return 0;
1943 }
1944
1945 void
1946 slave_remove(struct bond_dev_private *internals,
1947                 struct rte_eth_dev *slave_eth_dev)
1948 {
1949         uint8_t i;
1950
1951         for (i = 0; i < internals->slave_count; i++)
1952                 if (internals->slaves[i].port_id ==
1953                                 slave_eth_dev->data->port_id)
1954                         break;
1955
1956         if (i < (internals->slave_count - 1)) {
1957                 struct rte_flow *flow;
1958
1959                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1960                                 sizeof(internals->slaves[0]) *
1961                                 (internals->slave_count - i - 1));
1962                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1963                         memmove(&flow->flows[i], &flow->flows[i + 1],
1964                                 sizeof(flow->flows[0]) *
1965                                 (internals->slave_count - i - 1));
1966                         flow->flows[internals->slave_count - 1] = NULL;
1967                 }
1968         }
1969
1970         internals->slave_count--;
1971
1972         /* force reconfiguration of slave interfaces */
1973         _rte_eth_dev_reset(slave_eth_dev);
1974 }
1975
1976 static void
1977 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1978
1979 void
1980 slave_add(struct bond_dev_private *internals,
1981                 struct rte_eth_dev *slave_eth_dev)
1982 {
1983         struct bond_slave_details *slave_details =
1984                         &internals->slaves[internals->slave_count];
1985
1986         slave_details->port_id = slave_eth_dev->data->port_id;
1987         slave_details->last_link_status = 0;
1988
1989         /* Mark slave devices that don't support interrupts so we can
1990          * compensate when we start the bond
1991          */
1992         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1993                 slave_details->link_status_poll_enabled = 1;
1994         }
1995
1996         slave_details->link_status_wait_to_complete = 0;
1997         /* clean tlb_last_obytes when adding port for bonding device */
1998         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1999                         sizeof(struct ether_addr));
2000 }
2001
2002 void
2003 bond_ethdev_primary_set(struct bond_dev_private *internals,
2004                 uint16_t slave_port_id)
2005 {
2006         int i;
2007
2008         if (internals->active_slave_count < 1)
2009                 internals->current_primary_port = slave_port_id;
2010         else
2011                 /* Search bonded device slave ports for new proposed primary port */
2012                 for (i = 0; i < internals->active_slave_count; i++) {
2013                         if (internals->active_slaves[i] == slave_port_id)
2014                                 internals->current_primary_port = slave_port_id;
2015                 }
2016 }
2017
2018 static void
2019 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2020
2021 static int
2022 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2023 {
2024         struct bond_dev_private *internals;
2025         int i;
2026
2027         /* slave eth dev will be started by bonded device */
2028         if (check_for_bonded_ethdev(eth_dev)) {
2029                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2030                                 eth_dev->data->port_id);
2031                 return -1;
2032         }
2033
2034         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2035         eth_dev->data->dev_started = 1;
2036
2037         internals = eth_dev->data->dev_private;
2038
2039         if (internals->slave_count == 0) {
2040                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2041                 goto out_err;
2042         }
2043
2044         if (internals->user_defined_mac == 0) {
2045                 struct ether_addr *new_mac_addr = NULL;
2046
2047                 for (i = 0; i < internals->slave_count; i++)
2048                         if (internals->slaves[i].port_id == internals->primary_port)
2049                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2050
2051                 if (new_mac_addr == NULL)
2052                         goto out_err;
2053
2054                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2055                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2056                                         eth_dev->data->port_id);
2057                         goto out_err;
2058                 }
2059         }
2060
2061         /* If bonded device is configure in promiscuous mode then re-apply config */
2062         if (internals->promiscuous_en)
2063                 bond_ethdev_promiscuous_enable(eth_dev);
2064
2065         if (internals->mode == BONDING_MODE_8023AD) {
2066                 if (internals->mode4.dedicated_queues.enabled == 1) {
2067                         internals->mode4.dedicated_queues.rx_qid =
2068                                         eth_dev->data->nb_rx_queues;
2069                         internals->mode4.dedicated_queues.tx_qid =
2070                                         eth_dev->data->nb_tx_queues;
2071                 }
2072         }
2073
2074
2075         /* Reconfigure each slave device if starting bonded device */
2076         for (i = 0; i < internals->slave_count; i++) {
2077                 struct rte_eth_dev *slave_ethdev =
2078                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2079                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2080                         RTE_BOND_LOG(ERR,
2081                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2082                                 eth_dev->data->port_id,
2083                                 internals->slaves[i].port_id);
2084                         goto out_err;
2085                 }
2086                 /* We will need to poll for link status if any slave doesn't
2087                  * support interrupts
2088                  */
2089                 if (internals->slaves[i].link_status_poll_enabled)
2090                         internals->link_status_polling_enabled = 1;
2091         }
2092
2093         /* start polling if needed */
2094         if (internals->link_status_polling_enabled) {
2095                 rte_eal_alarm_set(
2096                         internals->link_status_polling_interval_ms * 1000,
2097                         bond_ethdev_slave_link_status_change_monitor,
2098                         (void *)&rte_eth_devices[internals->port_id]);
2099         }
2100
2101         /* Update all slave devices MACs*/
2102         if (mac_address_slaves_update(eth_dev) != 0)
2103                 goto out_err;
2104
2105         if (internals->user_defined_primary_port)
2106                 bond_ethdev_primary_set(internals, internals->primary_port);
2107
2108         if (internals->mode == BONDING_MODE_8023AD)
2109                 bond_mode_8023ad_start(eth_dev);
2110
2111         if (internals->mode == BONDING_MODE_TLB ||
2112                         internals->mode == BONDING_MODE_ALB)
2113                 bond_tlb_enable(internals);
2114
2115         return 0;
2116
2117 out_err:
2118         eth_dev->data->dev_started = 0;
2119         return -1;
2120 }
2121
2122 static void
2123 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2124 {
2125         uint8_t i;
2126
2127         if (dev->data->rx_queues != NULL) {
2128                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2129                         rte_free(dev->data->rx_queues[i]);
2130                         dev->data->rx_queues[i] = NULL;
2131                 }
2132                 dev->data->nb_rx_queues = 0;
2133         }
2134
2135         if (dev->data->tx_queues != NULL) {
2136                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2137                         rte_free(dev->data->tx_queues[i]);
2138                         dev->data->tx_queues[i] = NULL;
2139                 }
2140                 dev->data->nb_tx_queues = 0;
2141         }
2142 }
2143
2144 void
2145 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2146 {
2147         struct bond_dev_private *internals = eth_dev->data->dev_private;
2148         uint8_t i;
2149
2150         if (internals->mode == BONDING_MODE_8023AD) {
2151                 struct port *port;
2152                 void *pkt = NULL;
2153
2154                 bond_mode_8023ad_stop(eth_dev);
2155
2156                 /* Discard all messages to/from mode 4 state machines */
2157                 for (i = 0; i < internals->active_slave_count; i++) {
2158                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2159
2160                         RTE_ASSERT(port->rx_ring != NULL);
2161                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2162                                 rte_pktmbuf_free(pkt);
2163
2164                         RTE_ASSERT(port->tx_ring != NULL);
2165                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2166                                 rte_pktmbuf_free(pkt);
2167                 }
2168         }
2169
2170         if (internals->mode == BONDING_MODE_TLB ||
2171                         internals->mode == BONDING_MODE_ALB) {
2172                 bond_tlb_disable(internals);
2173                 for (i = 0; i < internals->active_slave_count; i++)
2174                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2175         }
2176
2177         internals->link_status_polling_enabled = 0;
2178         for (i = 0; i < internals->slave_count; i++)
2179                 internals->slaves[i].last_link_status = 0;
2180
2181         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2182         eth_dev->data->dev_started = 0;
2183 }
2184
2185 void
2186 bond_ethdev_close(struct rte_eth_dev *dev)
2187 {
2188         struct bond_dev_private *internals = dev->data->dev_private;
2189         uint8_t bond_port_id = internals->port_id;
2190         int skipped = 0;
2191         struct rte_flow_error ferror;
2192
2193         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2194         while (internals->slave_count != skipped) {
2195                 uint16_t port_id = internals->slaves[skipped].port_id;
2196
2197                 rte_eth_dev_stop(port_id);
2198
2199                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2200                         RTE_BOND_LOG(ERR,
2201                                      "Failed to remove port %d from bonded device %s",
2202                                      port_id, dev->device->name);
2203                         skipped++;
2204                 }
2205         }
2206         bond_flow_ops.flush(dev, &ferror);
2207         bond_ethdev_free_queues(dev);
2208         rte_bitmap_reset(internals->vlan_filter_bmp);
2209 }
2210
2211 /* forward declaration */
2212 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2213
2214 static void
2215 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2216 {
2217         struct bond_dev_private *internals = dev->data->dev_private;
2218
2219         uint16_t max_nb_rx_queues = UINT16_MAX;
2220         uint16_t max_nb_tx_queues = UINT16_MAX;
2221
2222         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2223
2224         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2225                         internals->candidate_max_rx_pktlen :
2226                         ETHER_MAX_JUMBO_FRAME_LEN;
2227
2228         /* Max number of tx/rx queues that the bonded device can support is the
2229          * minimum values of the bonded slaves, as all slaves must be capable
2230          * of supporting the same number of tx/rx queues.
2231          */
2232         if (internals->slave_count > 0) {
2233                 struct rte_eth_dev_info slave_info;
2234                 uint8_t idx;
2235
2236                 for (idx = 0; idx < internals->slave_count; idx++) {
2237                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2238                                         &slave_info);
2239
2240                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2241                                 max_nb_rx_queues = slave_info.max_rx_queues;
2242
2243                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2244                                 max_nb_tx_queues = slave_info.max_tx_queues;
2245                 }
2246         }
2247
2248         dev_info->max_rx_queues = max_nb_rx_queues;
2249         dev_info->max_tx_queues = max_nb_tx_queues;
2250
2251         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2252                sizeof(dev_info->default_rxconf));
2253         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2254                sizeof(dev_info->default_txconf));
2255
2256         /**
2257          * If dedicated hw queues enabled for link bonding device in LACP mode
2258          * then we need to reduce the maximum number of data path queues by 1.
2259          */
2260         if (internals->mode == BONDING_MODE_8023AD &&
2261                 internals->mode4.dedicated_queues.enabled == 1) {
2262                 dev_info->max_rx_queues--;
2263                 dev_info->max_tx_queues--;
2264         }
2265
2266         dev_info->min_rx_bufsize = 0;
2267
2268         dev_info->rx_offload_capa = internals->rx_offload_capa;
2269         dev_info->tx_offload_capa = internals->tx_offload_capa;
2270         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2271         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2272         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2273
2274         dev_info->reta_size = internals->reta_size;
2275 }
2276
2277 static int
2278 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2279 {
2280         int res;
2281         uint16_t i;
2282         struct bond_dev_private *internals = dev->data->dev_private;
2283
2284         /* don't do this while a slave is being added */
2285         rte_spinlock_lock(&internals->lock);
2286
2287         if (on)
2288                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2289         else
2290                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2291
2292         for (i = 0; i < internals->slave_count; i++) {
2293                 uint16_t port_id = internals->slaves[i].port_id;
2294
2295                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2296                 if (res == ENOTSUP)
2297                         RTE_BOND_LOG(WARNING,
2298                                      "Setting VLAN filter on slave port %u not supported.",
2299                                      port_id);
2300         }
2301
2302         rte_spinlock_unlock(&internals->lock);
2303         return 0;
2304 }
2305
2306 static int
2307 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2308                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2309                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2310 {
2311         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2312                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2313                                         0, dev->data->numa_node);
2314         if (bd_rx_q == NULL)
2315                 return -1;
2316
2317         bd_rx_q->queue_id = rx_queue_id;
2318         bd_rx_q->dev_private = dev->data->dev_private;
2319
2320         bd_rx_q->nb_rx_desc = nb_rx_desc;
2321
2322         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2323         bd_rx_q->mb_pool = mb_pool;
2324
2325         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2326
2327         return 0;
2328 }
2329
2330 static int
2331 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2332                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2333                 const struct rte_eth_txconf *tx_conf)
2334 {
2335         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2336                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2337                                         0, dev->data->numa_node);
2338
2339         if (bd_tx_q == NULL)
2340                 return -1;
2341
2342         bd_tx_q->queue_id = tx_queue_id;
2343         bd_tx_q->dev_private = dev->data->dev_private;
2344
2345         bd_tx_q->nb_tx_desc = nb_tx_desc;
2346         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2347
2348         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2349
2350         return 0;
2351 }
2352
2353 static void
2354 bond_ethdev_rx_queue_release(void *queue)
2355 {
2356         if (queue == NULL)
2357                 return;
2358
2359         rte_free(queue);
2360 }
2361
2362 static void
2363 bond_ethdev_tx_queue_release(void *queue)
2364 {
2365         if (queue == NULL)
2366                 return;
2367
2368         rte_free(queue);
2369 }
2370
2371 static void
2372 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2373 {
2374         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2375         struct bond_dev_private *internals;
2376
2377         /* Default value for polling slave found is true as we don't want to
2378          * disable the polling thread if we cannot get the lock */
2379         int i, polling_slave_found = 1;
2380
2381         if (cb_arg == NULL)
2382                 return;
2383
2384         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2385         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2386
2387         if (!bonded_ethdev->data->dev_started ||
2388                 !internals->link_status_polling_enabled)
2389                 return;
2390
2391         /* If device is currently being configured then don't check slaves link
2392          * status, wait until next period */
2393         if (rte_spinlock_trylock(&internals->lock)) {
2394                 if (internals->slave_count > 0)
2395                         polling_slave_found = 0;
2396
2397                 for (i = 0; i < internals->slave_count; i++) {
2398                         if (!internals->slaves[i].link_status_poll_enabled)
2399                                 continue;
2400
2401                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2402                         polling_slave_found = 1;
2403
2404                         /* Update slave link status */
2405                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2406                                         internals->slaves[i].link_status_wait_to_complete);
2407
2408                         /* if link status has changed since last checked then call lsc
2409                          * event callback */
2410                         if (slave_ethdev->data->dev_link.link_status !=
2411                                         internals->slaves[i].last_link_status) {
2412                                 internals->slaves[i].last_link_status =
2413                                                 slave_ethdev->data->dev_link.link_status;
2414
2415                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2416                                                 RTE_ETH_EVENT_INTR_LSC,
2417                                                 &bonded_ethdev->data->port_id,
2418                                                 NULL);
2419                         }
2420                 }
2421                 rte_spinlock_unlock(&internals->lock);
2422         }
2423
2424         if (polling_slave_found)
2425                 /* Set alarm to continue monitoring link status of slave ethdev's */
2426                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2427                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2428 }
2429
2430 static int
2431 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2432 {
2433         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2434
2435         struct bond_dev_private *bond_ctx;
2436         struct rte_eth_link slave_link;
2437
2438         uint32_t idx;
2439
2440         bond_ctx = ethdev->data->dev_private;
2441
2442         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2443
2444         if (ethdev->data->dev_started == 0 ||
2445                         bond_ctx->active_slave_count == 0) {
2446                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2447                 return 0;
2448         }
2449
2450         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2451
2452         if (wait_to_complete)
2453                 link_update = rte_eth_link_get;
2454         else
2455                 link_update = rte_eth_link_get_nowait;
2456
2457         switch (bond_ctx->mode) {
2458         case BONDING_MODE_BROADCAST:
2459                 /**
2460                  * Setting link speed to UINT32_MAX to ensure we pick up the
2461                  * value of the first active slave
2462                  */
2463                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2464
2465                 /**
2466                  * link speed is minimum value of all the slaves link speed as
2467                  * packet loss will occur on this slave if transmission at rates
2468                  * greater than this are attempted
2469                  */
2470                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2471                         link_update(bond_ctx->active_slaves[0], &slave_link);
2472
2473                         if (slave_link.link_speed <
2474                                         ethdev->data->dev_link.link_speed)
2475                                 ethdev->data->dev_link.link_speed =
2476                                                 slave_link.link_speed;
2477                 }
2478                 break;
2479         case BONDING_MODE_ACTIVE_BACKUP:
2480                 /* Current primary slave */
2481                 link_update(bond_ctx->current_primary_port, &slave_link);
2482
2483                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2484                 break;
2485         case BONDING_MODE_8023AD:
2486                 ethdev->data->dev_link.link_autoneg =
2487                                 bond_ctx->mode4.slave_link.link_autoneg;
2488                 ethdev->data->dev_link.link_duplex =
2489                                 bond_ctx->mode4.slave_link.link_duplex;
2490                 /* fall through to update link speed */
2491         case BONDING_MODE_ROUND_ROBIN:
2492         case BONDING_MODE_BALANCE:
2493         case BONDING_MODE_TLB:
2494         case BONDING_MODE_ALB:
2495         default:
2496                 /**
2497                  * In theses mode the maximum theoretical link speed is the sum
2498                  * of all the slaves
2499                  */
2500                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2501
2502                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2503                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2504
2505                         ethdev->data->dev_link.link_speed +=
2506                                         slave_link.link_speed;
2507                 }
2508         }
2509
2510
2511         return 0;
2512 }
2513
2514
2515 static int
2516 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2517 {
2518         struct bond_dev_private *internals = dev->data->dev_private;
2519         struct rte_eth_stats slave_stats;
2520         int i, j;
2521
2522         for (i = 0; i < internals->slave_count; i++) {
2523                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2524
2525                 stats->ipackets += slave_stats.ipackets;
2526                 stats->opackets += slave_stats.opackets;
2527                 stats->ibytes += slave_stats.ibytes;
2528                 stats->obytes += slave_stats.obytes;
2529                 stats->imissed += slave_stats.imissed;
2530                 stats->ierrors += slave_stats.ierrors;
2531                 stats->oerrors += slave_stats.oerrors;
2532                 stats->rx_nombuf += slave_stats.rx_nombuf;
2533
2534                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2535                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2536                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2537                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2538                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2539                         stats->q_errors[j] += slave_stats.q_errors[j];
2540                 }
2541
2542         }
2543
2544         return 0;
2545 }
2546
2547 static void
2548 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2549 {
2550         struct bond_dev_private *internals = dev->data->dev_private;
2551         int i;
2552
2553         for (i = 0; i < internals->slave_count; i++)
2554                 rte_eth_stats_reset(internals->slaves[i].port_id);
2555 }
2556
2557 static void
2558 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2559 {
2560         struct bond_dev_private *internals = eth_dev->data->dev_private;
2561         int i;
2562
2563         internals->promiscuous_en = 1;
2564
2565         switch (internals->mode) {
2566         /* Promiscuous mode is propagated to all slaves */
2567         case BONDING_MODE_ROUND_ROBIN:
2568         case BONDING_MODE_BALANCE:
2569         case BONDING_MODE_BROADCAST:
2570                 for (i = 0; i < internals->slave_count; i++)
2571                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2572                 break;
2573         /* In mode4 promiscus mode is managed when slave is added/removed */
2574         case BONDING_MODE_8023AD:
2575                 break;
2576         /* Promiscuous mode is propagated only to primary slave */
2577         case BONDING_MODE_ACTIVE_BACKUP:
2578         case BONDING_MODE_TLB:
2579         case BONDING_MODE_ALB:
2580         default:
2581                 rte_eth_promiscuous_enable(internals->current_primary_port);
2582         }
2583 }
2584
2585 static void
2586 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2587 {
2588         struct bond_dev_private *internals = dev->data->dev_private;
2589         int i;
2590
2591         internals->promiscuous_en = 0;
2592
2593         switch (internals->mode) {
2594         /* Promiscuous mode is propagated to all slaves */
2595         case BONDING_MODE_ROUND_ROBIN:
2596         case BONDING_MODE_BALANCE:
2597         case BONDING_MODE_BROADCAST:
2598                 for (i = 0; i < internals->slave_count; i++)
2599                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2600                 break;
2601         /* In mode4 promiscus mode is set managed when slave is added/removed */
2602         case BONDING_MODE_8023AD:
2603                 break;
2604         /* Promiscuous mode is propagated only to primary slave */
2605         case BONDING_MODE_ACTIVE_BACKUP:
2606         case BONDING_MODE_TLB:
2607         case BONDING_MODE_ALB:
2608         default:
2609                 rte_eth_promiscuous_disable(internals->current_primary_port);
2610         }
2611 }
2612
2613 static void
2614 bond_ethdev_delayed_lsc_propagation(void *arg)
2615 {
2616         if (arg == NULL)
2617                 return;
2618
2619         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2620                         RTE_ETH_EVENT_INTR_LSC, NULL);
2621 }
2622
2623 int
2624 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2625                 void *param, void *ret_param __rte_unused)
2626 {
2627         struct rte_eth_dev *bonded_eth_dev;
2628         struct bond_dev_private *internals;
2629         struct rte_eth_link link;
2630         int rc = -1;
2631
2632         int i, valid_slave = 0;
2633         uint8_t active_pos;
2634         uint8_t lsc_flag = 0;
2635
2636         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2637                 return rc;
2638
2639         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2640
2641         if (check_for_bonded_ethdev(bonded_eth_dev))
2642                 return rc;
2643
2644         internals = bonded_eth_dev->data->dev_private;
2645
2646         /* If the device isn't started don't handle interrupts */
2647         if (!bonded_eth_dev->data->dev_started)
2648                 return rc;
2649
2650         /* verify that port_id is a valid slave of bonded port */
2651         for (i = 0; i < internals->slave_count; i++) {
2652                 if (internals->slaves[i].port_id == port_id) {
2653                         valid_slave = 1;
2654                         break;
2655                 }
2656         }
2657
2658         if (!valid_slave)
2659                 return rc;
2660
2661         /* Synchronize lsc callback parallel calls either by real link event
2662          * from the slaves PMDs or by the bonding PMD itself.
2663          */
2664         rte_spinlock_lock(&internals->lsc_lock);
2665
2666         /* Search for port in active port list */
2667         active_pos = find_slave_by_id(internals->active_slaves,
2668                         internals->active_slave_count, port_id);
2669
2670         rte_eth_link_get_nowait(port_id, &link);
2671         if (link.link_status) {
2672                 if (active_pos < internals->active_slave_count)
2673                         goto link_update;
2674
2675                 /* if no active slave ports then set this port to be primary port */
2676                 if (internals->active_slave_count < 1) {
2677                         /* If first active slave, then change link status */
2678                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2679                         internals->current_primary_port = port_id;
2680                         lsc_flag = 1;
2681
2682                         mac_address_slaves_update(bonded_eth_dev);
2683                 }
2684
2685                 /* check link state properties if bonded link is up*/
2686                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2687                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2688                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2689                                              "for slave %d in bonding mode %d",
2690                                              port_id, internals->mode);
2691                 } else {
2692                         /* inherit slave link properties */
2693                         link_properties_set(bonded_eth_dev, &link);
2694                 }
2695
2696                 activate_slave(bonded_eth_dev, port_id);
2697
2698                 /* If user has defined the primary port then default to using it */
2699                 if (internals->user_defined_primary_port &&
2700                                 internals->primary_port == port_id)
2701                         bond_ethdev_primary_set(internals, port_id);
2702         } else {
2703                 if (active_pos == internals->active_slave_count)
2704                         goto link_update;
2705
2706                 /* Remove from active slave list */
2707                 deactivate_slave(bonded_eth_dev, port_id);
2708
2709                 if (internals->active_slave_count < 1)
2710                         lsc_flag = 1;
2711
2712                 /* Update primary id, take first active slave from list or if none
2713                  * available set to -1 */
2714                 if (port_id == internals->current_primary_port) {
2715                         if (internals->active_slave_count > 0)
2716                                 bond_ethdev_primary_set(internals,
2717                                                 internals->active_slaves[0]);
2718                         else
2719                                 internals->current_primary_port = internals->primary_port;
2720                 }
2721         }
2722
2723 link_update:
2724         /**
2725          * Update bonded device link properties after any change to active
2726          * slaves
2727          */
2728         bond_ethdev_link_update(bonded_eth_dev, 0);
2729
2730         if (lsc_flag) {
2731                 /* Cancel any possible outstanding interrupts if delays are enabled */
2732                 if (internals->link_up_delay_ms > 0 ||
2733                         internals->link_down_delay_ms > 0)
2734                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2735                                         bonded_eth_dev);
2736
2737                 if (bonded_eth_dev->data->dev_link.link_status) {
2738                         if (internals->link_up_delay_ms > 0)
2739                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2740                                                 bond_ethdev_delayed_lsc_propagation,
2741                                                 (void *)bonded_eth_dev);
2742                         else
2743                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2744                                                 RTE_ETH_EVENT_INTR_LSC,
2745                                                 NULL);
2746
2747                 } else {
2748                         if (internals->link_down_delay_ms > 0)
2749                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2750                                                 bond_ethdev_delayed_lsc_propagation,
2751                                                 (void *)bonded_eth_dev);
2752                         else
2753                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2754                                                 RTE_ETH_EVENT_INTR_LSC,
2755                                                 NULL);
2756                 }
2757         }
2758
2759         rte_spinlock_unlock(&internals->lsc_lock);
2760
2761         return rc;
2762 }
2763
2764 static int
2765 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2766                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2767 {
2768         unsigned i, j;
2769         int result = 0;
2770         int slave_reta_size;
2771         unsigned reta_count;
2772         struct bond_dev_private *internals = dev->data->dev_private;
2773
2774         if (reta_size != internals->reta_size)
2775                 return -EINVAL;
2776
2777          /* Copy RETA table */
2778         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2779
2780         for (i = 0; i < reta_count; i++) {
2781                 internals->reta_conf[i].mask = reta_conf[i].mask;
2782                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2783                         if ((reta_conf[i].mask >> j) & 0x01)
2784                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2785         }
2786
2787         /* Fill rest of array */
2788         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2789                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2790                                 sizeof(internals->reta_conf[0]) * reta_count);
2791
2792         /* Propagate RETA over slaves */
2793         for (i = 0; i < internals->slave_count; i++) {
2794                 slave_reta_size = internals->slaves[i].reta_size;
2795                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2796                                 &internals->reta_conf[0], slave_reta_size);
2797                 if (result < 0)
2798                         return result;
2799         }
2800
2801         return 0;
2802 }
2803
2804 static int
2805 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2806                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2807 {
2808         int i, j;
2809         struct bond_dev_private *internals = dev->data->dev_private;
2810
2811         if (reta_size != internals->reta_size)
2812                 return -EINVAL;
2813
2814          /* Copy RETA table */
2815         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2816                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2817                         if ((reta_conf[i].mask >> j) & 0x01)
2818                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2819
2820         return 0;
2821 }
2822
2823 static int
2824 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2825                 struct rte_eth_rss_conf *rss_conf)
2826 {
2827         int i, result = 0;
2828         struct bond_dev_private *internals = dev->data->dev_private;
2829         struct rte_eth_rss_conf bond_rss_conf;
2830
2831         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2832
2833         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2834
2835         if (bond_rss_conf.rss_hf != 0)
2836                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2837
2838         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2839                         sizeof(internals->rss_key)) {
2840                 if (bond_rss_conf.rss_key_len == 0)
2841                         bond_rss_conf.rss_key_len = 40;
2842                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2843                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2844                                 internals->rss_key_len);
2845         }
2846
2847         for (i = 0; i < internals->slave_count; i++) {
2848                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2849                                 &bond_rss_conf);
2850                 if (result < 0)
2851                         return result;
2852         }
2853
2854         return 0;
2855 }
2856
2857 static int
2858 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2859                 struct rte_eth_rss_conf *rss_conf)
2860 {
2861         struct bond_dev_private *internals = dev->data->dev_private;
2862
2863         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2864         rss_conf->rss_key_len = internals->rss_key_len;
2865         if (rss_conf->rss_key)
2866                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2867
2868         return 0;
2869 }
2870
2871 static int
2872 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2873 {
2874         struct rte_eth_dev *slave_eth_dev;
2875         struct bond_dev_private *internals = dev->data->dev_private;
2876         int ret, i;
2877
2878         rte_spinlock_lock(&internals->lock);
2879
2880         for (i = 0; i < internals->slave_count; i++) {
2881                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2882                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2883                         rte_spinlock_unlock(&internals->lock);
2884                         return -ENOTSUP;
2885                 }
2886         }
2887         for (i = 0; i < internals->slave_count; i++) {
2888                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2889                 if (ret < 0) {
2890                         rte_spinlock_unlock(&internals->lock);
2891                         return ret;
2892                 }
2893         }
2894
2895         rte_spinlock_unlock(&internals->lock);
2896         return 0;
2897 }
2898
2899 static int
2900 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2901 {
2902         if (mac_address_set(dev, addr)) {
2903                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2904                 return -EINVAL;
2905         }
2906
2907         return 0;
2908 }
2909
2910 static int
2911 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2912                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2913 {
2914         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2915                 *(const void **)arg = &bond_flow_ops;
2916                 return 0;
2917         }
2918         return -ENOTSUP;
2919 }
2920
2921 static int
2922 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2923                                 __rte_unused uint32_t index, uint32_t vmdq)
2924 {
2925         struct rte_eth_dev *slave_eth_dev;
2926         struct bond_dev_private *internals = dev->data->dev_private;
2927         int ret, i;
2928
2929         rte_spinlock_lock(&internals->lock);
2930
2931         for (i = 0; i < internals->slave_count; i++) {
2932                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2933                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2934                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2935                         ret = -ENOTSUP;
2936                         goto end;
2937                 }
2938         }
2939
2940         for (i = 0; i < internals->slave_count; i++) {
2941                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2942                                 mac_addr, vmdq);
2943                 if (ret < 0) {
2944                         /* rollback */
2945                         for (i--; i >= 0; i--)
2946                                 rte_eth_dev_mac_addr_remove(
2947                                         internals->slaves[i].port_id, mac_addr);
2948                         goto end;
2949                 }
2950         }
2951
2952         ret = 0;
2953 end:
2954         rte_spinlock_unlock(&internals->lock);
2955         return ret;
2956 }
2957
2958 static void
2959 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2960 {
2961         struct rte_eth_dev *slave_eth_dev;
2962         struct bond_dev_private *internals = dev->data->dev_private;
2963         int i;
2964
2965         rte_spinlock_lock(&internals->lock);
2966
2967         for (i = 0; i < internals->slave_count; i++) {
2968                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2969                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2970                         goto end;
2971         }
2972
2973         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2974
2975         for (i = 0; i < internals->slave_count; i++)
2976                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2977                                 mac_addr);
2978
2979 end:
2980         rte_spinlock_unlock(&internals->lock);
2981 }
2982
2983 const struct eth_dev_ops default_dev_ops = {
2984         .dev_start            = bond_ethdev_start,
2985         .dev_stop             = bond_ethdev_stop,
2986         .dev_close            = bond_ethdev_close,
2987         .dev_configure        = bond_ethdev_configure,
2988         .dev_infos_get        = bond_ethdev_info,
2989         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2990         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2991         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2992         .rx_queue_release     = bond_ethdev_rx_queue_release,
2993         .tx_queue_release     = bond_ethdev_tx_queue_release,
2994         .link_update          = bond_ethdev_link_update,
2995         .stats_get            = bond_ethdev_stats_get,
2996         .stats_reset          = bond_ethdev_stats_reset,
2997         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2998         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2999         .reta_update          = bond_ethdev_rss_reta_update,
3000         .reta_query           = bond_ethdev_rss_reta_query,
3001         .rss_hash_update      = bond_ethdev_rss_hash_update,
3002         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3003         .mtu_set              = bond_ethdev_mtu_set,
3004         .mac_addr_set         = bond_ethdev_mac_address_set,
3005         .mac_addr_add         = bond_ethdev_mac_addr_add,
3006         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3007         .filter_ctrl          = bond_filter_ctrl
3008 };
3009
3010 static int
3011 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3012 {
3013         const char *name = rte_vdev_device_name(dev);
3014         uint8_t socket_id = dev->device.numa_node;
3015         struct bond_dev_private *internals = NULL;
3016         struct rte_eth_dev *eth_dev = NULL;
3017         uint32_t vlan_filter_bmp_size;
3018
3019         /* now do all data allocation - for eth_dev structure, dummy pci driver
3020          * and internal (private) data
3021          */
3022
3023         /* reserve an ethdev entry */
3024         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3025         if (eth_dev == NULL) {
3026                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3027                 goto err;
3028         }
3029
3030         internals = eth_dev->data->dev_private;
3031         eth_dev->data->nb_rx_queues = (uint16_t)1;
3032         eth_dev->data->nb_tx_queues = (uint16_t)1;
3033
3034         /* Allocate memory for storing MAC addresses */
3035         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3036                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3037         if (eth_dev->data->mac_addrs == NULL) {
3038                 RTE_BOND_LOG(ERR,
3039                              "Failed to allocate %u bytes needed to store MAC addresses",
3040                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3041                 goto err;
3042         }
3043
3044         eth_dev->dev_ops = &default_dev_ops;
3045         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3046
3047         rte_spinlock_init(&internals->lock);
3048         rte_spinlock_init(&internals->lsc_lock);
3049
3050         internals->port_id = eth_dev->data->port_id;
3051         internals->mode = BONDING_MODE_INVALID;
3052         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3053         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3054         internals->burst_xmit_hash = burst_xmit_l2_hash;
3055         internals->user_defined_mac = 0;
3056
3057         internals->link_status_polling_enabled = 0;
3058
3059         internals->link_status_polling_interval_ms =
3060                 DEFAULT_POLLING_INTERVAL_10_MS;
3061         internals->link_down_delay_ms = 0;
3062         internals->link_up_delay_ms = 0;
3063
3064         internals->slave_count = 0;
3065         internals->active_slave_count = 0;
3066         internals->rx_offload_capa = 0;
3067         internals->tx_offload_capa = 0;
3068         internals->rx_queue_offload_capa = 0;
3069         internals->tx_queue_offload_capa = 0;
3070         internals->candidate_max_rx_pktlen = 0;
3071         internals->max_rx_pktlen = 0;
3072
3073         /* Initially allow to choose any offload type */
3074         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3075
3076         memset(&internals->default_rxconf, 0,
3077                sizeof(internals->default_rxconf));
3078         memset(&internals->default_txconf, 0,
3079                sizeof(internals->default_txconf));
3080
3081         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3082         memset(internals->slaves, 0, sizeof(internals->slaves));
3083
3084         TAILQ_INIT(&internals->flow_list);
3085         internals->flow_isolated_valid = 0;
3086
3087         /* Set mode 4 default configuration */
3088         bond_mode_8023ad_setup(eth_dev, NULL);
3089         if (bond_ethdev_mode_set(eth_dev, mode)) {
3090                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3091                                  eth_dev->data->port_id, mode);
3092                 goto err;
3093         }
3094
3095         vlan_filter_bmp_size =
3096                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3097         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3098                                                    RTE_CACHE_LINE_SIZE);
3099         if (internals->vlan_filter_bmpmem == NULL) {
3100                 RTE_BOND_LOG(ERR,
3101                              "Failed to allocate vlan bitmap for bonded device %u",
3102                              eth_dev->data->port_id);
3103                 goto err;
3104         }
3105
3106         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3107                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3108         if (internals->vlan_filter_bmp == NULL) {
3109                 RTE_BOND_LOG(ERR,
3110                              "Failed to init vlan bitmap for bonded device %u",
3111                              eth_dev->data->port_id);
3112                 rte_free(internals->vlan_filter_bmpmem);
3113                 goto err;
3114         }
3115
3116         return eth_dev->data->port_id;
3117
3118 err:
3119         rte_free(internals);
3120         if (eth_dev != NULL) {
3121                 rte_free(eth_dev->data->mac_addrs);
3122                 rte_eth_dev_release_port(eth_dev);
3123         }
3124         return -1;
3125 }
3126
3127 static int
3128 bond_probe(struct rte_vdev_device *dev)
3129 {
3130         const char *name;
3131         struct bond_dev_private *internals;
3132         struct rte_kvargs *kvlist;
3133         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3134         int  arg_count, port_id;
3135         uint8_t agg_mode;
3136         struct rte_eth_dev *eth_dev;
3137
3138         if (!dev)
3139                 return -EINVAL;
3140
3141         name = rte_vdev_device_name(dev);
3142         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3143
3144         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3145             strlen(rte_vdev_device_args(dev)) == 0) {
3146                 eth_dev = rte_eth_dev_attach_secondary(name);
3147                 if (!eth_dev) {
3148                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3149                         return -1;
3150                 }
3151                 /* TODO: request info from primary to set up Rx and Tx */
3152                 eth_dev->dev_ops = &default_dev_ops;
3153                 eth_dev->device = &dev->device;
3154                 rte_eth_dev_probing_finish(eth_dev);
3155                 return 0;
3156         }
3157
3158         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3159                 pmd_bond_init_valid_arguments);
3160         if (kvlist == NULL)
3161                 return -1;
3162
3163         /* Parse link bonding mode */
3164         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3165                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3166                                 &bond_ethdev_parse_slave_mode_kvarg,
3167                                 &bonding_mode) != 0) {
3168                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3169                                         name);
3170                         goto parse_error;
3171                 }
3172         } else {
3173                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3174                                 "device %s", name);
3175                 goto parse_error;
3176         }
3177
3178         /* Parse socket id to create bonding device on */
3179         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3180         if (arg_count == 1) {
3181                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3182                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3183                                 != 0) {
3184                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3185                                         "bonded device %s", name);
3186                         goto parse_error;
3187                 }
3188         } else if (arg_count > 1) {
3189                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3190                                 "bonded device %s", name);
3191                 goto parse_error;
3192         } else {
3193                 socket_id = rte_socket_id();
3194         }
3195
3196         dev->device.numa_node = socket_id;
3197
3198         /* Create link bonding eth device */
3199         port_id = bond_alloc(dev, bonding_mode);
3200         if (port_id < 0) {
3201                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3202                                 "socket %u.",   name, bonding_mode, socket_id);
3203                 goto parse_error;
3204         }
3205         internals = rte_eth_devices[port_id].data->dev_private;
3206         internals->kvlist = kvlist;
3207
3208         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3209
3210         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3211                 if (rte_kvargs_process(kvlist,
3212                                 PMD_BOND_AGG_MODE_KVARG,
3213                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3214                                 &agg_mode) != 0) {
3215                         RTE_BOND_LOG(ERR,
3216                                         "Failed to parse agg selection mode for bonded device %s",
3217                                         name);
3218                         goto parse_error;
3219                 }
3220
3221                 if (internals->mode == BONDING_MODE_8023AD)
3222                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3223                                         agg_mode);
3224         } else {
3225                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3226         }
3227
3228         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3229                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3230         return 0;
3231
3232 parse_error:
3233         rte_kvargs_free(kvlist);
3234
3235         return -1;
3236 }
3237
3238 static int
3239 bond_remove(struct rte_vdev_device *dev)
3240 {
3241         struct rte_eth_dev *eth_dev;
3242         struct bond_dev_private *internals;
3243         const char *name;
3244
3245         if (!dev)
3246                 return -EINVAL;
3247
3248         name = rte_vdev_device_name(dev);
3249         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3250
3251         /* now free all data allocation - for eth_dev structure,
3252          * dummy pci driver and internal (private) data
3253          */
3254
3255         /* find an ethdev entry */
3256         eth_dev = rte_eth_dev_allocated(name);
3257         if (eth_dev == NULL)
3258                 return -ENODEV;
3259
3260         RTE_ASSERT(eth_dev->device == &dev->device);
3261
3262         internals = eth_dev->data->dev_private;
3263         if (internals->slave_count != 0)
3264                 return -EBUSY;
3265
3266         if (eth_dev->data->dev_started == 1) {
3267                 bond_ethdev_stop(eth_dev);
3268                 bond_ethdev_close(eth_dev);
3269         }
3270
3271         eth_dev->dev_ops = NULL;
3272         eth_dev->rx_pkt_burst = NULL;
3273         eth_dev->tx_pkt_burst = NULL;
3274
3275         internals = eth_dev->data->dev_private;
3276         /* Try to release mempool used in mode6. If the bond
3277          * device is not mode6, free the NULL is not problem.
3278          */
3279         rte_mempool_free(internals->mode6.mempool);
3280         rte_bitmap_free(internals->vlan_filter_bmp);
3281         rte_free(internals->vlan_filter_bmpmem);
3282         rte_free(eth_dev->data->dev_private);
3283         rte_free(eth_dev->data->mac_addrs);
3284
3285         rte_eth_dev_release_port(eth_dev);
3286
3287         return 0;
3288 }
3289
3290 /* this part will resolve the slave portids after all the other pdev and vdev
3291  * have been allocated */
3292 static int
3293 bond_ethdev_configure(struct rte_eth_dev *dev)
3294 {
3295         const char *name = dev->device->name;
3296         struct bond_dev_private *internals = dev->data->dev_private;
3297         struct rte_kvargs *kvlist = internals->kvlist;
3298         int arg_count;
3299         uint16_t port_id = dev - rte_eth_devices;
3300         uint8_t agg_mode;
3301
3302         static const uint8_t default_rss_key[40] = {
3303                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3304                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3305                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3306                 0xBE, 0xAC, 0x01, 0xFA
3307         };
3308
3309         unsigned i, j;
3310
3311         /*
3312          * If RSS is enabled, fill table with default values and
3313          * set key to the the value specified in port RSS configuration.
3314          * Fall back to default RSS key if the key is not specified
3315          */
3316         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3317                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3318                         internals->rss_key_len =
3319                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3320                         memcpy(internals->rss_key,
3321                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3322                                internals->rss_key_len);
3323                 } else {
3324                         internals->rss_key_len = sizeof(default_rss_key);
3325                         memcpy(internals->rss_key, default_rss_key,
3326                                internals->rss_key_len);
3327                 }
3328
3329                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3330                         internals->reta_conf[i].mask = ~0LL;
3331                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3332                                 internals->reta_conf[i].reta[j] =
3333                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3334                                                 dev->data->nb_rx_queues;
3335                 }
3336         }
3337
3338         /* set the max_rx_pktlen */
3339         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3340
3341         /*
3342          * if no kvlist, it means that this bonded device has been created
3343          * through the bonding api.
3344          */
3345         if (!kvlist)
3346                 return 0;
3347
3348         /* Parse MAC address for bonded device */
3349         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3350         if (arg_count == 1) {
3351                 struct ether_addr bond_mac;
3352
3353                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3354                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3355                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3356                                      name);
3357                         return -1;
3358                 }
3359
3360                 /* Set MAC address */
3361                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3362                         RTE_BOND_LOG(ERR,
3363                                      "Failed to set mac address on bonded device %s",
3364                                      name);
3365                         return -1;
3366                 }
3367         } else if (arg_count > 1) {
3368                 RTE_BOND_LOG(ERR,
3369                              "MAC address can be specified only once for bonded device %s",
3370                              name);
3371                 return -1;
3372         }
3373
3374         /* Parse/set balance mode transmit policy */
3375         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3376         if (arg_count == 1) {
3377                 uint8_t xmit_policy;
3378
3379                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3380                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3381                     0) {
3382                         RTE_BOND_LOG(INFO,
3383                                      "Invalid xmit policy specified for bonded device %s",
3384                                      name);
3385                         return -1;
3386                 }
3387
3388                 /* Set balance mode transmit policy*/
3389                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3390                         RTE_BOND_LOG(ERR,
3391                                      "Failed to set balance xmit policy on bonded device %s",
3392                                      name);
3393                         return -1;
3394                 }
3395         } else if (arg_count > 1) {
3396                 RTE_BOND_LOG(ERR,
3397                              "Transmit policy can be specified only once for bonded device %s",
3398                              name);
3399                 return -1;
3400         }
3401
3402         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3403                 if (rte_kvargs_process(kvlist,
3404                                        PMD_BOND_AGG_MODE_KVARG,
3405                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3406                                        &agg_mode) != 0) {
3407                         RTE_BOND_LOG(ERR,
3408                                      "Failed to parse agg selection mode for bonded device %s",
3409                                      name);
3410                 }
3411                 if (internals->mode == BONDING_MODE_8023AD)
3412                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3413                                                               agg_mode);
3414         }
3415
3416         /* Parse/add slave ports to bonded device */
3417         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3418                 struct bond_ethdev_slave_ports slave_ports;
3419                 unsigned i;
3420
3421                 memset(&slave_ports, 0, sizeof(slave_ports));
3422
3423                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3424                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3425                         RTE_BOND_LOG(ERR,
3426                                      "Failed to parse slave ports for bonded device %s",
3427                                      name);
3428                         return -1;
3429                 }
3430
3431                 for (i = 0; i < slave_ports.slave_count; i++) {
3432                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3433                                 RTE_BOND_LOG(ERR,
3434                                              "Failed to add port %d as slave to bonded device %s",
3435                                              slave_ports.slaves[i], name);
3436                         }
3437                 }
3438
3439         } else {
3440                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3441                 return -1;
3442         }
3443
3444         /* Parse/set primary slave port id*/
3445         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3446         if (arg_count == 1) {
3447                 uint16_t primary_slave_port_id;
3448
3449                 if (rte_kvargs_process(kvlist,
3450                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3451                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3452                                        &primary_slave_port_id) < 0) {
3453                         RTE_BOND_LOG(INFO,
3454                                      "Invalid primary slave port id specified for bonded device %s",
3455                                      name);
3456                         return -1;
3457                 }
3458
3459                 /* Set balance mode transmit policy*/
3460                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3461                     != 0) {
3462                         RTE_BOND_LOG(ERR,
3463                                      "Failed to set primary slave port %d on bonded device %s",
3464                                      primary_slave_port_id, name);
3465                         return -1;
3466                 }
3467         } else if (arg_count > 1) {
3468                 RTE_BOND_LOG(INFO,
3469                              "Primary slave can be specified only once for bonded device %s",
3470                              name);
3471                 return -1;
3472         }
3473
3474         /* Parse link status monitor polling interval */
3475         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3476         if (arg_count == 1) {
3477                 uint32_t lsc_poll_interval_ms;
3478
3479                 if (rte_kvargs_process(kvlist,
3480                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3481                                        &bond_ethdev_parse_time_ms_kvarg,
3482                                        &lsc_poll_interval_ms) < 0) {
3483                         RTE_BOND_LOG(INFO,
3484                                      "Invalid lsc polling interval value specified for bonded"
3485                                      " device %s", name);
3486                         return -1;
3487                 }
3488
3489                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3490                     != 0) {
3491                         RTE_BOND_LOG(ERR,
3492                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3493                                      lsc_poll_interval_ms, name);
3494                         return -1;
3495                 }
3496         } else if (arg_count > 1) {
3497                 RTE_BOND_LOG(INFO,
3498                              "LSC polling interval can be specified only once for bonded"
3499                              " device %s", name);
3500                 return -1;
3501         }
3502
3503         /* Parse link up interrupt propagation delay */
3504         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3505         if (arg_count == 1) {
3506                 uint32_t link_up_delay_ms;
3507
3508                 if (rte_kvargs_process(kvlist,
3509                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3510                                        &bond_ethdev_parse_time_ms_kvarg,
3511                                        &link_up_delay_ms) < 0) {
3512                         RTE_BOND_LOG(INFO,
3513                                      "Invalid link up propagation delay value specified for"
3514                                      " bonded device %s", name);
3515                         return -1;
3516                 }
3517
3518                 /* Set balance mode transmit policy*/
3519                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3520                     != 0) {
3521                         RTE_BOND_LOG(ERR,
3522                                      "Failed to set link up propagation delay (%u ms) on bonded"
3523                                      " device %s", link_up_delay_ms, name);
3524                         return -1;
3525                 }
3526         } else if (arg_count > 1) {
3527                 RTE_BOND_LOG(INFO,
3528                              "Link up propagation delay can be specified only once for"
3529                              " bonded device %s", name);
3530                 return -1;
3531         }
3532
3533         /* Parse link down interrupt propagation delay */
3534         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3535         if (arg_count == 1) {
3536                 uint32_t link_down_delay_ms;
3537
3538                 if (rte_kvargs_process(kvlist,
3539                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3540                                        &bond_ethdev_parse_time_ms_kvarg,
3541                                        &link_down_delay_ms) < 0) {
3542                         RTE_BOND_LOG(INFO,
3543                                      "Invalid link down propagation delay value specified for"
3544                                      " bonded device %s", name);
3545                         return -1;
3546                 }
3547
3548                 /* Set balance mode transmit policy*/
3549                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3550                     != 0) {
3551                         RTE_BOND_LOG(ERR,
3552                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3553                                      link_down_delay_ms, name);
3554                         return -1;
3555                 }
3556         } else if (arg_count > 1) {
3557                 RTE_BOND_LOG(INFO,
3558                              "Link down propagation delay can be specified only once for  bonded device %s",
3559                              name);
3560                 return -1;
3561         }
3562
3563         return 0;
3564 }
3565
3566 struct rte_vdev_driver pmd_bond_drv = {
3567         .probe = bond_probe,
3568         .remove = bond_remove,
3569 };
3570
3571 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3572 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3573
3574 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3575         "slave=<ifc> "
3576         "primary=<ifc> "
3577         "mode=[0-6] "
3578         "xmit_policy=[l2 | l23 | l34] "
3579         "agg_mode=[count | stable | bandwidth] "
3580         "socket_id=<int> "
3581         "mac=<mac addr> "
3582         "lsc_poll_period_ms=<int> "
3583         "up_delay=<int> "
3584         "down_delay=<int>");
3585
3586 int bond_logtype;
3587
3588 RTE_INIT(bond_init_log)
3589 {
3590         bond_logtype = rte_log_register("pmd.net.bon");
3591         if (bond_logtype >= 0)
3592                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3593 }