e32fa39761d1e37bf42525283154095f956a3542
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
41                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
42
43                 vlan_offset = sizeof(struct vlan_hdr);
44                 *proto = vlan_hdr->eth_proto;
45
46                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
47                         vlan_hdr = vlan_hdr + 1;
48                         *proto = vlan_hdr->eth_proto;
49                         vlan_offset += sizeof(struct vlan_hdr);
50                 }
51         }
52         return vlan_offset;
53 }
54
55 static uint16_t
56 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
57 {
58         struct bond_dev_private *internals;
59
60         uint16_t num_rx_slave = 0;
61         uint16_t num_rx_total = 0;
62
63         int i;
64
65         /* Cast to structure, containing bonded device's port id and queue id */
66         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
67
68         internals = bd_rx_q->dev_private;
69
70
71         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
72                 /* Offset of pointer to *bufs increases as packets are received
73                  * from other slaves */
74                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
75                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
76                 if (num_rx_slave) {
77                         num_rx_total += num_rx_slave;
78                         nb_pkts -= num_rx_slave;
79                 }
80         }
81
82         return num_rx_total;
83 }
84
85 static uint16_t
86 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87                 uint16_t nb_pkts)
88 {
89         struct bond_dev_private *internals;
90
91         /* Cast to structure, containing bonded device's port id and queue id */
92         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
93
94         internals = bd_rx_q->dev_private;
95
96         return rte_eth_rx_burst(internals->current_primary_port,
97                         bd_rx_q->queue_id, bufs, nb_pkts);
98 }
99
100 static inline uint8_t
101 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
102 {
103         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
104
105         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
106                 (ethertype == ether_type_slow_be &&
107                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 }
109
110 /*****************************************************************************
111  * Flow director's setup for mode 4 optimization
112  */
113
114 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
115         .dst.addr_bytes = { 0 },
116         .src.addr_bytes = { 0 },
117         .type = RTE_BE16(ETHER_TYPE_SLOW),
118 };
119
120 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
121         .dst.addr_bytes = { 0 },
122         .src.addr_bytes = { 0 },
123         .type = 0xFFFF,
124 };
125
126 static struct rte_flow_item flow_item_8023ad[] = {
127         {
128                 .type = RTE_FLOW_ITEM_TYPE_ETH,
129                 .spec = &flow_item_eth_type_8023ad,
130                 .last = NULL,
131                 .mask = &flow_item_eth_mask_type_8023ad,
132         },
133         {
134                 .type = RTE_FLOW_ITEM_TYPE_END,
135                 .spec = NULL,
136                 .last = NULL,
137                 .mask = NULL,
138         }
139 };
140
141 const struct rte_flow_attr flow_attr_8023ad = {
142         .group = 0,
143         .priority = 0,
144         .ingress = 1,
145         .egress = 0,
146         .reserved = 0,
147 };
148
149 int
150 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
151                 uint16_t slave_port) {
152         struct rte_eth_dev_info slave_info;
153         struct rte_flow_error error;
154         struct bond_dev_private *internals = (struct bond_dev_private *)
155                         (bond_dev->data->dev_private);
156
157         const struct rte_flow_action_queue lacp_queue_conf = {
158                 .index = 0,
159         };
160
161         const struct rte_flow_action actions[] = {
162                 {
163                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
164                         .conf = &lacp_queue_conf
165                 },
166                 {
167                         .type = RTE_FLOW_ACTION_TYPE_END,
168                 }
169         };
170
171         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
172                         flow_item_8023ad, actions, &error);
173         if (ret < 0) {
174                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
175                                 __func__, error.message, slave_port,
176                                 internals->mode4.dedicated_queues.rx_qid);
177                 return -1;
178         }
179
180         rte_eth_dev_info_get(slave_port, &slave_info);
181         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
182                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
183                 RTE_BOND_LOG(ERR,
184                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
185                         __func__, slave_port);
186                 return -1;
187         }
188
189         return 0;
190 }
191
192 int
193 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
194         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
195         struct bond_dev_private *internals = (struct bond_dev_private *)
196                         (bond_dev->data->dev_private);
197         struct rte_eth_dev_info bond_info;
198         uint16_t idx;
199
200         /* Verify if all slaves in bonding supports flow director and */
201         if (internals->slave_count > 0) {
202                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
203
204                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
205                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
206
207                 for (idx = 0; idx < internals->slave_count; idx++) {
208                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
209                                         internals->slaves[idx].port_id) != 0)
210                                 return -1;
211                 }
212         }
213
214         return 0;
215 }
216
217 int
218 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
219
220         struct rte_flow_error error;
221         struct bond_dev_private *internals = (struct bond_dev_private *)
222                         (bond_dev->data->dev_private);
223
224         struct rte_flow_action_queue lacp_queue_conf = {
225                 .index = internals->mode4.dedicated_queues.rx_qid,
226         };
227
228         const struct rte_flow_action actions[] = {
229                 {
230                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
231                         .conf = &lacp_queue_conf
232                 },
233                 {
234                         .type = RTE_FLOW_ACTION_TYPE_END,
235                 }
236         };
237
238         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
239                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
240         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
241                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
242                                 "(slave_port=%d queue_id=%d)",
243                                 error.message, slave_port,
244                                 internals->mode4.dedicated_queues.rx_qid);
245                 return -1;
246         }
247
248         return 0;
249 }
250
251 static uint16_t
252 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253                 uint16_t nb_pkts)
254 {
255         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
256         struct bond_dev_private *internals = bd_rx_q->dev_private;
257         uint16_t num_rx_total = 0;      /* Total number of received packets */
258         uint16_t slaves[RTE_MAX_ETHPORTS];
259         uint16_t slave_count;
260
261         uint16_t i, idx;
262
263         /* Copy slave list to protect against slave up/down changes during tx
264          * bursting */
265         slave_count = internals->active_slave_count;
266         memcpy(slaves, internals->active_slaves,
267                         sizeof(internals->active_slaves[0]) * slave_count);
268
269         for (i = 0, idx = internals->active_slave;
270                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
271                 idx = idx % slave_count;
272
273                 /* Read packets from this slave */
274                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
275                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
276         }
277
278         internals->active_slave = idx;
279
280         return num_rx_total;
281 }
282
283 static uint16_t
284 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285                 uint16_t nb_bufs)
286 {
287         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
288         struct bond_dev_private *internals = bd_tx_q->dev_private;
289
290         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
291         uint16_t slave_count;
292
293         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
294         uint16_t dist_slave_count;
295
296         /* 2-D array to sort mbufs for transmission on each slave into */
297         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
298         /* Number of mbufs for transmission on each slave */
299         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
300         /* Mapping array generated by hash function to map mbufs to slaves */
301         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
302
303         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
304         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
305
306         uint16_t i, j;
307
308         if (unlikely(nb_bufs == 0))
309                 return 0;
310
311         /* Copy slave list to protect against slave up/down changes during tx
312          * bursting */
313         slave_count = internals->active_slave_count;
314         if (unlikely(slave_count < 1))
315                 return 0;
316
317         memcpy(slave_port_ids, internals->active_slaves,
318                         sizeof(slave_port_ids[0]) * slave_count);
319
320
321         dist_slave_count = 0;
322         for (i = 0; i < slave_count; i++) {
323                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
324
325                 if (ACTOR_STATE(port, DISTRIBUTING))
326                         dist_slave_port_ids[dist_slave_count++] =
327                                         slave_port_ids[i];
328         }
329
330         if (unlikely(dist_slave_count < 1))
331                 return 0;
332
333         /*
334          * Populate slaves mbuf with the packets which are to be sent on it
335          * selecting output slave using hash based on xmit policy
336          */
337         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
338                         bufs_slave_port_idxs);
339
340         for (i = 0; i < nb_bufs; i++) {
341                 /* Populate slave mbuf arrays with mbufs for that slave. */
342                 uint8_t slave_idx = bufs_slave_port_idxs[i];
343
344                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
345         }
346
347
348         /* Send packet burst on each slave device */
349         for (i = 0; i < dist_slave_count; i++) {
350                 if (slave_nb_bufs[i] == 0)
351                         continue;
352
353                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
354                                 bd_tx_q->queue_id, slave_bufs[i],
355                                 slave_nb_bufs[i]);
356
357                 total_tx_count += slave_tx_count;
358
359                 /* If tx burst fails move packets to end of bufs */
360                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
361                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
362                                         slave_tx_count;
363                         total_tx_fail_count += slave_tx_fail_count[i];
364
365                         /*
366                          * Shift bufs to beginning of array to allow reordering
367                          * later
368                          */
369                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
370                                 slave_bufs[i][j] =
371                                         slave_bufs[i][(slave_tx_count - 1) + j];
372                         }
373                 }
374         }
375
376         /*
377          * If there are tx burst failures we move packets to end of bufs to
378          * preserve expected PMD behaviour of all failed transmitted being
379          * at the end of the input mbuf array
380          */
381         if (unlikely(total_tx_fail_count > 0)) {
382                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
383
384                 for (i = 0; i < slave_count; i++) {
385                         if (slave_tx_fail_count[i] > 0) {
386                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
387                                         bufs[bufs_idx++] = slave_bufs[i][j];
388                         }
389                 }
390         }
391
392         return total_tx_count;
393 }
394
395
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398                 uint16_t nb_pkts)
399 {
400         /* Cast to structure, containing bonded device's port id and queue id */
401         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402         struct bond_dev_private *internals = bd_rx_q->dev_private;
403         struct ether_addr bond_mac;
404
405         struct ether_hdr *hdr;
406
407         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408         uint16_t num_rx_total = 0;      /* Total number of received packets */
409         uint16_t slaves[RTE_MAX_ETHPORTS];
410         uint16_t slave_count, idx;
411
412         uint8_t collecting;  /* current slave collecting status */
413         const uint8_t promisc = internals->promiscuous_en;
414         uint8_t i, j, k;
415         uint8_t subtype;
416
417         rte_eth_macaddr_get(internals->port_id, &bond_mac);
418         /* Copy slave list to protect against slave up/down changes during tx
419          * bursting */
420         slave_count = internals->active_slave_count;
421         memcpy(slaves, internals->active_slaves,
422                         sizeof(internals->active_slaves[0]) * slave_count);
423
424         idx = internals->active_slave;
425         if (idx >= slave_count) {
426                 internals->active_slave = 0;
427                 idx = 0;
428         }
429         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430                 j = num_rx_total;
431                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432                                          COLLECTING);
433
434                 /* Read packets from this slave */
435                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
437
438                 for (k = j; k < 2 && k < num_rx_total; k++)
439                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440
441                 /* Handle slow protocol packets. */
442                 while (j < num_rx_total) {
443
444                         /* If packet is not pure L2 and is known, skip it */
445                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
446                                 j++;
447                                 continue;
448                         }
449
450                         if (j + 3 < num_rx_total)
451                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452
453                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455
456                         /* Remove packet from array if it is slow packet or slave is not
457                          * in collecting state or bonding interface is not in promiscuous
458                          * mode and packet address does not match. */
459                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460                                 !collecting || (!promisc &&
461                                         !is_multicast_ether_addr(&hdr->d_addr) &&
462                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463
464                                 if (hdr->ether_type == ether_type_slow_be) {
465                                         bond_mode_8023ad_handle_slow_pkt(
466                                             internals, slaves[idx], bufs[j]);
467                                 } else
468                                         rte_pktmbuf_free(bufs[j]);
469
470                                 /* Packet is managed by mode 4 or dropped, shift the array */
471                                 num_rx_total--;
472                                 if (j < num_rx_total) {
473                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
474                                                 (num_rx_total - j));
475                                 }
476                         } else
477                                 j++;
478                 }
479                 if (unlikely(++idx == slave_count))
480                         idx = 0;
481         }
482
483         internals->active_slave = idx;
484         return num_rx_total;
485 }
486
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492
493 static void
494 arp_op_name(uint16_t arp_op, char *buf)
495 {
496         switch (arp_op) {
497         case ARP_OP_REQUEST:
498                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499                 return;
500         case ARP_OP_REPLY:
501                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502                 return;
503         case ARP_OP_REVREQUEST:
504                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505                                 "Reverse ARP Request");
506                 return;
507         case ARP_OP_REVREPLY:
508                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509                                 "Reverse ARP Reply");
510                 return;
511         case ARP_OP_INVREQUEST:
512                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513                                 "Peer Identify Request");
514                 return;
515         case ARP_OP_INVREPLY:
516                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517                                 "Peer Identify Reply");
518                 return;
519         default:
520                 break;
521         }
522         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
523         return;
524 }
525 #endif
526 #define MaxIPv4String   16
527 static void
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
529 {
530         uint32_t ipv4_addr;
531
532         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
535                 ipv4_addr & 0xFF);
536 }
537
538 #define MAX_CLIENTS_NUMBER      128
539 uint8_t active_clients;
540 struct client_stats_t {
541         uint16_t port;
542         uint32_t ipv4_addr;
543         uint32_t ipv4_rx_packets;
544         uint32_t ipv4_tx_packets;
545 };
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547
548 static void
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
550 {
551         int i = 0;
552
553         for (; i < MAX_CLIENTS_NUMBER; i++)     {
554                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
555                         /* Just update RX packets number for this client */
556                         if (TXorRXindicator == &burstnumberRX)
557                                 client_stats[i].ipv4_rx_packets++;
558                         else
559                                 client_stats[i].ipv4_tx_packets++;
560                         return;
561                 }
562         }
563         /* We have a new client. Insert him to the table, and increment stats */
564         if (TXorRXindicator == &burstnumberRX)
565                 client_stats[active_clients].ipv4_rx_packets++;
566         else
567                 client_stats[active_clients].ipv4_tx_packets++;
568         client_stats[active_clients].ipv4_addr = addr;
569         client_stats[active_clients].port = port;
570         active_clients++;
571
572 }
573
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
576         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
577                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
578                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
579                 info,                                                   \
580                 port,                                                   \
581                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
582                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
583                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
584                 src_ip,                                                 \
585                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
586                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
587                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
588                 dst_ip,                                                 \
589                 arp_op, ++burstnumber)
590 #endif
591
592 static void
593 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
594                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
595 {
596         struct ipv4_hdr *ipv4_h;
597 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
598         struct arp_hdr *arp_h;
599         char dst_ip[16];
600         char ArpOp[24];
601         char buf[16];
602 #endif
603         char src_ip[16];
604
605         uint16_t ether_type = eth_h->ether_type;
606         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
607
608 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
609         strlcpy(buf, info, 16);
610 #endif
611
612         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
613                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
614                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
617                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
618 #endif
619                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
620         }
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
623                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
624                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
625                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
626                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
627                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
628         }
629 #endif
630 }
631 #endif
632
633 static uint16_t
634 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
635 {
636         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
637         struct bond_dev_private *internals = bd_tx_q->dev_private;
638         struct ether_hdr *eth_h;
639         uint16_t ether_type, offset;
640         uint16_t nb_recv_pkts;
641         int i;
642
643         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
644
645         for (i = 0; i < nb_recv_pkts; i++) {
646                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
647                 ether_type = eth_h->ether_type;
648                 offset = get_vlan_offset(eth_h, &ether_type);
649
650                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
653 #endif
654                         bond_mode_alb_arp_recv(eth_h, offset, internals);
655                 }
656 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
657                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
658                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
659 #endif
660         }
661
662         return nb_recv_pkts;
663 }
664
665 static uint16_t
666 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
667                 uint16_t nb_pkts)
668 {
669         struct bond_dev_private *internals;
670         struct bond_tx_queue *bd_tx_q;
671
672         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
673         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
674
675         uint16_t num_of_slaves;
676         uint16_t slaves[RTE_MAX_ETHPORTS];
677
678         uint16_t num_tx_total = 0, num_tx_slave;
679
680         static int slave_idx = 0;
681         int i, cslave_idx = 0, tx_fail_total = 0;
682
683         bd_tx_q = (struct bond_tx_queue *)queue;
684         internals = bd_tx_q->dev_private;
685
686         /* Copy slave list to protect against slave up/down changes during tx
687          * bursting */
688         num_of_slaves = internals->active_slave_count;
689         memcpy(slaves, internals->active_slaves,
690                         sizeof(internals->active_slaves[0]) * num_of_slaves);
691
692         if (num_of_slaves < 1)
693                 return num_tx_total;
694
695         /* Populate slaves mbuf with which packets are to be sent on it  */
696         for (i = 0; i < nb_pkts; i++) {
697                 cslave_idx = (slave_idx + i) % num_of_slaves;
698                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
699         }
700
701         /* increment current slave index so the next call to tx burst starts on the
702          * next slave */
703         slave_idx = ++cslave_idx;
704
705         /* Send packet burst on each slave device */
706         for (i = 0; i < num_of_slaves; i++) {
707                 if (slave_nb_pkts[i] > 0) {
708                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
709                                         slave_bufs[i], slave_nb_pkts[i]);
710
711                         /* if tx burst fails move packets to end of bufs */
712                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
713                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
714
715                                 tx_fail_total += tx_fail_slave;
716
717                                 memcpy(&bufs[nb_pkts - tx_fail_total],
718                                                 &slave_bufs[i][num_tx_slave],
719                                                 tx_fail_slave * sizeof(bufs[0]));
720                         }
721                         num_tx_total += num_tx_slave;
722                 }
723         }
724
725         return num_tx_total;
726 }
727
728 static uint16_t
729 bond_ethdev_tx_burst_active_backup(void *queue,
730                 struct rte_mbuf **bufs, uint16_t nb_pkts)
731 {
732         struct bond_dev_private *internals;
733         struct bond_tx_queue *bd_tx_q;
734
735         bd_tx_q = (struct bond_tx_queue *)queue;
736         internals = bd_tx_q->dev_private;
737
738         if (internals->active_slave_count < 1)
739                 return 0;
740
741         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
742                         bufs, nb_pkts);
743 }
744
745 static inline uint16_t
746 ether_hash(struct ether_hdr *eth_hdr)
747 {
748         unaligned_uint16_t *word_src_addr =
749                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
750         unaligned_uint16_t *word_dst_addr =
751                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
752
753         return (word_src_addr[0] ^ word_dst_addr[0]) ^
754                         (word_src_addr[1] ^ word_dst_addr[1]) ^
755                         (word_src_addr[2] ^ word_dst_addr[2]);
756 }
757
758 static inline uint32_t
759 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
760 {
761         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
762 }
763
764 static inline uint32_t
765 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
766 {
767         unaligned_uint32_t *word_src_addr =
768                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
769         unaligned_uint32_t *word_dst_addr =
770                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
771
772         return (word_src_addr[0] ^ word_dst_addr[0]) ^
773                         (word_src_addr[1] ^ word_dst_addr[1]) ^
774                         (word_src_addr[2] ^ word_dst_addr[2]) ^
775                         (word_src_addr[3] ^ word_dst_addr[3]);
776 }
777
778
779 void
780 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
781                 uint8_t slave_count, uint16_t *slaves)
782 {
783         struct ether_hdr *eth_hdr;
784         uint32_t hash;
785         int i;
786
787         for (i = 0; i < nb_pkts; i++) {
788                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
789
790                 hash = ether_hash(eth_hdr);
791
792                 slaves[i] = (hash ^= hash >> 8) % slave_count;
793         }
794 }
795
796 void
797 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
798                 uint8_t slave_count, uint16_t *slaves)
799 {
800         uint16_t i;
801         struct ether_hdr *eth_hdr;
802         uint16_t proto;
803         size_t vlan_offset;
804         uint32_t hash, l3hash;
805
806         for (i = 0; i < nb_pkts; i++) {
807                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
808                 l3hash = 0;
809
810                 proto = eth_hdr->ether_type;
811                 hash = ether_hash(eth_hdr);
812
813                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
814
815                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
816                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
817                                         ((char *)(eth_hdr + 1) + vlan_offset);
818                         l3hash = ipv4_hash(ipv4_hdr);
819
820                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
821                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
822                                         ((char *)(eth_hdr + 1) + vlan_offset);
823                         l3hash = ipv6_hash(ipv6_hdr);
824                 }
825
826                 hash = hash ^ l3hash;
827                 hash ^= hash >> 16;
828                 hash ^= hash >> 8;
829
830                 slaves[i] = hash % slave_count;
831         }
832 }
833
834 void
835 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
836                 uint8_t slave_count, uint16_t *slaves)
837 {
838         struct ether_hdr *eth_hdr;
839         uint16_t proto;
840         size_t vlan_offset;
841         int i;
842
843         struct udp_hdr *udp_hdr;
844         struct tcp_hdr *tcp_hdr;
845         uint32_t hash, l3hash, l4hash;
846
847         for (i = 0; i < nb_pkts; i++) {
848                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
849                 proto = eth_hdr->ether_type;
850                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
851                 l3hash = 0;
852                 l4hash = 0;
853
854                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
855                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
856                                         ((char *)(eth_hdr + 1) + vlan_offset);
857                         size_t ip_hdr_offset;
858
859                         l3hash = ipv4_hash(ipv4_hdr);
860
861                         /* there is no L4 header in fragmented packet */
862                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
863                                                                 == 0)) {
864                                 ip_hdr_offset = (ipv4_hdr->version_ihl
865                                         & IPV4_HDR_IHL_MASK) *
866                                         IPV4_IHL_MULTIPLIER;
867
868                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
869                                         tcp_hdr = (struct tcp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         l4hash = HASH_L4_PORTS(tcp_hdr);
873                                 } else if (ipv4_hdr->next_proto_id ==
874                                                                 IPPROTO_UDP) {
875                                         udp_hdr = (struct udp_hdr *)
876                                                 ((char *)ipv4_hdr +
877                                                         ip_hdr_offset);
878                                         l4hash = HASH_L4_PORTS(udp_hdr);
879                                 }
880                         }
881                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
882                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
883                                         ((char *)(eth_hdr + 1) + vlan_offset);
884                         l3hash = ipv6_hash(ipv6_hdr);
885
886                         if (ipv6_hdr->proto == IPPROTO_TCP) {
887                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
888                                 l4hash = HASH_L4_PORTS(tcp_hdr);
889                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
890                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
891                                 l4hash = HASH_L4_PORTS(udp_hdr);
892                         }
893                 }
894
895                 hash = l3hash ^ l4hash;
896                 hash ^= hash >> 16;
897                 hash ^= hash >> 8;
898
899                 slaves[i] = hash % slave_count;
900         }
901 }
902
903 struct bwg_slave {
904         uint64_t bwg_left_int;
905         uint64_t bwg_left_remainder;
906         uint8_t slave;
907 };
908
909 void
910 bond_tlb_activate_slave(struct bond_dev_private *internals) {
911         int i;
912
913         for (i = 0; i < internals->active_slave_count; i++) {
914                 tlb_last_obytets[internals->active_slaves[i]] = 0;
915         }
916 }
917
918 static int
919 bandwidth_cmp(const void *a, const void *b)
920 {
921         const struct bwg_slave *bwg_a = a;
922         const struct bwg_slave *bwg_b = b;
923         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
924         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
925                         (int64_t)bwg_a->bwg_left_remainder;
926         if (diff > 0)
927                 return 1;
928         else if (diff < 0)
929                 return -1;
930         else if (diff2 > 0)
931                 return 1;
932         else if (diff2 < 0)
933                 return -1;
934         else
935                 return 0;
936 }
937
938 static void
939 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
940                 struct bwg_slave *bwg_slave)
941 {
942         struct rte_eth_link link_status;
943
944         rte_eth_link_get_nowait(port_id, &link_status);
945         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
946         if (link_bwg == 0)
947                 return;
948         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
949         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
950         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
951 }
952
953 static void
954 bond_ethdev_update_tlb_slave_cb(void *arg)
955 {
956         struct bond_dev_private *internals = arg;
957         struct rte_eth_stats slave_stats;
958         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
959         uint8_t slave_count;
960         uint64_t tx_bytes;
961
962         uint8_t update_stats = 0;
963         uint8_t i, slave_id;
964
965         internals->slave_update_idx++;
966
967
968         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
969                 update_stats = 1;
970
971         for (i = 0; i < internals->active_slave_count; i++) {
972                 slave_id = internals->active_slaves[i];
973                 rte_eth_stats_get(slave_id, &slave_stats);
974                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
975                 bandwidth_left(slave_id, tx_bytes,
976                                 internals->slave_update_idx, &bwg_array[i]);
977                 bwg_array[i].slave = slave_id;
978
979                 if (update_stats) {
980                         tlb_last_obytets[slave_id] = slave_stats.obytes;
981                 }
982         }
983
984         if (update_stats == 1)
985                 internals->slave_update_idx = 0;
986
987         slave_count = i;
988         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
989         for (i = 0; i < slave_count; i++)
990                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
991
992         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
993                         (struct bond_dev_private *)internals);
994 }
995
996 static uint16_t
997 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
998 {
999         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1000         struct bond_dev_private *internals = bd_tx_q->dev_private;
1001
1002         struct rte_eth_dev *primary_port =
1003                         &rte_eth_devices[internals->primary_port];
1004         uint16_t num_tx_total = 0;
1005         uint16_t i, j;
1006
1007         uint16_t num_of_slaves = internals->active_slave_count;
1008         uint16_t slaves[RTE_MAX_ETHPORTS];
1009
1010         struct ether_hdr *ether_hdr;
1011         struct ether_addr primary_slave_addr;
1012         struct ether_addr active_slave_addr;
1013
1014         if (num_of_slaves < 1)
1015                 return num_tx_total;
1016
1017         memcpy(slaves, internals->tlb_slaves_order,
1018                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1019
1020
1021         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1022
1023         if (nb_pkts > 3) {
1024                 for (i = 0; i < 3; i++)
1025                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1026         }
1027
1028         for (i = 0; i < num_of_slaves; i++) {
1029                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1030                 for (j = num_tx_total; j < nb_pkts; j++) {
1031                         if (j + 3 < nb_pkts)
1032                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1033
1034                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1035                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1036                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1037 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1038                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1039 #endif
1040                 }
1041
1042                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1043                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1044
1045                 if (num_tx_total == nb_pkts)
1046                         break;
1047         }
1048
1049         return num_tx_total;
1050 }
1051
1052 void
1053 bond_tlb_disable(struct bond_dev_private *internals)
1054 {
1055         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1056 }
1057
1058 void
1059 bond_tlb_enable(struct bond_dev_private *internals)
1060 {
1061         bond_ethdev_update_tlb_slave_cb(internals);
1062 }
1063
1064 static uint16_t
1065 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1066 {
1067         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1068         struct bond_dev_private *internals = bd_tx_q->dev_private;
1069
1070         struct ether_hdr *eth_h;
1071         uint16_t ether_type, offset;
1072
1073         struct client_data *client_info;
1074
1075         /*
1076          * We create transmit buffers for every slave and one additional to send
1077          * through tlb. In worst case every packet will be send on one port.
1078          */
1079         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1080         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1081
1082         /*
1083          * We create separate transmit buffers for update packets as they won't
1084          * be counted in num_tx_total.
1085          */
1086         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1087         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1088
1089         struct rte_mbuf *upd_pkt;
1090         size_t pkt_size;
1091
1092         uint16_t num_send, num_not_send = 0;
1093         uint16_t num_tx_total = 0;
1094         uint16_t slave_idx;
1095
1096         int i, j;
1097
1098         /* Search tx buffer for ARP packets and forward them to alb */
1099         for (i = 0; i < nb_pkts; i++) {
1100                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1101                 ether_type = eth_h->ether_type;
1102                 offset = get_vlan_offset(eth_h, &ether_type);
1103
1104                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1105                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1106
1107                         /* Change src mac in eth header */
1108                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1109
1110                         /* Add packet to slave tx buffer */
1111                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1112                         slave_bufs_pkts[slave_idx]++;
1113                 } else {
1114                         /* If packet is not ARP, send it with TLB policy */
1115                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1116                                         bufs[i];
1117                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1118                 }
1119         }
1120
1121         /* Update connected client ARP tables */
1122         if (internals->mode6.ntt) {
1123                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1124                         client_info = &internals->mode6.client_table[i];
1125
1126                         if (client_info->in_use) {
1127                                 /* Allocate new packet to send ARP update on current slave */
1128                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1129                                 if (upd_pkt == NULL) {
1130                                         RTE_BOND_LOG(ERR,
1131                                                      "Failed to allocate ARP packet from pool");
1132                                         continue;
1133                                 }
1134                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1135                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1136                                 upd_pkt->data_len = pkt_size;
1137                                 upd_pkt->pkt_len = pkt_size;
1138
1139                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1140                                                 internals);
1141
1142                                 /* Add packet to update tx buffer */
1143                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1144                                 update_bufs_pkts[slave_idx]++;
1145                         }
1146                 }
1147                 internals->mode6.ntt = 0;
1148         }
1149
1150         /* Send ARP packets on proper slaves */
1151         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1152                 if (slave_bufs_pkts[i] > 0) {
1153                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1154                                         slave_bufs[i], slave_bufs_pkts[i]);
1155                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1156                                 bufs[nb_pkts - 1 - num_not_send - j] =
1157                                                 slave_bufs[i][nb_pkts - 1 - j];
1158                         }
1159
1160                         num_tx_total += num_send;
1161                         num_not_send += slave_bufs_pkts[i] - num_send;
1162
1163 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1164         /* Print TX stats including update packets */
1165                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1166                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1167                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1168                         }
1169 #endif
1170                 }
1171         }
1172
1173         /* Send update packets on proper slaves */
1174         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1175                 if (update_bufs_pkts[i] > 0) {
1176                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1177                                         update_bufs_pkts[i]);
1178                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1179                                 rte_pktmbuf_free(update_bufs[i][j]);
1180                         }
1181 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1182                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1183                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1184                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1185                         }
1186 #endif
1187                 }
1188         }
1189
1190         /* Send non-ARP packets using tlb policy */
1191         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1192                 num_send = bond_ethdev_tx_burst_tlb(queue,
1193                                 slave_bufs[RTE_MAX_ETHPORTS],
1194                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1195
1196                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1197                         bufs[nb_pkts - 1 - num_not_send - j] =
1198                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1199                 }
1200
1201                 num_tx_total += num_send;
1202         }
1203
1204         return num_tx_total;
1205 }
1206
1207 static uint16_t
1208 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1209                 uint16_t nb_bufs)
1210 {
1211         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212         struct bond_dev_private *internals = bd_tx_q->dev_private;
1213
1214         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215         uint16_t slave_count;
1216
1217         /* Array to sort mbufs for transmission on each slave into */
1218         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1219         /* Number of mbufs for transmission on each slave */
1220         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1221         /* Mapping array generated by hash function to map mbufs to slaves */
1222         uint16_t bufs_slave_port_idxs[nb_bufs];
1223
1224         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1225         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1226
1227         uint16_t i, j;
1228
1229         if (unlikely(nb_bufs == 0))
1230                 return 0;
1231
1232         /* Copy slave list to protect against slave up/down changes during tx
1233          * bursting */
1234         slave_count = internals->active_slave_count;
1235         if (unlikely(slave_count < 1))
1236                 return 0;
1237
1238         memcpy(slave_port_ids, internals->active_slaves,
1239                         sizeof(slave_port_ids[0]) * slave_count);
1240
1241         /*
1242          * Populate slaves mbuf with the packets which are to be sent on it
1243          * selecting output slave using hash based on xmit policy
1244          */
1245         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1246                         bufs_slave_port_idxs);
1247
1248         for (i = 0; i < nb_bufs; i++) {
1249                 /* Populate slave mbuf arrays with mbufs for that slave. */
1250                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1251
1252                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1253         }
1254
1255         /* Send packet burst on each slave device */
1256         for (i = 0; i < slave_count; i++) {
1257                 if (slave_nb_bufs[i] == 0)
1258                         continue;
1259
1260                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1261                                 bd_tx_q->queue_id, slave_bufs[i],
1262                                 slave_nb_bufs[i]);
1263
1264                 total_tx_count += slave_tx_count;
1265
1266                 /* If tx burst fails move packets to end of bufs */
1267                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1268                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1269                                         slave_tx_count;
1270                         total_tx_fail_count += slave_tx_fail_count[i];
1271
1272                         /*
1273                          * Shift bufs to beginning of array to allow reordering
1274                          * later
1275                          */
1276                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1277                                 slave_bufs[i][j] =
1278                                         slave_bufs[i][(slave_tx_count - 1) + j];
1279                         }
1280                 }
1281         }
1282
1283         /*
1284          * If there are tx burst failures we move packets to end of bufs to
1285          * preserve expected PMD behaviour of all failed transmitted being
1286          * at the end of the input mbuf array
1287          */
1288         if (unlikely(total_tx_fail_count > 0)) {
1289                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1290
1291                 for (i = 0; i < slave_count; i++) {
1292                         if (slave_tx_fail_count[i] > 0) {
1293                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1294                                         bufs[bufs_idx++] = slave_bufs[i][j];
1295                         }
1296                 }
1297         }
1298
1299         return total_tx_count;
1300 }
1301
1302 static uint16_t
1303 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1304                 uint16_t nb_bufs)
1305 {
1306         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1307         struct bond_dev_private *internals = bd_tx_q->dev_private;
1308
1309         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1310         uint16_t slave_count;
1311
1312         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1313         uint16_t dist_slave_count;
1314
1315         /* 2-D array to sort mbufs for transmission on each slave into */
1316         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1317         /* Number of mbufs for transmission on each slave */
1318         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1319         /* Mapping array generated by hash function to map mbufs to slaves */
1320         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1321
1322         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1323         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1324
1325         uint16_t i, j;
1326
1327         if (unlikely(nb_bufs == 0))
1328                 return 0;
1329
1330         /* Copy slave list to protect against slave up/down changes during tx
1331          * bursting */
1332         slave_count = internals->active_slave_count;
1333         if (unlikely(slave_count < 1))
1334                 return 0;
1335
1336         memcpy(slave_port_ids, internals->active_slaves,
1337                         sizeof(slave_port_ids[0]) * slave_count);
1338
1339         dist_slave_count = 0;
1340         for (i = 0; i < slave_count; i++) {
1341                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1342
1343                 if (ACTOR_STATE(port, DISTRIBUTING))
1344                         dist_slave_port_ids[dist_slave_count++] =
1345                                         slave_port_ids[i];
1346         }
1347
1348         if (likely(dist_slave_count > 1)) {
1349
1350                 /*
1351                  * Populate slaves mbuf with the packets which are to be sent
1352                  * on it, selecting output slave using hash based on xmit policy
1353                  */
1354                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1355                                 bufs_slave_port_idxs);
1356
1357                 for (i = 0; i < nb_bufs; i++) {
1358                         /*
1359                          * Populate slave mbuf arrays with mbufs for that
1360                          * slave
1361                          */
1362                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1363
1364                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1365                                         bufs[i];
1366                 }
1367
1368
1369                 /* Send packet burst on each slave device */
1370                 for (i = 0; i < dist_slave_count; i++) {
1371                         if (slave_nb_bufs[i] == 0)
1372                                 continue;
1373
1374                         slave_tx_count = rte_eth_tx_burst(
1375                                         dist_slave_port_ids[i],
1376                                         bd_tx_q->queue_id, slave_bufs[i],
1377                                         slave_nb_bufs[i]);
1378
1379                         total_tx_count += slave_tx_count;
1380
1381                         /* If tx burst fails move packets to end of bufs */
1382                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1383                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1384                                                 slave_tx_count;
1385                                 total_tx_fail_count += slave_tx_fail_count[i];
1386
1387                                 /*
1388                                  * Shift bufs to beginning of array to allow
1389                                  * reordering later
1390                                  */
1391                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1392                                         slave_bufs[i][j] =
1393                                                 slave_bufs[i]
1394                                                         [(slave_tx_count - 1)
1395                                                         + j];
1396                         }
1397                 }
1398
1399                 /*
1400                  * If there are tx burst failures we move packets to end of
1401                  * bufs to preserve expected PMD behaviour of all failed
1402                  * transmitted being at the end of the input mbuf array
1403                  */
1404                 if (unlikely(total_tx_fail_count > 0)) {
1405                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1406
1407                         for (i = 0; i < slave_count; i++) {
1408                                 if (slave_tx_fail_count[i] > 0) {
1409                                         for (j = 0;
1410                                                 j < slave_tx_fail_count[i];
1411                                                 j++) {
1412                                                 bufs[bufs_idx++] =
1413                                                         slave_bufs[i][j];
1414                                         }
1415                                 }
1416                         }
1417                 }
1418         }
1419
1420         /* Check for LACP control packets and send if available */
1421         for (i = 0; i < slave_count; i++) {
1422                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1423                 struct rte_mbuf *ctrl_pkt = NULL;
1424
1425                 if (likely(rte_ring_empty(port->tx_ring)))
1426                         continue;
1427
1428                 if (rte_ring_dequeue(port->tx_ring,
1429                                      (void **)&ctrl_pkt) != -ENOENT) {
1430                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1431                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1432                         /*
1433                          * re-enqueue LAG control plane packets to buffering
1434                          * ring if transmission fails so the packet isn't lost.
1435                          */
1436                         if (slave_tx_count != 1)
1437                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1438                 }
1439         }
1440
1441         return total_tx_count;
1442 }
1443
1444 static uint16_t
1445 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1446                 uint16_t nb_pkts)
1447 {
1448         struct bond_dev_private *internals;
1449         struct bond_tx_queue *bd_tx_q;
1450
1451         uint8_t tx_failed_flag = 0, num_of_slaves;
1452         uint16_t slaves[RTE_MAX_ETHPORTS];
1453
1454         uint16_t max_nb_of_tx_pkts = 0;
1455
1456         int slave_tx_total[RTE_MAX_ETHPORTS];
1457         int i, most_successful_tx_slave = -1;
1458
1459         bd_tx_q = (struct bond_tx_queue *)queue;
1460         internals = bd_tx_q->dev_private;
1461
1462         /* Copy slave list to protect against slave up/down changes during tx
1463          * bursting */
1464         num_of_slaves = internals->active_slave_count;
1465         memcpy(slaves, internals->active_slaves,
1466                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1467
1468         if (num_of_slaves < 1)
1469                 return 0;
1470
1471         /* Increment reference count on mbufs */
1472         for (i = 0; i < nb_pkts; i++)
1473                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1474
1475         /* Transmit burst on each active slave */
1476         for (i = 0; i < num_of_slaves; i++) {
1477                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1478                                         bufs, nb_pkts);
1479
1480                 if (unlikely(slave_tx_total[i] < nb_pkts))
1481                         tx_failed_flag = 1;
1482
1483                 /* record the value and slave index for the slave which transmits the
1484                  * maximum number of packets */
1485                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1486                         max_nb_of_tx_pkts = slave_tx_total[i];
1487                         most_successful_tx_slave = i;
1488                 }
1489         }
1490
1491         /* if slaves fail to transmit packets from burst, the calling application
1492          * is not expected to know about multiple references to packets so we must
1493          * handle failures of all packets except those of the most successful slave
1494          */
1495         if (unlikely(tx_failed_flag))
1496                 for (i = 0; i < num_of_slaves; i++)
1497                         if (i != most_successful_tx_slave)
1498                                 while (slave_tx_total[i] < nb_pkts)
1499                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1500
1501         return max_nb_of_tx_pkts;
1502 }
1503
1504 void
1505 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1506 {
1507         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1508
1509         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1510                 /**
1511                  * If in mode 4 then save the link properties of the first
1512                  * slave, all subsequent slaves must match these properties
1513                  */
1514                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1515
1516                 bond_link->link_autoneg = slave_link->link_autoneg;
1517                 bond_link->link_duplex = slave_link->link_duplex;
1518                 bond_link->link_speed = slave_link->link_speed;
1519         } else {
1520                 /**
1521                  * In any other mode the link properties are set to default
1522                  * values of AUTONEG/DUPLEX
1523                  */
1524                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1525                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1526         }
1527 }
1528
1529 int
1530 link_properties_valid(struct rte_eth_dev *ethdev,
1531                 struct rte_eth_link *slave_link)
1532 {
1533         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1534
1535         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1536                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1537
1538                 if (bond_link->link_duplex != slave_link->link_duplex ||
1539                         bond_link->link_autoneg != slave_link->link_autoneg ||
1540                         bond_link->link_speed != slave_link->link_speed)
1541                         return -1;
1542         }
1543
1544         return 0;
1545 }
1546
1547 int
1548 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1549 {
1550         struct ether_addr *mac_addr;
1551
1552         if (eth_dev == NULL) {
1553                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1554                 return -1;
1555         }
1556
1557         if (dst_mac_addr == NULL) {
1558                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1559                 return -1;
1560         }
1561
1562         mac_addr = eth_dev->data->mac_addrs;
1563
1564         ether_addr_copy(mac_addr, dst_mac_addr);
1565         return 0;
1566 }
1567
1568 int
1569 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1570 {
1571         struct ether_addr *mac_addr;
1572
1573         if (eth_dev == NULL) {
1574                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1575                 return -1;
1576         }
1577
1578         if (new_mac_addr == NULL) {
1579                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1580                 return -1;
1581         }
1582
1583         mac_addr = eth_dev->data->mac_addrs;
1584
1585         /* If new MAC is different to current MAC then update */
1586         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1587                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1588
1589         return 0;
1590 }
1591
1592 static const struct ether_addr null_mac_addr;
1593
1594 /*
1595  * Add additional MAC addresses to the slave
1596  */
1597 int
1598 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1599                 uint16_t slave_port_id)
1600 {
1601         int i, ret;
1602         struct ether_addr *mac_addr;
1603
1604         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1605                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1606                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1607                         break;
1608
1609                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1610                 if (ret < 0) {
1611                         /* rollback */
1612                         for (i--; i > 0; i--)
1613                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1614                                         &bonded_eth_dev->data->mac_addrs[i]);
1615                         return ret;
1616                 }
1617         }
1618
1619         return 0;
1620 }
1621
1622 /*
1623  * Remove additional MAC addresses from the slave
1624  */
1625 int
1626 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1627                 uint16_t slave_port_id)
1628 {
1629         int i, rc, ret;
1630         struct ether_addr *mac_addr;
1631
1632         rc = 0;
1633         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1634                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1635                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1636                         break;
1637
1638                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1639                 /* save only the first error */
1640                 if (ret < 0 && rc == 0)
1641                         rc = ret;
1642         }
1643
1644         return rc;
1645 }
1646
1647 int
1648 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1649 {
1650         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1651         int i;
1652
1653         /* Update slave devices MAC addresses */
1654         if (internals->slave_count < 1)
1655                 return -1;
1656
1657         switch (internals->mode) {
1658         case BONDING_MODE_ROUND_ROBIN:
1659         case BONDING_MODE_BALANCE:
1660         case BONDING_MODE_BROADCAST:
1661                 for (i = 0; i < internals->slave_count; i++) {
1662                         if (rte_eth_dev_default_mac_addr_set(
1663                                         internals->slaves[i].port_id,
1664                                         bonded_eth_dev->data->mac_addrs)) {
1665                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1666                                                 internals->slaves[i].port_id);
1667                                 return -1;
1668                         }
1669                 }
1670                 break;
1671         case BONDING_MODE_8023AD:
1672                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1673                 break;
1674         case BONDING_MODE_ACTIVE_BACKUP:
1675         case BONDING_MODE_TLB:
1676         case BONDING_MODE_ALB:
1677         default:
1678                 for (i = 0; i < internals->slave_count; i++) {
1679                         if (internals->slaves[i].port_id ==
1680                                         internals->current_primary_port) {
1681                                 if (rte_eth_dev_default_mac_addr_set(
1682                                                 internals->primary_port,
1683                                                 bonded_eth_dev->data->mac_addrs)) {
1684                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1685                                                         internals->current_primary_port);
1686                                         return -1;
1687                                 }
1688                         } else {
1689                                 if (rte_eth_dev_default_mac_addr_set(
1690                                                 internals->slaves[i].port_id,
1691                                                 &internals->slaves[i].persisted_mac_addr)) {
1692                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1693                                                         internals->slaves[i].port_id);
1694                                         return -1;
1695                                 }
1696                         }
1697                 }
1698         }
1699
1700         return 0;
1701 }
1702
1703 int
1704 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1705 {
1706         struct bond_dev_private *internals;
1707
1708         internals = eth_dev->data->dev_private;
1709
1710         switch (mode) {
1711         case BONDING_MODE_ROUND_ROBIN:
1712                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1713                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1714                 break;
1715         case BONDING_MODE_ACTIVE_BACKUP:
1716                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1717                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1718                 break;
1719         case BONDING_MODE_BALANCE:
1720                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1721                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1722                 break;
1723         case BONDING_MODE_BROADCAST:
1724                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1725                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1726                 break;
1727         case BONDING_MODE_8023AD:
1728                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1729                         return -1;
1730
1731                 if (internals->mode4.dedicated_queues.enabled == 0) {
1732                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1733                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1734                         RTE_BOND_LOG(WARNING,
1735                                 "Using mode 4, it is necessary to do TX burst "
1736                                 "and RX burst at least every 100ms.");
1737                 } else {
1738                         /* Use flow director's optimization */
1739                         eth_dev->rx_pkt_burst =
1740                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1741                         eth_dev->tx_pkt_burst =
1742                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1743                 }
1744                 break;
1745         case BONDING_MODE_TLB:
1746                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1747                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1748                 break;
1749         case BONDING_MODE_ALB:
1750                 if (bond_mode_alb_enable(eth_dev) != 0)
1751                         return -1;
1752
1753                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1754                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1755                 break;
1756         default:
1757                 return -1;
1758         }
1759
1760         internals->mode = mode;
1761
1762         return 0;
1763 }
1764
1765
1766 static int
1767 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1768                 struct rte_eth_dev *slave_eth_dev)
1769 {
1770         int errval = 0;
1771         struct bond_dev_private *internals = (struct bond_dev_private *)
1772                 bonded_eth_dev->data->dev_private;
1773         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1774
1775         if (port->slow_pool == NULL) {
1776                 char mem_name[256];
1777                 int slave_id = slave_eth_dev->data->port_id;
1778
1779                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1780                                 slave_id);
1781                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1782                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1783                         slave_eth_dev->data->numa_node);
1784
1785                 /* Any memory allocation failure in initialization is critical because
1786                  * resources can't be free, so reinitialization is impossible. */
1787                 if (port->slow_pool == NULL) {
1788                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1789                                 slave_id, mem_name, rte_strerror(rte_errno));
1790                 }
1791         }
1792
1793         if (internals->mode4.dedicated_queues.enabled == 1) {
1794                 /* Configure slow Rx queue */
1795
1796                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1797                                 internals->mode4.dedicated_queues.rx_qid, 128,
1798                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1799                                 NULL, port->slow_pool);
1800                 if (errval != 0) {
1801                         RTE_BOND_LOG(ERR,
1802                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1803                                         slave_eth_dev->data->port_id,
1804                                         internals->mode4.dedicated_queues.rx_qid,
1805                                         errval);
1806                         return errval;
1807                 }
1808
1809                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1810                                 internals->mode4.dedicated_queues.tx_qid, 512,
1811                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1812                                 NULL);
1813                 if (errval != 0) {
1814                         RTE_BOND_LOG(ERR,
1815                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1816                                 slave_eth_dev->data->port_id,
1817                                 internals->mode4.dedicated_queues.tx_qid,
1818                                 errval);
1819                         return errval;
1820                 }
1821         }
1822         return 0;
1823 }
1824
1825 int
1826 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1827                 struct rte_eth_dev *slave_eth_dev)
1828 {
1829         struct bond_rx_queue *bd_rx_q;
1830         struct bond_tx_queue *bd_tx_q;
1831         uint16_t nb_rx_queues;
1832         uint16_t nb_tx_queues;
1833
1834         int errval;
1835         uint16_t q_id;
1836         struct rte_flow_error flow_error;
1837
1838         struct bond_dev_private *internals = (struct bond_dev_private *)
1839                 bonded_eth_dev->data->dev_private;
1840
1841         /* Stop slave */
1842         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1843
1844         /* Enable interrupts on slave device if supported */
1845         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1846                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1847
1848         /* If RSS is enabled for bonding, try to enable it for slaves  */
1849         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1850                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1851                                 != 0) {
1852                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1853                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1854                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1855                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1856                 } else {
1857                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1858                 }
1859
1860                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1861                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1862                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1863                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1864         }
1865
1866         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1867                         DEV_RX_OFFLOAD_VLAN_FILTER)
1868                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1869                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1870         else
1871                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1872                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1873
1874         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1875         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1876
1877         if (internals->mode == BONDING_MODE_8023AD) {
1878                 if (internals->mode4.dedicated_queues.enabled == 1) {
1879                         nb_rx_queues++;
1880                         nb_tx_queues++;
1881                 }
1882         }
1883
1884         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1885                                      bonded_eth_dev->data->mtu);
1886         if (errval != 0 && errval != -ENOTSUP) {
1887                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1888                                 slave_eth_dev->data->port_id, errval);
1889                 return errval;
1890         }
1891
1892         /* Configure device */
1893         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1894                         nb_rx_queues, nb_tx_queues,
1895                         &(slave_eth_dev->data->dev_conf));
1896         if (errval != 0) {
1897                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1898                                 slave_eth_dev->data->port_id, errval);
1899                 return errval;
1900         }
1901
1902         /* Setup Rx Queues */
1903         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1904                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1905
1906                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1907                                 bd_rx_q->nb_rx_desc,
1908                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1909                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1910                 if (errval != 0) {
1911                         RTE_BOND_LOG(ERR,
1912                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1913                                         slave_eth_dev->data->port_id, q_id, errval);
1914                         return errval;
1915                 }
1916         }
1917
1918         /* Setup Tx Queues */
1919         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1920                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1921
1922                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1923                                 bd_tx_q->nb_tx_desc,
1924                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1925                                 &bd_tx_q->tx_conf);
1926                 if (errval != 0) {
1927                         RTE_BOND_LOG(ERR,
1928                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1929                                 slave_eth_dev->data->port_id, q_id, errval);
1930                         return errval;
1931                 }
1932         }
1933
1934         if (internals->mode == BONDING_MODE_8023AD &&
1935                         internals->mode4.dedicated_queues.enabled == 1) {
1936                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1937                                 != 0)
1938                         return errval;
1939
1940                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1941                                 slave_eth_dev->data->port_id) != 0) {
1942                         RTE_BOND_LOG(ERR,
1943                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1944                                 slave_eth_dev->data->port_id, q_id, errval);
1945                         return -1;
1946                 }
1947
1948                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1949                         rte_flow_destroy(slave_eth_dev->data->port_id,
1950                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1951                                         &flow_error);
1952
1953                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1954                                 slave_eth_dev->data->port_id);
1955         }
1956
1957         /* Start device */
1958         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1959         if (errval != 0) {
1960                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1961                                 slave_eth_dev->data->port_id, errval);
1962                 return -1;
1963         }
1964
1965         /* If RSS is enabled for bonding, synchronize RETA */
1966         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1967                 int i;
1968                 struct bond_dev_private *internals;
1969
1970                 internals = bonded_eth_dev->data->dev_private;
1971
1972                 for (i = 0; i < internals->slave_count; i++) {
1973                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1974                                 errval = rte_eth_dev_rss_reta_update(
1975                                                 slave_eth_dev->data->port_id,
1976                                                 &internals->reta_conf[0],
1977                                                 internals->slaves[i].reta_size);
1978                                 if (errval != 0) {
1979                                         RTE_BOND_LOG(WARNING,
1980                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1981                                                      " RSS Configuration for bonding may be inconsistent.",
1982                                                      slave_eth_dev->data->port_id, errval);
1983                                 }
1984                                 break;
1985                         }
1986                 }
1987         }
1988
1989         /* If lsc interrupt is set, check initial slave's link status */
1990         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1991                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1992                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1993                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1994                         NULL);
1995         }
1996
1997         return 0;
1998 }
1999
2000 void
2001 slave_remove(struct bond_dev_private *internals,
2002                 struct rte_eth_dev *slave_eth_dev)
2003 {
2004         uint8_t i;
2005
2006         for (i = 0; i < internals->slave_count; i++)
2007                 if (internals->slaves[i].port_id ==
2008                                 slave_eth_dev->data->port_id)
2009                         break;
2010
2011         if (i < (internals->slave_count - 1)) {
2012                 struct rte_flow *flow;
2013
2014                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
2015                                 sizeof(internals->slaves[0]) *
2016                                 (internals->slave_count - i - 1));
2017                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
2018                         memmove(&flow->flows[i], &flow->flows[i + 1],
2019                                 sizeof(flow->flows[0]) *
2020                                 (internals->slave_count - i - 1));
2021                         flow->flows[internals->slave_count - 1] = NULL;
2022                 }
2023         }
2024
2025         internals->slave_count--;
2026
2027         /* force reconfiguration of slave interfaces */
2028         _rte_eth_dev_reset(slave_eth_dev);
2029 }
2030
2031 static void
2032 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
2033
2034 void
2035 slave_add(struct bond_dev_private *internals,
2036                 struct rte_eth_dev *slave_eth_dev)
2037 {
2038         struct bond_slave_details *slave_details =
2039                         &internals->slaves[internals->slave_count];
2040
2041         slave_details->port_id = slave_eth_dev->data->port_id;
2042         slave_details->last_link_status = 0;
2043
2044         /* Mark slave devices that don't support interrupts so we can
2045          * compensate when we start the bond
2046          */
2047         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2048                 slave_details->link_status_poll_enabled = 1;
2049         }
2050
2051         slave_details->link_status_wait_to_complete = 0;
2052         /* clean tlb_last_obytes when adding port for bonding device */
2053         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2054                         sizeof(struct ether_addr));
2055 }
2056
2057 void
2058 bond_ethdev_primary_set(struct bond_dev_private *internals,
2059                 uint16_t slave_port_id)
2060 {
2061         int i;
2062
2063         if (internals->active_slave_count < 1)
2064                 internals->current_primary_port = slave_port_id;
2065         else
2066                 /* Search bonded device slave ports for new proposed primary port */
2067                 for (i = 0; i < internals->active_slave_count; i++) {
2068                         if (internals->active_slaves[i] == slave_port_id)
2069                                 internals->current_primary_port = slave_port_id;
2070                 }
2071 }
2072
2073 static void
2074 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2075
2076 static int
2077 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2078 {
2079         struct bond_dev_private *internals;
2080         int i;
2081
2082         /* slave eth dev will be started by bonded device */
2083         if (check_for_bonded_ethdev(eth_dev)) {
2084                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2085                                 eth_dev->data->port_id);
2086                 return -1;
2087         }
2088
2089         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2090         eth_dev->data->dev_started = 1;
2091
2092         internals = eth_dev->data->dev_private;
2093
2094         if (internals->slave_count == 0) {
2095                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2096                 goto out_err;
2097         }
2098
2099         if (internals->user_defined_mac == 0) {
2100                 struct ether_addr *new_mac_addr = NULL;
2101
2102                 for (i = 0; i < internals->slave_count; i++)
2103                         if (internals->slaves[i].port_id == internals->primary_port)
2104                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2105
2106                 if (new_mac_addr == NULL)
2107                         goto out_err;
2108
2109                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2110                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2111                                         eth_dev->data->port_id);
2112                         goto out_err;
2113                 }
2114         }
2115
2116         /* Update all slave devices MACs*/
2117         if (mac_address_slaves_update(eth_dev) != 0)
2118                 goto out_err;
2119
2120         /* If bonded device is configure in promiscuous mode then re-apply config */
2121         if (internals->promiscuous_en)
2122                 bond_ethdev_promiscuous_enable(eth_dev);
2123
2124         if (internals->mode == BONDING_MODE_8023AD) {
2125                 if (internals->mode4.dedicated_queues.enabled == 1) {
2126                         internals->mode4.dedicated_queues.rx_qid =
2127                                         eth_dev->data->nb_rx_queues;
2128                         internals->mode4.dedicated_queues.tx_qid =
2129                                         eth_dev->data->nb_tx_queues;
2130                 }
2131         }
2132
2133
2134         /* Reconfigure each slave device if starting bonded device */
2135         for (i = 0; i < internals->slave_count; i++) {
2136                 struct rte_eth_dev *slave_ethdev =
2137                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2138                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2139                         RTE_BOND_LOG(ERR,
2140                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2141                                 eth_dev->data->port_id,
2142                                 internals->slaves[i].port_id);
2143                         goto out_err;
2144                 }
2145                 /* We will need to poll for link status if any slave doesn't
2146                  * support interrupts
2147                  */
2148                 if (internals->slaves[i].link_status_poll_enabled)
2149                         internals->link_status_polling_enabled = 1;
2150         }
2151
2152         /* start polling if needed */
2153         if (internals->link_status_polling_enabled) {
2154                 rte_eal_alarm_set(
2155                         internals->link_status_polling_interval_ms * 1000,
2156                         bond_ethdev_slave_link_status_change_monitor,
2157                         (void *)&rte_eth_devices[internals->port_id]);
2158         }
2159
2160         if (internals->user_defined_primary_port)
2161                 bond_ethdev_primary_set(internals, internals->primary_port);
2162
2163         if (internals->mode == BONDING_MODE_8023AD)
2164                 bond_mode_8023ad_start(eth_dev);
2165
2166         if (internals->mode == BONDING_MODE_TLB ||
2167                         internals->mode == BONDING_MODE_ALB)
2168                 bond_tlb_enable(internals);
2169
2170         return 0;
2171
2172 out_err:
2173         eth_dev->data->dev_started = 0;
2174         return -1;
2175 }
2176
2177 static void
2178 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2179 {
2180         uint8_t i;
2181
2182         if (dev->data->rx_queues != NULL) {
2183                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2184                         rte_free(dev->data->rx_queues[i]);
2185                         dev->data->rx_queues[i] = NULL;
2186                 }
2187                 dev->data->nb_rx_queues = 0;
2188         }
2189
2190         if (dev->data->tx_queues != NULL) {
2191                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2192                         rte_free(dev->data->tx_queues[i]);
2193                         dev->data->tx_queues[i] = NULL;
2194                 }
2195                 dev->data->nb_tx_queues = 0;
2196         }
2197 }
2198
2199 void
2200 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2201 {
2202         struct bond_dev_private *internals = eth_dev->data->dev_private;
2203         uint8_t i;
2204
2205         if (internals->mode == BONDING_MODE_8023AD) {
2206                 struct port *port;
2207                 void *pkt = NULL;
2208
2209                 bond_mode_8023ad_stop(eth_dev);
2210
2211                 /* Discard all messages to/from mode 4 state machines */
2212                 for (i = 0; i < internals->active_slave_count; i++) {
2213                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2214
2215                         RTE_ASSERT(port->rx_ring != NULL);
2216                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2217                                 rte_pktmbuf_free(pkt);
2218
2219                         RTE_ASSERT(port->tx_ring != NULL);
2220                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2221                                 rte_pktmbuf_free(pkt);
2222                 }
2223         }
2224
2225         if (internals->mode == BONDING_MODE_TLB ||
2226                         internals->mode == BONDING_MODE_ALB) {
2227                 bond_tlb_disable(internals);
2228                 for (i = 0; i < internals->active_slave_count; i++)
2229                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2230         }
2231
2232         internals->active_slave_count = 0;
2233         internals->link_status_polling_enabled = 0;
2234         for (i = 0; i < internals->slave_count; i++)
2235                 internals->slaves[i].last_link_status = 0;
2236
2237         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2238         eth_dev->data->dev_started = 0;
2239 }
2240
2241 void
2242 bond_ethdev_close(struct rte_eth_dev *dev)
2243 {
2244         struct bond_dev_private *internals = dev->data->dev_private;
2245         uint8_t bond_port_id = internals->port_id;
2246         int skipped = 0;
2247         struct rte_flow_error ferror;
2248
2249         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2250         while (internals->slave_count != skipped) {
2251                 uint16_t port_id = internals->slaves[skipped].port_id;
2252
2253                 rte_eth_dev_stop(port_id);
2254
2255                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2256                         RTE_BOND_LOG(ERR,
2257                                      "Failed to remove port %d from bonded device %s",
2258                                      port_id, dev->device->name);
2259                         skipped++;
2260                 }
2261         }
2262         bond_flow_ops.flush(dev, &ferror);
2263         bond_ethdev_free_queues(dev);
2264         rte_bitmap_reset(internals->vlan_filter_bmp);
2265 }
2266
2267 /* forward declaration */
2268 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2269
2270 static void
2271 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2272 {
2273         struct bond_dev_private *internals = dev->data->dev_private;
2274
2275         uint16_t max_nb_rx_queues = UINT16_MAX;
2276         uint16_t max_nb_tx_queues = UINT16_MAX;
2277
2278         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2279
2280         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2281                         internals->candidate_max_rx_pktlen :
2282                         ETHER_MAX_JUMBO_FRAME_LEN;
2283
2284         /* Max number of tx/rx queues that the bonded device can support is the
2285          * minimum values of the bonded slaves, as all slaves must be capable
2286          * of supporting the same number of tx/rx queues.
2287          */
2288         if (internals->slave_count > 0) {
2289                 struct rte_eth_dev_info slave_info;
2290                 uint8_t idx;
2291
2292                 for (idx = 0; idx < internals->slave_count; idx++) {
2293                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2294                                         &slave_info);
2295
2296                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2297                                 max_nb_rx_queues = slave_info.max_rx_queues;
2298
2299                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2300                                 max_nb_tx_queues = slave_info.max_tx_queues;
2301                 }
2302         }
2303
2304         dev_info->max_rx_queues = max_nb_rx_queues;
2305         dev_info->max_tx_queues = max_nb_tx_queues;
2306
2307         /**
2308          * If dedicated hw queues enabled for link bonding device in LACP mode
2309          * then we need to reduce the maximum number of data path queues by 1.
2310          */
2311         if (internals->mode == BONDING_MODE_8023AD &&
2312                 internals->mode4.dedicated_queues.enabled == 1) {
2313                 dev_info->max_rx_queues--;
2314                 dev_info->max_tx_queues--;
2315         }
2316
2317         dev_info->min_rx_bufsize = 0;
2318
2319         dev_info->rx_offload_capa = internals->rx_offload_capa;
2320         dev_info->tx_offload_capa = internals->tx_offload_capa;
2321         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2322         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2323         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2324
2325         dev_info->reta_size = internals->reta_size;
2326 }
2327
2328 static int
2329 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2330 {
2331         int res;
2332         uint16_t i;
2333         struct bond_dev_private *internals = dev->data->dev_private;
2334
2335         /* don't do this while a slave is being added */
2336         rte_spinlock_lock(&internals->lock);
2337
2338         if (on)
2339                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2340         else
2341                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2342
2343         for (i = 0; i < internals->slave_count; i++) {
2344                 uint16_t port_id = internals->slaves[i].port_id;
2345
2346                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2347                 if (res == ENOTSUP)
2348                         RTE_BOND_LOG(WARNING,
2349                                      "Setting VLAN filter on slave port %u not supported.",
2350                                      port_id);
2351         }
2352
2353         rte_spinlock_unlock(&internals->lock);
2354         return 0;
2355 }
2356
2357 static int
2358 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2359                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2360                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2361 {
2362         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2363                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2364                                         0, dev->data->numa_node);
2365         if (bd_rx_q == NULL)
2366                 return -1;
2367
2368         bd_rx_q->queue_id = rx_queue_id;
2369         bd_rx_q->dev_private = dev->data->dev_private;
2370
2371         bd_rx_q->nb_rx_desc = nb_rx_desc;
2372
2373         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2374         bd_rx_q->mb_pool = mb_pool;
2375
2376         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2377
2378         return 0;
2379 }
2380
2381 static int
2382 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2383                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2384                 const struct rte_eth_txconf *tx_conf)
2385 {
2386         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2387                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2388                                         0, dev->data->numa_node);
2389
2390         if (bd_tx_q == NULL)
2391                 return -1;
2392
2393         bd_tx_q->queue_id = tx_queue_id;
2394         bd_tx_q->dev_private = dev->data->dev_private;
2395
2396         bd_tx_q->nb_tx_desc = nb_tx_desc;
2397         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2398
2399         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2400
2401         return 0;
2402 }
2403
2404 static void
2405 bond_ethdev_rx_queue_release(void *queue)
2406 {
2407         if (queue == NULL)
2408                 return;
2409
2410         rte_free(queue);
2411 }
2412
2413 static void
2414 bond_ethdev_tx_queue_release(void *queue)
2415 {
2416         if (queue == NULL)
2417                 return;
2418
2419         rte_free(queue);
2420 }
2421
2422 static void
2423 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2424 {
2425         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2426         struct bond_dev_private *internals;
2427
2428         /* Default value for polling slave found is true as we don't want to
2429          * disable the polling thread if we cannot get the lock */
2430         int i, polling_slave_found = 1;
2431
2432         if (cb_arg == NULL)
2433                 return;
2434
2435         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2436         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2437
2438         if (!bonded_ethdev->data->dev_started ||
2439                 !internals->link_status_polling_enabled)
2440                 return;
2441
2442         /* If device is currently being configured then don't check slaves link
2443          * status, wait until next period */
2444         if (rte_spinlock_trylock(&internals->lock)) {
2445                 if (internals->slave_count > 0)
2446                         polling_slave_found = 0;
2447
2448                 for (i = 0; i < internals->slave_count; i++) {
2449                         if (!internals->slaves[i].link_status_poll_enabled)
2450                                 continue;
2451
2452                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2453                         polling_slave_found = 1;
2454
2455                         /* Update slave link status */
2456                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2457                                         internals->slaves[i].link_status_wait_to_complete);
2458
2459                         /* if link status has changed since last checked then call lsc
2460                          * event callback */
2461                         if (slave_ethdev->data->dev_link.link_status !=
2462                                         internals->slaves[i].last_link_status) {
2463                                 internals->slaves[i].last_link_status =
2464                                                 slave_ethdev->data->dev_link.link_status;
2465
2466                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2467                                                 RTE_ETH_EVENT_INTR_LSC,
2468                                                 &bonded_ethdev->data->port_id,
2469                                                 NULL);
2470                         }
2471                 }
2472                 rte_spinlock_unlock(&internals->lock);
2473         }
2474
2475         if (polling_slave_found)
2476                 /* Set alarm to continue monitoring link status of slave ethdev's */
2477                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2478                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2479 }
2480
2481 static int
2482 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2483 {
2484         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2485
2486         struct bond_dev_private *bond_ctx;
2487         struct rte_eth_link slave_link;
2488
2489         uint32_t idx;
2490
2491         bond_ctx = ethdev->data->dev_private;
2492
2493         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2494
2495         if (ethdev->data->dev_started == 0 ||
2496                         bond_ctx->active_slave_count == 0) {
2497                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2498                 return 0;
2499         }
2500
2501         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2502
2503         if (wait_to_complete)
2504                 link_update = rte_eth_link_get;
2505         else
2506                 link_update = rte_eth_link_get_nowait;
2507
2508         switch (bond_ctx->mode) {
2509         case BONDING_MODE_BROADCAST:
2510                 /**
2511                  * Setting link speed to UINT32_MAX to ensure we pick up the
2512                  * value of the first active slave
2513                  */
2514                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2515
2516                 /**
2517                  * link speed is minimum value of all the slaves link speed as
2518                  * packet loss will occur on this slave if transmission at rates
2519                  * greater than this are attempted
2520                  */
2521                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2522                         link_update(bond_ctx->active_slaves[0], &slave_link);
2523
2524                         if (slave_link.link_speed <
2525                                         ethdev->data->dev_link.link_speed)
2526                                 ethdev->data->dev_link.link_speed =
2527                                                 slave_link.link_speed;
2528                 }
2529                 break;
2530         case BONDING_MODE_ACTIVE_BACKUP:
2531                 /* Current primary slave */
2532                 link_update(bond_ctx->current_primary_port, &slave_link);
2533
2534                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2535                 break;
2536         case BONDING_MODE_8023AD:
2537                 ethdev->data->dev_link.link_autoneg =
2538                                 bond_ctx->mode4.slave_link.link_autoneg;
2539                 ethdev->data->dev_link.link_duplex =
2540                                 bond_ctx->mode4.slave_link.link_duplex;
2541                 /* fall through to update link speed */
2542         case BONDING_MODE_ROUND_ROBIN:
2543         case BONDING_MODE_BALANCE:
2544         case BONDING_MODE_TLB:
2545         case BONDING_MODE_ALB:
2546         default:
2547                 /**
2548                  * In theses mode the maximum theoretical link speed is the sum
2549                  * of all the slaves
2550                  */
2551                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2552
2553                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2554                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2555
2556                         ethdev->data->dev_link.link_speed +=
2557                                         slave_link.link_speed;
2558                 }
2559         }
2560
2561
2562         return 0;
2563 }
2564
2565
2566 static int
2567 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2568 {
2569         struct bond_dev_private *internals = dev->data->dev_private;
2570         struct rte_eth_stats slave_stats;
2571         int i, j;
2572
2573         for (i = 0; i < internals->slave_count; i++) {
2574                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2575
2576                 stats->ipackets += slave_stats.ipackets;
2577                 stats->opackets += slave_stats.opackets;
2578                 stats->ibytes += slave_stats.ibytes;
2579                 stats->obytes += slave_stats.obytes;
2580                 stats->imissed += slave_stats.imissed;
2581                 stats->ierrors += slave_stats.ierrors;
2582                 stats->oerrors += slave_stats.oerrors;
2583                 stats->rx_nombuf += slave_stats.rx_nombuf;
2584
2585                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2586                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2587                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2588                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2589                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2590                         stats->q_errors[j] += slave_stats.q_errors[j];
2591                 }
2592
2593         }
2594
2595         return 0;
2596 }
2597
2598 static void
2599 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2600 {
2601         struct bond_dev_private *internals = dev->data->dev_private;
2602         int i;
2603
2604         for (i = 0; i < internals->slave_count; i++)
2605                 rte_eth_stats_reset(internals->slaves[i].port_id);
2606 }
2607
2608 static void
2609 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2610 {
2611         struct bond_dev_private *internals = eth_dev->data->dev_private;
2612         int i;
2613
2614         internals->promiscuous_en = 1;
2615
2616         switch (internals->mode) {
2617         /* Promiscuous mode is propagated to all slaves */
2618         case BONDING_MODE_ROUND_ROBIN:
2619         case BONDING_MODE_BALANCE:
2620         case BONDING_MODE_BROADCAST:
2621                 for (i = 0; i < internals->slave_count; i++)
2622                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2623                 break;
2624         /* In mode4 promiscus mode is managed when slave is added/removed */
2625         case BONDING_MODE_8023AD:
2626                 break;
2627         /* Promiscuous mode is propagated only to primary slave */
2628         case BONDING_MODE_ACTIVE_BACKUP:
2629         case BONDING_MODE_TLB:
2630         case BONDING_MODE_ALB:
2631         default:
2632                 rte_eth_promiscuous_enable(internals->current_primary_port);
2633         }
2634 }
2635
2636 static void
2637 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2638 {
2639         struct bond_dev_private *internals = dev->data->dev_private;
2640         int i;
2641
2642         internals->promiscuous_en = 0;
2643
2644         switch (internals->mode) {
2645         /* Promiscuous mode is propagated to all slaves */
2646         case BONDING_MODE_ROUND_ROBIN:
2647         case BONDING_MODE_BALANCE:
2648         case BONDING_MODE_BROADCAST:
2649                 for (i = 0; i < internals->slave_count; i++)
2650                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2651                 break;
2652         /* In mode4 promiscus mode is set managed when slave is added/removed */
2653         case BONDING_MODE_8023AD:
2654                 break;
2655         /* Promiscuous mode is propagated only to primary slave */
2656         case BONDING_MODE_ACTIVE_BACKUP:
2657         case BONDING_MODE_TLB:
2658         case BONDING_MODE_ALB:
2659         default:
2660                 rte_eth_promiscuous_disable(internals->current_primary_port);
2661         }
2662 }
2663
2664 static void
2665 bond_ethdev_delayed_lsc_propagation(void *arg)
2666 {
2667         if (arg == NULL)
2668                 return;
2669
2670         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2671                         RTE_ETH_EVENT_INTR_LSC, NULL);
2672 }
2673
2674 int
2675 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2676                 void *param, void *ret_param __rte_unused)
2677 {
2678         struct rte_eth_dev *bonded_eth_dev;
2679         struct bond_dev_private *internals;
2680         struct rte_eth_link link;
2681         int rc = -1;
2682
2683         int i, valid_slave = 0;
2684         uint8_t active_pos;
2685         uint8_t lsc_flag = 0;
2686
2687         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2688                 return rc;
2689
2690         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2691
2692         if (check_for_bonded_ethdev(bonded_eth_dev))
2693                 return rc;
2694
2695         internals = bonded_eth_dev->data->dev_private;
2696
2697         /* If the device isn't started don't handle interrupts */
2698         if (!bonded_eth_dev->data->dev_started)
2699                 return rc;
2700
2701         /* verify that port_id is a valid slave of bonded port */
2702         for (i = 0; i < internals->slave_count; i++) {
2703                 if (internals->slaves[i].port_id == port_id) {
2704                         valid_slave = 1;
2705                         break;
2706                 }
2707         }
2708
2709         if (!valid_slave)
2710                 return rc;
2711
2712         /* Synchronize lsc callback parallel calls either by real link event
2713          * from the slaves PMDs or by the bonding PMD itself.
2714          */
2715         rte_spinlock_lock(&internals->lsc_lock);
2716
2717         /* Search for port in active port list */
2718         active_pos = find_slave_by_id(internals->active_slaves,
2719                         internals->active_slave_count, port_id);
2720
2721         rte_eth_link_get_nowait(port_id, &link);
2722         if (link.link_status) {
2723                 if (active_pos < internals->active_slave_count)
2724                         goto link_update;
2725
2726                 /* if no active slave ports then set this port to be primary port */
2727                 if (internals->active_slave_count < 1) {
2728                         /* If first active slave, then change link status */
2729                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2730                         internals->current_primary_port = port_id;
2731                         lsc_flag = 1;
2732
2733                         mac_address_slaves_update(bonded_eth_dev);
2734                 }
2735
2736                 activate_slave(bonded_eth_dev, port_id);
2737
2738                 /* If user has defined the primary port then default to using it */
2739                 if (internals->user_defined_primary_port &&
2740                                 internals->primary_port == port_id)
2741                         bond_ethdev_primary_set(internals, port_id);
2742         } else {
2743                 if (active_pos == internals->active_slave_count)
2744                         goto link_update;
2745
2746                 /* Remove from active slave list */
2747                 deactivate_slave(bonded_eth_dev, port_id);
2748
2749                 if (internals->active_slave_count < 1)
2750                         lsc_flag = 1;
2751
2752                 /* Update primary id, take first active slave from list or if none
2753                  * available set to -1 */
2754                 if (port_id == internals->current_primary_port) {
2755                         if (internals->active_slave_count > 0)
2756                                 bond_ethdev_primary_set(internals,
2757                                                 internals->active_slaves[0]);
2758                         else
2759                                 internals->current_primary_port = internals->primary_port;
2760                 }
2761         }
2762
2763 link_update:
2764         /**
2765          * Update bonded device link properties after any change to active
2766          * slaves
2767          */
2768         bond_ethdev_link_update(bonded_eth_dev, 0);
2769
2770         if (lsc_flag) {
2771                 /* Cancel any possible outstanding interrupts if delays are enabled */
2772                 if (internals->link_up_delay_ms > 0 ||
2773                         internals->link_down_delay_ms > 0)
2774                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2775                                         bonded_eth_dev);
2776
2777                 if (bonded_eth_dev->data->dev_link.link_status) {
2778                         if (internals->link_up_delay_ms > 0)
2779                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2780                                                 bond_ethdev_delayed_lsc_propagation,
2781                                                 (void *)bonded_eth_dev);
2782                         else
2783                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2784                                                 RTE_ETH_EVENT_INTR_LSC,
2785                                                 NULL);
2786
2787                 } else {
2788                         if (internals->link_down_delay_ms > 0)
2789                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2790                                                 bond_ethdev_delayed_lsc_propagation,
2791                                                 (void *)bonded_eth_dev);
2792                         else
2793                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2794                                                 RTE_ETH_EVENT_INTR_LSC,
2795                                                 NULL);
2796                 }
2797         }
2798
2799         rte_spinlock_unlock(&internals->lsc_lock);
2800
2801         return rc;
2802 }
2803
2804 static int
2805 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2806                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2807 {
2808         unsigned i, j;
2809         int result = 0;
2810         int slave_reta_size;
2811         unsigned reta_count;
2812         struct bond_dev_private *internals = dev->data->dev_private;
2813
2814         if (reta_size != internals->reta_size)
2815                 return -EINVAL;
2816
2817          /* Copy RETA table */
2818         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2819
2820         for (i = 0; i < reta_count; i++) {
2821                 internals->reta_conf[i].mask = reta_conf[i].mask;
2822                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2823                         if ((reta_conf[i].mask >> j) & 0x01)
2824                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2825         }
2826
2827         /* Fill rest of array */
2828         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2829                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2830                                 sizeof(internals->reta_conf[0]) * reta_count);
2831
2832         /* Propagate RETA over slaves */
2833         for (i = 0; i < internals->slave_count; i++) {
2834                 slave_reta_size = internals->slaves[i].reta_size;
2835                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2836                                 &internals->reta_conf[0], slave_reta_size);
2837                 if (result < 0)
2838                         return result;
2839         }
2840
2841         return 0;
2842 }
2843
2844 static int
2845 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2846                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2847 {
2848         int i, j;
2849         struct bond_dev_private *internals = dev->data->dev_private;
2850
2851         if (reta_size != internals->reta_size)
2852                 return -EINVAL;
2853
2854          /* Copy RETA table */
2855         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2856                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2857                         if ((reta_conf[i].mask >> j) & 0x01)
2858                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2859
2860         return 0;
2861 }
2862
2863 static int
2864 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2865                 struct rte_eth_rss_conf *rss_conf)
2866 {
2867         int i, result = 0;
2868         struct bond_dev_private *internals = dev->data->dev_private;
2869         struct rte_eth_rss_conf bond_rss_conf;
2870
2871         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2872
2873         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2874
2875         if (bond_rss_conf.rss_hf != 0)
2876                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2877
2878         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2879                         sizeof(internals->rss_key)) {
2880                 if (bond_rss_conf.rss_key_len == 0)
2881                         bond_rss_conf.rss_key_len = 40;
2882                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2883                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2884                                 internals->rss_key_len);
2885         }
2886
2887         for (i = 0; i < internals->slave_count; i++) {
2888                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2889                                 &bond_rss_conf);
2890                 if (result < 0)
2891                         return result;
2892         }
2893
2894         return 0;
2895 }
2896
2897 static int
2898 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2899                 struct rte_eth_rss_conf *rss_conf)
2900 {
2901         struct bond_dev_private *internals = dev->data->dev_private;
2902
2903         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2904         rss_conf->rss_key_len = internals->rss_key_len;
2905         if (rss_conf->rss_key)
2906                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2907
2908         return 0;
2909 }
2910
2911 static int
2912 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2913 {
2914         struct rte_eth_dev *slave_eth_dev;
2915         struct bond_dev_private *internals = dev->data->dev_private;
2916         int ret, i;
2917
2918         rte_spinlock_lock(&internals->lock);
2919
2920         for (i = 0; i < internals->slave_count; i++) {
2921                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2922                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2923                         rte_spinlock_unlock(&internals->lock);
2924                         return -ENOTSUP;
2925                 }
2926         }
2927         for (i = 0; i < internals->slave_count; i++) {
2928                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2929                 if (ret < 0) {
2930                         rte_spinlock_unlock(&internals->lock);
2931                         return ret;
2932                 }
2933         }
2934
2935         rte_spinlock_unlock(&internals->lock);
2936         return 0;
2937 }
2938
2939 static int
2940 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2941 {
2942         if (mac_address_set(dev, addr)) {
2943                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2944                 return -EINVAL;
2945         }
2946
2947         return 0;
2948 }
2949
2950 static int
2951 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2952                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2953 {
2954         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2955                 *(const void **)arg = &bond_flow_ops;
2956                 return 0;
2957         }
2958         return -ENOTSUP;
2959 }
2960
2961 static int
2962 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2963                                 __rte_unused uint32_t index, uint32_t vmdq)
2964 {
2965         struct rte_eth_dev *slave_eth_dev;
2966         struct bond_dev_private *internals = dev->data->dev_private;
2967         int ret, i;
2968
2969         rte_spinlock_lock(&internals->lock);
2970
2971         for (i = 0; i < internals->slave_count; i++) {
2972                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2973                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2974                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2975                         ret = -ENOTSUP;
2976                         goto end;
2977                 }
2978         }
2979
2980         for (i = 0; i < internals->slave_count; i++) {
2981                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2982                                 mac_addr, vmdq);
2983                 if (ret < 0) {
2984                         /* rollback */
2985                         for (i--; i >= 0; i--)
2986                                 rte_eth_dev_mac_addr_remove(
2987                                         internals->slaves[i].port_id, mac_addr);
2988                         goto end;
2989                 }
2990         }
2991
2992         ret = 0;
2993 end:
2994         rte_spinlock_unlock(&internals->lock);
2995         return ret;
2996 }
2997
2998 static void
2999 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3000 {
3001         struct rte_eth_dev *slave_eth_dev;
3002         struct bond_dev_private *internals = dev->data->dev_private;
3003         int i;
3004
3005         rte_spinlock_lock(&internals->lock);
3006
3007         for (i = 0; i < internals->slave_count; i++) {
3008                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3009                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3010                         goto end;
3011         }
3012
3013         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3014
3015         for (i = 0; i < internals->slave_count; i++)
3016                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3017                                 mac_addr);
3018
3019 end:
3020         rte_spinlock_unlock(&internals->lock);
3021 }
3022
3023 const struct eth_dev_ops default_dev_ops = {
3024         .dev_start            = bond_ethdev_start,
3025         .dev_stop             = bond_ethdev_stop,
3026         .dev_close            = bond_ethdev_close,
3027         .dev_configure        = bond_ethdev_configure,
3028         .dev_infos_get        = bond_ethdev_info,
3029         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3030         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3031         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3032         .rx_queue_release     = bond_ethdev_rx_queue_release,
3033         .tx_queue_release     = bond_ethdev_tx_queue_release,
3034         .link_update          = bond_ethdev_link_update,
3035         .stats_get            = bond_ethdev_stats_get,
3036         .stats_reset          = bond_ethdev_stats_reset,
3037         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3038         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3039         .reta_update          = bond_ethdev_rss_reta_update,
3040         .reta_query           = bond_ethdev_rss_reta_query,
3041         .rss_hash_update      = bond_ethdev_rss_hash_update,
3042         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3043         .mtu_set              = bond_ethdev_mtu_set,
3044         .mac_addr_set         = bond_ethdev_mac_address_set,
3045         .mac_addr_add         = bond_ethdev_mac_addr_add,
3046         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3047         .filter_ctrl          = bond_filter_ctrl
3048 };
3049
3050 static int
3051 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3052 {
3053         const char *name = rte_vdev_device_name(dev);
3054         uint8_t socket_id = dev->device.numa_node;
3055         struct bond_dev_private *internals = NULL;
3056         struct rte_eth_dev *eth_dev = NULL;
3057         uint32_t vlan_filter_bmp_size;
3058
3059         /* now do all data allocation - for eth_dev structure, dummy pci driver
3060          * and internal (private) data
3061          */
3062
3063         /* reserve an ethdev entry */
3064         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3065         if (eth_dev == NULL) {
3066                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3067                 goto err;
3068         }
3069
3070         internals = eth_dev->data->dev_private;
3071         eth_dev->data->nb_rx_queues = (uint16_t)1;
3072         eth_dev->data->nb_tx_queues = (uint16_t)1;
3073
3074         /* Allocate memory for storing MAC addresses */
3075         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3076                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3077         if (eth_dev->data->mac_addrs == NULL) {
3078                 RTE_BOND_LOG(ERR,
3079                              "Failed to allocate %u bytes needed to store MAC addresses",
3080                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3081                 goto err;
3082         }
3083
3084         eth_dev->dev_ops = &default_dev_ops;
3085         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3086
3087         rte_spinlock_init(&internals->lock);
3088         rte_spinlock_init(&internals->lsc_lock);
3089
3090         internals->port_id = eth_dev->data->port_id;
3091         internals->mode = BONDING_MODE_INVALID;
3092         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3093         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3094         internals->burst_xmit_hash = burst_xmit_l2_hash;
3095         internals->user_defined_mac = 0;
3096
3097         internals->link_status_polling_enabled = 0;
3098
3099         internals->link_status_polling_interval_ms =
3100                 DEFAULT_POLLING_INTERVAL_10_MS;
3101         internals->link_down_delay_ms = 0;
3102         internals->link_up_delay_ms = 0;
3103
3104         internals->slave_count = 0;
3105         internals->active_slave_count = 0;
3106         internals->rx_offload_capa = 0;
3107         internals->tx_offload_capa = 0;
3108         internals->rx_queue_offload_capa = 0;
3109         internals->tx_queue_offload_capa = 0;
3110         internals->candidate_max_rx_pktlen = 0;
3111         internals->max_rx_pktlen = 0;
3112
3113         /* Initially allow to choose any offload type */
3114         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3115
3116         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3117         memset(internals->slaves, 0, sizeof(internals->slaves));
3118
3119         TAILQ_INIT(&internals->flow_list);
3120         internals->flow_isolated_valid = 0;
3121
3122         /* Set mode 4 default configuration */
3123         bond_mode_8023ad_setup(eth_dev, NULL);
3124         if (bond_ethdev_mode_set(eth_dev, mode)) {
3125                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3126                                  eth_dev->data->port_id, mode);
3127                 goto err;
3128         }
3129
3130         vlan_filter_bmp_size =
3131                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3132         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3133                                                    RTE_CACHE_LINE_SIZE);
3134         if (internals->vlan_filter_bmpmem == NULL) {
3135                 RTE_BOND_LOG(ERR,
3136                              "Failed to allocate vlan bitmap for bonded device %u",
3137                              eth_dev->data->port_id);
3138                 goto err;
3139         }
3140
3141         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3142                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3143         if (internals->vlan_filter_bmp == NULL) {
3144                 RTE_BOND_LOG(ERR,
3145                              "Failed to init vlan bitmap for bonded device %u",
3146                              eth_dev->data->port_id);
3147                 rte_free(internals->vlan_filter_bmpmem);
3148                 goto err;
3149         }
3150
3151         return eth_dev->data->port_id;
3152
3153 err:
3154         rte_free(internals);
3155         if (eth_dev != NULL) {
3156                 rte_free(eth_dev->data->mac_addrs);
3157                 rte_eth_dev_release_port(eth_dev);
3158         }
3159         return -1;
3160 }
3161
3162 static int
3163 bond_probe(struct rte_vdev_device *dev)
3164 {
3165         const char *name;
3166         struct bond_dev_private *internals;
3167         struct rte_kvargs *kvlist;
3168         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3169         int  arg_count, port_id;
3170         uint8_t agg_mode;
3171         struct rte_eth_dev *eth_dev;
3172
3173         if (!dev)
3174                 return -EINVAL;
3175
3176         name = rte_vdev_device_name(dev);
3177         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3178
3179         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3180             strlen(rte_vdev_device_args(dev)) == 0) {
3181                 eth_dev = rte_eth_dev_attach_secondary(name);
3182                 if (!eth_dev) {
3183                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3184                         return -1;
3185                 }
3186                 /* TODO: request info from primary to set up Rx and Tx */
3187                 eth_dev->dev_ops = &default_dev_ops;
3188                 rte_eth_dev_probing_finish(eth_dev);
3189                 return 0;
3190         }
3191
3192         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3193                 pmd_bond_init_valid_arguments);
3194         if (kvlist == NULL)
3195                 return -1;
3196
3197         /* Parse link bonding mode */
3198         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3199                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3200                                 &bond_ethdev_parse_slave_mode_kvarg,
3201                                 &bonding_mode) != 0) {
3202                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3203                                         name);
3204                         goto parse_error;
3205                 }
3206         } else {
3207                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3208                                 "device %s", name);
3209                 goto parse_error;
3210         }
3211
3212         /* Parse socket id to create bonding device on */
3213         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3214         if (arg_count == 1) {
3215                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3216                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3217                                 != 0) {
3218                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3219                                         "bonded device %s", name);
3220                         goto parse_error;
3221                 }
3222         } else if (arg_count > 1) {
3223                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3224                                 "bonded device %s", name);
3225                 goto parse_error;
3226         } else {
3227                 socket_id = rte_socket_id();
3228         }
3229
3230         dev->device.numa_node = socket_id;
3231
3232         /* Create link bonding eth device */
3233         port_id = bond_alloc(dev, bonding_mode);
3234         if (port_id < 0) {
3235                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3236                                 "socket %u.",   name, bonding_mode, socket_id);
3237                 goto parse_error;
3238         }
3239         internals = rte_eth_devices[port_id].data->dev_private;
3240         internals->kvlist = kvlist;
3241
3242
3243         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3244                 if (rte_kvargs_process(kvlist,
3245                                 PMD_BOND_AGG_MODE_KVARG,
3246                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3247                                 &agg_mode) != 0) {
3248                         RTE_BOND_LOG(ERR,
3249                                         "Failed to parse agg selection mode for bonded device %s",
3250                                         name);
3251                         goto parse_error;
3252                 }
3253
3254                 if (internals->mode == BONDING_MODE_8023AD)
3255                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3256                                         agg_mode);
3257         } else {
3258                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3259         }
3260
3261         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3262         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3263                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3264         return 0;
3265
3266 parse_error:
3267         rte_kvargs_free(kvlist);
3268
3269         return -1;
3270 }
3271
3272 static int
3273 bond_remove(struct rte_vdev_device *dev)
3274 {
3275         struct rte_eth_dev *eth_dev;
3276         struct bond_dev_private *internals;
3277         const char *name;
3278
3279         if (!dev)
3280                 return -EINVAL;
3281
3282         name = rte_vdev_device_name(dev);
3283         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3284
3285         /* now free all data allocation - for eth_dev structure,
3286          * dummy pci driver and internal (private) data
3287          */
3288
3289         /* find an ethdev entry */
3290         eth_dev = rte_eth_dev_allocated(name);
3291         if (eth_dev == NULL)
3292                 return -ENODEV;
3293
3294         RTE_ASSERT(eth_dev->device == &dev->device);
3295
3296         internals = eth_dev->data->dev_private;
3297         if (internals->slave_count != 0)
3298                 return -EBUSY;
3299
3300         if (eth_dev->data->dev_started == 1) {
3301                 bond_ethdev_stop(eth_dev);
3302                 bond_ethdev_close(eth_dev);
3303         }
3304
3305         eth_dev->dev_ops = NULL;
3306         eth_dev->rx_pkt_burst = NULL;
3307         eth_dev->tx_pkt_burst = NULL;
3308
3309         internals = eth_dev->data->dev_private;
3310         /* Try to release mempool used in mode6. If the bond
3311          * device is not mode6, free the NULL is not problem.
3312          */
3313         rte_mempool_free(internals->mode6.mempool);
3314         rte_bitmap_free(internals->vlan_filter_bmp);
3315         rte_free(internals->vlan_filter_bmpmem);
3316         rte_free(eth_dev->data->dev_private);
3317         rte_free(eth_dev->data->mac_addrs);
3318
3319         rte_eth_dev_release_port(eth_dev);
3320
3321         return 0;
3322 }
3323
3324 /* this part will resolve the slave portids after all the other pdev and vdev
3325  * have been allocated */
3326 static int
3327 bond_ethdev_configure(struct rte_eth_dev *dev)
3328 {
3329         const char *name = dev->device->name;
3330         struct bond_dev_private *internals = dev->data->dev_private;
3331         struct rte_kvargs *kvlist = internals->kvlist;
3332         int arg_count;
3333         uint16_t port_id = dev - rte_eth_devices;
3334         uint8_t agg_mode;
3335
3336         static const uint8_t default_rss_key[40] = {
3337                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3338                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3339                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3340                 0xBE, 0xAC, 0x01, 0xFA
3341         };
3342
3343         unsigned i, j;
3344
3345         /* If RSS is enabled, fill table and key with default values */
3346         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3347                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3348                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3349                 memcpy(internals->rss_key, default_rss_key, 40);
3350
3351                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3352                         internals->reta_conf[i].mask = ~0LL;
3353                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3354                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3355                 }
3356         }
3357
3358         /* set the max_rx_pktlen */
3359         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3360
3361         /*
3362          * if no kvlist, it means that this bonded device has been created
3363          * through the bonding api.
3364          */
3365         if (!kvlist)
3366                 return 0;
3367
3368         /* Parse MAC address for bonded device */
3369         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3370         if (arg_count == 1) {
3371                 struct ether_addr bond_mac;
3372
3373                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3374                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3375                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3376                                      name);
3377                         return -1;
3378                 }
3379
3380                 /* Set MAC address */
3381                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3382                         RTE_BOND_LOG(ERR,
3383                                      "Failed to set mac address on bonded device %s",
3384                                      name);
3385                         return -1;
3386                 }
3387         } else if (arg_count > 1) {
3388                 RTE_BOND_LOG(ERR,
3389                              "MAC address can be specified only once for bonded device %s",
3390                              name);
3391                 return -1;
3392         }
3393
3394         /* Parse/set balance mode transmit policy */
3395         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3396         if (arg_count == 1) {
3397                 uint8_t xmit_policy;
3398
3399                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3400                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3401                     0) {
3402                         RTE_BOND_LOG(INFO,
3403                                      "Invalid xmit policy specified for bonded device %s",
3404                                      name);
3405                         return -1;
3406                 }
3407
3408                 /* Set balance mode transmit policy*/
3409                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3410                         RTE_BOND_LOG(ERR,
3411                                      "Failed to set balance xmit policy on bonded device %s",
3412                                      name);
3413                         return -1;
3414                 }
3415         } else if (arg_count > 1) {
3416                 RTE_BOND_LOG(ERR,
3417                              "Transmit policy can be specified only once for bonded device %s",
3418                              name);
3419                 return -1;
3420         }
3421
3422         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3423                 if (rte_kvargs_process(kvlist,
3424                                        PMD_BOND_AGG_MODE_KVARG,
3425                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3426                                        &agg_mode) != 0) {
3427                         RTE_BOND_LOG(ERR,
3428                                      "Failed to parse agg selection mode for bonded device %s",
3429                                      name);
3430                 }
3431                 if (internals->mode == BONDING_MODE_8023AD)
3432                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3433                                                               agg_mode);
3434         }
3435
3436         /* Parse/add slave ports to bonded device */
3437         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3438                 struct bond_ethdev_slave_ports slave_ports;
3439                 unsigned i;
3440
3441                 memset(&slave_ports, 0, sizeof(slave_ports));
3442
3443                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3444                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3445                         RTE_BOND_LOG(ERR,
3446                                      "Failed to parse slave ports for bonded device %s",
3447                                      name);
3448                         return -1;
3449                 }
3450
3451                 for (i = 0; i < slave_ports.slave_count; i++) {
3452                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3453                                 RTE_BOND_LOG(ERR,
3454                                              "Failed to add port %d as slave to bonded device %s",
3455                                              slave_ports.slaves[i], name);
3456                         }
3457                 }
3458
3459         } else {
3460                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3461                 return -1;
3462         }
3463
3464         /* Parse/set primary slave port id*/
3465         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3466         if (arg_count == 1) {
3467                 uint16_t primary_slave_port_id;
3468
3469                 if (rte_kvargs_process(kvlist,
3470                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3471                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3472                                        &primary_slave_port_id) < 0) {
3473                         RTE_BOND_LOG(INFO,
3474                                      "Invalid primary slave port id specified for bonded device %s",
3475                                      name);
3476                         return -1;
3477                 }
3478
3479                 /* Set balance mode transmit policy*/
3480                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3481                     != 0) {
3482                         RTE_BOND_LOG(ERR,
3483                                      "Failed to set primary slave port %d on bonded device %s",
3484                                      primary_slave_port_id, name);
3485                         return -1;
3486                 }
3487         } else if (arg_count > 1) {
3488                 RTE_BOND_LOG(INFO,
3489                              "Primary slave can be specified only once for bonded device %s",
3490                              name);
3491                 return -1;
3492         }
3493
3494         /* Parse link status monitor polling interval */
3495         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3496         if (arg_count == 1) {
3497                 uint32_t lsc_poll_interval_ms;
3498
3499                 if (rte_kvargs_process(kvlist,
3500                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3501                                        &bond_ethdev_parse_time_ms_kvarg,
3502                                        &lsc_poll_interval_ms) < 0) {
3503                         RTE_BOND_LOG(INFO,
3504                                      "Invalid lsc polling interval value specified for bonded"
3505                                      " device %s", name);
3506                         return -1;
3507                 }
3508
3509                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3510                     != 0) {
3511                         RTE_BOND_LOG(ERR,
3512                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3513                                      lsc_poll_interval_ms, name);
3514                         return -1;
3515                 }
3516         } else if (arg_count > 1) {
3517                 RTE_BOND_LOG(INFO,
3518                              "LSC polling interval can be specified only once for bonded"
3519                              " device %s", name);
3520                 return -1;
3521         }
3522
3523         /* Parse link up interrupt propagation delay */
3524         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3525         if (arg_count == 1) {
3526                 uint32_t link_up_delay_ms;
3527
3528                 if (rte_kvargs_process(kvlist,
3529                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3530                                        &bond_ethdev_parse_time_ms_kvarg,
3531                                        &link_up_delay_ms) < 0) {
3532                         RTE_BOND_LOG(INFO,
3533                                      "Invalid link up propagation delay value specified for"
3534                                      " bonded device %s", name);
3535                         return -1;
3536                 }
3537
3538                 /* Set balance mode transmit policy*/
3539                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3540                     != 0) {
3541                         RTE_BOND_LOG(ERR,
3542                                      "Failed to set link up propagation delay (%u ms) on bonded"
3543                                      " device %s", link_up_delay_ms, name);
3544                         return -1;
3545                 }
3546         } else if (arg_count > 1) {
3547                 RTE_BOND_LOG(INFO,
3548                              "Link up propagation delay can be specified only once for"
3549                              " bonded device %s", name);
3550                 return -1;
3551         }
3552
3553         /* Parse link down interrupt propagation delay */
3554         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3555         if (arg_count == 1) {
3556                 uint32_t link_down_delay_ms;
3557
3558                 if (rte_kvargs_process(kvlist,
3559                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3560                                        &bond_ethdev_parse_time_ms_kvarg,
3561                                        &link_down_delay_ms) < 0) {
3562                         RTE_BOND_LOG(INFO,
3563                                      "Invalid link down propagation delay value specified for"
3564                                      " bonded device %s", name);
3565                         return -1;
3566                 }
3567
3568                 /* Set balance mode transmit policy*/
3569                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3570                     != 0) {
3571                         RTE_BOND_LOG(ERR,
3572                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3573                                      link_down_delay_ms, name);
3574                         return -1;
3575                 }
3576         } else if (arg_count > 1) {
3577                 RTE_BOND_LOG(INFO,
3578                              "Link down propagation delay can be specified only once for  bonded device %s",
3579                              name);
3580                 return -1;
3581         }
3582
3583         return 0;
3584 }
3585
3586 struct rte_vdev_driver pmd_bond_drv = {
3587         .probe = bond_probe,
3588         .remove = bond_remove,
3589 };
3590
3591 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3592 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3593
3594 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3595         "slave=<ifc> "
3596         "primary=<ifc> "
3597         "mode=[0-6] "
3598         "xmit_policy=[l2 | l23 | l34] "
3599         "agg_mode=[count | stable | bandwidth] "
3600         "socket_id=<int> "
3601         "mac=<mac addr> "
3602         "lsc_poll_period_ms=<int> "
3603         "up_delay=<int> "
3604         "down_delay=<int>");
3605
3606 int bond_logtype;
3607
3608 RTE_INIT(bond_init_log);
3609 static void
3610 bond_init_log(void)
3611 {
3612         bond_logtype = rte_log_register("pmd.net.bon");
3613         if (bond_logtype >= 0)
3614                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3615 }