net: add rte prefix to ether structures
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct rte_vlan_hdr *vlan_hdr =
43                         (struct rte_vlan_hdr *)(eth_hdr + 1);
44
45                 vlan_offset = sizeof(struct rte_vlan_hdr);
46                 *proto = vlan_hdr->eth_proto;
47
48                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
49                         vlan_hdr = vlan_hdr + 1;
50                         *proto = vlan_hdr->eth_proto;
51                         vlan_offset += sizeof(struct rte_vlan_hdr);
52                 }
53         }
54         return vlan_offset;
55 }
56
57 static uint16_t
58 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 {
60         struct bond_dev_private *internals;
61
62         uint16_t num_rx_total = 0;
63         uint16_t slave_count;
64         uint16_t active_slave;
65         int i;
66
67         /* Cast to structure, containing bonded device's port id and queue id */
68         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
69         internals = bd_rx_q->dev_private;
70         slave_count = internals->active_slave_count;
71         active_slave = internals->active_slave;
72
73         for (i = 0; i < slave_count && nb_pkts; i++) {
74                 uint16_t num_rx_slave;
75
76                 /* Offset of pointer to *bufs increases as packets are received
77                  * from other slaves */
78                 num_rx_slave =
79                         rte_eth_rx_burst(internals->active_slaves[active_slave],
80                                          bd_rx_q->queue_id,
81                                          bufs + num_rx_total, nb_pkts);
82                 num_rx_total += num_rx_slave;
83                 nb_pkts -= num_rx_slave;
84                 if (++active_slave == slave_count)
85                         active_slave = 0;
86         }
87
88         if (++internals->active_slave >= slave_count)
89                 internals->active_slave = 0;
90         return num_rx_total;
91 }
92
93 static uint16_t
94 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
95                 uint16_t nb_pkts)
96 {
97         struct bond_dev_private *internals;
98
99         /* Cast to structure, containing bonded device's port id and queue id */
100         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
101
102         internals = bd_rx_q->dev_private;
103
104         return rte_eth_rx_burst(internals->current_primary_port,
105                         bd_rx_q->queue_id, bufs, nb_pkts);
106 }
107
108 static inline uint8_t
109 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
110 {
111         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
112
113         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
114                 (ethertype == ether_type_slow_be &&
115                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
116 }
117
118 /*****************************************************************************
119  * Flow director's setup for mode 4 optimization
120  */
121
122 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
123         .dst.addr_bytes = { 0 },
124         .src.addr_bytes = { 0 },
125         .type = RTE_BE16(ETHER_TYPE_SLOW),
126 };
127
128 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
129         .dst.addr_bytes = { 0 },
130         .src.addr_bytes = { 0 },
131         .type = 0xFFFF,
132 };
133
134 static struct rte_flow_item flow_item_8023ad[] = {
135         {
136                 .type = RTE_FLOW_ITEM_TYPE_ETH,
137                 .spec = &flow_item_eth_type_8023ad,
138                 .last = NULL,
139                 .mask = &flow_item_eth_mask_type_8023ad,
140         },
141         {
142                 .type = RTE_FLOW_ITEM_TYPE_END,
143                 .spec = NULL,
144                 .last = NULL,
145                 .mask = NULL,
146         }
147 };
148
149 const struct rte_flow_attr flow_attr_8023ad = {
150         .group = 0,
151         .priority = 0,
152         .ingress = 1,
153         .egress = 0,
154         .reserved = 0,
155 };
156
157 int
158 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
159                 uint16_t slave_port) {
160         struct rte_eth_dev_info slave_info;
161         struct rte_flow_error error;
162         struct bond_dev_private *internals = (struct bond_dev_private *)
163                         (bond_dev->data->dev_private);
164
165         const struct rte_flow_action_queue lacp_queue_conf = {
166                 .index = 0,
167         };
168
169         const struct rte_flow_action actions[] = {
170                 {
171                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
172                         .conf = &lacp_queue_conf
173                 },
174                 {
175                         .type = RTE_FLOW_ACTION_TYPE_END,
176                 }
177         };
178
179         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
180                         flow_item_8023ad, actions, &error);
181         if (ret < 0) {
182                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
183                                 __func__, error.message, slave_port,
184                                 internals->mode4.dedicated_queues.rx_qid);
185                 return -1;
186         }
187
188         rte_eth_dev_info_get(slave_port, &slave_info);
189         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
190                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
191                 RTE_BOND_LOG(ERR,
192                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
193                         __func__, slave_port);
194                 return -1;
195         }
196
197         return 0;
198 }
199
200 int
201 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
202         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
203         struct bond_dev_private *internals = (struct bond_dev_private *)
204                         (bond_dev->data->dev_private);
205         struct rte_eth_dev_info bond_info;
206         uint16_t idx;
207
208         /* Verify if all slaves in bonding supports flow director and */
209         if (internals->slave_count > 0) {
210                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211
212                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214
215                 for (idx = 0; idx < internals->slave_count; idx++) {
216                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
217                                         internals->slaves[idx].port_id) != 0)
218                                 return -1;
219                 }
220         }
221
222         return 0;
223 }
224
225 int
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227
228         struct rte_flow_error error;
229         struct bond_dev_private *internals = (struct bond_dev_private *)
230                         (bond_dev->data->dev_private);
231
232         struct rte_flow_action_queue lacp_queue_conf = {
233                 .index = internals->mode4.dedicated_queues.rx_qid,
234         };
235
236         const struct rte_flow_action actions[] = {
237                 {
238                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
239                         .conf = &lacp_queue_conf
240                 },
241                 {
242                         .type = RTE_FLOW_ACTION_TYPE_END,
243                 }
244         };
245
246         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
247                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
248         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
249                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
250                                 "(slave_port=%d queue_id=%d)",
251                                 error.message, slave_port,
252                                 internals->mode4.dedicated_queues.rx_qid);
253                 return -1;
254         }
255
256         return 0;
257 }
258
259 static uint16_t
260 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
261                 uint16_t nb_pkts)
262 {
263         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
264         struct bond_dev_private *internals = bd_rx_q->dev_private;
265         uint16_t num_rx_total = 0;      /* Total number of received packets */
266         uint16_t slaves[RTE_MAX_ETHPORTS];
267         uint16_t slave_count;
268         uint16_t active_slave;
269         uint16_t i;
270
271         /* Copy slave list to protect against slave up/down changes during tx
272          * bursting */
273         slave_count = internals->active_slave_count;
274         active_slave = internals->active_slave;
275         memcpy(slaves, internals->active_slaves,
276                         sizeof(internals->active_slaves[0]) * slave_count);
277
278         for (i = 0; i < slave_count && nb_pkts; i++) {
279                 uint16_t num_rx_slave;
280
281                 /* Read packets from this slave */
282                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
283                                                 bd_rx_q->queue_id,
284                                                 bufs + num_rx_total, nb_pkts);
285                 num_rx_total += num_rx_slave;
286                 nb_pkts -= num_rx_slave;
287
288                 if (++active_slave == slave_count)
289                         active_slave = 0;
290         }
291
292         if (++internals->active_slave >= slave_count)
293                 internals->active_slave = 0;
294
295         return num_rx_total;
296 }
297
298 static uint16_t
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
300                 uint16_t nb_bufs)
301 {
302         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
303         struct bond_dev_private *internals = bd_tx_q->dev_private;
304
305         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
306         uint16_t slave_count;
307
308         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
309         uint16_t dist_slave_count;
310
311         /* 2-D array to sort mbufs for transmission on each slave into */
312         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
313         /* Number of mbufs for transmission on each slave */
314         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
315         /* Mapping array generated by hash function to map mbufs to slaves */
316         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
317
318         uint16_t slave_tx_count;
319         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
320
321         uint16_t i;
322
323         if (unlikely(nb_bufs == 0))
324                 return 0;
325
326         /* Copy slave list to protect against slave up/down changes during tx
327          * bursting */
328         slave_count = internals->active_slave_count;
329         if (unlikely(slave_count < 1))
330                 return 0;
331
332         memcpy(slave_port_ids, internals->active_slaves,
333                         sizeof(slave_port_ids[0]) * slave_count);
334
335
336         dist_slave_count = 0;
337         for (i = 0; i < slave_count; i++) {
338                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
339
340                 if (ACTOR_STATE(port, DISTRIBUTING))
341                         dist_slave_port_ids[dist_slave_count++] =
342                                         slave_port_ids[i];
343         }
344
345         if (unlikely(dist_slave_count < 1))
346                 return 0;
347
348         /*
349          * Populate slaves mbuf with the packets which are to be sent on it
350          * selecting output slave using hash based on xmit policy
351          */
352         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
353                         bufs_slave_port_idxs);
354
355         for (i = 0; i < nb_bufs; i++) {
356                 /* Populate slave mbuf arrays with mbufs for that slave. */
357                 uint16_t slave_idx = bufs_slave_port_idxs[i];
358
359                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
360         }
361
362
363         /* Send packet burst on each slave device */
364         for (i = 0; i < dist_slave_count; i++) {
365                 if (slave_nb_bufs[i] == 0)
366                         continue;
367
368                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
369                                 bd_tx_q->queue_id, slave_bufs[i],
370                                 slave_nb_bufs[i]);
371
372                 total_tx_count += slave_tx_count;
373
374                 /* If tx burst fails move packets to end of bufs */
375                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
376                         int slave_tx_fail_count = slave_nb_bufs[i] -
377                                         slave_tx_count;
378                         total_tx_fail_count += slave_tx_fail_count;
379                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
380                                &slave_bufs[i][slave_tx_count],
381                                slave_tx_fail_count * sizeof(bufs[0]));
382                 }
383         }
384
385         return total_tx_count;
386 }
387
388
389 static uint16_t
390 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
391                 uint16_t nb_pkts)
392 {
393         /* Cast to structure, containing bonded device's port id and queue id */
394         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
395         struct bond_dev_private *internals = bd_rx_q->dev_private;
396         struct rte_eth_dev *bonded_eth_dev =
397                                         &rte_eth_devices[internals->port_id];
398         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
399         struct rte_ether_hdr *hdr;
400
401         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
402         uint16_t num_rx_total = 0;      /* Total number of received packets */
403         uint16_t slaves[RTE_MAX_ETHPORTS];
404         uint16_t slave_count, idx;
405
406         uint8_t collecting;  /* current slave collecting status */
407         const uint8_t promisc = internals->promiscuous_en;
408         uint8_t subtype;
409         uint16_t i;
410         uint16_t j;
411         uint16_t k;
412
413         /* Copy slave list to protect against slave up/down changes during tx
414          * bursting */
415         slave_count = internals->active_slave_count;
416         memcpy(slaves, internals->active_slaves,
417                         sizeof(internals->active_slaves[0]) * slave_count);
418
419         idx = internals->active_slave;
420         if (idx >= slave_count) {
421                 internals->active_slave = 0;
422                 idx = 0;
423         }
424         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
425                 j = num_rx_total;
426                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
427                                          COLLECTING);
428
429                 /* Read packets from this slave */
430                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
431                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
432
433                 for (k = j; k < 2 && k < num_rx_total; k++)
434                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
435
436                 /* Handle slow protocol packets. */
437                 while (j < num_rx_total) {
438
439                         /* If packet is not pure L2 and is known, skip it */
440                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
441                                 j++;
442                                 continue;
443                         }
444
445                         if (j + 3 < num_rx_total)
446                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
447
448                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
449                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
450
451                         /* Remove packet from array if it is slow packet or slave is not
452                          * in collecting state or bonding interface is not in promiscuous
453                          * mode and packet address does not match. */
454                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
455                                 !collecting ||
456                                 (!promisc &&
457                                  !is_multicast_ether_addr(&hdr->d_addr) &&
458                                  !is_same_ether_addr(bond_mac,
459                                                      &hdr->d_addr)))) {
460
461                                 if (hdr->ether_type == ether_type_slow_be) {
462                                         bond_mode_8023ad_handle_slow_pkt(
463                                             internals, slaves[idx], bufs[j]);
464                                 } else
465                                         rte_pktmbuf_free(bufs[j]);
466
467                                 /* Packet is managed by mode 4 or dropped, shift the array */
468                                 num_rx_total--;
469                                 if (j < num_rx_total) {
470                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
471                                                 (num_rx_total - j));
472                                 }
473                         } else
474                                 j++;
475                 }
476                 if (unlikely(++idx == slave_count))
477                         idx = 0;
478         }
479
480         if (++internals->active_slave >= slave_count)
481                 internals->active_slave = 0;
482
483         return num_rx_total;
484 }
485
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491
492 static void
493 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
494 {
495         switch (arp_op) {
496         case RTE_ARP_OP_REQUEST:
497                 strlcpy(buf, "ARP Request", buf_len);
498                 return;
499         case RTE_ARP_OP_REPLY:
500                 strlcpy(buf, "ARP Reply", buf_len);
501                 return;
502         case RTE_ARP_OP_REVREQUEST:
503                 strlcpy(buf, "Reverse ARP Request", buf_len);
504                 return;
505         case RTE_ARP_OP_REVREPLY:
506                 strlcpy(buf, "Reverse ARP Reply", buf_len);
507                 return;
508         case RTE_ARP_OP_INVREQUEST:
509                 strlcpy(buf, "Peer Identify Request", buf_len);
510                 return;
511         case RTE_ARP_OP_INVREPLY:
512                 strlcpy(buf, "Peer Identify Reply", buf_len);
513                 return;
514         default:
515                 break;
516         }
517         strlcpy(buf, "Unknown", buf_len);
518         return;
519 }
520 #endif
521 #define MaxIPv4String   16
522 static void
523 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
524 {
525         uint32_t ipv4_addr;
526
527         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
528         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
529                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
530                 ipv4_addr & 0xFF);
531 }
532
533 #define MAX_CLIENTS_NUMBER      128
534 uint8_t active_clients;
535 struct client_stats_t {
536         uint16_t port;
537         uint32_t ipv4_addr;
538         uint32_t ipv4_rx_packets;
539         uint32_t ipv4_tx_packets;
540 };
541 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
542
543 static void
544 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
545 {
546         int i = 0;
547
548         for (; i < MAX_CLIENTS_NUMBER; i++)     {
549                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
550                         /* Just update RX packets number for this client */
551                         if (TXorRXindicator == &burstnumberRX)
552                                 client_stats[i].ipv4_rx_packets++;
553                         else
554                                 client_stats[i].ipv4_tx_packets++;
555                         return;
556                 }
557         }
558         /* We have a new client. Insert him to the table, and increment stats */
559         if (TXorRXindicator == &burstnumberRX)
560                 client_stats[active_clients].ipv4_rx_packets++;
561         else
562                 client_stats[active_clients].ipv4_tx_packets++;
563         client_stats[active_clients].ipv4_addr = addr;
564         client_stats[active_clients].port = port;
565         active_clients++;
566
567 }
568
569 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
570 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
571         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
572                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
573                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
574                 info,                                                   \
575                 port,                                                   \
576                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
577                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
578                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
579                 src_ip,                                                 \
580                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
581                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
582                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
583                 dst_ip,                                                 \
584                 arp_op, ++burstnumber)
585 #endif
586
587 static void
588 mode6_debug(const char __attribute__((unused)) *info,
589         struct rte_ether_hdr *eth_h, uint16_t port,
590         uint32_t __attribute__((unused)) *burstnumber)
591 {
592         struct ipv4_hdr *ipv4_h;
593 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
594         struct rte_arp_hdr *arp_h;
595         char dst_ip[16];
596         char ArpOp[24];
597         char buf[16];
598 #endif
599         char src_ip[16];
600
601         uint16_t ether_type = eth_h->ether_type;
602         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
603
604 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
605         strlcpy(buf, info, 16);
606 #endif
607
608         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
609                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
610                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
611 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
612                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
613                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
614 #endif
615                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
616         }
617 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
618         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
619                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
620                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
622                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
623                                 ArpOp, sizeof(ArpOp));
624                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
625         }
626 #endif
627 }
628 #endif
629
630 static uint16_t
631 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
634         struct bond_dev_private *internals = bd_tx_q->dev_private;
635         struct rte_ether_hdr *eth_h;
636         uint16_t ether_type, offset;
637         uint16_t nb_recv_pkts;
638         int i;
639
640         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
641
642         for (i = 0; i < nb_recv_pkts; i++) {
643                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
644                 ether_type = eth_h->ether_type;
645                 offset = get_vlan_offset(eth_h, &ether_type);
646
647                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
648 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
649                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
650 #endif
651                         bond_mode_alb_arp_recv(eth_h, offset, internals);
652                 }
653 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
654                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
655                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
656 #endif
657         }
658
659         return nb_recv_pkts;
660 }
661
662 static uint16_t
663 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
664                 uint16_t nb_pkts)
665 {
666         struct bond_dev_private *internals;
667         struct bond_tx_queue *bd_tx_q;
668
669         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
670         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
671
672         uint16_t num_of_slaves;
673         uint16_t slaves[RTE_MAX_ETHPORTS];
674
675         uint16_t num_tx_total = 0, num_tx_slave;
676
677         static int slave_idx = 0;
678         int i, cslave_idx = 0, tx_fail_total = 0;
679
680         bd_tx_q = (struct bond_tx_queue *)queue;
681         internals = bd_tx_q->dev_private;
682
683         /* Copy slave list to protect against slave up/down changes during tx
684          * bursting */
685         num_of_slaves = internals->active_slave_count;
686         memcpy(slaves, internals->active_slaves,
687                         sizeof(internals->active_slaves[0]) * num_of_slaves);
688
689         if (num_of_slaves < 1)
690                 return num_tx_total;
691
692         /* Populate slaves mbuf with which packets are to be sent on it  */
693         for (i = 0; i < nb_pkts; i++) {
694                 cslave_idx = (slave_idx + i) % num_of_slaves;
695                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
696         }
697
698         /* increment current slave index so the next call to tx burst starts on the
699          * next slave */
700         slave_idx = ++cslave_idx;
701
702         /* Send packet burst on each slave device */
703         for (i = 0; i < num_of_slaves; i++) {
704                 if (slave_nb_pkts[i] > 0) {
705                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
706                                         slave_bufs[i], slave_nb_pkts[i]);
707
708                         /* if tx burst fails move packets to end of bufs */
709                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
710                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
711
712                                 tx_fail_total += tx_fail_slave;
713
714                                 memcpy(&bufs[nb_pkts - tx_fail_total],
715                                        &slave_bufs[i][num_tx_slave],
716                                        tx_fail_slave * sizeof(bufs[0]));
717                         }
718                         num_tx_total += num_tx_slave;
719                 }
720         }
721
722         return num_tx_total;
723 }
724
725 static uint16_t
726 bond_ethdev_tx_burst_active_backup(void *queue,
727                 struct rte_mbuf **bufs, uint16_t nb_pkts)
728 {
729         struct bond_dev_private *internals;
730         struct bond_tx_queue *bd_tx_q;
731
732         bd_tx_q = (struct bond_tx_queue *)queue;
733         internals = bd_tx_q->dev_private;
734
735         if (internals->active_slave_count < 1)
736                 return 0;
737
738         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
739                         bufs, nb_pkts);
740 }
741
742 static inline uint16_t
743 ether_hash(struct rte_ether_hdr *eth_hdr)
744 {
745         unaligned_uint16_t *word_src_addr =
746                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
747         unaligned_uint16_t *word_dst_addr =
748                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
749
750         return (word_src_addr[0] ^ word_dst_addr[0]) ^
751                         (word_src_addr[1] ^ word_dst_addr[1]) ^
752                         (word_src_addr[2] ^ word_dst_addr[2]);
753 }
754
755 static inline uint32_t
756 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
757 {
758         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
759 }
760
761 static inline uint32_t
762 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
763 {
764         unaligned_uint32_t *word_src_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
766         unaligned_uint32_t *word_dst_addr =
767                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
768
769         return (word_src_addr[0] ^ word_dst_addr[0]) ^
770                         (word_src_addr[1] ^ word_dst_addr[1]) ^
771                         (word_src_addr[2] ^ word_dst_addr[2]) ^
772                         (word_src_addr[3] ^ word_dst_addr[3]);
773 }
774
775
776 void
777 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
778                 uint16_t slave_count, uint16_t *slaves)
779 {
780         struct rte_ether_hdr *eth_hdr;
781         uint32_t hash;
782         int i;
783
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
786
787                 hash = ether_hash(eth_hdr);
788
789                 slaves[i] = (hash ^= hash >> 8) % slave_count;
790         }
791 }
792
793 void
794 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
795                 uint16_t slave_count, uint16_t *slaves)
796 {
797         uint16_t i;
798         struct rte_ether_hdr *eth_hdr;
799         uint16_t proto;
800         size_t vlan_offset;
801         uint32_t hash, l3hash;
802
803         for (i = 0; i < nb_pkts; i++) {
804                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
805                 l3hash = 0;
806
807                 proto = eth_hdr->ether_type;
808                 hash = ether_hash(eth_hdr);
809
810                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
811
812                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
813                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
814                                         ((char *)(eth_hdr + 1) + vlan_offset);
815                         l3hash = ipv4_hash(ipv4_hdr);
816
817                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
818                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
819                                         ((char *)(eth_hdr + 1) + vlan_offset);
820                         l3hash = ipv6_hash(ipv6_hdr);
821                 }
822
823                 hash = hash ^ l3hash;
824                 hash ^= hash >> 16;
825                 hash ^= hash >> 8;
826
827                 slaves[i] = hash % slave_count;
828         }
829 }
830
831 void
832 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
833                 uint16_t slave_count, uint16_t *slaves)
834 {
835         struct rte_ether_hdr *eth_hdr;
836         uint16_t proto;
837         size_t vlan_offset;
838         int i;
839
840         struct udp_hdr *udp_hdr;
841         struct tcp_hdr *tcp_hdr;
842         uint32_t hash, l3hash, l4hash;
843
844         for (i = 0; i < nb_pkts; i++) {
845                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
846                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
847                 proto = eth_hdr->ether_type;
848                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
849                 l3hash = 0;
850                 l4hash = 0;
851
852                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
853                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
854                                         ((char *)(eth_hdr + 1) + vlan_offset);
855                         size_t ip_hdr_offset;
856
857                         l3hash = ipv4_hash(ipv4_hdr);
858
859                         /* there is no L4 header in fragmented packet */
860                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
861                                                                 == 0)) {
862                                 ip_hdr_offset = (ipv4_hdr->version_ihl
863                                         & IPV4_HDR_IHL_MASK) *
864                                         IPV4_IHL_MULTIPLIER;
865
866                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
867                                         tcp_hdr = (struct tcp_hdr *)
868                                                 ((char *)ipv4_hdr +
869                                                         ip_hdr_offset);
870                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
871                                                         < pkt_end)
872                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
873                                 } else if (ipv4_hdr->next_proto_id ==
874                                                                 IPPROTO_UDP) {
875                                         udp_hdr = (struct udp_hdr *)
876                                                 ((char *)ipv4_hdr +
877                                                         ip_hdr_offset);
878                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
879                                                         < pkt_end)
880                                                 l4hash = HASH_L4_PORTS(udp_hdr);
881                                 }
882                         }
883                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
884                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
885                                         ((char *)(eth_hdr + 1) + vlan_offset);
886                         l3hash = ipv6_hash(ipv6_hdr);
887
888                         if (ipv6_hdr->proto == IPPROTO_TCP) {
889                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
890                                 l4hash = HASH_L4_PORTS(tcp_hdr);
891                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
892                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
893                                 l4hash = HASH_L4_PORTS(udp_hdr);
894                         }
895                 }
896
897                 hash = l3hash ^ l4hash;
898                 hash ^= hash >> 16;
899                 hash ^= hash >> 8;
900
901                 slaves[i] = hash % slave_count;
902         }
903 }
904
905 struct bwg_slave {
906         uint64_t bwg_left_int;
907         uint64_t bwg_left_remainder;
908         uint16_t slave;
909 };
910
911 void
912 bond_tlb_activate_slave(struct bond_dev_private *internals) {
913         int i;
914
915         for (i = 0; i < internals->active_slave_count; i++) {
916                 tlb_last_obytets[internals->active_slaves[i]] = 0;
917         }
918 }
919
920 static int
921 bandwidth_cmp(const void *a, const void *b)
922 {
923         const struct bwg_slave *bwg_a = a;
924         const struct bwg_slave *bwg_b = b;
925         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
926         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
927                         (int64_t)bwg_a->bwg_left_remainder;
928         if (diff > 0)
929                 return 1;
930         else if (diff < 0)
931                 return -1;
932         else if (diff2 > 0)
933                 return 1;
934         else if (diff2 < 0)
935                 return -1;
936         else
937                 return 0;
938 }
939
940 static void
941 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
942                 struct bwg_slave *bwg_slave)
943 {
944         struct rte_eth_link link_status;
945
946         rte_eth_link_get_nowait(port_id, &link_status);
947         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
948         if (link_bwg == 0)
949                 return;
950         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
951         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
952         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
953 }
954
955 static void
956 bond_ethdev_update_tlb_slave_cb(void *arg)
957 {
958         struct bond_dev_private *internals = arg;
959         struct rte_eth_stats slave_stats;
960         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
961         uint16_t slave_count;
962         uint64_t tx_bytes;
963
964         uint8_t update_stats = 0;
965         uint16_t slave_id;
966         uint16_t i;
967
968         internals->slave_update_idx++;
969
970
971         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
972                 update_stats = 1;
973
974         for (i = 0; i < internals->active_slave_count; i++) {
975                 slave_id = internals->active_slaves[i];
976                 rte_eth_stats_get(slave_id, &slave_stats);
977                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
978                 bandwidth_left(slave_id, tx_bytes,
979                                 internals->slave_update_idx, &bwg_array[i]);
980                 bwg_array[i].slave = slave_id;
981
982                 if (update_stats) {
983                         tlb_last_obytets[slave_id] = slave_stats.obytes;
984                 }
985         }
986
987         if (update_stats == 1)
988                 internals->slave_update_idx = 0;
989
990         slave_count = i;
991         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
992         for (i = 0; i < slave_count; i++)
993                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
994
995         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
996                         (struct bond_dev_private *)internals);
997 }
998
999 static uint16_t
1000 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1001 {
1002         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1003         struct bond_dev_private *internals = bd_tx_q->dev_private;
1004
1005         struct rte_eth_dev *primary_port =
1006                         &rte_eth_devices[internals->primary_port];
1007         uint16_t num_tx_total = 0;
1008         uint16_t i, j;
1009
1010         uint16_t num_of_slaves = internals->active_slave_count;
1011         uint16_t slaves[RTE_MAX_ETHPORTS];
1012
1013         struct rte_ether_hdr *ether_hdr;
1014         struct rte_ether_addr primary_slave_addr;
1015         struct rte_ether_addr active_slave_addr;
1016
1017         if (num_of_slaves < 1)
1018                 return num_tx_total;
1019
1020         memcpy(slaves, internals->tlb_slaves_order,
1021                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1022
1023
1024         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1025
1026         if (nb_pkts > 3) {
1027                 for (i = 0; i < 3; i++)
1028                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1029         }
1030
1031         for (i = 0; i < num_of_slaves; i++) {
1032                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1033                 for (j = num_tx_total; j < nb_pkts; j++) {
1034                         if (j + 3 < nb_pkts)
1035                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1036
1037                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
1038                                                 struct rte_ether_hdr *);
1039                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1040                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1041 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1042                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1043 #endif
1044                 }
1045
1046                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1047                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1048
1049                 if (num_tx_total == nb_pkts)
1050                         break;
1051         }
1052
1053         return num_tx_total;
1054 }
1055
1056 void
1057 bond_tlb_disable(struct bond_dev_private *internals)
1058 {
1059         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1060 }
1061
1062 void
1063 bond_tlb_enable(struct bond_dev_private *internals)
1064 {
1065         bond_ethdev_update_tlb_slave_cb(internals);
1066 }
1067
1068 static uint16_t
1069 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1070 {
1071         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1072         struct bond_dev_private *internals = bd_tx_q->dev_private;
1073
1074         struct rte_ether_hdr *eth_h;
1075         uint16_t ether_type, offset;
1076
1077         struct client_data *client_info;
1078
1079         /*
1080          * We create transmit buffers for every slave and one additional to send
1081          * through tlb. In worst case every packet will be send on one port.
1082          */
1083         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1084         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1085
1086         /*
1087          * We create separate transmit buffers for update packets as they won't
1088          * be counted in num_tx_total.
1089          */
1090         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1091         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1092
1093         struct rte_mbuf *upd_pkt;
1094         size_t pkt_size;
1095
1096         uint16_t num_send, num_not_send = 0;
1097         uint16_t num_tx_total = 0;
1098         uint16_t slave_idx;
1099
1100         int i, j;
1101
1102         /* Search tx buffer for ARP packets and forward them to alb */
1103         for (i = 0; i < nb_pkts; i++) {
1104                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1105                 ether_type = eth_h->ether_type;
1106                 offset = get_vlan_offset(eth_h, &ether_type);
1107
1108                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1109                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1110
1111                         /* Change src mac in eth header */
1112                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1113
1114                         /* Add packet to slave tx buffer */
1115                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1116                         slave_bufs_pkts[slave_idx]++;
1117                 } else {
1118                         /* If packet is not ARP, send it with TLB policy */
1119                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1120                                         bufs[i];
1121                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1122                 }
1123         }
1124
1125         /* Update connected client ARP tables */
1126         if (internals->mode6.ntt) {
1127                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1128                         client_info = &internals->mode6.client_table[i];
1129
1130                         if (client_info->in_use) {
1131                                 /* Allocate new packet to send ARP update on current slave */
1132                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1133                                 if (upd_pkt == NULL) {
1134                                         RTE_BOND_LOG(ERR,
1135                                                      "Failed to allocate ARP packet from pool");
1136                                         continue;
1137                                 }
1138                                 pkt_size = sizeof(struct rte_ether_hdr) +
1139                                         sizeof(struct rte_arp_hdr) +
1140                                         client_info->vlan_count *
1141                                         sizeof(struct rte_vlan_hdr);
1142                                 upd_pkt->data_len = pkt_size;
1143                                 upd_pkt->pkt_len = pkt_size;
1144
1145                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1146                                                 internals);
1147
1148                                 /* Add packet to update tx buffer */
1149                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1150                                 update_bufs_pkts[slave_idx]++;
1151                         }
1152                 }
1153                 internals->mode6.ntt = 0;
1154         }
1155
1156         /* Send ARP packets on proper slaves */
1157         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1158                 if (slave_bufs_pkts[i] > 0) {
1159                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1160                                         slave_bufs[i], slave_bufs_pkts[i]);
1161                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1162                                 bufs[nb_pkts - 1 - num_not_send - j] =
1163                                                 slave_bufs[i][nb_pkts - 1 - j];
1164                         }
1165
1166                         num_tx_total += num_send;
1167                         num_not_send += slave_bufs_pkts[i] - num_send;
1168
1169 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1170         /* Print TX stats including update packets */
1171                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1172                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1173                                                         struct rte_ether_hdr *);
1174                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1175                         }
1176 #endif
1177                 }
1178         }
1179
1180         /* Send update packets on proper slaves */
1181         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1182                 if (update_bufs_pkts[i] > 0) {
1183                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1184                                         update_bufs_pkts[i]);
1185                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1186                                 rte_pktmbuf_free(update_bufs[i][j]);
1187                         }
1188 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1189                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1190                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1191                                                         struct rte_ether_hdr *);
1192                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1193                         }
1194 #endif
1195                 }
1196         }
1197
1198         /* Send non-ARP packets using tlb policy */
1199         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1200                 num_send = bond_ethdev_tx_burst_tlb(queue,
1201                                 slave_bufs[RTE_MAX_ETHPORTS],
1202                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1203
1204                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1205                         bufs[nb_pkts - 1 - num_not_send - j] =
1206                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1207                 }
1208
1209                 num_tx_total += num_send;
1210         }
1211
1212         return num_tx_total;
1213 }
1214
1215 static uint16_t
1216 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1217                 uint16_t nb_bufs)
1218 {
1219         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1220         struct bond_dev_private *internals = bd_tx_q->dev_private;
1221
1222         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1223         uint16_t slave_count;
1224
1225         /* Array to sort mbufs for transmission on each slave into */
1226         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1227         /* Number of mbufs for transmission on each slave */
1228         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1229         /* Mapping array generated by hash function to map mbufs to slaves */
1230         uint16_t bufs_slave_port_idxs[nb_bufs];
1231
1232         uint16_t slave_tx_count;
1233         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1234
1235         uint16_t i;
1236
1237         if (unlikely(nb_bufs == 0))
1238                 return 0;
1239
1240         /* Copy slave list to protect against slave up/down changes during tx
1241          * bursting */
1242         slave_count = internals->active_slave_count;
1243         if (unlikely(slave_count < 1))
1244                 return 0;
1245
1246         memcpy(slave_port_ids, internals->active_slaves,
1247                         sizeof(slave_port_ids[0]) * slave_count);
1248
1249         /*
1250          * Populate slaves mbuf with the packets which are to be sent on it
1251          * selecting output slave using hash based on xmit policy
1252          */
1253         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1254                         bufs_slave_port_idxs);
1255
1256         for (i = 0; i < nb_bufs; i++) {
1257                 /* Populate slave mbuf arrays with mbufs for that slave. */
1258                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1259
1260                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1261         }
1262
1263         /* Send packet burst on each slave device */
1264         for (i = 0; i < slave_count; i++) {
1265                 if (slave_nb_bufs[i] == 0)
1266                         continue;
1267
1268                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1269                                 bd_tx_q->queue_id, slave_bufs[i],
1270                                 slave_nb_bufs[i]);
1271
1272                 total_tx_count += slave_tx_count;
1273
1274                 /* If tx burst fails move packets to end of bufs */
1275                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1276                         int slave_tx_fail_count = slave_nb_bufs[i] -
1277                                         slave_tx_count;
1278                         total_tx_fail_count += slave_tx_fail_count;
1279                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1280                                &slave_bufs[i][slave_tx_count],
1281                                slave_tx_fail_count * sizeof(bufs[0]));
1282                 }
1283         }
1284
1285         return total_tx_count;
1286 }
1287
1288 static uint16_t
1289 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1290                 uint16_t nb_bufs)
1291 {
1292         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1293         struct bond_dev_private *internals = bd_tx_q->dev_private;
1294
1295         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1296         uint16_t slave_count;
1297
1298         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1299         uint16_t dist_slave_count;
1300
1301         /* 2-D array to sort mbufs for transmission on each slave into */
1302         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1303         /* Number of mbufs for transmission on each slave */
1304         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1305         /* Mapping array generated by hash function to map mbufs to slaves */
1306         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1307
1308         uint16_t slave_tx_count;
1309         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1310
1311         uint16_t i;
1312
1313         /* Copy slave list to protect against slave up/down changes during tx
1314          * bursting */
1315         slave_count = internals->active_slave_count;
1316         if (unlikely(slave_count < 1))
1317                 return 0;
1318
1319         memcpy(slave_port_ids, internals->active_slaves,
1320                         sizeof(slave_port_ids[0]) * slave_count);
1321
1322         /* Check for LACP control packets and send if available */
1323         for (i = 0; i < slave_count; i++) {
1324                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1325                 struct rte_mbuf *ctrl_pkt = NULL;
1326
1327                 if (likely(rte_ring_empty(port->tx_ring)))
1328                         continue;
1329
1330                 if (rte_ring_dequeue(port->tx_ring,
1331                                      (void **)&ctrl_pkt) != -ENOENT) {
1332                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1333                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1334                         /*
1335                          * re-enqueue LAG control plane packets to buffering
1336                          * ring if transmission fails so the packet isn't lost.
1337                          */
1338                         if (slave_tx_count != 1)
1339                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1340                 }
1341         }
1342
1343         if (unlikely(nb_bufs == 0))
1344                 return 0;
1345
1346         dist_slave_count = 0;
1347         for (i = 0; i < slave_count; i++) {
1348                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1349
1350                 if (ACTOR_STATE(port, DISTRIBUTING))
1351                         dist_slave_port_ids[dist_slave_count++] =
1352                                         slave_port_ids[i];
1353         }
1354
1355         if (likely(dist_slave_count > 0)) {
1356
1357                 /*
1358                  * Populate slaves mbuf with the packets which are to be sent
1359                  * on it, selecting output slave using hash based on xmit policy
1360                  */
1361                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1362                                 bufs_slave_port_idxs);
1363
1364                 for (i = 0; i < nb_bufs; i++) {
1365                         /*
1366                          * Populate slave mbuf arrays with mbufs for that
1367                          * slave
1368                          */
1369                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1370
1371                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1372                                         bufs[i];
1373                 }
1374
1375
1376                 /* Send packet burst on each slave device */
1377                 for (i = 0; i < dist_slave_count; i++) {
1378                         if (slave_nb_bufs[i] == 0)
1379                                 continue;
1380
1381                         slave_tx_count = rte_eth_tx_burst(
1382                                         dist_slave_port_ids[i],
1383                                         bd_tx_q->queue_id, slave_bufs[i],
1384                                         slave_nb_bufs[i]);
1385
1386                         total_tx_count += slave_tx_count;
1387
1388                         /* If tx burst fails move packets to end of bufs */
1389                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1390                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1391                                                 slave_tx_count;
1392                                 total_tx_fail_count += slave_tx_fail_count;
1393
1394                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1395                                        &slave_bufs[i][slave_tx_count],
1396                                        slave_tx_fail_count * sizeof(bufs[0]));
1397                         }
1398                 }
1399         }
1400
1401         return total_tx_count;
1402 }
1403
1404 static uint16_t
1405 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1406                 uint16_t nb_pkts)
1407 {
1408         struct bond_dev_private *internals;
1409         struct bond_tx_queue *bd_tx_q;
1410
1411         uint16_t slaves[RTE_MAX_ETHPORTS];
1412         uint8_t tx_failed_flag = 0;
1413         uint16_t num_of_slaves;
1414
1415         uint16_t max_nb_of_tx_pkts = 0;
1416
1417         int slave_tx_total[RTE_MAX_ETHPORTS];
1418         int i, most_successful_tx_slave = -1;
1419
1420         bd_tx_q = (struct bond_tx_queue *)queue;
1421         internals = bd_tx_q->dev_private;
1422
1423         /* Copy slave list to protect against slave up/down changes during tx
1424          * bursting */
1425         num_of_slaves = internals->active_slave_count;
1426         memcpy(slaves, internals->active_slaves,
1427                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1428
1429         if (num_of_slaves < 1)
1430                 return 0;
1431
1432         /* Increment reference count on mbufs */
1433         for (i = 0; i < nb_pkts; i++)
1434                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1435
1436         /* Transmit burst on each active slave */
1437         for (i = 0; i < num_of_slaves; i++) {
1438                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1439                                         bufs, nb_pkts);
1440
1441                 if (unlikely(slave_tx_total[i] < nb_pkts))
1442                         tx_failed_flag = 1;
1443
1444                 /* record the value and slave index for the slave which transmits the
1445                  * maximum number of packets */
1446                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1447                         max_nb_of_tx_pkts = slave_tx_total[i];
1448                         most_successful_tx_slave = i;
1449                 }
1450         }
1451
1452         /* if slaves fail to transmit packets from burst, the calling application
1453          * is not expected to know about multiple references to packets so we must
1454          * handle failures of all packets except those of the most successful slave
1455          */
1456         if (unlikely(tx_failed_flag))
1457                 for (i = 0; i < num_of_slaves; i++)
1458                         if (i != most_successful_tx_slave)
1459                                 while (slave_tx_total[i] < nb_pkts)
1460                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1461
1462         return max_nb_of_tx_pkts;
1463 }
1464
1465 static void
1466 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1467 {
1468         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1469
1470         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1471                 /**
1472                  * If in mode 4 then save the link properties of the first
1473                  * slave, all subsequent slaves must match these properties
1474                  */
1475                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1476
1477                 bond_link->link_autoneg = slave_link->link_autoneg;
1478                 bond_link->link_duplex = slave_link->link_duplex;
1479                 bond_link->link_speed = slave_link->link_speed;
1480         } else {
1481                 /**
1482                  * In any other mode the link properties are set to default
1483                  * values of AUTONEG/DUPLEX
1484                  */
1485                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1486                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1487         }
1488 }
1489
1490 static int
1491 link_properties_valid(struct rte_eth_dev *ethdev,
1492                 struct rte_eth_link *slave_link)
1493 {
1494         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1495
1496         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1497                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1498
1499                 if (bond_link->link_duplex != slave_link->link_duplex ||
1500                         bond_link->link_autoneg != slave_link->link_autoneg ||
1501                         bond_link->link_speed != slave_link->link_speed)
1502                         return -1;
1503         }
1504
1505         return 0;
1506 }
1507
1508 int
1509 mac_address_get(struct rte_eth_dev *eth_dev,
1510                 struct rte_ether_addr *dst_mac_addr)
1511 {
1512         struct rte_ether_addr *mac_addr;
1513
1514         if (eth_dev == NULL) {
1515                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1516                 return -1;
1517         }
1518
1519         if (dst_mac_addr == NULL) {
1520                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1521                 return -1;
1522         }
1523
1524         mac_addr = eth_dev->data->mac_addrs;
1525
1526         ether_addr_copy(mac_addr, dst_mac_addr);
1527         return 0;
1528 }
1529
1530 int
1531 mac_address_set(struct rte_eth_dev *eth_dev,
1532                 struct rte_ether_addr *new_mac_addr)
1533 {
1534         struct rte_ether_addr *mac_addr;
1535
1536         if (eth_dev == NULL) {
1537                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1538                 return -1;
1539         }
1540
1541         if (new_mac_addr == NULL) {
1542                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1543                 return -1;
1544         }
1545
1546         mac_addr = eth_dev->data->mac_addrs;
1547
1548         /* If new MAC is different to current MAC then update */
1549         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1550                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1551
1552         return 0;
1553 }
1554
1555 static const struct rte_ether_addr null_mac_addr;
1556
1557 /*
1558  * Add additional MAC addresses to the slave
1559  */
1560 int
1561 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1562                 uint16_t slave_port_id)
1563 {
1564         int i, ret;
1565         struct rte_ether_addr *mac_addr;
1566
1567         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1568                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1569                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1570                         break;
1571
1572                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1573                 if (ret < 0) {
1574                         /* rollback */
1575                         for (i--; i > 0; i--)
1576                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1577                                         &bonded_eth_dev->data->mac_addrs[i]);
1578                         return ret;
1579                 }
1580         }
1581
1582         return 0;
1583 }
1584
1585 /*
1586  * Remove additional MAC addresses from the slave
1587  */
1588 int
1589 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1590                 uint16_t slave_port_id)
1591 {
1592         int i, rc, ret;
1593         struct rte_ether_addr *mac_addr;
1594
1595         rc = 0;
1596         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1597                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1598                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1599                         break;
1600
1601                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1602                 /* save only the first error */
1603                 if (ret < 0 && rc == 0)
1604                         rc = ret;
1605         }
1606
1607         return rc;
1608 }
1609
1610 int
1611 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1612 {
1613         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1614         int i;
1615
1616         /* Update slave devices MAC addresses */
1617         if (internals->slave_count < 1)
1618                 return -1;
1619
1620         switch (internals->mode) {
1621         case BONDING_MODE_ROUND_ROBIN:
1622         case BONDING_MODE_BALANCE:
1623         case BONDING_MODE_BROADCAST:
1624                 for (i = 0; i < internals->slave_count; i++) {
1625                         if (rte_eth_dev_default_mac_addr_set(
1626                                         internals->slaves[i].port_id,
1627                                         bonded_eth_dev->data->mac_addrs)) {
1628                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1629                                                 internals->slaves[i].port_id);
1630                                 return -1;
1631                         }
1632                 }
1633                 break;
1634         case BONDING_MODE_8023AD:
1635                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1636                 break;
1637         case BONDING_MODE_ACTIVE_BACKUP:
1638         case BONDING_MODE_TLB:
1639         case BONDING_MODE_ALB:
1640         default:
1641                 for (i = 0; i < internals->slave_count; i++) {
1642                         if (internals->slaves[i].port_id ==
1643                                         internals->current_primary_port) {
1644                                 if (rte_eth_dev_default_mac_addr_set(
1645                                                 internals->primary_port,
1646                                                 bonded_eth_dev->data->mac_addrs)) {
1647                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1648                                                         internals->current_primary_port);
1649                                         return -1;
1650                                 }
1651                         } else {
1652                                 if (rte_eth_dev_default_mac_addr_set(
1653                                                 internals->slaves[i].port_id,
1654                                                 &internals->slaves[i].persisted_mac_addr)) {
1655                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1656                                                         internals->slaves[i].port_id);
1657                                         return -1;
1658                                 }
1659                         }
1660                 }
1661         }
1662
1663         return 0;
1664 }
1665
1666 int
1667 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1668 {
1669         struct bond_dev_private *internals;
1670
1671         internals = eth_dev->data->dev_private;
1672
1673         switch (mode) {
1674         case BONDING_MODE_ROUND_ROBIN:
1675                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1676                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1677                 break;
1678         case BONDING_MODE_ACTIVE_BACKUP:
1679                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1680                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1681                 break;
1682         case BONDING_MODE_BALANCE:
1683                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1684                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1685                 break;
1686         case BONDING_MODE_BROADCAST:
1687                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1688                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1689                 break;
1690         case BONDING_MODE_8023AD:
1691                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1692                         return -1;
1693
1694                 if (internals->mode4.dedicated_queues.enabled == 0) {
1695                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1696                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1697                         RTE_BOND_LOG(WARNING,
1698                                 "Using mode 4, it is necessary to do TX burst "
1699                                 "and RX burst at least every 100ms.");
1700                 } else {
1701                         /* Use flow director's optimization */
1702                         eth_dev->rx_pkt_burst =
1703                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1704                         eth_dev->tx_pkt_burst =
1705                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1706                 }
1707                 break;
1708         case BONDING_MODE_TLB:
1709                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1710                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1711                 break;
1712         case BONDING_MODE_ALB:
1713                 if (bond_mode_alb_enable(eth_dev) != 0)
1714                         return -1;
1715
1716                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1717                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1718                 break;
1719         default:
1720                 return -1;
1721         }
1722
1723         internals->mode = mode;
1724
1725         return 0;
1726 }
1727
1728
1729 static int
1730 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1731                 struct rte_eth_dev *slave_eth_dev)
1732 {
1733         int errval = 0;
1734         struct bond_dev_private *internals = (struct bond_dev_private *)
1735                 bonded_eth_dev->data->dev_private;
1736         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1737
1738         if (port->slow_pool == NULL) {
1739                 char mem_name[256];
1740                 int slave_id = slave_eth_dev->data->port_id;
1741
1742                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1743                                 slave_id);
1744                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1745                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1746                         slave_eth_dev->data->numa_node);
1747
1748                 /* Any memory allocation failure in initialization is critical because
1749                  * resources can't be free, so reinitialization is impossible. */
1750                 if (port->slow_pool == NULL) {
1751                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1752                                 slave_id, mem_name, rte_strerror(rte_errno));
1753                 }
1754         }
1755
1756         if (internals->mode4.dedicated_queues.enabled == 1) {
1757                 /* Configure slow Rx queue */
1758
1759                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1760                                 internals->mode4.dedicated_queues.rx_qid, 128,
1761                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1762                                 NULL, port->slow_pool);
1763                 if (errval != 0) {
1764                         RTE_BOND_LOG(ERR,
1765                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1766                                         slave_eth_dev->data->port_id,
1767                                         internals->mode4.dedicated_queues.rx_qid,
1768                                         errval);
1769                         return errval;
1770                 }
1771
1772                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1773                                 internals->mode4.dedicated_queues.tx_qid, 512,
1774                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1775                                 NULL);
1776                 if (errval != 0) {
1777                         RTE_BOND_LOG(ERR,
1778                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1779                                 slave_eth_dev->data->port_id,
1780                                 internals->mode4.dedicated_queues.tx_qid,
1781                                 errval);
1782                         return errval;
1783                 }
1784         }
1785         return 0;
1786 }
1787
1788 int
1789 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1790                 struct rte_eth_dev *slave_eth_dev)
1791 {
1792         struct bond_rx_queue *bd_rx_q;
1793         struct bond_tx_queue *bd_tx_q;
1794         uint16_t nb_rx_queues;
1795         uint16_t nb_tx_queues;
1796
1797         int errval;
1798         uint16_t q_id;
1799         struct rte_flow_error flow_error;
1800
1801         struct bond_dev_private *internals = (struct bond_dev_private *)
1802                 bonded_eth_dev->data->dev_private;
1803
1804         /* Stop slave */
1805         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1806
1807         /* Enable interrupts on slave device if supported */
1808         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1809                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1810
1811         /* If RSS is enabled for bonding, try to enable it for slaves  */
1812         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1813                 if (internals->rss_key_len != 0) {
1814                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1815                                         internals->rss_key_len;
1816                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1817                                         internals->rss_key;
1818                 } else {
1819                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1820                 }
1821
1822                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1823                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1824                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1825                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1826         }
1827
1828         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1829                         DEV_RX_OFFLOAD_VLAN_FILTER)
1830                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1831                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1832         else
1833                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1834                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1835
1836         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1837         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1838
1839         if (internals->mode == BONDING_MODE_8023AD) {
1840                 if (internals->mode4.dedicated_queues.enabled == 1) {
1841                         nb_rx_queues++;
1842                         nb_tx_queues++;
1843                 }
1844         }
1845
1846         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1847                                      bonded_eth_dev->data->mtu);
1848         if (errval != 0 && errval != -ENOTSUP) {
1849                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1850                                 slave_eth_dev->data->port_id, errval);
1851                 return errval;
1852         }
1853
1854         /* Configure device */
1855         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1856                         nb_rx_queues, nb_tx_queues,
1857                         &(slave_eth_dev->data->dev_conf));
1858         if (errval != 0) {
1859                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1860                                 slave_eth_dev->data->port_id, errval);
1861                 return errval;
1862         }
1863
1864         /* Setup Rx Queues */
1865         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1866                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1867
1868                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1869                                 bd_rx_q->nb_rx_desc,
1870                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1871                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1872                 if (errval != 0) {
1873                         RTE_BOND_LOG(ERR,
1874                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1875                                         slave_eth_dev->data->port_id, q_id, errval);
1876                         return errval;
1877                 }
1878         }
1879
1880         /* Setup Tx Queues */
1881         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1882                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1883
1884                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1885                                 bd_tx_q->nb_tx_desc,
1886                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1887                                 &bd_tx_q->tx_conf);
1888                 if (errval != 0) {
1889                         RTE_BOND_LOG(ERR,
1890                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1891                                 slave_eth_dev->data->port_id, q_id, errval);
1892                         return errval;
1893                 }
1894         }
1895
1896         if (internals->mode == BONDING_MODE_8023AD &&
1897                         internals->mode4.dedicated_queues.enabled == 1) {
1898                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1899                                 != 0)
1900                         return errval;
1901
1902                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1903                                 slave_eth_dev->data->port_id) != 0) {
1904                         RTE_BOND_LOG(ERR,
1905                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1906                                 slave_eth_dev->data->port_id, q_id, errval);
1907                         return -1;
1908                 }
1909
1910                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1911                         rte_flow_destroy(slave_eth_dev->data->port_id,
1912                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1913                                         &flow_error);
1914
1915                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1916                                 slave_eth_dev->data->port_id);
1917         }
1918
1919         /* Start device */
1920         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1921         if (errval != 0) {
1922                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1923                                 slave_eth_dev->data->port_id, errval);
1924                 return -1;
1925         }
1926
1927         /* If RSS is enabled for bonding, synchronize RETA */
1928         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1929                 int i;
1930                 struct bond_dev_private *internals;
1931
1932                 internals = bonded_eth_dev->data->dev_private;
1933
1934                 for (i = 0; i < internals->slave_count; i++) {
1935                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1936                                 errval = rte_eth_dev_rss_reta_update(
1937                                                 slave_eth_dev->data->port_id,
1938                                                 &internals->reta_conf[0],
1939                                                 internals->slaves[i].reta_size);
1940                                 if (errval != 0) {
1941                                         RTE_BOND_LOG(WARNING,
1942                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1943                                                      " RSS Configuration for bonding may be inconsistent.",
1944                                                      slave_eth_dev->data->port_id, errval);
1945                                 }
1946                                 break;
1947                         }
1948                 }
1949         }
1950
1951         /* If lsc interrupt is set, check initial slave's link status */
1952         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1953                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1954                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1955                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1956                         NULL);
1957         }
1958
1959         return 0;
1960 }
1961
1962 void
1963 slave_remove(struct bond_dev_private *internals,
1964                 struct rte_eth_dev *slave_eth_dev)
1965 {
1966         uint16_t i;
1967
1968         for (i = 0; i < internals->slave_count; i++)
1969                 if (internals->slaves[i].port_id ==
1970                                 slave_eth_dev->data->port_id)
1971                         break;
1972
1973         if (i < (internals->slave_count - 1)) {
1974                 struct rte_flow *flow;
1975
1976                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1977                                 sizeof(internals->slaves[0]) *
1978                                 (internals->slave_count - i - 1));
1979                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1980                         memmove(&flow->flows[i], &flow->flows[i + 1],
1981                                 sizeof(flow->flows[0]) *
1982                                 (internals->slave_count - i - 1));
1983                         flow->flows[internals->slave_count - 1] = NULL;
1984                 }
1985         }
1986
1987         internals->slave_count--;
1988
1989         /* force reconfiguration of slave interfaces */
1990         _rte_eth_dev_reset(slave_eth_dev);
1991 }
1992
1993 static void
1994 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1995
1996 void
1997 slave_add(struct bond_dev_private *internals,
1998                 struct rte_eth_dev *slave_eth_dev)
1999 {
2000         struct bond_slave_details *slave_details =
2001                         &internals->slaves[internals->slave_count];
2002
2003         slave_details->port_id = slave_eth_dev->data->port_id;
2004         slave_details->last_link_status = 0;
2005
2006         /* Mark slave devices that don't support interrupts so we can
2007          * compensate when we start the bond
2008          */
2009         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2010                 slave_details->link_status_poll_enabled = 1;
2011         }
2012
2013         slave_details->link_status_wait_to_complete = 0;
2014         /* clean tlb_last_obytes when adding port for bonding device */
2015         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2016                         sizeof(struct rte_ether_addr));
2017 }
2018
2019 void
2020 bond_ethdev_primary_set(struct bond_dev_private *internals,
2021                 uint16_t slave_port_id)
2022 {
2023         int i;
2024
2025         if (internals->active_slave_count < 1)
2026                 internals->current_primary_port = slave_port_id;
2027         else
2028                 /* Search bonded device slave ports for new proposed primary port */
2029                 for (i = 0; i < internals->active_slave_count; i++) {
2030                         if (internals->active_slaves[i] == slave_port_id)
2031                                 internals->current_primary_port = slave_port_id;
2032                 }
2033 }
2034
2035 static void
2036 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2037
2038 static int
2039 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2040 {
2041         struct bond_dev_private *internals;
2042         int i;
2043
2044         /* slave eth dev will be started by bonded device */
2045         if (check_for_bonded_ethdev(eth_dev)) {
2046                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2047                                 eth_dev->data->port_id);
2048                 return -1;
2049         }
2050
2051         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2052         eth_dev->data->dev_started = 1;
2053
2054         internals = eth_dev->data->dev_private;
2055
2056         if (internals->slave_count == 0) {
2057                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2058                 goto out_err;
2059         }
2060
2061         if (internals->user_defined_mac == 0) {
2062                 struct rte_ether_addr *new_mac_addr = NULL;
2063
2064                 for (i = 0; i < internals->slave_count; i++)
2065                         if (internals->slaves[i].port_id == internals->primary_port)
2066                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2067
2068                 if (new_mac_addr == NULL)
2069                         goto out_err;
2070
2071                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2072                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2073                                         eth_dev->data->port_id);
2074                         goto out_err;
2075                 }
2076         }
2077
2078         /* If bonded device is configure in promiscuous mode then re-apply config */
2079         if (internals->promiscuous_en)
2080                 bond_ethdev_promiscuous_enable(eth_dev);
2081
2082         if (internals->mode == BONDING_MODE_8023AD) {
2083                 if (internals->mode4.dedicated_queues.enabled == 1) {
2084                         internals->mode4.dedicated_queues.rx_qid =
2085                                         eth_dev->data->nb_rx_queues;
2086                         internals->mode4.dedicated_queues.tx_qid =
2087                                         eth_dev->data->nb_tx_queues;
2088                 }
2089         }
2090
2091
2092         /* Reconfigure each slave device if starting bonded device */
2093         for (i = 0; i < internals->slave_count; i++) {
2094                 struct rte_eth_dev *slave_ethdev =
2095                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2096                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2097                         RTE_BOND_LOG(ERR,
2098                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2099                                 eth_dev->data->port_id,
2100                                 internals->slaves[i].port_id);
2101                         goto out_err;
2102                 }
2103                 /* We will need to poll for link status if any slave doesn't
2104                  * support interrupts
2105                  */
2106                 if (internals->slaves[i].link_status_poll_enabled)
2107                         internals->link_status_polling_enabled = 1;
2108         }
2109
2110         /* start polling if needed */
2111         if (internals->link_status_polling_enabled) {
2112                 rte_eal_alarm_set(
2113                         internals->link_status_polling_interval_ms * 1000,
2114                         bond_ethdev_slave_link_status_change_monitor,
2115                         (void *)&rte_eth_devices[internals->port_id]);
2116         }
2117
2118         /* Update all slave devices MACs*/
2119         if (mac_address_slaves_update(eth_dev) != 0)
2120                 goto out_err;
2121
2122         if (internals->user_defined_primary_port)
2123                 bond_ethdev_primary_set(internals, internals->primary_port);
2124
2125         if (internals->mode == BONDING_MODE_8023AD)
2126                 bond_mode_8023ad_start(eth_dev);
2127
2128         if (internals->mode == BONDING_MODE_TLB ||
2129                         internals->mode == BONDING_MODE_ALB)
2130                 bond_tlb_enable(internals);
2131
2132         return 0;
2133
2134 out_err:
2135         eth_dev->data->dev_started = 0;
2136         return -1;
2137 }
2138
2139 static void
2140 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2141 {
2142         uint16_t i;
2143
2144         if (dev->data->rx_queues != NULL) {
2145                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2146                         rte_free(dev->data->rx_queues[i]);
2147                         dev->data->rx_queues[i] = NULL;
2148                 }
2149                 dev->data->nb_rx_queues = 0;
2150         }
2151
2152         if (dev->data->tx_queues != NULL) {
2153                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2154                         rte_free(dev->data->tx_queues[i]);
2155                         dev->data->tx_queues[i] = NULL;
2156                 }
2157                 dev->data->nb_tx_queues = 0;
2158         }
2159 }
2160
2161 void
2162 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2163 {
2164         struct bond_dev_private *internals = eth_dev->data->dev_private;
2165         uint16_t i;
2166
2167         if (internals->mode == BONDING_MODE_8023AD) {
2168                 struct port *port;
2169                 void *pkt = NULL;
2170
2171                 bond_mode_8023ad_stop(eth_dev);
2172
2173                 /* Discard all messages to/from mode 4 state machines */
2174                 for (i = 0; i < internals->active_slave_count; i++) {
2175                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2176
2177                         RTE_ASSERT(port->rx_ring != NULL);
2178                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2179                                 rte_pktmbuf_free(pkt);
2180
2181                         RTE_ASSERT(port->tx_ring != NULL);
2182                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2183                                 rte_pktmbuf_free(pkt);
2184                 }
2185         }
2186
2187         if (internals->mode == BONDING_MODE_TLB ||
2188                         internals->mode == BONDING_MODE_ALB) {
2189                 bond_tlb_disable(internals);
2190                 for (i = 0; i < internals->active_slave_count; i++)
2191                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2192         }
2193
2194         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2195         eth_dev->data->dev_started = 0;
2196
2197         internals->link_status_polling_enabled = 0;
2198         for (i = 0; i < internals->slave_count; i++) {
2199                 uint16_t slave_id = internals->slaves[i].port_id;
2200                 if (find_slave_by_id(internals->active_slaves,
2201                                 internals->active_slave_count, slave_id) !=
2202                                                 internals->active_slave_count) {
2203                         internals->slaves[i].last_link_status = 0;
2204                         rte_eth_dev_stop(slave_id);
2205                         deactivate_slave(eth_dev, slave_id);
2206                 }
2207         }
2208 }
2209
2210 void
2211 bond_ethdev_close(struct rte_eth_dev *dev)
2212 {
2213         struct bond_dev_private *internals = dev->data->dev_private;
2214         uint16_t bond_port_id = internals->port_id;
2215         int skipped = 0;
2216         struct rte_flow_error ferror;
2217
2218         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2219         while (internals->slave_count != skipped) {
2220                 uint16_t port_id = internals->slaves[skipped].port_id;
2221
2222                 rte_eth_dev_stop(port_id);
2223
2224                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2225                         RTE_BOND_LOG(ERR,
2226                                      "Failed to remove port %d from bonded device %s",
2227                                      port_id, dev->device->name);
2228                         skipped++;
2229                 }
2230         }
2231         bond_flow_ops.flush(dev, &ferror);
2232         bond_ethdev_free_queues(dev);
2233         rte_bitmap_reset(internals->vlan_filter_bmp);
2234 }
2235
2236 /* forward declaration */
2237 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2238
2239 static void
2240 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2241 {
2242         struct bond_dev_private *internals = dev->data->dev_private;
2243
2244         uint16_t max_nb_rx_queues = UINT16_MAX;
2245         uint16_t max_nb_tx_queues = UINT16_MAX;
2246         uint16_t max_rx_desc_lim = UINT16_MAX;
2247         uint16_t max_tx_desc_lim = UINT16_MAX;
2248
2249         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2250
2251         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2252                         internals->candidate_max_rx_pktlen :
2253                         ETHER_MAX_JUMBO_FRAME_LEN;
2254
2255         /* Max number of tx/rx queues that the bonded device can support is the
2256          * minimum values of the bonded slaves, as all slaves must be capable
2257          * of supporting the same number of tx/rx queues.
2258          */
2259         if (internals->slave_count > 0) {
2260                 struct rte_eth_dev_info slave_info;
2261                 uint16_t idx;
2262
2263                 for (idx = 0; idx < internals->slave_count; idx++) {
2264                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2265                                         &slave_info);
2266
2267                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2268                                 max_nb_rx_queues = slave_info.max_rx_queues;
2269
2270                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2271                                 max_nb_tx_queues = slave_info.max_tx_queues;
2272
2273                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2274                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2275
2276                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2277                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2278                 }
2279         }
2280
2281         dev_info->max_rx_queues = max_nb_rx_queues;
2282         dev_info->max_tx_queues = max_nb_tx_queues;
2283
2284         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2285                sizeof(dev_info->default_rxconf));
2286         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2287                sizeof(dev_info->default_txconf));
2288
2289         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2290         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2291
2292         /**
2293          * If dedicated hw queues enabled for link bonding device in LACP mode
2294          * then we need to reduce the maximum number of data path queues by 1.
2295          */
2296         if (internals->mode == BONDING_MODE_8023AD &&
2297                 internals->mode4.dedicated_queues.enabled == 1) {
2298                 dev_info->max_rx_queues--;
2299                 dev_info->max_tx_queues--;
2300         }
2301
2302         dev_info->min_rx_bufsize = 0;
2303
2304         dev_info->rx_offload_capa = internals->rx_offload_capa;
2305         dev_info->tx_offload_capa = internals->tx_offload_capa;
2306         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2307         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2308         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2309
2310         dev_info->reta_size = internals->reta_size;
2311 }
2312
2313 static int
2314 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2315 {
2316         int res;
2317         uint16_t i;
2318         struct bond_dev_private *internals = dev->data->dev_private;
2319
2320         /* don't do this while a slave is being added */
2321         rte_spinlock_lock(&internals->lock);
2322
2323         if (on)
2324                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2325         else
2326                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2327
2328         for (i = 0; i < internals->slave_count; i++) {
2329                 uint16_t port_id = internals->slaves[i].port_id;
2330
2331                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2332                 if (res == ENOTSUP)
2333                         RTE_BOND_LOG(WARNING,
2334                                      "Setting VLAN filter on slave port %u not supported.",
2335                                      port_id);
2336         }
2337
2338         rte_spinlock_unlock(&internals->lock);
2339         return 0;
2340 }
2341
2342 static int
2343 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2344                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2345                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2346 {
2347         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2348                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2349                                         0, dev->data->numa_node);
2350         if (bd_rx_q == NULL)
2351                 return -1;
2352
2353         bd_rx_q->queue_id = rx_queue_id;
2354         bd_rx_q->dev_private = dev->data->dev_private;
2355
2356         bd_rx_q->nb_rx_desc = nb_rx_desc;
2357
2358         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2359         bd_rx_q->mb_pool = mb_pool;
2360
2361         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2362
2363         return 0;
2364 }
2365
2366 static int
2367 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2368                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2369                 const struct rte_eth_txconf *tx_conf)
2370 {
2371         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2372                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2373                                         0, dev->data->numa_node);
2374
2375         if (bd_tx_q == NULL)
2376                 return -1;
2377
2378         bd_tx_q->queue_id = tx_queue_id;
2379         bd_tx_q->dev_private = dev->data->dev_private;
2380
2381         bd_tx_q->nb_tx_desc = nb_tx_desc;
2382         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2383
2384         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2385
2386         return 0;
2387 }
2388
2389 static void
2390 bond_ethdev_rx_queue_release(void *queue)
2391 {
2392         if (queue == NULL)
2393                 return;
2394
2395         rte_free(queue);
2396 }
2397
2398 static void
2399 bond_ethdev_tx_queue_release(void *queue)
2400 {
2401         if (queue == NULL)
2402                 return;
2403
2404         rte_free(queue);
2405 }
2406
2407 static void
2408 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2409 {
2410         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2411         struct bond_dev_private *internals;
2412
2413         /* Default value for polling slave found is true as we don't want to
2414          * disable the polling thread if we cannot get the lock */
2415         int i, polling_slave_found = 1;
2416
2417         if (cb_arg == NULL)
2418                 return;
2419
2420         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2421         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2422
2423         if (!bonded_ethdev->data->dev_started ||
2424                 !internals->link_status_polling_enabled)
2425                 return;
2426
2427         /* If device is currently being configured then don't check slaves link
2428          * status, wait until next period */
2429         if (rte_spinlock_trylock(&internals->lock)) {
2430                 if (internals->slave_count > 0)
2431                         polling_slave_found = 0;
2432
2433                 for (i = 0; i < internals->slave_count; i++) {
2434                         if (!internals->slaves[i].link_status_poll_enabled)
2435                                 continue;
2436
2437                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2438                         polling_slave_found = 1;
2439
2440                         /* Update slave link status */
2441                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2442                                         internals->slaves[i].link_status_wait_to_complete);
2443
2444                         /* if link status has changed since last checked then call lsc
2445                          * event callback */
2446                         if (slave_ethdev->data->dev_link.link_status !=
2447                                         internals->slaves[i].last_link_status) {
2448                                 internals->slaves[i].last_link_status =
2449                                                 slave_ethdev->data->dev_link.link_status;
2450
2451                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2452                                                 RTE_ETH_EVENT_INTR_LSC,
2453                                                 &bonded_ethdev->data->port_id,
2454                                                 NULL);
2455                         }
2456                 }
2457                 rte_spinlock_unlock(&internals->lock);
2458         }
2459
2460         if (polling_slave_found)
2461                 /* Set alarm to continue monitoring link status of slave ethdev's */
2462                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2463                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2464 }
2465
2466 static int
2467 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2468 {
2469         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2470
2471         struct bond_dev_private *bond_ctx;
2472         struct rte_eth_link slave_link;
2473
2474         uint32_t idx;
2475
2476         bond_ctx = ethdev->data->dev_private;
2477
2478         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2479
2480         if (ethdev->data->dev_started == 0 ||
2481                         bond_ctx->active_slave_count == 0) {
2482                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2483                 return 0;
2484         }
2485
2486         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2487
2488         if (wait_to_complete)
2489                 link_update = rte_eth_link_get;
2490         else
2491                 link_update = rte_eth_link_get_nowait;
2492
2493         switch (bond_ctx->mode) {
2494         case BONDING_MODE_BROADCAST:
2495                 /**
2496                  * Setting link speed to UINT32_MAX to ensure we pick up the
2497                  * value of the first active slave
2498                  */
2499                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2500
2501                 /**
2502                  * link speed is minimum value of all the slaves link speed as
2503                  * packet loss will occur on this slave if transmission at rates
2504                  * greater than this are attempted
2505                  */
2506                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2507                         link_update(bond_ctx->active_slaves[0], &slave_link);
2508
2509                         if (slave_link.link_speed <
2510                                         ethdev->data->dev_link.link_speed)
2511                                 ethdev->data->dev_link.link_speed =
2512                                                 slave_link.link_speed;
2513                 }
2514                 break;
2515         case BONDING_MODE_ACTIVE_BACKUP:
2516                 /* Current primary slave */
2517                 link_update(bond_ctx->current_primary_port, &slave_link);
2518
2519                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2520                 break;
2521         case BONDING_MODE_8023AD:
2522                 ethdev->data->dev_link.link_autoneg =
2523                                 bond_ctx->mode4.slave_link.link_autoneg;
2524                 ethdev->data->dev_link.link_duplex =
2525                                 bond_ctx->mode4.slave_link.link_duplex;
2526                 /* fall through to update link speed */
2527         case BONDING_MODE_ROUND_ROBIN:
2528         case BONDING_MODE_BALANCE:
2529         case BONDING_MODE_TLB:
2530         case BONDING_MODE_ALB:
2531         default:
2532                 /**
2533                  * In theses mode the maximum theoretical link speed is the sum
2534                  * of all the slaves
2535                  */
2536                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2537
2538                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2539                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2540
2541                         ethdev->data->dev_link.link_speed +=
2542                                         slave_link.link_speed;
2543                 }
2544         }
2545
2546
2547         return 0;
2548 }
2549
2550
2551 static int
2552 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2553 {
2554         struct bond_dev_private *internals = dev->data->dev_private;
2555         struct rte_eth_stats slave_stats;
2556         int i, j;
2557
2558         for (i = 0; i < internals->slave_count; i++) {
2559                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2560
2561                 stats->ipackets += slave_stats.ipackets;
2562                 stats->opackets += slave_stats.opackets;
2563                 stats->ibytes += slave_stats.ibytes;
2564                 stats->obytes += slave_stats.obytes;
2565                 stats->imissed += slave_stats.imissed;
2566                 stats->ierrors += slave_stats.ierrors;
2567                 stats->oerrors += slave_stats.oerrors;
2568                 stats->rx_nombuf += slave_stats.rx_nombuf;
2569
2570                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2571                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2572                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2573                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2574                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2575                         stats->q_errors[j] += slave_stats.q_errors[j];
2576                 }
2577
2578         }
2579
2580         return 0;
2581 }
2582
2583 static void
2584 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2585 {
2586         struct bond_dev_private *internals = dev->data->dev_private;
2587         int i;
2588
2589         for (i = 0; i < internals->slave_count; i++)
2590                 rte_eth_stats_reset(internals->slaves[i].port_id);
2591 }
2592
2593 static void
2594 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2595 {
2596         struct bond_dev_private *internals = eth_dev->data->dev_private;
2597         int i;
2598
2599         internals->promiscuous_en = 1;
2600
2601         switch (internals->mode) {
2602         /* Promiscuous mode is propagated to all slaves */
2603         case BONDING_MODE_ROUND_ROBIN:
2604         case BONDING_MODE_BALANCE:
2605         case BONDING_MODE_BROADCAST:
2606                 for (i = 0; i < internals->slave_count; i++)
2607                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2608                 break;
2609         /* In mode4 promiscus mode is managed when slave is added/removed */
2610         case BONDING_MODE_8023AD:
2611                 break;
2612         /* Promiscuous mode is propagated only to primary slave */
2613         case BONDING_MODE_ACTIVE_BACKUP:
2614         case BONDING_MODE_TLB:
2615         case BONDING_MODE_ALB:
2616         default:
2617                 /* Do not touch promisc when there cannot be primary ports */
2618                 if (internals->slave_count == 0)
2619                         break;
2620                 rte_eth_promiscuous_enable(internals->current_primary_port);
2621         }
2622 }
2623
2624 static void
2625 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2626 {
2627         struct bond_dev_private *internals = dev->data->dev_private;
2628         int i;
2629
2630         internals->promiscuous_en = 0;
2631
2632         switch (internals->mode) {
2633         /* Promiscuous mode is propagated to all slaves */
2634         case BONDING_MODE_ROUND_ROBIN:
2635         case BONDING_MODE_BALANCE:
2636         case BONDING_MODE_BROADCAST:
2637                 for (i = 0; i < internals->slave_count; i++)
2638                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2639                 break;
2640         /* In mode4 promiscus mode is set managed when slave is added/removed */
2641         case BONDING_MODE_8023AD:
2642                 break;
2643         /* Promiscuous mode is propagated only to primary slave */
2644         case BONDING_MODE_ACTIVE_BACKUP:
2645         case BONDING_MODE_TLB:
2646         case BONDING_MODE_ALB:
2647         default:
2648                 /* Do not touch promisc when there cannot be primary ports */
2649                 if (internals->slave_count == 0)
2650                         break;
2651                 rte_eth_promiscuous_disable(internals->current_primary_port);
2652         }
2653 }
2654
2655 static void
2656 bond_ethdev_delayed_lsc_propagation(void *arg)
2657 {
2658         if (arg == NULL)
2659                 return;
2660
2661         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2662                         RTE_ETH_EVENT_INTR_LSC, NULL);
2663 }
2664
2665 int
2666 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2667                 void *param, void *ret_param __rte_unused)
2668 {
2669         struct rte_eth_dev *bonded_eth_dev;
2670         struct bond_dev_private *internals;
2671         struct rte_eth_link link;
2672         int rc = -1;
2673
2674         uint8_t lsc_flag = 0;
2675         int valid_slave = 0;
2676         uint16_t active_pos;
2677         uint16_t i;
2678
2679         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2680                 return rc;
2681
2682         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2683
2684         if (check_for_bonded_ethdev(bonded_eth_dev))
2685                 return rc;
2686
2687         internals = bonded_eth_dev->data->dev_private;
2688
2689         /* If the device isn't started don't handle interrupts */
2690         if (!bonded_eth_dev->data->dev_started)
2691                 return rc;
2692
2693         /* verify that port_id is a valid slave of bonded port */
2694         for (i = 0; i < internals->slave_count; i++) {
2695                 if (internals->slaves[i].port_id == port_id) {
2696                         valid_slave = 1;
2697                         break;
2698                 }
2699         }
2700
2701         if (!valid_slave)
2702                 return rc;
2703
2704         /* Synchronize lsc callback parallel calls either by real link event
2705          * from the slaves PMDs or by the bonding PMD itself.
2706          */
2707         rte_spinlock_lock(&internals->lsc_lock);
2708
2709         /* Search for port in active port list */
2710         active_pos = find_slave_by_id(internals->active_slaves,
2711                         internals->active_slave_count, port_id);
2712
2713         rte_eth_link_get_nowait(port_id, &link);
2714         if (link.link_status) {
2715                 if (active_pos < internals->active_slave_count)
2716                         goto link_update;
2717
2718                 /* check link state properties if bonded link is up*/
2719                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2720                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2721                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2722                                              "for slave %d in bonding mode %d",
2723                                              port_id, internals->mode);
2724                 } else {
2725                         /* inherit slave link properties */
2726                         link_properties_set(bonded_eth_dev, &link);
2727                 }
2728
2729                 /* If no active slave ports then set this port to be
2730                  * the primary port.
2731                  */
2732                 if (internals->active_slave_count < 1) {
2733                         /* If first active slave, then change link status */
2734                         bonded_eth_dev->data->dev_link.link_status =
2735                                                                 ETH_LINK_UP;
2736                         internals->current_primary_port = port_id;
2737                         lsc_flag = 1;
2738
2739                         mac_address_slaves_update(bonded_eth_dev);
2740                 }
2741
2742                 activate_slave(bonded_eth_dev, port_id);
2743
2744                 /* If the user has defined the primary port then default to
2745                  * using it.
2746                  */
2747                 if (internals->user_defined_primary_port &&
2748                                 internals->primary_port == port_id)
2749                         bond_ethdev_primary_set(internals, port_id);
2750         } else {
2751                 if (active_pos == internals->active_slave_count)
2752                         goto link_update;
2753
2754                 /* Remove from active slave list */
2755                 deactivate_slave(bonded_eth_dev, port_id);
2756
2757                 if (internals->active_slave_count < 1)
2758                         lsc_flag = 1;
2759
2760                 /* Update primary id, take first active slave from list or if none
2761                  * available set to -1 */
2762                 if (port_id == internals->current_primary_port) {
2763                         if (internals->active_slave_count > 0)
2764                                 bond_ethdev_primary_set(internals,
2765                                                 internals->active_slaves[0]);
2766                         else
2767                                 internals->current_primary_port = internals->primary_port;
2768                 }
2769         }
2770
2771 link_update:
2772         /**
2773          * Update bonded device link properties after any change to active
2774          * slaves
2775          */
2776         bond_ethdev_link_update(bonded_eth_dev, 0);
2777
2778         if (lsc_flag) {
2779                 /* Cancel any possible outstanding interrupts if delays are enabled */
2780                 if (internals->link_up_delay_ms > 0 ||
2781                         internals->link_down_delay_ms > 0)
2782                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2783                                         bonded_eth_dev);
2784
2785                 if (bonded_eth_dev->data->dev_link.link_status) {
2786                         if (internals->link_up_delay_ms > 0)
2787                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2788                                                 bond_ethdev_delayed_lsc_propagation,
2789                                                 (void *)bonded_eth_dev);
2790                         else
2791                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2792                                                 RTE_ETH_EVENT_INTR_LSC,
2793                                                 NULL);
2794
2795                 } else {
2796                         if (internals->link_down_delay_ms > 0)
2797                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2798                                                 bond_ethdev_delayed_lsc_propagation,
2799                                                 (void *)bonded_eth_dev);
2800                         else
2801                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2802                                                 RTE_ETH_EVENT_INTR_LSC,
2803                                                 NULL);
2804                 }
2805         }
2806
2807         rte_spinlock_unlock(&internals->lsc_lock);
2808
2809         return rc;
2810 }
2811
2812 static int
2813 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2814                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2815 {
2816         unsigned i, j;
2817         int result = 0;
2818         int slave_reta_size;
2819         unsigned reta_count;
2820         struct bond_dev_private *internals = dev->data->dev_private;
2821
2822         if (reta_size != internals->reta_size)
2823                 return -EINVAL;
2824
2825          /* Copy RETA table */
2826         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2827
2828         for (i = 0; i < reta_count; i++) {
2829                 internals->reta_conf[i].mask = reta_conf[i].mask;
2830                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2831                         if ((reta_conf[i].mask >> j) & 0x01)
2832                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2833         }
2834
2835         /* Fill rest of array */
2836         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2837                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2838                                 sizeof(internals->reta_conf[0]) * reta_count);
2839
2840         /* Propagate RETA over slaves */
2841         for (i = 0; i < internals->slave_count; i++) {
2842                 slave_reta_size = internals->slaves[i].reta_size;
2843                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2844                                 &internals->reta_conf[0], slave_reta_size);
2845                 if (result < 0)
2846                         return result;
2847         }
2848
2849         return 0;
2850 }
2851
2852 static int
2853 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2854                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2855 {
2856         int i, j;
2857         struct bond_dev_private *internals = dev->data->dev_private;
2858
2859         if (reta_size != internals->reta_size)
2860                 return -EINVAL;
2861
2862          /* Copy RETA table */
2863         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2864                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2865                         if ((reta_conf[i].mask >> j) & 0x01)
2866                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2867
2868         return 0;
2869 }
2870
2871 static int
2872 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2873                 struct rte_eth_rss_conf *rss_conf)
2874 {
2875         int i, result = 0;
2876         struct bond_dev_private *internals = dev->data->dev_private;
2877         struct rte_eth_rss_conf bond_rss_conf;
2878
2879         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2880
2881         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2882
2883         if (bond_rss_conf.rss_hf != 0)
2884                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2885
2886         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2887                         sizeof(internals->rss_key)) {
2888                 if (bond_rss_conf.rss_key_len == 0)
2889                         bond_rss_conf.rss_key_len = 40;
2890                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2891                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2892                                 internals->rss_key_len);
2893         }
2894
2895         for (i = 0; i < internals->slave_count; i++) {
2896                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2897                                 &bond_rss_conf);
2898                 if (result < 0)
2899                         return result;
2900         }
2901
2902         return 0;
2903 }
2904
2905 static int
2906 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2907                 struct rte_eth_rss_conf *rss_conf)
2908 {
2909         struct bond_dev_private *internals = dev->data->dev_private;
2910
2911         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2912         rss_conf->rss_key_len = internals->rss_key_len;
2913         if (rss_conf->rss_key)
2914                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2915
2916         return 0;
2917 }
2918
2919 static int
2920 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2921 {
2922         struct rte_eth_dev *slave_eth_dev;
2923         struct bond_dev_private *internals = dev->data->dev_private;
2924         int ret, i;
2925
2926         rte_spinlock_lock(&internals->lock);
2927
2928         for (i = 0; i < internals->slave_count; i++) {
2929                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2930                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2931                         rte_spinlock_unlock(&internals->lock);
2932                         return -ENOTSUP;
2933                 }
2934         }
2935         for (i = 0; i < internals->slave_count; i++) {
2936                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2937                 if (ret < 0) {
2938                         rte_spinlock_unlock(&internals->lock);
2939                         return ret;
2940                 }
2941         }
2942
2943         rte_spinlock_unlock(&internals->lock);
2944         return 0;
2945 }
2946
2947 static int
2948 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2949                         struct rte_ether_addr *addr)
2950 {
2951         if (mac_address_set(dev, addr)) {
2952                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2953                 return -EINVAL;
2954         }
2955
2956         return 0;
2957 }
2958
2959 static int
2960 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2961                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2962 {
2963         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2964                 *(const void **)arg = &bond_flow_ops;
2965                 return 0;
2966         }
2967         return -ENOTSUP;
2968 }
2969
2970 static int
2971 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2972                         struct rte_ether_addr *mac_addr,
2973                         __rte_unused uint32_t index, uint32_t vmdq)
2974 {
2975         struct rte_eth_dev *slave_eth_dev;
2976         struct bond_dev_private *internals = dev->data->dev_private;
2977         int ret, i;
2978
2979         rte_spinlock_lock(&internals->lock);
2980
2981         for (i = 0; i < internals->slave_count; i++) {
2982                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2983                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2984                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2985                         ret = -ENOTSUP;
2986                         goto end;
2987                 }
2988         }
2989
2990         for (i = 0; i < internals->slave_count; i++) {
2991                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2992                                 mac_addr, vmdq);
2993                 if (ret < 0) {
2994                         /* rollback */
2995                         for (i--; i >= 0; i--)
2996                                 rte_eth_dev_mac_addr_remove(
2997                                         internals->slaves[i].port_id, mac_addr);
2998                         goto end;
2999                 }
3000         }
3001
3002         ret = 0;
3003 end:
3004         rte_spinlock_unlock(&internals->lock);
3005         return ret;
3006 }
3007
3008 static void
3009 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3010 {
3011         struct rte_eth_dev *slave_eth_dev;
3012         struct bond_dev_private *internals = dev->data->dev_private;
3013         int i;
3014
3015         rte_spinlock_lock(&internals->lock);
3016
3017         for (i = 0; i < internals->slave_count; i++) {
3018                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3019                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3020                         goto end;
3021         }
3022
3023         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3024
3025         for (i = 0; i < internals->slave_count; i++)
3026                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3027                                 mac_addr);
3028
3029 end:
3030         rte_spinlock_unlock(&internals->lock);
3031 }
3032
3033 const struct eth_dev_ops default_dev_ops = {
3034         .dev_start            = bond_ethdev_start,
3035         .dev_stop             = bond_ethdev_stop,
3036         .dev_close            = bond_ethdev_close,
3037         .dev_configure        = bond_ethdev_configure,
3038         .dev_infos_get        = bond_ethdev_info,
3039         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3040         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3041         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3042         .rx_queue_release     = bond_ethdev_rx_queue_release,
3043         .tx_queue_release     = bond_ethdev_tx_queue_release,
3044         .link_update          = bond_ethdev_link_update,
3045         .stats_get            = bond_ethdev_stats_get,
3046         .stats_reset          = bond_ethdev_stats_reset,
3047         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3048         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3049         .reta_update          = bond_ethdev_rss_reta_update,
3050         .reta_query           = bond_ethdev_rss_reta_query,
3051         .rss_hash_update      = bond_ethdev_rss_hash_update,
3052         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3053         .mtu_set              = bond_ethdev_mtu_set,
3054         .mac_addr_set         = bond_ethdev_mac_address_set,
3055         .mac_addr_add         = bond_ethdev_mac_addr_add,
3056         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3057         .filter_ctrl          = bond_filter_ctrl
3058 };
3059
3060 static int
3061 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3062 {
3063         const char *name = rte_vdev_device_name(dev);
3064         uint8_t socket_id = dev->device.numa_node;
3065         struct bond_dev_private *internals = NULL;
3066         struct rte_eth_dev *eth_dev = NULL;
3067         uint32_t vlan_filter_bmp_size;
3068
3069         /* now do all data allocation - for eth_dev structure, dummy pci driver
3070          * and internal (private) data
3071          */
3072
3073         /* reserve an ethdev entry */
3074         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3075         if (eth_dev == NULL) {
3076                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3077                 goto err;
3078         }
3079
3080         internals = eth_dev->data->dev_private;
3081         eth_dev->data->nb_rx_queues = (uint16_t)1;
3082         eth_dev->data->nb_tx_queues = (uint16_t)1;
3083
3084         /* Allocate memory for storing MAC addresses */
3085         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3086                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3087         if (eth_dev->data->mac_addrs == NULL) {
3088                 RTE_BOND_LOG(ERR,
3089                              "Failed to allocate %u bytes needed to store MAC addresses",
3090                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3091                 goto err;
3092         }
3093
3094         eth_dev->dev_ops = &default_dev_ops;
3095         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3096
3097         rte_spinlock_init(&internals->lock);
3098         rte_spinlock_init(&internals->lsc_lock);
3099
3100         internals->port_id = eth_dev->data->port_id;
3101         internals->mode = BONDING_MODE_INVALID;
3102         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3103         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3104         internals->burst_xmit_hash = burst_xmit_l2_hash;
3105         internals->user_defined_mac = 0;
3106
3107         internals->link_status_polling_enabled = 0;
3108
3109         internals->link_status_polling_interval_ms =
3110                 DEFAULT_POLLING_INTERVAL_10_MS;
3111         internals->link_down_delay_ms = 0;
3112         internals->link_up_delay_ms = 0;
3113
3114         internals->slave_count = 0;
3115         internals->active_slave_count = 0;
3116         internals->rx_offload_capa = 0;
3117         internals->tx_offload_capa = 0;
3118         internals->rx_queue_offload_capa = 0;
3119         internals->tx_queue_offload_capa = 0;
3120         internals->candidate_max_rx_pktlen = 0;
3121         internals->max_rx_pktlen = 0;
3122
3123         /* Initially allow to choose any offload type */
3124         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3125
3126         memset(&internals->default_rxconf, 0,
3127                sizeof(internals->default_rxconf));
3128         memset(&internals->default_txconf, 0,
3129                sizeof(internals->default_txconf));
3130
3131         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3132         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3133
3134         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3135         memset(internals->slaves, 0, sizeof(internals->slaves));
3136
3137         TAILQ_INIT(&internals->flow_list);
3138         internals->flow_isolated_valid = 0;
3139
3140         /* Set mode 4 default configuration */
3141         bond_mode_8023ad_setup(eth_dev, NULL);
3142         if (bond_ethdev_mode_set(eth_dev, mode)) {
3143                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3144                                  eth_dev->data->port_id, mode);
3145                 goto err;
3146         }
3147
3148         vlan_filter_bmp_size =
3149                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3150         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3151                                                    RTE_CACHE_LINE_SIZE);
3152         if (internals->vlan_filter_bmpmem == NULL) {
3153                 RTE_BOND_LOG(ERR,
3154                              "Failed to allocate vlan bitmap for bonded device %u",
3155                              eth_dev->data->port_id);
3156                 goto err;
3157         }
3158
3159         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3160                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3161         if (internals->vlan_filter_bmp == NULL) {
3162                 RTE_BOND_LOG(ERR,
3163                              "Failed to init vlan bitmap for bonded device %u",
3164                              eth_dev->data->port_id);
3165                 rte_free(internals->vlan_filter_bmpmem);
3166                 goto err;
3167         }
3168
3169         return eth_dev->data->port_id;
3170
3171 err:
3172         rte_free(internals);
3173         if (eth_dev != NULL)
3174                 eth_dev->data->dev_private = NULL;
3175         rte_eth_dev_release_port(eth_dev);
3176         return -1;
3177 }
3178
3179 static int
3180 bond_probe(struct rte_vdev_device *dev)
3181 {
3182         const char *name;
3183         struct bond_dev_private *internals;
3184         struct rte_kvargs *kvlist;
3185         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3186         int  arg_count, port_id;
3187         uint8_t agg_mode;
3188         struct rte_eth_dev *eth_dev;
3189
3190         if (!dev)
3191                 return -EINVAL;
3192
3193         name = rte_vdev_device_name(dev);
3194         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3195
3196         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3197                 eth_dev = rte_eth_dev_attach_secondary(name);
3198                 if (!eth_dev) {
3199                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3200                         return -1;
3201                 }
3202                 /* TODO: request info from primary to set up Rx and Tx */
3203                 eth_dev->dev_ops = &default_dev_ops;
3204                 eth_dev->device = &dev->device;
3205                 rte_eth_dev_probing_finish(eth_dev);
3206                 return 0;
3207         }
3208
3209         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3210                 pmd_bond_init_valid_arguments);
3211         if (kvlist == NULL)
3212                 return -1;
3213
3214         /* Parse link bonding mode */
3215         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3216                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3217                                 &bond_ethdev_parse_slave_mode_kvarg,
3218                                 &bonding_mode) != 0) {
3219                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3220                                         name);
3221                         goto parse_error;
3222                 }
3223         } else {
3224                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3225                                 "device %s", name);
3226                 goto parse_error;
3227         }
3228
3229         /* Parse socket id to create bonding device on */
3230         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3231         if (arg_count == 1) {
3232                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3233                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3234                                 != 0) {
3235                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3236                                         "bonded device %s", name);
3237                         goto parse_error;
3238                 }
3239         } else if (arg_count > 1) {
3240                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3241                                 "bonded device %s", name);
3242                 goto parse_error;
3243         } else {
3244                 socket_id = rte_socket_id();
3245         }
3246
3247         dev->device.numa_node = socket_id;
3248
3249         /* Create link bonding eth device */
3250         port_id = bond_alloc(dev, bonding_mode);
3251         if (port_id < 0) {
3252                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3253                                 "socket %u.",   name, bonding_mode, socket_id);
3254                 goto parse_error;
3255         }
3256         internals = rte_eth_devices[port_id].data->dev_private;
3257         internals->kvlist = kvlist;
3258
3259         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3260                 if (rte_kvargs_process(kvlist,
3261                                 PMD_BOND_AGG_MODE_KVARG,
3262                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3263                                 &agg_mode) != 0) {
3264                         RTE_BOND_LOG(ERR,
3265                                         "Failed to parse agg selection mode for bonded device %s",
3266                                         name);
3267                         goto parse_error;
3268                 }
3269
3270                 if (internals->mode == BONDING_MODE_8023AD)
3271                         internals->mode4.agg_selection = agg_mode;
3272         } else {
3273                 internals->mode4.agg_selection = AGG_STABLE;
3274         }
3275
3276         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3277         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3278                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3279         return 0;
3280
3281 parse_error:
3282         rte_kvargs_free(kvlist);
3283
3284         return -1;
3285 }
3286
3287 static int
3288 bond_remove(struct rte_vdev_device *dev)
3289 {
3290         struct rte_eth_dev *eth_dev;
3291         struct bond_dev_private *internals;
3292         const char *name;
3293
3294         if (!dev)
3295                 return -EINVAL;
3296
3297         name = rte_vdev_device_name(dev);
3298         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3299
3300         /* now free all data allocation - for eth_dev structure,
3301          * dummy pci driver and internal (private) data
3302          */
3303
3304         /* find an ethdev entry */
3305         eth_dev = rte_eth_dev_allocated(name);
3306         if (eth_dev == NULL)
3307                 return -ENODEV;
3308
3309         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3310                 return rte_eth_dev_release_port(eth_dev);
3311
3312         RTE_ASSERT(eth_dev->device == &dev->device);
3313
3314         internals = eth_dev->data->dev_private;
3315         if (internals->slave_count != 0)
3316                 return -EBUSY;
3317
3318         if (eth_dev->data->dev_started == 1) {
3319                 bond_ethdev_stop(eth_dev);
3320                 bond_ethdev_close(eth_dev);
3321         }
3322
3323         eth_dev->dev_ops = NULL;
3324         eth_dev->rx_pkt_burst = NULL;
3325         eth_dev->tx_pkt_burst = NULL;
3326
3327         internals = eth_dev->data->dev_private;
3328         /* Try to release mempool used in mode6. If the bond
3329          * device is not mode6, free the NULL is not problem.
3330          */
3331         rte_mempool_free(internals->mode6.mempool);
3332         rte_bitmap_free(internals->vlan_filter_bmp);
3333         rte_free(internals->vlan_filter_bmpmem);
3334
3335         rte_eth_dev_release_port(eth_dev);
3336
3337         return 0;
3338 }
3339
3340 /* this part will resolve the slave portids after all the other pdev and vdev
3341  * have been allocated */
3342 static int
3343 bond_ethdev_configure(struct rte_eth_dev *dev)
3344 {
3345         const char *name = dev->device->name;
3346         struct bond_dev_private *internals = dev->data->dev_private;
3347         struct rte_kvargs *kvlist = internals->kvlist;
3348         int arg_count;
3349         uint16_t port_id = dev - rte_eth_devices;
3350         uint8_t agg_mode;
3351
3352         static const uint8_t default_rss_key[40] = {
3353                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3354                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3355                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3356                 0xBE, 0xAC, 0x01, 0xFA
3357         };
3358
3359         unsigned i, j;
3360
3361         /*
3362          * If RSS is enabled, fill table with default values and
3363          * set key to the the value specified in port RSS configuration.
3364          * Fall back to default RSS key if the key is not specified
3365          */
3366         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3367                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3368                         internals->rss_key_len =
3369                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3370                         memcpy(internals->rss_key,
3371                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3372                                internals->rss_key_len);
3373                 } else {
3374                         internals->rss_key_len = sizeof(default_rss_key);
3375                         memcpy(internals->rss_key, default_rss_key,
3376                                internals->rss_key_len);
3377                 }
3378
3379                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3380                         internals->reta_conf[i].mask = ~0LL;
3381                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3382                                 internals->reta_conf[i].reta[j] =
3383                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3384                                                 dev->data->nb_rx_queues;
3385                 }
3386         }
3387
3388         /* set the max_rx_pktlen */
3389         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3390
3391         /*
3392          * if no kvlist, it means that this bonded device has been created
3393          * through the bonding api.
3394          */
3395         if (!kvlist)
3396                 return 0;
3397
3398         /* Parse MAC address for bonded device */
3399         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3400         if (arg_count == 1) {
3401                 struct rte_ether_addr bond_mac;
3402
3403                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3404                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3405                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3406                                      name);
3407                         return -1;
3408                 }
3409
3410                 /* Set MAC address */
3411                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3412                         RTE_BOND_LOG(ERR,
3413                                      "Failed to set mac address on bonded device %s",
3414                                      name);
3415                         return -1;
3416                 }
3417         } else if (arg_count > 1) {
3418                 RTE_BOND_LOG(ERR,
3419                              "MAC address can be specified only once for bonded device %s",
3420                              name);
3421                 return -1;
3422         }
3423
3424         /* Parse/set balance mode transmit policy */
3425         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3426         if (arg_count == 1) {
3427                 uint8_t xmit_policy;
3428
3429                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3430                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3431                     0) {
3432                         RTE_BOND_LOG(INFO,
3433                                      "Invalid xmit policy specified for bonded device %s",
3434                                      name);
3435                         return -1;
3436                 }
3437
3438                 /* Set balance mode transmit policy*/
3439                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3440                         RTE_BOND_LOG(ERR,
3441                                      "Failed to set balance xmit policy on bonded device %s",
3442                                      name);
3443                         return -1;
3444                 }
3445         } else if (arg_count > 1) {
3446                 RTE_BOND_LOG(ERR,
3447                              "Transmit policy can be specified only once for bonded device %s",
3448                              name);
3449                 return -1;
3450         }
3451
3452         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3453                 if (rte_kvargs_process(kvlist,
3454                                        PMD_BOND_AGG_MODE_KVARG,
3455                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3456                                        &agg_mode) != 0) {
3457                         RTE_BOND_LOG(ERR,
3458                                      "Failed to parse agg selection mode for bonded device %s",
3459                                      name);
3460                 }
3461                 if (internals->mode == BONDING_MODE_8023AD) {
3462                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3463                                         agg_mode);
3464                         if (ret < 0) {
3465                                 RTE_BOND_LOG(ERR,
3466                                         "Invalid args for agg selection set for bonded device %s",
3467                                         name);
3468                                 return -1;
3469                         }
3470                 }
3471         }
3472
3473         /* Parse/add slave ports to bonded device */
3474         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3475                 struct bond_ethdev_slave_ports slave_ports;
3476                 unsigned i;
3477
3478                 memset(&slave_ports, 0, sizeof(slave_ports));
3479
3480                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3481                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3482                         RTE_BOND_LOG(ERR,
3483                                      "Failed to parse slave ports for bonded device %s",
3484                                      name);
3485                         return -1;
3486                 }
3487
3488                 for (i = 0; i < slave_ports.slave_count; i++) {
3489                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3490                                 RTE_BOND_LOG(ERR,
3491                                              "Failed to add port %d as slave to bonded device %s",
3492                                              slave_ports.slaves[i], name);
3493                         }
3494                 }
3495
3496         } else {
3497                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3498                 return -1;
3499         }
3500
3501         /* Parse/set primary slave port id*/
3502         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3503         if (arg_count == 1) {
3504                 uint16_t primary_slave_port_id;
3505
3506                 if (rte_kvargs_process(kvlist,
3507                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3508                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3509                                        &primary_slave_port_id) < 0) {
3510                         RTE_BOND_LOG(INFO,
3511                                      "Invalid primary slave port id specified for bonded device %s",
3512                                      name);
3513                         return -1;
3514                 }
3515
3516                 /* Set balance mode transmit policy*/
3517                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3518                     != 0) {
3519                         RTE_BOND_LOG(ERR,
3520                                      "Failed to set primary slave port %d on bonded device %s",
3521                                      primary_slave_port_id, name);
3522                         return -1;
3523                 }
3524         } else if (arg_count > 1) {
3525                 RTE_BOND_LOG(INFO,
3526                              "Primary slave can be specified only once for bonded device %s",
3527                              name);
3528                 return -1;
3529         }
3530
3531         /* Parse link status monitor polling interval */
3532         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3533         if (arg_count == 1) {
3534                 uint32_t lsc_poll_interval_ms;
3535
3536                 if (rte_kvargs_process(kvlist,
3537                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3538                                        &bond_ethdev_parse_time_ms_kvarg,
3539                                        &lsc_poll_interval_ms) < 0) {
3540                         RTE_BOND_LOG(INFO,
3541                                      "Invalid lsc polling interval value specified for bonded"
3542                                      " device %s", name);
3543                         return -1;
3544                 }
3545
3546                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3547                     != 0) {
3548                         RTE_BOND_LOG(ERR,
3549                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3550                                      lsc_poll_interval_ms, name);
3551                         return -1;
3552                 }
3553         } else if (arg_count > 1) {
3554                 RTE_BOND_LOG(INFO,
3555                              "LSC polling interval can be specified only once for bonded"
3556                              " device %s", name);
3557                 return -1;
3558         }
3559
3560         /* Parse link up interrupt propagation delay */
3561         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3562         if (arg_count == 1) {
3563                 uint32_t link_up_delay_ms;
3564
3565                 if (rte_kvargs_process(kvlist,
3566                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3567                                        &bond_ethdev_parse_time_ms_kvarg,
3568                                        &link_up_delay_ms) < 0) {
3569                         RTE_BOND_LOG(INFO,
3570                                      "Invalid link up propagation delay value specified for"
3571                                      " bonded device %s", name);
3572                         return -1;
3573                 }
3574
3575                 /* Set balance mode transmit policy*/
3576                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3577                     != 0) {
3578                         RTE_BOND_LOG(ERR,
3579                                      "Failed to set link up propagation delay (%u ms) on bonded"
3580                                      " device %s", link_up_delay_ms, name);
3581                         return -1;
3582                 }
3583         } else if (arg_count > 1) {
3584                 RTE_BOND_LOG(INFO,
3585                              "Link up propagation delay can be specified only once for"
3586                              " bonded device %s", name);
3587                 return -1;
3588         }
3589
3590         /* Parse link down interrupt propagation delay */
3591         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3592         if (arg_count == 1) {
3593                 uint32_t link_down_delay_ms;
3594
3595                 if (rte_kvargs_process(kvlist,
3596                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3597                                        &bond_ethdev_parse_time_ms_kvarg,
3598                                        &link_down_delay_ms) < 0) {
3599                         RTE_BOND_LOG(INFO,
3600                                      "Invalid link down propagation delay value specified for"
3601                                      " bonded device %s", name);
3602                         return -1;
3603                 }
3604
3605                 /* Set balance mode transmit policy*/
3606                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3607                     != 0) {
3608                         RTE_BOND_LOG(ERR,
3609                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3610                                      link_down_delay_ms, name);
3611                         return -1;
3612                 }
3613         } else if (arg_count > 1) {
3614                 RTE_BOND_LOG(INFO,
3615                              "Link down propagation delay can be specified only once for  bonded device %s",
3616                              name);
3617                 return -1;
3618         }
3619
3620         return 0;
3621 }
3622
3623 struct rte_vdev_driver pmd_bond_drv = {
3624         .probe = bond_probe,
3625         .remove = bond_remove,
3626 };
3627
3628 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3629 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3630
3631 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3632         "slave=<ifc> "
3633         "primary=<ifc> "
3634         "mode=[0-6] "
3635         "xmit_policy=[l2 | l23 | l34] "
3636         "agg_mode=[count | stable | bandwidth] "
3637         "socket_id=<int> "
3638         "mac=<mac addr> "
3639         "lsc_poll_period_ms=<int> "
3640         "up_delay=<int> "
3641         "down_delay=<int>");
3642
3643 int bond_logtype;
3644
3645 RTE_INIT(bond_init_log)
3646 {
3647         bond_logtype = rte_log_register("pmd.net.bond");
3648         if (bond_logtype >= 0)
3649                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3650 }