net: add rte prefix to ether functions
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct rte_vlan_hdr *vlan_hdr =
43                         (struct rte_vlan_hdr *)(eth_hdr + 1);
44
45                 vlan_offset = sizeof(struct rte_vlan_hdr);
46                 *proto = vlan_hdr->eth_proto;
47
48                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
49                         vlan_hdr = vlan_hdr + 1;
50                         *proto = vlan_hdr->eth_proto;
51                         vlan_offset += sizeof(struct rte_vlan_hdr);
52                 }
53         }
54         return vlan_offset;
55 }
56
57 static uint16_t
58 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 {
60         struct bond_dev_private *internals;
61
62         uint16_t num_rx_total = 0;
63         uint16_t slave_count;
64         uint16_t active_slave;
65         int i;
66
67         /* Cast to structure, containing bonded device's port id and queue id */
68         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
69         internals = bd_rx_q->dev_private;
70         slave_count = internals->active_slave_count;
71         active_slave = internals->active_slave;
72
73         for (i = 0; i < slave_count && nb_pkts; i++) {
74                 uint16_t num_rx_slave;
75
76                 /* Offset of pointer to *bufs increases as packets are received
77                  * from other slaves */
78                 num_rx_slave =
79                         rte_eth_rx_burst(internals->active_slaves[active_slave],
80                                          bd_rx_q->queue_id,
81                                          bufs + num_rx_total, nb_pkts);
82                 num_rx_total += num_rx_slave;
83                 nb_pkts -= num_rx_slave;
84                 if (++active_slave == slave_count)
85                         active_slave = 0;
86         }
87
88         if (++internals->active_slave >= slave_count)
89                 internals->active_slave = 0;
90         return num_rx_total;
91 }
92
93 static uint16_t
94 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
95                 uint16_t nb_pkts)
96 {
97         struct bond_dev_private *internals;
98
99         /* Cast to structure, containing bonded device's port id and queue id */
100         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
101
102         internals = bd_rx_q->dev_private;
103
104         return rte_eth_rx_burst(internals->current_primary_port,
105                         bd_rx_q->queue_id, bufs, nb_pkts);
106 }
107
108 static inline uint8_t
109 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
110 {
111         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
112
113         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
114                 (ethertype == ether_type_slow_be &&
115                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
116 }
117
118 /*****************************************************************************
119  * Flow director's setup for mode 4 optimization
120  */
121
122 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
123         .dst.addr_bytes = { 0 },
124         .src.addr_bytes = { 0 },
125         .type = RTE_BE16(ETHER_TYPE_SLOW),
126 };
127
128 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
129         .dst.addr_bytes = { 0 },
130         .src.addr_bytes = { 0 },
131         .type = 0xFFFF,
132 };
133
134 static struct rte_flow_item flow_item_8023ad[] = {
135         {
136                 .type = RTE_FLOW_ITEM_TYPE_ETH,
137                 .spec = &flow_item_eth_type_8023ad,
138                 .last = NULL,
139                 .mask = &flow_item_eth_mask_type_8023ad,
140         },
141         {
142                 .type = RTE_FLOW_ITEM_TYPE_END,
143                 .spec = NULL,
144                 .last = NULL,
145                 .mask = NULL,
146         }
147 };
148
149 const struct rte_flow_attr flow_attr_8023ad = {
150         .group = 0,
151         .priority = 0,
152         .ingress = 1,
153         .egress = 0,
154         .reserved = 0,
155 };
156
157 int
158 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
159                 uint16_t slave_port) {
160         struct rte_eth_dev_info slave_info;
161         struct rte_flow_error error;
162         struct bond_dev_private *internals = (struct bond_dev_private *)
163                         (bond_dev->data->dev_private);
164
165         const struct rte_flow_action_queue lacp_queue_conf = {
166                 .index = 0,
167         };
168
169         const struct rte_flow_action actions[] = {
170                 {
171                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
172                         .conf = &lacp_queue_conf
173                 },
174                 {
175                         .type = RTE_FLOW_ACTION_TYPE_END,
176                 }
177         };
178
179         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
180                         flow_item_8023ad, actions, &error);
181         if (ret < 0) {
182                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
183                                 __func__, error.message, slave_port,
184                                 internals->mode4.dedicated_queues.rx_qid);
185                 return -1;
186         }
187
188         rte_eth_dev_info_get(slave_port, &slave_info);
189         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
190                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
191                 RTE_BOND_LOG(ERR,
192                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
193                         __func__, slave_port);
194                 return -1;
195         }
196
197         return 0;
198 }
199
200 int
201 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
202         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
203         struct bond_dev_private *internals = (struct bond_dev_private *)
204                         (bond_dev->data->dev_private);
205         struct rte_eth_dev_info bond_info;
206         uint16_t idx;
207
208         /* Verify if all slaves in bonding supports flow director and */
209         if (internals->slave_count > 0) {
210                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211
212                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214
215                 for (idx = 0; idx < internals->slave_count; idx++) {
216                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
217                                         internals->slaves[idx].port_id) != 0)
218                                 return -1;
219                 }
220         }
221
222         return 0;
223 }
224
225 int
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227
228         struct rte_flow_error error;
229         struct bond_dev_private *internals = (struct bond_dev_private *)
230                         (bond_dev->data->dev_private);
231
232         struct rte_flow_action_queue lacp_queue_conf = {
233                 .index = internals->mode4.dedicated_queues.rx_qid,
234         };
235
236         const struct rte_flow_action actions[] = {
237                 {
238                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
239                         .conf = &lacp_queue_conf
240                 },
241                 {
242                         .type = RTE_FLOW_ACTION_TYPE_END,
243                 }
244         };
245
246         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
247                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
248         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
249                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
250                                 "(slave_port=%d queue_id=%d)",
251                                 error.message, slave_port,
252                                 internals->mode4.dedicated_queues.rx_qid);
253                 return -1;
254         }
255
256         return 0;
257 }
258
259 static uint16_t
260 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
261                 uint16_t nb_pkts)
262 {
263         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
264         struct bond_dev_private *internals = bd_rx_q->dev_private;
265         uint16_t num_rx_total = 0;      /* Total number of received packets */
266         uint16_t slaves[RTE_MAX_ETHPORTS];
267         uint16_t slave_count;
268         uint16_t active_slave;
269         uint16_t i;
270
271         /* Copy slave list to protect against slave up/down changes during tx
272          * bursting */
273         slave_count = internals->active_slave_count;
274         active_slave = internals->active_slave;
275         memcpy(slaves, internals->active_slaves,
276                         sizeof(internals->active_slaves[0]) * slave_count);
277
278         for (i = 0; i < slave_count && nb_pkts; i++) {
279                 uint16_t num_rx_slave;
280
281                 /* Read packets from this slave */
282                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
283                                                 bd_rx_q->queue_id,
284                                                 bufs + num_rx_total, nb_pkts);
285                 num_rx_total += num_rx_slave;
286                 nb_pkts -= num_rx_slave;
287
288                 if (++active_slave == slave_count)
289                         active_slave = 0;
290         }
291
292         if (++internals->active_slave >= slave_count)
293                 internals->active_slave = 0;
294
295         return num_rx_total;
296 }
297
298 static uint16_t
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
300                 uint16_t nb_bufs)
301 {
302         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
303         struct bond_dev_private *internals = bd_tx_q->dev_private;
304
305         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
306         uint16_t slave_count;
307
308         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
309         uint16_t dist_slave_count;
310
311         /* 2-D array to sort mbufs for transmission on each slave into */
312         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
313         /* Number of mbufs for transmission on each slave */
314         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
315         /* Mapping array generated by hash function to map mbufs to slaves */
316         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
317
318         uint16_t slave_tx_count;
319         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
320
321         uint16_t i;
322
323         if (unlikely(nb_bufs == 0))
324                 return 0;
325
326         /* Copy slave list to protect against slave up/down changes during tx
327          * bursting */
328         slave_count = internals->active_slave_count;
329         if (unlikely(slave_count < 1))
330                 return 0;
331
332         memcpy(slave_port_ids, internals->active_slaves,
333                         sizeof(slave_port_ids[0]) * slave_count);
334
335
336         dist_slave_count = 0;
337         for (i = 0; i < slave_count; i++) {
338                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
339
340                 if (ACTOR_STATE(port, DISTRIBUTING))
341                         dist_slave_port_ids[dist_slave_count++] =
342                                         slave_port_ids[i];
343         }
344
345         if (unlikely(dist_slave_count < 1))
346                 return 0;
347
348         /*
349          * Populate slaves mbuf with the packets which are to be sent on it
350          * selecting output slave using hash based on xmit policy
351          */
352         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
353                         bufs_slave_port_idxs);
354
355         for (i = 0; i < nb_bufs; i++) {
356                 /* Populate slave mbuf arrays with mbufs for that slave. */
357                 uint16_t slave_idx = bufs_slave_port_idxs[i];
358
359                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
360         }
361
362
363         /* Send packet burst on each slave device */
364         for (i = 0; i < dist_slave_count; i++) {
365                 if (slave_nb_bufs[i] == 0)
366                         continue;
367
368                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
369                                 bd_tx_q->queue_id, slave_bufs[i],
370                                 slave_nb_bufs[i]);
371
372                 total_tx_count += slave_tx_count;
373
374                 /* If tx burst fails move packets to end of bufs */
375                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
376                         int slave_tx_fail_count = slave_nb_bufs[i] -
377                                         slave_tx_count;
378                         total_tx_fail_count += slave_tx_fail_count;
379                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
380                                &slave_bufs[i][slave_tx_count],
381                                slave_tx_fail_count * sizeof(bufs[0]));
382                 }
383         }
384
385         return total_tx_count;
386 }
387
388
389 static uint16_t
390 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
391                 uint16_t nb_pkts)
392 {
393         /* Cast to structure, containing bonded device's port id and queue id */
394         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
395         struct bond_dev_private *internals = bd_rx_q->dev_private;
396         struct rte_eth_dev *bonded_eth_dev =
397                                         &rte_eth_devices[internals->port_id];
398         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
399         struct rte_ether_hdr *hdr;
400
401         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
402         uint16_t num_rx_total = 0;      /* Total number of received packets */
403         uint16_t slaves[RTE_MAX_ETHPORTS];
404         uint16_t slave_count, idx;
405
406         uint8_t collecting;  /* current slave collecting status */
407         const uint8_t promisc = internals->promiscuous_en;
408         uint8_t subtype;
409         uint16_t i;
410         uint16_t j;
411         uint16_t k;
412
413         /* Copy slave list to protect against slave up/down changes during tx
414          * bursting */
415         slave_count = internals->active_slave_count;
416         memcpy(slaves, internals->active_slaves,
417                         sizeof(internals->active_slaves[0]) * slave_count);
418
419         idx = internals->active_slave;
420         if (idx >= slave_count) {
421                 internals->active_slave = 0;
422                 idx = 0;
423         }
424         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
425                 j = num_rx_total;
426                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
427                                          COLLECTING);
428
429                 /* Read packets from this slave */
430                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
431                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
432
433                 for (k = j; k < 2 && k < num_rx_total; k++)
434                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
435
436                 /* Handle slow protocol packets. */
437                 while (j < num_rx_total) {
438
439                         /* If packet is not pure L2 and is known, skip it */
440                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
441                                 j++;
442                                 continue;
443                         }
444
445                         if (j + 3 < num_rx_total)
446                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
447
448                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
449                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
450
451                         /* Remove packet from array if it is slow packet or slave is not
452                          * in collecting state or bonding interface is not in promiscuous
453                          * mode and packet address does not match. */
454                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
455                                 !collecting ||
456                                 (!promisc &&
457                                  !rte_is_multicast_ether_addr(&hdr->d_addr) &&
458                                  !rte_is_same_ether_addr(bond_mac,
459                                                      &hdr->d_addr)))) {
460
461                                 if (hdr->ether_type == ether_type_slow_be) {
462                                         bond_mode_8023ad_handle_slow_pkt(
463                                             internals, slaves[idx], bufs[j]);
464                                 } else
465                                         rte_pktmbuf_free(bufs[j]);
466
467                                 /* Packet is managed by mode 4 or dropped, shift the array */
468                                 num_rx_total--;
469                                 if (j < num_rx_total) {
470                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
471                                                 (num_rx_total - j));
472                                 }
473                         } else
474                                 j++;
475                 }
476                 if (unlikely(++idx == slave_count))
477                         idx = 0;
478         }
479
480         if (++internals->active_slave >= slave_count)
481                 internals->active_slave = 0;
482
483         return num_rx_total;
484 }
485
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491
492 static void
493 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
494 {
495         switch (arp_op) {
496         case RTE_ARP_OP_REQUEST:
497                 strlcpy(buf, "ARP Request", buf_len);
498                 return;
499         case RTE_ARP_OP_REPLY:
500                 strlcpy(buf, "ARP Reply", buf_len);
501                 return;
502         case RTE_ARP_OP_REVREQUEST:
503                 strlcpy(buf, "Reverse ARP Request", buf_len);
504                 return;
505         case RTE_ARP_OP_REVREPLY:
506                 strlcpy(buf, "Reverse ARP Reply", buf_len);
507                 return;
508         case RTE_ARP_OP_INVREQUEST:
509                 strlcpy(buf, "Peer Identify Request", buf_len);
510                 return;
511         case RTE_ARP_OP_INVREPLY:
512                 strlcpy(buf, "Peer Identify Reply", buf_len);
513                 return;
514         default:
515                 break;
516         }
517         strlcpy(buf, "Unknown", buf_len);
518         return;
519 }
520 #endif
521 #define MaxIPv4String   16
522 static void
523 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
524 {
525         uint32_t ipv4_addr;
526
527         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
528         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
529                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
530                 ipv4_addr & 0xFF);
531 }
532
533 #define MAX_CLIENTS_NUMBER      128
534 uint8_t active_clients;
535 struct client_stats_t {
536         uint16_t port;
537         uint32_t ipv4_addr;
538         uint32_t ipv4_rx_packets;
539         uint32_t ipv4_tx_packets;
540 };
541 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
542
543 static void
544 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
545 {
546         int i = 0;
547
548         for (; i < MAX_CLIENTS_NUMBER; i++)     {
549                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
550                         /* Just update RX packets number for this client */
551                         if (TXorRXindicator == &burstnumberRX)
552                                 client_stats[i].ipv4_rx_packets++;
553                         else
554                                 client_stats[i].ipv4_tx_packets++;
555                         return;
556                 }
557         }
558         /* We have a new client. Insert him to the table, and increment stats */
559         if (TXorRXindicator == &burstnumberRX)
560                 client_stats[active_clients].ipv4_rx_packets++;
561         else
562                 client_stats[active_clients].ipv4_tx_packets++;
563         client_stats[active_clients].ipv4_addr = addr;
564         client_stats[active_clients].port = port;
565         active_clients++;
566
567 }
568
569 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
570 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
571         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
572                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
573                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
574                 info,                                                   \
575                 port,                                                   \
576                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
577                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
578                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
579                 src_ip,                                                 \
580                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
581                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
582                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
583                 dst_ip,                                                 \
584                 arp_op, ++burstnumber)
585 #endif
586
587 static void
588 mode6_debug(const char __attribute__((unused)) *info,
589         struct rte_ether_hdr *eth_h, uint16_t port,
590         uint32_t __attribute__((unused)) *burstnumber)
591 {
592         struct ipv4_hdr *ipv4_h;
593 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
594         struct rte_arp_hdr *arp_h;
595         char dst_ip[16];
596         char ArpOp[24];
597         char buf[16];
598 #endif
599         char src_ip[16];
600
601         uint16_t ether_type = eth_h->ether_type;
602         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
603
604 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
605         strlcpy(buf, info, 16);
606 #endif
607
608         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
609                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
610                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
611 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
612                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
613                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
614 #endif
615                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
616         }
617 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
618         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
619                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
620                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
622                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
623                                 ArpOp, sizeof(ArpOp));
624                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
625         }
626 #endif
627 }
628 #endif
629
630 static uint16_t
631 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
634         struct bond_dev_private *internals = bd_tx_q->dev_private;
635         struct rte_ether_hdr *eth_h;
636         uint16_t ether_type, offset;
637         uint16_t nb_recv_pkts;
638         int i;
639
640         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
641
642         for (i = 0; i < nb_recv_pkts; i++) {
643                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
644                 ether_type = eth_h->ether_type;
645                 offset = get_vlan_offset(eth_h, &ether_type);
646
647                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
648 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
649                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
650 #endif
651                         bond_mode_alb_arp_recv(eth_h, offset, internals);
652                 }
653 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
654                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
655                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
656 #endif
657         }
658
659         return nb_recv_pkts;
660 }
661
662 static uint16_t
663 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
664                 uint16_t nb_pkts)
665 {
666         struct bond_dev_private *internals;
667         struct bond_tx_queue *bd_tx_q;
668
669         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
670         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
671
672         uint16_t num_of_slaves;
673         uint16_t slaves[RTE_MAX_ETHPORTS];
674
675         uint16_t num_tx_total = 0, num_tx_slave;
676
677         static int slave_idx = 0;
678         int i, cslave_idx = 0, tx_fail_total = 0;
679
680         bd_tx_q = (struct bond_tx_queue *)queue;
681         internals = bd_tx_q->dev_private;
682
683         /* Copy slave list to protect against slave up/down changes during tx
684          * bursting */
685         num_of_slaves = internals->active_slave_count;
686         memcpy(slaves, internals->active_slaves,
687                         sizeof(internals->active_slaves[0]) * num_of_slaves);
688
689         if (num_of_slaves < 1)
690                 return num_tx_total;
691
692         /* Populate slaves mbuf with which packets are to be sent on it  */
693         for (i = 0; i < nb_pkts; i++) {
694                 cslave_idx = (slave_idx + i) % num_of_slaves;
695                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
696         }
697
698         /* increment current slave index so the next call to tx burst starts on the
699          * next slave */
700         slave_idx = ++cslave_idx;
701
702         /* Send packet burst on each slave device */
703         for (i = 0; i < num_of_slaves; i++) {
704                 if (slave_nb_pkts[i] > 0) {
705                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
706                                         slave_bufs[i], slave_nb_pkts[i]);
707
708                         /* if tx burst fails move packets to end of bufs */
709                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
710                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
711
712                                 tx_fail_total += tx_fail_slave;
713
714                                 memcpy(&bufs[nb_pkts - tx_fail_total],
715                                        &slave_bufs[i][num_tx_slave],
716                                        tx_fail_slave * sizeof(bufs[0]));
717                         }
718                         num_tx_total += num_tx_slave;
719                 }
720         }
721
722         return num_tx_total;
723 }
724
725 static uint16_t
726 bond_ethdev_tx_burst_active_backup(void *queue,
727                 struct rte_mbuf **bufs, uint16_t nb_pkts)
728 {
729         struct bond_dev_private *internals;
730         struct bond_tx_queue *bd_tx_q;
731
732         bd_tx_q = (struct bond_tx_queue *)queue;
733         internals = bd_tx_q->dev_private;
734
735         if (internals->active_slave_count < 1)
736                 return 0;
737
738         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
739                         bufs, nb_pkts);
740 }
741
742 static inline uint16_t
743 ether_hash(struct rte_ether_hdr *eth_hdr)
744 {
745         unaligned_uint16_t *word_src_addr =
746                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
747         unaligned_uint16_t *word_dst_addr =
748                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
749
750         return (word_src_addr[0] ^ word_dst_addr[0]) ^
751                         (word_src_addr[1] ^ word_dst_addr[1]) ^
752                         (word_src_addr[2] ^ word_dst_addr[2]);
753 }
754
755 static inline uint32_t
756 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
757 {
758         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
759 }
760
761 static inline uint32_t
762 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
763 {
764         unaligned_uint32_t *word_src_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
766         unaligned_uint32_t *word_dst_addr =
767                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
768
769         return (word_src_addr[0] ^ word_dst_addr[0]) ^
770                         (word_src_addr[1] ^ word_dst_addr[1]) ^
771                         (word_src_addr[2] ^ word_dst_addr[2]) ^
772                         (word_src_addr[3] ^ word_dst_addr[3]);
773 }
774
775
776 void
777 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
778                 uint16_t slave_count, uint16_t *slaves)
779 {
780         struct rte_ether_hdr *eth_hdr;
781         uint32_t hash;
782         int i;
783
784         for (i = 0; i < nb_pkts; i++) {
785                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
786
787                 hash = ether_hash(eth_hdr);
788
789                 slaves[i] = (hash ^= hash >> 8) % slave_count;
790         }
791 }
792
793 void
794 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
795                 uint16_t slave_count, uint16_t *slaves)
796 {
797         uint16_t i;
798         struct rte_ether_hdr *eth_hdr;
799         uint16_t proto;
800         size_t vlan_offset;
801         uint32_t hash, l3hash;
802
803         for (i = 0; i < nb_pkts; i++) {
804                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
805                 l3hash = 0;
806
807                 proto = eth_hdr->ether_type;
808                 hash = ether_hash(eth_hdr);
809
810                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
811
812                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
813                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
814                                         ((char *)(eth_hdr + 1) + vlan_offset);
815                         l3hash = ipv4_hash(ipv4_hdr);
816
817                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
818                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
819                                         ((char *)(eth_hdr + 1) + vlan_offset);
820                         l3hash = ipv6_hash(ipv6_hdr);
821                 }
822
823                 hash = hash ^ l3hash;
824                 hash ^= hash >> 16;
825                 hash ^= hash >> 8;
826
827                 slaves[i] = hash % slave_count;
828         }
829 }
830
831 void
832 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
833                 uint16_t slave_count, uint16_t *slaves)
834 {
835         struct rte_ether_hdr *eth_hdr;
836         uint16_t proto;
837         size_t vlan_offset;
838         int i;
839
840         struct udp_hdr *udp_hdr;
841         struct tcp_hdr *tcp_hdr;
842         uint32_t hash, l3hash, l4hash;
843
844         for (i = 0; i < nb_pkts; i++) {
845                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
846                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
847                 proto = eth_hdr->ether_type;
848                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
849                 l3hash = 0;
850                 l4hash = 0;
851
852                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
853                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
854                                         ((char *)(eth_hdr + 1) + vlan_offset);
855                         size_t ip_hdr_offset;
856
857                         l3hash = ipv4_hash(ipv4_hdr);
858
859                         /* there is no L4 header in fragmented packet */
860                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
861                                                                 == 0)) {
862                                 ip_hdr_offset = (ipv4_hdr->version_ihl
863                                         & IPV4_HDR_IHL_MASK) *
864                                         IPV4_IHL_MULTIPLIER;
865
866                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
867                                         tcp_hdr = (struct tcp_hdr *)
868                                                 ((char *)ipv4_hdr +
869                                                         ip_hdr_offset);
870                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
871                                                         < pkt_end)
872                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
873                                 } else if (ipv4_hdr->next_proto_id ==
874                                                                 IPPROTO_UDP) {
875                                         udp_hdr = (struct udp_hdr *)
876                                                 ((char *)ipv4_hdr +
877                                                         ip_hdr_offset);
878                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
879                                                         < pkt_end)
880                                                 l4hash = HASH_L4_PORTS(udp_hdr);
881                                 }
882                         }
883                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
884                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
885                                         ((char *)(eth_hdr + 1) + vlan_offset);
886                         l3hash = ipv6_hash(ipv6_hdr);
887
888                         if (ipv6_hdr->proto == IPPROTO_TCP) {
889                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
890                                 l4hash = HASH_L4_PORTS(tcp_hdr);
891                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
892                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
893                                 l4hash = HASH_L4_PORTS(udp_hdr);
894                         }
895                 }
896
897                 hash = l3hash ^ l4hash;
898                 hash ^= hash >> 16;
899                 hash ^= hash >> 8;
900
901                 slaves[i] = hash % slave_count;
902         }
903 }
904
905 struct bwg_slave {
906         uint64_t bwg_left_int;
907         uint64_t bwg_left_remainder;
908         uint16_t slave;
909 };
910
911 void
912 bond_tlb_activate_slave(struct bond_dev_private *internals) {
913         int i;
914
915         for (i = 0; i < internals->active_slave_count; i++) {
916                 tlb_last_obytets[internals->active_slaves[i]] = 0;
917         }
918 }
919
920 static int
921 bandwidth_cmp(const void *a, const void *b)
922 {
923         const struct bwg_slave *bwg_a = a;
924         const struct bwg_slave *bwg_b = b;
925         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
926         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
927                         (int64_t)bwg_a->bwg_left_remainder;
928         if (diff > 0)
929                 return 1;
930         else if (diff < 0)
931                 return -1;
932         else if (diff2 > 0)
933                 return 1;
934         else if (diff2 < 0)
935                 return -1;
936         else
937                 return 0;
938 }
939
940 static void
941 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
942                 struct bwg_slave *bwg_slave)
943 {
944         struct rte_eth_link link_status;
945
946         rte_eth_link_get_nowait(port_id, &link_status);
947         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
948         if (link_bwg == 0)
949                 return;
950         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
951         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
952         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
953 }
954
955 static void
956 bond_ethdev_update_tlb_slave_cb(void *arg)
957 {
958         struct bond_dev_private *internals = arg;
959         struct rte_eth_stats slave_stats;
960         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
961         uint16_t slave_count;
962         uint64_t tx_bytes;
963
964         uint8_t update_stats = 0;
965         uint16_t slave_id;
966         uint16_t i;
967
968         internals->slave_update_idx++;
969
970
971         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
972                 update_stats = 1;
973
974         for (i = 0; i < internals->active_slave_count; i++) {
975                 slave_id = internals->active_slaves[i];
976                 rte_eth_stats_get(slave_id, &slave_stats);
977                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
978                 bandwidth_left(slave_id, tx_bytes,
979                                 internals->slave_update_idx, &bwg_array[i]);
980                 bwg_array[i].slave = slave_id;
981
982                 if (update_stats) {
983                         tlb_last_obytets[slave_id] = slave_stats.obytes;
984                 }
985         }
986
987         if (update_stats == 1)
988                 internals->slave_update_idx = 0;
989
990         slave_count = i;
991         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
992         for (i = 0; i < slave_count; i++)
993                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
994
995         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
996                         (struct bond_dev_private *)internals);
997 }
998
999 static uint16_t
1000 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1001 {
1002         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1003         struct bond_dev_private *internals = bd_tx_q->dev_private;
1004
1005         struct rte_eth_dev *primary_port =
1006                         &rte_eth_devices[internals->primary_port];
1007         uint16_t num_tx_total = 0;
1008         uint16_t i, j;
1009
1010         uint16_t num_of_slaves = internals->active_slave_count;
1011         uint16_t slaves[RTE_MAX_ETHPORTS];
1012
1013         struct rte_ether_hdr *ether_hdr;
1014         struct rte_ether_addr primary_slave_addr;
1015         struct rte_ether_addr active_slave_addr;
1016
1017         if (num_of_slaves < 1)
1018                 return num_tx_total;
1019
1020         memcpy(slaves, internals->tlb_slaves_order,
1021                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1022
1023
1024         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1025
1026         if (nb_pkts > 3) {
1027                 for (i = 0; i < 3; i++)
1028                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1029         }
1030
1031         for (i = 0; i < num_of_slaves; i++) {
1032                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1033                 for (j = num_tx_total; j < nb_pkts; j++) {
1034                         if (j + 3 < nb_pkts)
1035                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1036
1037                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
1038                                                 struct rte_ether_hdr *);
1039                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
1040                                                         &primary_slave_addr))
1041                                 rte_ether_addr_copy(&active_slave_addr,
1042                                                 &ether_hdr->s_addr);
1043 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1044                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1045 #endif
1046                 }
1047
1048                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1049                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1050
1051                 if (num_tx_total == nb_pkts)
1052                         break;
1053         }
1054
1055         return num_tx_total;
1056 }
1057
1058 void
1059 bond_tlb_disable(struct bond_dev_private *internals)
1060 {
1061         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1062 }
1063
1064 void
1065 bond_tlb_enable(struct bond_dev_private *internals)
1066 {
1067         bond_ethdev_update_tlb_slave_cb(internals);
1068 }
1069
1070 static uint16_t
1071 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1072 {
1073         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1074         struct bond_dev_private *internals = bd_tx_q->dev_private;
1075
1076         struct rte_ether_hdr *eth_h;
1077         uint16_t ether_type, offset;
1078
1079         struct client_data *client_info;
1080
1081         /*
1082          * We create transmit buffers for every slave and one additional to send
1083          * through tlb. In worst case every packet will be send on one port.
1084          */
1085         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1086         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1087
1088         /*
1089          * We create separate transmit buffers for update packets as they won't
1090          * be counted in num_tx_total.
1091          */
1092         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1093         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1094
1095         struct rte_mbuf *upd_pkt;
1096         size_t pkt_size;
1097
1098         uint16_t num_send, num_not_send = 0;
1099         uint16_t num_tx_total = 0;
1100         uint16_t slave_idx;
1101
1102         int i, j;
1103
1104         /* Search tx buffer for ARP packets and forward them to alb */
1105         for (i = 0; i < nb_pkts; i++) {
1106                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1107                 ether_type = eth_h->ether_type;
1108                 offset = get_vlan_offset(eth_h, &ether_type);
1109
1110                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1111                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1112
1113                         /* Change src mac in eth header */
1114                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1115
1116                         /* Add packet to slave tx buffer */
1117                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1118                         slave_bufs_pkts[slave_idx]++;
1119                 } else {
1120                         /* If packet is not ARP, send it with TLB policy */
1121                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1122                                         bufs[i];
1123                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1124                 }
1125         }
1126
1127         /* Update connected client ARP tables */
1128         if (internals->mode6.ntt) {
1129                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1130                         client_info = &internals->mode6.client_table[i];
1131
1132                         if (client_info->in_use) {
1133                                 /* Allocate new packet to send ARP update on current slave */
1134                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1135                                 if (upd_pkt == NULL) {
1136                                         RTE_BOND_LOG(ERR,
1137                                                      "Failed to allocate ARP packet from pool");
1138                                         continue;
1139                                 }
1140                                 pkt_size = sizeof(struct rte_ether_hdr) +
1141                                         sizeof(struct rte_arp_hdr) +
1142                                         client_info->vlan_count *
1143                                         sizeof(struct rte_vlan_hdr);
1144                                 upd_pkt->data_len = pkt_size;
1145                                 upd_pkt->pkt_len = pkt_size;
1146
1147                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1148                                                 internals);
1149
1150                                 /* Add packet to update tx buffer */
1151                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1152                                 update_bufs_pkts[slave_idx]++;
1153                         }
1154                 }
1155                 internals->mode6.ntt = 0;
1156         }
1157
1158         /* Send ARP packets on proper slaves */
1159         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1160                 if (slave_bufs_pkts[i] > 0) {
1161                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1162                                         slave_bufs[i], slave_bufs_pkts[i]);
1163                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1164                                 bufs[nb_pkts - 1 - num_not_send - j] =
1165                                                 slave_bufs[i][nb_pkts - 1 - j];
1166                         }
1167
1168                         num_tx_total += num_send;
1169                         num_not_send += slave_bufs_pkts[i] - num_send;
1170
1171 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1172         /* Print TX stats including update packets */
1173                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1174                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1175                                                         struct rte_ether_hdr *);
1176                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1177                         }
1178 #endif
1179                 }
1180         }
1181
1182         /* Send update packets on proper slaves */
1183         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1184                 if (update_bufs_pkts[i] > 0) {
1185                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1186                                         update_bufs_pkts[i]);
1187                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1188                                 rte_pktmbuf_free(update_bufs[i][j]);
1189                         }
1190 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1191                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1192                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1193                                                         struct rte_ether_hdr *);
1194                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1195                         }
1196 #endif
1197                 }
1198         }
1199
1200         /* Send non-ARP packets using tlb policy */
1201         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1202                 num_send = bond_ethdev_tx_burst_tlb(queue,
1203                                 slave_bufs[RTE_MAX_ETHPORTS],
1204                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1205
1206                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1207                         bufs[nb_pkts - 1 - num_not_send - j] =
1208                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1209                 }
1210
1211                 num_tx_total += num_send;
1212         }
1213
1214         return num_tx_total;
1215 }
1216
1217 static uint16_t
1218 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1219                 uint16_t nb_bufs)
1220 {
1221         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1222         struct bond_dev_private *internals = bd_tx_q->dev_private;
1223
1224         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1225         uint16_t slave_count;
1226
1227         /* Array to sort mbufs for transmission on each slave into */
1228         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1229         /* Number of mbufs for transmission on each slave */
1230         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1231         /* Mapping array generated by hash function to map mbufs to slaves */
1232         uint16_t bufs_slave_port_idxs[nb_bufs];
1233
1234         uint16_t slave_tx_count;
1235         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1236
1237         uint16_t i;
1238
1239         if (unlikely(nb_bufs == 0))
1240                 return 0;
1241
1242         /* Copy slave list to protect against slave up/down changes during tx
1243          * bursting */
1244         slave_count = internals->active_slave_count;
1245         if (unlikely(slave_count < 1))
1246                 return 0;
1247
1248         memcpy(slave_port_ids, internals->active_slaves,
1249                         sizeof(slave_port_ids[0]) * slave_count);
1250
1251         /*
1252          * Populate slaves mbuf with the packets which are to be sent on it
1253          * selecting output slave using hash based on xmit policy
1254          */
1255         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1256                         bufs_slave_port_idxs);
1257
1258         for (i = 0; i < nb_bufs; i++) {
1259                 /* Populate slave mbuf arrays with mbufs for that slave. */
1260                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1261
1262                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1263         }
1264
1265         /* Send packet burst on each slave device */
1266         for (i = 0; i < slave_count; i++) {
1267                 if (slave_nb_bufs[i] == 0)
1268                         continue;
1269
1270                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1271                                 bd_tx_q->queue_id, slave_bufs[i],
1272                                 slave_nb_bufs[i]);
1273
1274                 total_tx_count += slave_tx_count;
1275
1276                 /* If tx burst fails move packets to end of bufs */
1277                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1278                         int slave_tx_fail_count = slave_nb_bufs[i] -
1279                                         slave_tx_count;
1280                         total_tx_fail_count += slave_tx_fail_count;
1281                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1282                                &slave_bufs[i][slave_tx_count],
1283                                slave_tx_fail_count * sizeof(bufs[0]));
1284                 }
1285         }
1286
1287         return total_tx_count;
1288 }
1289
1290 static uint16_t
1291 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1292                 uint16_t nb_bufs)
1293 {
1294         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1295         struct bond_dev_private *internals = bd_tx_q->dev_private;
1296
1297         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1298         uint16_t slave_count;
1299
1300         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1301         uint16_t dist_slave_count;
1302
1303         /* 2-D array to sort mbufs for transmission on each slave into */
1304         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1305         /* Number of mbufs for transmission on each slave */
1306         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1307         /* Mapping array generated by hash function to map mbufs to slaves */
1308         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1309
1310         uint16_t slave_tx_count;
1311         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1312
1313         uint16_t i;
1314
1315         /* Copy slave list to protect against slave up/down changes during tx
1316          * bursting */
1317         slave_count = internals->active_slave_count;
1318         if (unlikely(slave_count < 1))
1319                 return 0;
1320
1321         memcpy(slave_port_ids, internals->active_slaves,
1322                         sizeof(slave_port_ids[0]) * slave_count);
1323
1324         /* Check for LACP control packets and send if available */
1325         for (i = 0; i < slave_count; i++) {
1326                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1327                 struct rte_mbuf *ctrl_pkt = NULL;
1328
1329                 if (likely(rte_ring_empty(port->tx_ring)))
1330                         continue;
1331
1332                 if (rte_ring_dequeue(port->tx_ring,
1333                                      (void **)&ctrl_pkt) != -ENOENT) {
1334                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1335                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1336                         /*
1337                          * re-enqueue LAG control plane packets to buffering
1338                          * ring if transmission fails so the packet isn't lost.
1339                          */
1340                         if (slave_tx_count != 1)
1341                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1342                 }
1343         }
1344
1345         if (unlikely(nb_bufs == 0))
1346                 return 0;
1347
1348         dist_slave_count = 0;
1349         for (i = 0; i < slave_count; i++) {
1350                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1351
1352                 if (ACTOR_STATE(port, DISTRIBUTING))
1353                         dist_slave_port_ids[dist_slave_count++] =
1354                                         slave_port_ids[i];
1355         }
1356
1357         if (likely(dist_slave_count > 0)) {
1358
1359                 /*
1360                  * Populate slaves mbuf with the packets which are to be sent
1361                  * on it, selecting output slave using hash based on xmit policy
1362                  */
1363                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1364                                 bufs_slave_port_idxs);
1365
1366                 for (i = 0; i < nb_bufs; i++) {
1367                         /*
1368                          * Populate slave mbuf arrays with mbufs for that
1369                          * slave
1370                          */
1371                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1372
1373                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1374                                         bufs[i];
1375                 }
1376
1377
1378                 /* Send packet burst on each slave device */
1379                 for (i = 0; i < dist_slave_count; i++) {
1380                         if (slave_nb_bufs[i] == 0)
1381                                 continue;
1382
1383                         slave_tx_count = rte_eth_tx_burst(
1384                                         dist_slave_port_ids[i],
1385                                         bd_tx_q->queue_id, slave_bufs[i],
1386                                         slave_nb_bufs[i]);
1387
1388                         total_tx_count += slave_tx_count;
1389
1390                         /* If tx burst fails move packets to end of bufs */
1391                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1392                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1393                                                 slave_tx_count;
1394                                 total_tx_fail_count += slave_tx_fail_count;
1395
1396                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1397                                        &slave_bufs[i][slave_tx_count],
1398                                        slave_tx_fail_count * sizeof(bufs[0]));
1399                         }
1400                 }
1401         }
1402
1403         return total_tx_count;
1404 }
1405
1406 static uint16_t
1407 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1408                 uint16_t nb_pkts)
1409 {
1410         struct bond_dev_private *internals;
1411         struct bond_tx_queue *bd_tx_q;
1412
1413         uint16_t slaves[RTE_MAX_ETHPORTS];
1414         uint8_t tx_failed_flag = 0;
1415         uint16_t num_of_slaves;
1416
1417         uint16_t max_nb_of_tx_pkts = 0;
1418
1419         int slave_tx_total[RTE_MAX_ETHPORTS];
1420         int i, most_successful_tx_slave = -1;
1421
1422         bd_tx_q = (struct bond_tx_queue *)queue;
1423         internals = bd_tx_q->dev_private;
1424
1425         /* Copy slave list to protect against slave up/down changes during tx
1426          * bursting */
1427         num_of_slaves = internals->active_slave_count;
1428         memcpy(slaves, internals->active_slaves,
1429                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1430
1431         if (num_of_slaves < 1)
1432                 return 0;
1433
1434         /* Increment reference count on mbufs */
1435         for (i = 0; i < nb_pkts; i++)
1436                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1437
1438         /* Transmit burst on each active slave */
1439         for (i = 0; i < num_of_slaves; i++) {
1440                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1441                                         bufs, nb_pkts);
1442
1443                 if (unlikely(slave_tx_total[i] < nb_pkts))
1444                         tx_failed_flag = 1;
1445
1446                 /* record the value and slave index for the slave which transmits the
1447                  * maximum number of packets */
1448                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1449                         max_nb_of_tx_pkts = slave_tx_total[i];
1450                         most_successful_tx_slave = i;
1451                 }
1452         }
1453
1454         /* if slaves fail to transmit packets from burst, the calling application
1455          * is not expected to know about multiple references to packets so we must
1456          * handle failures of all packets except those of the most successful slave
1457          */
1458         if (unlikely(tx_failed_flag))
1459                 for (i = 0; i < num_of_slaves; i++)
1460                         if (i != most_successful_tx_slave)
1461                                 while (slave_tx_total[i] < nb_pkts)
1462                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1463
1464         return max_nb_of_tx_pkts;
1465 }
1466
1467 static void
1468 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1469 {
1470         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1471
1472         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1473                 /**
1474                  * If in mode 4 then save the link properties of the first
1475                  * slave, all subsequent slaves must match these properties
1476                  */
1477                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1478
1479                 bond_link->link_autoneg = slave_link->link_autoneg;
1480                 bond_link->link_duplex = slave_link->link_duplex;
1481                 bond_link->link_speed = slave_link->link_speed;
1482         } else {
1483                 /**
1484                  * In any other mode the link properties are set to default
1485                  * values of AUTONEG/DUPLEX
1486                  */
1487                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1488                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1489         }
1490 }
1491
1492 static int
1493 link_properties_valid(struct rte_eth_dev *ethdev,
1494                 struct rte_eth_link *slave_link)
1495 {
1496         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1497
1498         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1499                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1500
1501                 if (bond_link->link_duplex != slave_link->link_duplex ||
1502                         bond_link->link_autoneg != slave_link->link_autoneg ||
1503                         bond_link->link_speed != slave_link->link_speed)
1504                         return -1;
1505         }
1506
1507         return 0;
1508 }
1509
1510 int
1511 mac_address_get(struct rte_eth_dev *eth_dev,
1512                 struct rte_ether_addr *dst_mac_addr)
1513 {
1514         struct rte_ether_addr *mac_addr;
1515
1516         if (eth_dev == NULL) {
1517                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1518                 return -1;
1519         }
1520
1521         if (dst_mac_addr == NULL) {
1522                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1523                 return -1;
1524         }
1525
1526         mac_addr = eth_dev->data->mac_addrs;
1527
1528         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1529         return 0;
1530 }
1531
1532 int
1533 mac_address_set(struct rte_eth_dev *eth_dev,
1534                 struct rte_ether_addr *new_mac_addr)
1535 {
1536         struct rte_ether_addr *mac_addr;
1537
1538         if (eth_dev == NULL) {
1539                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1540                 return -1;
1541         }
1542
1543         if (new_mac_addr == NULL) {
1544                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1545                 return -1;
1546         }
1547
1548         mac_addr = eth_dev->data->mac_addrs;
1549
1550         /* If new MAC is different to current MAC then update */
1551         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1552                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1553
1554         return 0;
1555 }
1556
1557 static const struct rte_ether_addr null_mac_addr;
1558
1559 /*
1560  * Add additional MAC addresses to the slave
1561  */
1562 int
1563 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1564                 uint16_t slave_port_id)
1565 {
1566         int i, ret;
1567         struct rte_ether_addr *mac_addr;
1568
1569         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1570                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1571                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1572                         break;
1573
1574                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1575                 if (ret < 0) {
1576                         /* rollback */
1577                         for (i--; i > 0; i--)
1578                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1579                                         &bonded_eth_dev->data->mac_addrs[i]);
1580                         return ret;
1581                 }
1582         }
1583
1584         return 0;
1585 }
1586
1587 /*
1588  * Remove additional MAC addresses from the slave
1589  */
1590 int
1591 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1592                 uint16_t slave_port_id)
1593 {
1594         int i, rc, ret;
1595         struct rte_ether_addr *mac_addr;
1596
1597         rc = 0;
1598         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1599                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1600                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1601                         break;
1602
1603                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1604                 /* save only the first error */
1605                 if (ret < 0 && rc == 0)
1606                         rc = ret;
1607         }
1608
1609         return rc;
1610 }
1611
1612 int
1613 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1614 {
1615         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1616         int i;
1617
1618         /* Update slave devices MAC addresses */
1619         if (internals->slave_count < 1)
1620                 return -1;
1621
1622         switch (internals->mode) {
1623         case BONDING_MODE_ROUND_ROBIN:
1624         case BONDING_MODE_BALANCE:
1625         case BONDING_MODE_BROADCAST:
1626                 for (i = 0; i < internals->slave_count; i++) {
1627                         if (rte_eth_dev_default_mac_addr_set(
1628                                         internals->slaves[i].port_id,
1629                                         bonded_eth_dev->data->mac_addrs)) {
1630                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1631                                                 internals->slaves[i].port_id);
1632                                 return -1;
1633                         }
1634                 }
1635                 break;
1636         case BONDING_MODE_8023AD:
1637                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1638                 break;
1639         case BONDING_MODE_ACTIVE_BACKUP:
1640         case BONDING_MODE_TLB:
1641         case BONDING_MODE_ALB:
1642         default:
1643                 for (i = 0; i < internals->slave_count; i++) {
1644                         if (internals->slaves[i].port_id ==
1645                                         internals->current_primary_port) {
1646                                 if (rte_eth_dev_default_mac_addr_set(
1647                                                 internals->primary_port,
1648                                                 bonded_eth_dev->data->mac_addrs)) {
1649                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1650                                                         internals->current_primary_port);
1651                                         return -1;
1652                                 }
1653                         } else {
1654                                 if (rte_eth_dev_default_mac_addr_set(
1655                                                 internals->slaves[i].port_id,
1656                                                 &internals->slaves[i].persisted_mac_addr)) {
1657                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1658                                                         internals->slaves[i].port_id);
1659                                         return -1;
1660                                 }
1661                         }
1662                 }
1663         }
1664
1665         return 0;
1666 }
1667
1668 int
1669 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1670 {
1671         struct bond_dev_private *internals;
1672
1673         internals = eth_dev->data->dev_private;
1674
1675         switch (mode) {
1676         case BONDING_MODE_ROUND_ROBIN:
1677                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1678                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1679                 break;
1680         case BONDING_MODE_ACTIVE_BACKUP:
1681                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1682                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1683                 break;
1684         case BONDING_MODE_BALANCE:
1685                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1686                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1687                 break;
1688         case BONDING_MODE_BROADCAST:
1689                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1690                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1691                 break;
1692         case BONDING_MODE_8023AD:
1693                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1694                         return -1;
1695
1696                 if (internals->mode4.dedicated_queues.enabled == 0) {
1697                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1698                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1699                         RTE_BOND_LOG(WARNING,
1700                                 "Using mode 4, it is necessary to do TX burst "
1701                                 "and RX burst at least every 100ms.");
1702                 } else {
1703                         /* Use flow director's optimization */
1704                         eth_dev->rx_pkt_burst =
1705                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1706                         eth_dev->tx_pkt_burst =
1707                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1708                 }
1709                 break;
1710         case BONDING_MODE_TLB:
1711                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1712                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1713                 break;
1714         case BONDING_MODE_ALB:
1715                 if (bond_mode_alb_enable(eth_dev) != 0)
1716                         return -1;
1717
1718                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1719                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1720                 break;
1721         default:
1722                 return -1;
1723         }
1724
1725         internals->mode = mode;
1726
1727         return 0;
1728 }
1729
1730
1731 static int
1732 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1733                 struct rte_eth_dev *slave_eth_dev)
1734 {
1735         int errval = 0;
1736         struct bond_dev_private *internals = (struct bond_dev_private *)
1737                 bonded_eth_dev->data->dev_private;
1738         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1739
1740         if (port->slow_pool == NULL) {
1741                 char mem_name[256];
1742                 int slave_id = slave_eth_dev->data->port_id;
1743
1744                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1745                                 slave_id);
1746                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1747                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1748                         slave_eth_dev->data->numa_node);
1749
1750                 /* Any memory allocation failure in initialization is critical because
1751                  * resources can't be free, so reinitialization is impossible. */
1752                 if (port->slow_pool == NULL) {
1753                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1754                                 slave_id, mem_name, rte_strerror(rte_errno));
1755                 }
1756         }
1757
1758         if (internals->mode4.dedicated_queues.enabled == 1) {
1759                 /* Configure slow Rx queue */
1760
1761                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1762                                 internals->mode4.dedicated_queues.rx_qid, 128,
1763                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1764                                 NULL, port->slow_pool);
1765                 if (errval != 0) {
1766                         RTE_BOND_LOG(ERR,
1767                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1768                                         slave_eth_dev->data->port_id,
1769                                         internals->mode4.dedicated_queues.rx_qid,
1770                                         errval);
1771                         return errval;
1772                 }
1773
1774                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1775                                 internals->mode4.dedicated_queues.tx_qid, 512,
1776                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1777                                 NULL);
1778                 if (errval != 0) {
1779                         RTE_BOND_LOG(ERR,
1780                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1781                                 slave_eth_dev->data->port_id,
1782                                 internals->mode4.dedicated_queues.tx_qid,
1783                                 errval);
1784                         return errval;
1785                 }
1786         }
1787         return 0;
1788 }
1789
1790 int
1791 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1792                 struct rte_eth_dev *slave_eth_dev)
1793 {
1794         struct bond_rx_queue *bd_rx_q;
1795         struct bond_tx_queue *bd_tx_q;
1796         uint16_t nb_rx_queues;
1797         uint16_t nb_tx_queues;
1798
1799         int errval;
1800         uint16_t q_id;
1801         struct rte_flow_error flow_error;
1802
1803         struct bond_dev_private *internals = (struct bond_dev_private *)
1804                 bonded_eth_dev->data->dev_private;
1805
1806         /* Stop slave */
1807         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1808
1809         /* Enable interrupts on slave device if supported */
1810         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1811                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1812
1813         /* If RSS is enabled for bonding, try to enable it for slaves  */
1814         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1815                 if (internals->rss_key_len != 0) {
1816                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1817                                         internals->rss_key_len;
1818                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1819                                         internals->rss_key;
1820                 } else {
1821                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1822                 }
1823
1824                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1825                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1826                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1827                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1828         }
1829
1830         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1831                         DEV_RX_OFFLOAD_VLAN_FILTER)
1832                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1833                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1834         else
1835                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1836                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1837
1838         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1839         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1840
1841         if (internals->mode == BONDING_MODE_8023AD) {
1842                 if (internals->mode4.dedicated_queues.enabled == 1) {
1843                         nb_rx_queues++;
1844                         nb_tx_queues++;
1845                 }
1846         }
1847
1848         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1849                                      bonded_eth_dev->data->mtu);
1850         if (errval != 0 && errval != -ENOTSUP) {
1851                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1852                                 slave_eth_dev->data->port_id, errval);
1853                 return errval;
1854         }
1855
1856         /* Configure device */
1857         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1858                         nb_rx_queues, nb_tx_queues,
1859                         &(slave_eth_dev->data->dev_conf));
1860         if (errval != 0) {
1861                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1862                                 slave_eth_dev->data->port_id, errval);
1863                 return errval;
1864         }
1865
1866         /* Setup Rx Queues */
1867         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1868                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1869
1870                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1871                                 bd_rx_q->nb_rx_desc,
1872                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1873                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1874                 if (errval != 0) {
1875                         RTE_BOND_LOG(ERR,
1876                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1877                                         slave_eth_dev->data->port_id, q_id, errval);
1878                         return errval;
1879                 }
1880         }
1881
1882         /* Setup Tx Queues */
1883         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1884                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1885
1886                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1887                                 bd_tx_q->nb_tx_desc,
1888                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1889                                 &bd_tx_q->tx_conf);
1890                 if (errval != 0) {
1891                         RTE_BOND_LOG(ERR,
1892                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1893                                 slave_eth_dev->data->port_id, q_id, errval);
1894                         return errval;
1895                 }
1896         }
1897
1898         if (internals->mode == BONDING_MODE_8023AD &&
1899                         internals->mode4.dedicated_queues.enabled == 1) {
1900                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1901                                 != 0)
1902                         return errval;
1903
1904                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1905                                 slave_eth_dev->data->port_id) != 0) {
1906                         RTE_BOND_LOG(ERR,
1907                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1908                                 slave_eth_dev->data->port_id, q_id, errval);
1909                         return -1;
1910                 }
1911
1912                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1913                         rte_flow_destroy(slave_eth_dev->data->port_id,
1914                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1915                                         &flow_error);
1916
1917                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1918                                 slave_eth_dev->data->port_id);
1919         }
1920
1921         /* Start device */
1922         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1923         if (errval != 0) {
1924                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1925                                 slave_eth_dev->data->port_id, errval);
1926                 return -1;
1927         }
1928
1929         /* If RSS is enabled for bonding, synchronize RETA */
1930         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1931                 int i;
1932                 struct bond_dev_private *internals;
1933
1934                 internals = bonded_eth_dev->data->dev_private;
1935
1936                 for (i = 0; i < internals->slave_count; i++) {
1937                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1938                                 errval = rte_eth_dev_rss_reta_update(
1939                                                 slave_eth_dev->data->port_id,
1940                                                 &internals->reta_conf[0],
1941                                                 internals->slaves[i].reta_size);
1942                                 if (errval != 0) {
1943                                         RTE_BOND_LOG(WARNING,
1944                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1945                                                      " RSS Configuration for bonding may be inconsistent.",
1946                                                      slave_eth_dev->data->port_id, errval);
1947                                 }
1948                                 break;
1949                         }
1950                 }
1951         }
1952
1953         /* If lsc interrupt is set, check initial slave's link status */
1954         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1955                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1956                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1957                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1958                         NULL);
1959         }
1960
1961         return 0;
1962 }
1963
1964 void
1965 slave_remove(struct bond_dev_private *internals,
1966                 struct rte_eth_dev *slave_eth_dev)
1967 {
1968         uint16_t i;
1969
1970         for (i = 0; i < internals->slave_count; i++)
1971                 if (internals->slaves[i].port_id ==
1972                                 slave_eth_dev->data->port_id)
1973                         break;
1974
1975         if (i < (internals->slave_count - 1)) {
1976                 struct rte_flow *flow;
1977
1978                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1979                                 sizeof(internals->slaves[0]) *
1980                                 (internals->slave_count - i - 1));
1981                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1982                         memmove(&flow->flows[i], &flow->flows[i + 1],
1983                                 sizeof(flow->flows[0]) *
1984                                 (internals->slave_count - i - 1));
1985                         flow->flows[internals->slave_count - 1] = NULL;
1986                 }
1987         }
1988
1989         internals->slave_count--;
1990
1991         /* force reconfiguration of slave interfaces */
1992         _rte_eth_dev_reset(slave_eth_dev);
1993 }
1994
1995 static void
1996 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1997
1998 void
1999 slave_add(struct bond_dev_private *internals,
2000                 struct rte_eth_dev *slave_eth_dev)
2001 {
2002         struct bond_slave_details *slave_details =
2003                         &internals->slaves[internals->slave_count];
2004
2005         slave_details->port_id = slave_eth_dev->data->port_id;
2006         slave_details->last_link_status = 0;
2007
2008         /* Mark slave devices that don't support interrupts so we can
2009          * compensate when we start the bond
2010          */
2011         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2012                 slave_details->link_status_poll_enabled = 1;
2013         }
2014
2015         slave_details->link_status_wait_to_complete = 0;
2016         /* clean tlb_last_obytes when adding port for bonding device */
2017         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2018                         sizeof(struct rte_ether_addr));
2019 }
2020
2021 void
2022 bond_ethdev_primary_set(struct bond_dev_private *internals,
2023                 uint16_t slave_port_id)
2024 {
2025         int i;
2026
2027         if (internals->active_slave_count < 1)
2028                 internals->current_primary_port = slave_port_id;
2029         else
2030                 /* Search bonded device slave ports for new proposed primary port */
2031                 for (i = 0; i < internals->active_slave_count; i++) {
2032                         if (internals->active_slaves[i] == slave_port_id)
2033                                 internals->current_primary_port = slave_port_id;
2034                 }
2035 }
2036
2037 static void
2038 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2039
2040 static int
2041 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2042 {
2043         struct bond_dev_private *internals;
2044         int i;
2045
2046         /* slave eth dev will be started by bonded device */
2047         if (check_for_bonded_ethdev(eth_dev)) {
2048                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2049                                 eth_dev->data->port_id);
2050                 return -1;
2051         }
2052
2053         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2054         eth_dev->data->dev_started = 1;
2055
2056         internals = eth_dev->data->dev_private;
2057
2058         if (internals->slave_count == 0) {
2059                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2060                 goto out_err;
2061         }
2062
2063         if (internals->user_defined_mac == 0) {
2064                 struct rte_ether_addr *new_mac_addr = NULL;
2065
2066                 for (i = 0; i < internals->slave_count; i++)
2067                         if (internals->slaves[i].port_id == internals->primary_port)
2068                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2069
2070                 if (new_mac_addr == NULL)
2071                         goto out_err;
2072
2073                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2074                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2075                                         eth_dev->data->port_id);
2076                         goto out_err;
2077                 }
2078         }
2079
2080         /* If bonded device is configure in promiscuous mode then re-apply config */
2081         if (internals->promiscuous_en)
2082                 bond_ethdev_promiscuous_enable(eth_dev);
2083
2084         if (internals->mode == BONDING_MODE_8023AD) {
2085                 if (internals->mode4.dedicated_queues.enabled == 1) {
2086                         internals->mode4.dedicated_queues.rx_qid =
2087                                         eth_dev->data->nb_rx_queues;
2088                         internals->mode4.dedicated_queues.tx_qid =
2089                                         eth_dev->data->nb_tx_queues;
2090                 }
2091         }
2092
2093
2094         /* Reconfigure each slave device if starting bonded device */
2095         for (i = 0; i < internals->slave_count; i++) {
2096                 struct rte_eth_dev *slave_ethdev =
2097                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2098                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2099                         RTE_BOND_LOG(ERR,
2100                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2101                                 eth_dev->data->port_id,
2102                                 internals->slaves[i].port_id);
2103                         goto out_err;
2104                 }
2105                 /* We will need to poll for link status if any slave doesn't
2106                  * support interrupts
2107                  */
2108                 if (internals->slaves[i].link_status_poll_enabled)
2109                         internals->link_status_polling_enabled = 1;
2110         }
2111
2112         /* start polling if needed */
2113         if (internals->link_status_polling_enabled) {
2114                 rte_eal_alarm_set(
2115                         internals->link_status_polling_interval_ms * 1000,
2116                         bond_ethdev_slave_link_status_change_monitor,
2117                         (void *)&rte_eth_devices[internals->port_id]);
2118         }
2119
2120         /* Update all slave devices MACs*/
2121         if (mac_address_slaves_update(eth_dev) != 0)
2122                 goto out_err;
2123
2124         if (internals->user_defined_primary_port)
2125                 bond_ethdev_primary_set(internals, internals->primary_port);
2126
2127         if (internals->mode == BONDING_MODE_8023AD)
2128                 bond_mode_8023ad_start(eth_dev);
2129
2130         if (internals->mode == BONDING_MODE_TLB ||
2131                         internals->mode == BONDING_MODE_ALB)
2132                 bond_tlb_enable(internals);
2133
2134         return 0;
2135
2136 out_err:
2137         eth_dev->data->dev_started = 0;
2138         return -1;
2139 }
2140
2141 static void
2142 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2143 {
2144         uint16_t i;
2145
2146         if (dev->data->rx_queues != NULL) {
2147                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2148                         rte_free(dev->data->rx_queues[i]);
2149                         dev->data->rx_queues[i] = NULL;
2150                 }
2151                 dev->data->nb_rx_queues = 0;
2152         }
2153
2154         if (dev->data->tx_queues != NULL) {
2155                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2156                         rte_free(dev->data->tx_queues[i]);
2157                         dev->data->tx_queues[i] = NULL;
2158                 }
2159                 dev->data->nb_tx_queues = 0;
2160         }
2161 }
2162
2163 void
2164 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2165 {
2166         struct bond_dev_private *internals = eth_dev->data->dev_private;
2167         uint16_t i;
2168
2169         if (internals->mode == BONDING_MODE_8023AD) {
2170                 struct port *port;
2171                 void *pkt = NULL;
2172
2173                 bond_mode_8023ad_stop(eth_dev);
2174
2175                 /* Discard all messages to/from mode 4 state machines */
2176                 for (i = 0; i < internals->active_slave_count; i++) {
2177                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2178
2179                         RTE_ASSERT(port->rx_ring != NULL);
2180                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2181                                 rte_pktmbuf_free(pkt);
2182
2183                         RTE_ASSERT(port->tx_ring != NULL);
2184                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2185                                 rte_pktmbuf_free(pkt);
2186                 }
2187         }
2188
2189         if (internals->mode == BONDING_MODE_TLB ||
2190                         internals->mode == BONDING_MODE_ALB) {
2191                 bond_tlb_disable(internals);
2192                 for (i = 0; i < internals->active_slave_count; i++)
2193                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2194         }
2195
2196         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2197         eth_dev->data->dev_started = 0;
2198
2199         internals->link_status_polling_enabled = 0;
2200         for (i = 0; i < internals->slave_count; i++) {
2201                 uint16_t slave_id = internals->slaves[i].port_id;
2202                 if (find_slave_by_id(internals->active_slaves,
2203                                 internals->active_slave_count, slave_id) !=
2204                                                 internals->active_slave_count) {
2205                         internals->slaves[i].last_link_status = 0;
2206                         rte_eth_dev_stop(slave_id);
2207                         deactivate_slave(eth_dev, slave_id);
2208                 }
2209         }
2210 }
2211
2212 void
2213 bond_ethdev_close(struct rte_eth_dev *dev)
2214 {
2215         struct bond_dev_private *internals = dev->data->dev_private;
2216         uint16_t bond_port_id = internals->port_id;
2217         int skipped = 0;
2218         struct rte_flow_error ferror;
2219
2220         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2221         while (internals->slave_count != skipped) {
2222                 uint16_t port_id = internals->slaves[skipped].port_id;
2223
2224                 rte_eth_dev_stop(port_id);
2225
2226                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2227                         RTE_BOND_LOG(ERR,
2228                                      "Failed to remove port %d from bonded device %s",
2229                                      port_id, dev->device->name);
2230                         skipped++;
2231                 }
2232         }
2233         bond_flow_ops.flush(dev, &ferror);
2234         bond_ethdev_free_queues(dev);
2235         rte_bitmap_reset(internals->vlan_filter_bmp);
2236 }
2237
2238 /* forward declaration */
2239 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2240
2241 static void
2242 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2243 {
2244         struct bond_dev_private *internals = dev->data->dev_private;
2245
2246         uint16_t max_nb_rx_queues = UINT16_MAX;
2247         uint16_t max_nb_tx_queues = UINT16_MAX;
2248         uint16_t max_rx_desc_lim = UINT16_MAX;
2249         uint16_t max_tx_desc_lim = UINT16_MAX;
2250
2251         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2252
2253         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2254                         internals->candidate_max_rx_pktlen :
2255                         ETHER_MAX_JUMBO_FRAME_LEN;
2256
2257         /* Max number of tx/rx queues that the bonded device can support is the
2258          * minimum values of the bonded slaves, as all slaves must be capable
2259          * of supporting the same number of tx/rx queues.
2260          */
2261         if (internals->slave_count > 0) {
2262                 struct rte_eth_dev_info slave_info;
2263                 uint16_t idx;
2264
2265                 for (idx = 0; idx < internals->slave_count; idx++) {
2266                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2267                                         &slave_info);
2268
2269                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2270                                 max_nb_rx_queues = slave_info.max_rx_queues;
2271
2272                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2273                                 max_nb_tx_queues = slave_info.max_tx_queues;
2274
2275                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2276                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2277
2278                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2279                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2280                 }
2281         }
2282
2283         dev_info->max_rx_queues = max_nb_rx_queues;
2284         dev_info->max_tx_queues = max_nb_tx_queues;
2285
2286         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2287                sizeof(dev_info->default_rxconf));
2288         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2289                sizeof(dev_info->default_txconf));
2290
2291         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2292         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2293
2294         /**
2295          * If dedicated hw queues enabled for link bonding device in LACP mode
2296          * then we need to reduce the maximum number of data path queues by 1.
2297          */
2298         if (internals->mode == BONDING_MODE_8023AD &&
2299                 internals->mode4.dedicated_queues.enabled == 1) {
2300                 dev_info->max_rx_queues--;
2301                 dev_info->max_tx_queues--;
2302         }
2303
2304         dev_info->min_rx_bufsize = 0;
2305
2306         dev_info->rx_offload_capa = internals->rx_offload_capa;
2307         dev_info->tx_offload_capa = internals->tx_offload_capa;
2308         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2309         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2310         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2311
2312         dev_info->reta_size = internals->reta_size;
2313 }
2314
2315 static int
2316 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2317 {
2318         int res;
2319         uint16_t i;
2320         struct bond_dev_private *internals = dev->data->dev_private;
2321
2322         /* don't do this while a slave is being added */
2323         rte_spinlock_lock(&internals->lock);
2324
2325         if (on)
2326                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2327         else
2328                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2329
2330         for (i = 0; i < internals->slave_count; i++) {
2331                 uint16_t port_id = internals->slaves[i].port_id;
2332
2333                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2334                 if (res == ENOTSUP)
2335                         RTE_BOND_LOG(WARNING,
2336                                      "Setting VLAN filter on slave port %u not supported.",
2337                                      port_id);
2338         }
2339
2340         rte_spinlock_unlock(&internals->lock);
2341         return 0;
2342 }
2343
2344 static int
2345 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2346                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2347                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2348 {
2349         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2350                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2351                                         0, dev->data->numa_node);
2352         if (bd_rx_q == NULL)
2353                 return -1;
2354
2355         bd_rx_q->queue_id = rx_queue_id;
2356         bd_rx_q->dev_private = dev->data->dev_private;
2357
2358         bd_rx_q->nb_rx_desc = nb_rx_desc;
2359
2360         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2361         bd_rx_q->mb_pool = mb_pool;
2362
2363         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2364
2365         return 0;
2366 }
2367
2368 static int
2369 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2370                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2371                 const struct rte_eth_txconf *tx_conf)
2372 {
2373         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2374                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2375                                         0, dev->data->numa_node);
2376
2377         if (bd_tx_q == NULL)
2378                 return -1;
2379
2380         bd_tx_q->queue_id = tx_queue_id;
2381         bd_tx_q->dev_private = dev->data->dev_private;
2382
2383         bd_tx_q->nb_tx_desc = nb_tx_desc;
2384         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2385
2386         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2387
2388         return 0;
2389 }
2390
2391 static void
2392 bond_ethdev_rx_queue_release(void *queue)
2393 {
2394         if (queue == NULL)
2395                 return;
2396
2397         rte_free(queue);
2398 }
2399
2400 static void
2401 bond_ethdev_tx_queue_release(void *queue)
2402 {
2403         if (queue == NULL)
2404                 return;
2405
2406         rte_free(queue);
2407 }
2408
2409 static void
2410 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2411 {
2412         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2413         struct bond_dev_private *internals;
2414
2415         /* Default value for polling slave found is true as we don't want to
2416          * disable the polling thread if we cannot get the lock */
2417         int i, polling_slave_found = 1;
2418
2419         if (cb_arg == NULL)
2420                 return;
2421
2422         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2423         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2424
2425         if (!bonded_ethdev->data->dev_started ||
2426                 !internals->link_status_polling_enabled)
2427                 return;
2428
2429         /* If device is currently being configured then don't check slaves link
2430          * status, wait until next period */
2431         if (rte_spinlock_trylock(&internals->lock)) {
2432                 if (internals->slave_count > 0)
2433                         polling_slave_found = 0;
2434
2435                 for (i = 0; i < internals->slave_count; i++) {
2436                         if (!internals->slaves[i].link_status_poll_enabled)
2437                                 continue;
2438
2439                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2440                         polling_slave_found = 1;
2441
2442                         /* Update slave link status */
2443                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2444                                         internals->slaves[i].link_status_wait_to_complete);
2445
2446                         /* if link status has changed since last checked then call lsc
2447                          * event callback */
2448                         if (slave_ethdev->data->dev_link.link_status !=
2449                                         internals->slaves[i].last_link_status) {
2450                                 internals->slaves[i].last_link_status =
2451                                                 slave_ethdev->data->dev_link.link_status;
2452
2453                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2454                                                 RTE_ETH_EVENT_INTR_LSC,
2455                                                 &bonded_ethdev->data->port_id,
2456                                                 NULL);
2457                         }
2458                 }
2459                 rte_spinlock_unlock(&internals->lock);
2460         }
2461
2462         if (polling_slave_found)
2463                 /* Set alarm to continue monitoring link status of slave ethdev's */
2464                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2465                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2466 }
2467
2468 static int
2469 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2470 {
2471         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2472
2473         struct bond_dev_private *bond_ctx;
2474         struct rte_eth_link slave_link;
2475
2476         uint32_t idx;
2477
2478         bond_ctx = ethdev->data->dev_private;
2479
2480         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2481
2482         if (ethdev->data->dev_started == 0 ||
2483                         bond_ctx->active_slave_count == 0) {
2484                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2485                 return 0;
2486         }
2487
2488         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2489
2490         if (wait_to_complete)
2491                 link_update = rte_eth_link_get;
2492         else
2493                 link_update = rte_eth_link_get_nowait;
2494
2495         switch (bond_ctx->mode) {
2496         case BONDING_MODE_BROADCAST:
2497                 /**
2498                  * Setting link speed to UINT32_MAX to ensure we pick up the
2499                  * value of the first active slave
2500                  */
2501                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2502
2503                 /**
2504                  * link speed is minimum value of all the slaves link speed as
2505                  * packet loss will occur on this slave if transmission at rates
2506                  * greater than this are attempted
2507                  */
2508                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2509                         link_update(bond_ctx->active_slaves[0], &slave_link);
2510
2511                         if (slave_link.link_speed <
2512                                         ethdev->data->dev_link.link_speed)
2513                                 ethdev->data->dev_link.link_speed =
2514                                                 slave_link.link_speed;
2515                 }
2516                 break;
2517         case BONDING_MODE_ACTIVE_BACKUP:
2518                 /* Current primary slave */
2519                 link_update(bond_ctx->current_primary_port, &slave_link);
2520
2521                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2522                 break;
2523         case BONDING_MODE_8023AD:
2524                 ethdev->data->dev_link.link_autoneg =
2525                                 bond_ctx->mode4.slave_link.link_autoneg;
2526                 ethdev->data->dev_link.link_duplex =
2527                                 bond_ctx->mode4.slave_link.link_duplex;
2528                 /* fall through to update link speed */
2529         case BONDING_MODE_ROUND_ROBIN:
2530         case BONDING_MODE_BALANCE:
2531         case BONDING_MODE_TLB:
2532         case BONDING_MODE_ALB:
2533         default:
2534                 /**
2535                  * In theses mode the maximum theoretical link speed is the sum
2536                  * of all the slaves
2537                  */
2538                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2539
2540                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2541                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2542
2543                         ethdev->data->dev_link.link_speed +=
2544                                         slave_link.link_speed;
2545                 }
2546         }
2547
2548
2549         return 0;
2550 }
2551
2552
2553 static int
2554 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2555 {
2556         struct bond_dev_private *internals = dev->data->dev_private;
2557         struct rte_eth_stats slave_stats;
2558         int i, j;
2559
2560         for (i = 0; i < internals->slave_count; i++) {
2561                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2562
2563                 stats->ipackets += slave_stats.ipackets;
2564                 stats->opackets += slave_stats.opackets;
2565                 stats->ibytes += slave_stats.ibytes;
2566                 stats->obytes += slave_stats.obytes;
2567                 stats->imissed += slave_stats.imissed;
2568                 stats->ierrors += slave_stats.ierrors;
2569                 stats->oerrors += slave_stats.oerrors;
2570                 stats->rx_nombuf += slave_stats.rx_nombuf;
2571
2572                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2573                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2574                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2575                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2576                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2577                         stats->q_errors[j] += slave_stats.q_errors[j];
2578                 }
2579
2580         }
2581
2582         return 0;
2583 }
2584
2585 static void
2586 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2587 {
2588         struct bond_dev_private *internals = dev->data->dev_private;
2589         int i;
2590
2591         for (i = 0; i < internals->slave_count; i++)
2592                 rte_eth_stats_reset(internals->slaves[i].port_id);
2593 }
2594
2595 static void
2596 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2597 {
2598         struct bond_dev_private *internals = eth_dev->data->dev_private;
2599         int i;
2600
2601         internals->promiscuous_en = 1;
2602
2603         switch (internals->mode) {
2604         /* Promiscuous mode is propagated to all slaves */
2605         case BONDING_MODE_ROUND_ROBIN:
2606         case BONDING_MODE_BALANCE:
2607         case BONDING_MODE_BROADCAST:
2608                 for (i = 0; i < internals->slave_count; i++)
2609                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2610                 break;
2611         /* In mode4 promiscus mode is managed when slave is added/removed */
2612         case BONDING_MODE_8023AD:
2613                 break;
2614         /* Promiscuous mode is propagated only to primary slave */
2615         case BONDING_MODE_ACTIVE_BACKUP:
2616         case BONDING_MODE_TLB:
2617         case BONDING_MODE_ALB:
2618         default:
2619                 /* Do not touch promisc when there cannot be primary ports */
2620                 if (internals->slave_count == 0)
2621                         break;
2622                 rte_eth_promiscuous_enable(internals->current_primary_port);
2623         }
2624 }
2625
2626 static void
2627 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2628 {
2629         struct bond_dev_private *internals = dev->data->dev_private;
2630         int i;
2631
2632         internals->promiscuous_en = 0;
2633
2634         switch (internals->mode) {
2635         /* Promiscuous mode is propagated to all slaves */
2636         case BONDING_MODE_ROUND_ROBIN:
2637         case BONDING_MODE_BALANCE:
2638         case BONDING_MODE_BROADCAST:
2639                 for (i = 0; i < internals->slave_count; i++)
2640                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2641                 break;
2642         /* In mode4 promiscus mode is set managed when slave is added/removed */
2643         case BONDING_MODE_8023AD:
2644                 break;
2645         /* Promiscuous mode is propagated only to primary slave */
2646         case BONDING_MODE_ACTIVE_BACKUP:
2647         case BONDING_MODE_TLB:
2648         case BONDING_MODE_ALB:
2649         default:
2650                 /* Do not touch promisc when there cannot be primary ports */
2651                 if (internals->slave_count == 0)
2652                         break;
2653                 rte_eth_promiscuous_disable(internals->current_primary_port);
2654         }
2655 }
2656
2657 static void
2658 bond_ethdev_delayed_lsc_propagation(void *arg)
2659 {
2660         if (arg == NULL)
2661                 return;
2662
2663         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2664                         RTE_ETH_EVENT_INTR_LSC, NULL);
2665 }
2666
2667 int
2668 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2669                 void *param, void *ret_param __rte_unused)
2670 {
2671         struct rte_eth_dev *bonded_eth_dev;
2672         struct bond_dev_private *internals;
2673         struct rte_eth_link link;
2674         int rc = -1;
2675
2676         uint8_t lsc_flag = 0;
2677         int valid_slave = 0;
2678         uint16_t active_pos;
2679         uint16_t i;
2680
2681         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2682                 return rc;
2683
2684         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2685
2686         if (check_for_bonded_ethdev(bonded_eth_dev))
2687                 return rc;
2688
2689         internals = bonded_eth_dev->data->dev_private;
2690
2691         /* If the device isn't started don't handle interrupts */
2692         if (!bonded_eth_dev->data->dev_started)
2693                 return rc;
2694
2695         /* verify that port_id is a valid slave of bonded port */
2696         for (i = 0; i < internals->slave_count; i++) {
2697                 if (internals->slaves[i].port_id == port_id) {
2698                         valid_slave = 1;
2699                         break;
2700                 }
2701         }
2702
2703         if (!valid_slave)
2704                 return rc;
2705
2706         /* Synchronize lsc callback parallel calls either by real link event
2707          * from the slaves PMDs or by the bonding PMD itself.
2708          */
2709         rte_spinlock_lock(&internals->lsc_lock);
2710
2711         /* Search for port in active port list */
2712         active_pos = find_slave_by_id(internals->active_slaves,
2713                         internals->active_slave_count, port_id);
2714
2715         rte_eth_link_get_nowait(port_id, &link);
2716         if (link.link_status) {
2717                 if (active_pos < internals->active_slave_count)
2718                         goto link_update;
2719
2720                 /* check link state properties if bonded link is up*/
2721                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2722                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2723                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2724                                              "for slave %d in bonding mode %d",
2725                                              port_id, internals->mode);
2726                 } else {
2727                         /* inherit slave link properties */
2728                         link_properties_set(bonded_eth_dev, &link);
2729                 }
2730
2731                 /* If no active slave ports then set this port to be
2732                  * the primary port.
2733                  */
2734                 if (internals->active_slave_count < 1) {
2735                         /* If first active slave, then change link status */
2736                         bonded_eth_dev->data->dev_link.link_status =
2737                                                                 ETH_LINK_UP;
2738                         internals->current_primary_port = port_id;
2739                         lsc_flag = 1;
2740
2741                         mac_address_slaves_update(bonded_eth_dev);
2742                 }
2743
2744                 activate_slave(bonded_eth_dev, port_id);
2745
2746                 /* If the user has defined the primary port then default to
2747                  * using it.
2748                  */
2749                 if (internals->user_defined_primary_port &&
2750                                 internals->primary_port == port_id)
2751                         bond_ethdev_primary_set(internals, port_id);
2752         } else {
2753                 if (active_pos == internals->active_slave_count)
2754                         goto link_update;
2755
2756                 /* Remove from active slave list */
2757                 deactivate_slave(bonded_eth_dev, port_id);
2758
2759                 if (internals->active_slave_count < 1)
2760                         lsc_flag = 1;
2761
2762                 /* Update primary id, take first active slave from list or if none
2763                  * available set to -1 */
2764                 if (port_id == internals->current_primary_port) {
2765                         if (internals->active_slave_count > 0)
2766                                 bond_ethdev_primary_set(internals,
2767                                                 internals->active_slaves[0]);
2768                         else
2769                                 internals->current_primary_port = internals->primary_port;
2770                 }
2771         }
2772
2773 link_update:
2774         /**
2775          * Update bonded device link properties after any change to active
2776          * slaves
2777          */
2778         bond_ethdev_link_update(bonded_eth_dev, 0);
2779
2780         if (lsc_flag) {
2781                 /* Cancel any possible outstanding interrupts if delays are enabled */
2782                 if (internals->link_up_delay_ms > 0 ||
2783                         internals->link_down_delay_ms > 0)
2784                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2785                                         bonded_eth_dev);
2786
2787                 if (bonded_eth_dev->data->dev_link.link_status) {
2788                         if (internals->link_up_delay_ms > 0)
2789                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2790                                                 bond_ethdev_delayed_lsc_propagation,
2791                                                 (void *)bonded_eth_dev);
2792                         else
2793                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2794                                                 RTE_ETH_EVENT_INTR_LSC,
2795                                                 NULL);
2796
2797                 } else {
2798                         if (internals->link_down_delay_ms > 0)
2799                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2800                                                 bond_ethdev_delayed_lsc_propagation,
2801                                                 (void *)bonded_eth_dev);
2802                         else
2803                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2804                                                 RTE_ETH_EVENT_INTR_LSC,
2805                                                 NULL);
2806                 }
2807         }
2808
2809         rte_spinlock_unlock(&internals->lsc_lock);
2810
2811         return rc;
2812 }
2813
2814 static int
2815 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2816                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2817 {
2818         unsigned i, j;
2819         int result = 0;
2820         int slave_reta_size;
2821         unsigned reta_count;
2822         struct bond_dev_private *internals = dev->data->dev_private;
2823
2824         if (reta_size != internals->reta_size)
2825                 return -EINVAL;
2826
2827          /* Copy RETA table */
2828         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2829
2830         for (i = 0; i < reta_count; i++) {
2831                 internals->reta_conf[i].mask = reta_conf[i].mask;
2832                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2833                         if ((reta_conf[i].mask >> j) & 0x01)
2834                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2835         }
2836
2837         /* Fill rest of array */
2838         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2839                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2840                                 sizeof(internals->reta_conf[0]) * reta_count);
2841
2842         /* Propagate RETA over slaves */
2843         for (i = 0; i < internals->slave_count; i++) {
2844                 slave_reta_size = internals->slaves[i].reta_size;
2845                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2846                                 &internals->reta_conf[0], slave_reta_size);
2847                 if (result < 0)
2848                         return result;
2849         }
2850
2851         return 0;
2852 }
2853
2854 static int
2855 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2856                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2857 {
2858         int i, j;
2859         struct bond_dev_private *internals = dev->data->dev_private;
2860
2861         if (reta_size != internals->reta_size)
2862                 return -EINVAL;
2863
2864          /* Copy RETA table */
2865         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2866                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2867                         if ((reta_conf[i].mask >> j) & 0x01)
2868                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2869
2870         return 0;
2871 }
2872
2873 static int
2874 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2875                 struct rte_eth_rss_conf *rss_conf)
2876 {
2877         int i, result = 0;
2878         struct bond_dev_private *internals = dev->data->dev_private;
2879         struct rte_eth_rss_conf bond_rss_conf;
2880
2881         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2882
2883         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2884
2885         if (bond_rss_conf.rss_hf != 0)
2886                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2887
2888         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2889                         sizeof(internals->rss_key)) {
2890                 if (bond_rss_conf.rss_key_len == 0)
2891                         bond_rss_conf.rss_key_len = 40;
2892                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2893                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2894                                 internals->rss_key_len);
2895         }
2896
2897         for (i = 0; i < internals->slave_count; i++) {
2898                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2899                                 &bond_rss_conf);
2900                 if (result < 0)
2901                         return result;
2902         }
2903
2904         return 0;
2905 }
2906
2907 static int
2908 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2909                 struct rte_eth_rss_conf *rss_conf)
2910 {
2911         struct bond_dev_private *internals = dev->data->dev_private;
2912
2913         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2914         rss_conf->rss_key_len = internals->rss_key_len;
2915         if (rss_conf->rss_key)
2916                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2917
2918         return 0;
2919 }
2920
2921 static int
2922 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2923 {
2924         struct rte_eth_dev *slave_eth_dev;
2925         struct bond_dev_private *internals = dev->data->dev_private;
2926         int ret, i;
2927
2928         rte_spinlock_lock(&internals->lock);
2929
2930         for (i = 0; i < internals->slave_count; i++) {
2931                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2932                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2933                         rte_spinlock_unlock(&internals->lock);
2934                         return -ENOTSUP;
2935                 }
2936         }
2937         for (i = 0; i < internals->slave_count; i++) {
2938                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2939                 if (ret < 0) {
2940                         rte_spinlock_unlock(&internals->lock);
2941                         return ret;
2942                 }
2943         }
2944
2945         rte_spinlock_unlock(&internals->lock);
2946         return 0;
2947 }
2948
2949 static int
2950 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2951                         struct rte_ether_addr *addr)
2952 {
2953         if (mac_address_set(dev, addr)) {
2954                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2955                 return -EINVAL;
2956         }
2957
2958         return 0;
2959 }
2960
2961 static int
2962 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2963                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2964 {
2965         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2966                 *(const void **)arg = &bond_flow_ops;
2967                 return 0;
2968         }
2969         return -ENOTSUP;
2970 }
2971
2972 static int
2973 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2974                         struct rte_ether_addr *mac_addr,
2975                         __rte_unused uint32_t index, uint32_t vmdq)
2976 {
2977         struct rte_eth_dev *slave_eth_dev;
2978         struct bond_dev_private *internals = dev->data->dev_private;
2979         int ret, i;
2980
2981         rte_spinlock_lock(&internals->lock);
2982
2983         for (i = 0; i < internals->slave_count; i++) {
2984                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2985                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2986                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2987                         ret = -ENOTSUP;
2988                         goto end;
2989                 }
2990         }
2991
2992         for (i = 0; i < internals->slave_count; i++) {
2993                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2994                                 mac_addr, vmdq);
2995                 if (ret < 0) {
2996                         /* rollback */
2997                         for (i--; i >= 0; i--)
2998                                 rte_eth_dev_mac_addr_remove(
2999                                         internals->slaves[i].port_id, mac_addr);
3000                         goto end;
3001                 }
3002         }
3003
3004         ret = 0;
3005 end:
3006         rte_spinlock_unlock(&internals->lock);
3007         return ret;
3008 }
3009
3010 static void
3011 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3012 {
3013         struct rte_eth_dev *slave_eth_dev;
3014         struct bond_dev_private *internals = dev->data->dev_private;
3015         int i;
3016
3017         rte_spinlock_lock(&internals->lock);
3018
3019         for (i = 0; i < internals->slave_count; i++) {
3020                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3021                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3022                         goto end;
3023         }
3024
3025         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3026
3027         for (i = 0; i < internals->slave_count; i++)
3028                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3029                                 mac_addr);
3030
3031 end:
3032         rte_spinlock_unlock(&internals->lock);
3033 }
3034
3035 const struct eth_dev_ops default_dev_ops = {
3036         .dev_start            = bond_ethdev_start,
3037         .dev_stop             = bond_ethdev_stop,
3038         .dev_close            = bond_ethdev_close,
3039         .dev_configure        = bond_ethdev_configure,
3040         .dev_infos_get        = bond_ethdev_info,
3041         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3042         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3043         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3044         .rx_queue_release     = bond_ethdev_rx_queue_release,
3045         .tx_queue_release     = bond_ethdev_tx_queue_release,
3046         .link_update          = bond_ethdev_link_update,
3047         .stats_get            = bond_ethdev_stats_get,
3048         .stats_reset          = bond_ethdev_stats_reset,
3049         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3050         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3051         .reta_update          = bond_ethdev_rss_reta_update,
3052         .reta_query           = bond_ethdev_rss_reta_query,
3053         .rss_hash_update      = bond_ethdev_rss_hash_update,
3054         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3055         .mtu_set              = bond_ethdev_mtu_set,
3056         .mac_addr_set         = bond_ethdev_mac_address_set,
3057         .mac_addr_add         = bond_ethdev_mac_addr_add,
3058         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3059         .filter_ctrl          = bond_filter_ctrl
3060 };
3061
3062 static int
3063 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3064 {
3065         const char *name = rte_vdev_device_name(dev);
3066         uint8_t socket_id = dev->device.numa_node;
3067         struct bond_dev_private *internals = NULL;
3068         struct rte_eth_dev *eth_dev = NULL;
3069         uint32_t vlan_filter_bmp_size;
3070
3071         /* now do all data allocation - for eth_dev structure, dummy pci driver
3072          * and internal (private) data
3073          */
3074
3075         /* reserve an ethdev entry */
3076         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3077         if (eth_dev == NULL) {
3078                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3079                 goto err;
3080         }
3081
3082         internals = eth_dev->data->dev_private;
3083         eth_dev->data->nb_rx_queues = (uint16_t)1;
3084         eth_dev->data->nb_tx_queues = (uint16_t)1;
3085
3086         /* Allocate memory for storing MAC addresses */
3087         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3088                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3089         if (eth_dev->data->mac_addrs == NULL) {
3090                 RTE_BOND_LOG(ERR,
3091                              "Failed to allocate %u bytes needed to store MAC addresses",
3092                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3093                 goto err;
3094         }
3095
3096         eth_dev->dev_ops = &default_dev_ops;
3097         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3098
3099         rte_spinlock_init(&internals->lock);
3100         rte_spinlock_init(&internals->lsc_lock);
3101
3102         internals->port_id = eth_dev->data->port_id;
3103         internals->mode = BONDING_MODE_INVALID;
3104         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3105         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3106         internals->burst_xmit_hash = burst_xmit_l2_hash;
3107         internals->user_defined_mac = 0;
3108
3109         internals->link_status_polling_enabled = 0;
3110
3111         internals->link_status_polling_interval_ms =
3112                 DEFAULT_POLLING_INTERVAL_10_MS;
3113         internals->link_down_delay_ms = 0;
3114         internals->link_up_delay_ms = 0;
3115
3116         internals->slave_count = 0;
3117         internals->active_slave_count = 0;
3118         internals->rx_offload_capa = 0;
3119         internals->tx_offload_capa = 0;
3120         internals->rx_queue_offload_capa = 0;
3121         internals->tx_queue_offload_capa = 0;
3122         internals->candidate_max_rx_pktlen = 0;
3123         internals->max_rx_pktlen = 0;
3124
3125         /* Initially allow to choose any offload type */
3126         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3127
3128         memset(&internals->default_rxconf, 0,
3129                sizeof(internals->default_rxconf));
3130         memset(&internals->default_txconf, 0,
3131                sizeof(internals->default_txconf));
3132
3133         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3134         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3135
3136         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3137         memset(internals->slaves, 0, sizeof(internals->slaves));
3138
3139         TAILQ_INIT(&internals->flow_list);
3140         internals->flow_isolated_valid = 0;
3141
3142         /* Set mode 4 default configuration */
3143         bond_mode_8023ad_setup(eth_dev, NULL);
3144         if (bond_ethdev_mode_set(eth_dev, mode)) {
3145                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3146                                  eth_dev->data->port_id, mode);
3147                 goto err;
3148         }
3149
3150         vlan_filter_bmp_size =
3151                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3152         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3153                                                    RTE_CACHE_LINE_SIZE);
3154         if (internals->vlan_filter_bmpmem == NULL) {
3155                 RTE_BOND_LOG(ERR,
3156                              "Failed to allocate vlan bitmap for bonded device %u",
3157                              eth_dev->data->port_id);
3158                 goto err;
3159         }
3160
3161         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3162                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3163         if (internals->vlan_filter_bmp == NULL) {
3164                 RTE_BOND_LOG(ERR,
3165                              "Failed to init vlan bitmap for bonded device %u",
3166                              eth_dev->data->port_id);
3167                 rte_free(internals->vlan_filter_bmpmem);
3168                 goto err;
3169         }
3170
3171         return eth_dev->data->port_id;
3172
3173 err:
3174         rte_free(internals);
3175         if (eth_dev != NULL)
3176                 eth_dev->data->dev_private = NULL;
3177         rte_eth_dev_release_port(eth_dev);
3178         return -1;
3179 }
3180
3181 static int
3182 bond_probe(struct rte_vdev_device *dev)
3183 {
3184         const char *name;
3185         struct bond_dev_private *internals;
3186         struct rte_kvargs *kvlist;
3187         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3188         int  arg_count, port_id;
3189         uint8_t agg_mode;
3190         struct rte_eth_dev *eth_dev;
3191
3192         if (!dev)
3193                 return -EINVAL;
3194
3195         name = rte_vdev_device_name(dev);
3196         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3197
3198         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3199                 eth_dev = rte_eth_dev_attach_secondary(name);
3200                 if (!eth_dev) {
3201                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3202                         return -1;
3203                 }
3204                 /* TODO: request info from primary to set up Rx and Tx */
3205                 eth_dev->dev_ops = &default_dev_ops;
3206                 eth_dev->device = &dev->device;
3207                 rte_eth_dev_probing_finish(eth_dev);
3208                 return 0;
3209         }
3210
3211         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3212                 pmd_bond_init_valid_arguments);
3213         if (kvlist == NULL)
3214                 return -1;
3215
3216         /* Parse link bonding mode */
3217         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3218                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3219                                 &bond_ethdev_parse_slave_mode_kvarg,
3220                                 &bonding_mode) != 0) {
3221                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3222                                         name);
3223                         goto parse_error;
3224                 }
3225         } else {
3226                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3227                                 "device %s", name);
3228                 goto parse_error;
3229         }
3230
3231         /* Parse socket id to create bonding device on */
3232         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3233         if (arg_count == 1) {
3234                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3235                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3236                                 != 0) {
3237                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3238                                         "bonded device %s", name);
3239                         goto parse_error;
3240                 }
3241         } else if (arg_count > 1) {
3242                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3243                                 "bonded device %s", name);
3244                 goto parse_error;
3245         } else {
3246                 socket_id = rte_socket_id();
3247         }
3248
3249         dev->device.numa_node = socket_id;
3250
3251         /* Create link bonding eth device */
3252         port_id = bond_alloc(dev, bonding_mode);
3253         if (port_id < 0) {
3254                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3255                                 "socket %u.",   name, bonding_mode, socket_id);
3256                 goto parse_error;
3257         }
3258         internals = rte_eth_devices[port_id].data->dev_private;
3259         internals->kvlist = kvlist;
3260
3261         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3262                 if (rte_kvargs_process(kvlist,
3263                                 PMD_BOND_AGG_MODE_KVARG,
3264                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3265                                 &agg_mode) != 0) {
3266                         RTE_BOND_LOG(ERR,
3267                                         "Failed to parse agg selection mode for bonded device %s",
3268                                         name);
3269                         goto parse_error;
3270                 }
3271
3272                 if (internals->mode == BONDING_MODE_8023AD)
3273                         internals->mode4.agg_selection = agg_mode;
3274         } else {
3275                 internals->mode4.agg_selection = AGG_STABLE;
3276         }
3277
3278         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3279         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3280                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3281         return 0;
3282
3283 parse_error:
3284         rte_kvargs_free(kvlist);
3285
3286         return -1;
3287 }
3288
3289 static int
3290 bond_remove(struct rte_vdev_device *dev)
3291 {
3292         struct rte_eth_dev *eth_dev;
3293         struct bond_dev_private *internals;
3294         const char *name;
3295
3296         if (!dev)
3297                 return -EINVAL;
3298
3299         name = rte_vdev_device_name(dev);
3300         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3301
3302         /* now free all data allocation - for eth_dev structure,
3303          * dummy pci driver and internal (private) data
3304          */
3305
3306         /* find an ethdev entry */
3307         eth_dev = rte_eth_dev_allocated(name);
3308         if (eth_dev == NULL)
3309                 return -ENODEV;
3310
3311         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3312                 return rte_eth_dev_release_port(eth_dev);
3313
3314         RTE_ASSERT(eth_dev->device == &dev->device);
3315
3316         internals = eth_dev->data->dev_private;
3317         if (internals->slave_count != 0)
3318                 return -EBUSY;
3319
3320         if (eth_dev->data->dev_started == 1) {
3321                 bond_ethdev_stop(eth_dev);
3322                 bond_ethdev_close(eth_dev);
3323         }
3324
3325         eth_dev->dev_ops = NULL;
3326         eth_dev->rx_pkt_burst = NULL;
3327         eth_dev->tx_pkt_burst = NULL;
3328
3329         internals = eth_dev->data->dev_private;
3330         /* Try to release mempool used in mode6. If the bond
3331          * device is not mode6, free the NULL is not problem.
3332          */
3333         rte_mempool_free(internals->mode6.mempool);
3334         rte_bitmap_free(internals->vlan_filter_bmp);
3335         rte_free(internals->vlan_filter_bmpmem);
3336
3337         rte_eth_dev_release_port(eth_dev);
3338
3339         return 0;
3340 }
3341
3342 /* this part will resolve the slave portids after all the other pdev and vdev
3343  * have been allocated */
3344 static int
3345 bond_ethdev_configure(struct rte_eth_dev *dev)
3346 {
3347         const char *name = dev->device->name;
3348         struct bond_dev_private *internals = dev->data->dev_private;
3349         struct rte_kvargs *kvlist = internals->kvlist;
3350         int arg_count;
3351         uint16_t port_id = dev - rte_eth_devices;
3352         uint8_t agg_mode;
3353
3354         static const uint8_t default_rss_key[40] = {
3355                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3356                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3357                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3358                 0xBE, 0xAC, 0x01, 0xFA
3359         };
3360
3361         unsigned i, j;
3362
3363         /*
3364          * If RSS is enabled, fill table with default values and
3365          * set key to the the value specified in port RSS configuration.
3366          * Fall back to default RSS key if the key is not specified
3367          */
3368         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3369                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3370                         internals->rss_key_len =
3371                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3372                         memcpy(internals->rss_key,
3373                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3374                                internals->rss_key_len);
3375                 } else {
3376                         internals->rss_key_len = sizeof(default_rss_key);
3377                         memcpy(internals->rss_key, default_rss_key,
3378                                internals->rss_key_len);
3379                 }
3380
3381                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3382                         internals->reta_conf[i].mask = ~0LL;
3383                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3384                                 internals->reta_conf[i].reta[j] =
3385                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3386                                                 dev->data->nb_rx_queues;
3387                 }
3388         }
3389
3390         /* set the max_rx_pktlen */
3391         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3392
3393         /*
3394          * if no kvlist, it means that this bonded device has been created
3395          * through the bonding api.
3396          */
3397         if (!kvlist)
3398                 return 0;
3399
3400         /* Parse MAC address for bonded device */
3401         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3402         if (arg_count == 1) {
3403                 struct rte_ether_addr bond_mac;
3404
3405                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3406                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3407                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3408                                      name);
3409                         return -1;
3410                 }
3411
3412                 /* Set MAC address */
3413                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3414                         RTE_BOND_LOG(ERR,
3415                                      "Failed to set mac address on bonded device %s",
3416                                      name);
3417                         return -1;
3418                 }
3419         } else if (arg_count > 1) {
3420                 RTE_BOND_LOG(ERR,
3421                              "MAC address can be specified only once for bonded device %s",
3422                              name);
3423                 return -1;
3424         }
3425
3426         /* Parse/set balance mode transmit policy */
3427         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3428         if (arg_count == 1) {
3429                 uint8_t xmit_policy;
3430
3431                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3432                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3433                     0) {
3434                         RTE_BOND_LOG(INFO,
3435                                      "Invalid xmit policy specified for bonded device %s",
3436                                      name);
3437                         return -1;
3438                 }
3439
3440                 /* Set balance mode transmit policy*/
3441                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3442                         RTE_BOND_LOG(ERR,
3443                                      "Failed to set balance xmit policy on bonded device %s",
3444                                      name);
3445                         return -1;
3446                 }
3447         } else if (arg_count > 1) {
3448                 RTE_BOND_LOG(ERR,
3449                              "Transmit policy can be specified only once for bonded device %s",
3450                              name);
3451                 return -1;
3452         }
3453
3454         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3455                 if (rte_kvargs_process(kvlist,
3456                                        PMD_BOND_AGG_MODE_KVARG,
3457                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3458                                        &agg_mode) != 0) {
3459                         RTE_BOND_LOG(ERR,
3460                                      "Failed to parse agg selection mode for bonded device %s",
3461                                      name);
3462                 }
3463                 if (internals->mode == BONDING_MODE_8023AD) {
3464                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3465                                         agg_mode);
3466                         if (ret < 0) {
3467                                 RTE_BOND_LOG(ERR,
3468                                         "Invalid args for agg selection set for bonded device %s",
3469                                         name);
3470                                 return -1;
3471                         }
3472                 }
3473         }
3474
3475         /* Parse/add slave ports to bonded device */
3476         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3477                 struct bond_ethdev_slave_ports slave_ports;
3478                 unsigned i;
3479
3480                 memset(&slave_ports, 0, sizeof(slave_ports));
3481
3482                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3483                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3484                         RTE_BOND_LOG(ERR,
3485                                      "Failed to parse slave ports for bonded device %s",
3486                                      name);
3487                         return -1;
3488                 }
3489
3490                 for (i = 0; i < slave_ports.slave_count; i++) {
3491                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3492                                 RTE_BOND_LOG(ERR,
3493                                              "Failed to add port %d as slave to bonded device %s",
3494                                              slave_ports.slaves[i], name);
3495                         }
3496                 }
3497
3498         } else {
3499                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3500                 return -1;
3501         }
3502
3503         /* Parse/set primary slave port id*/
3504         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3505         if (arg_count == 1) {
3506                 uint16_t primary_slave_port_id;
3507
3508                 if (rte_kvargs_process(kvlist,
3509                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3510                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3511                                        &primary_slave_port_id) < 0) {
3512                         RTE_BOND_LOG(INFO,
3513                                      "Invalid primary slave port id specified for bonded device %s",
3514                                      name);
3515                         return -1;
3516                 }
3517
3518                 /* Set balance mode transmit policy*/
3519                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3520                     != 0) {
3521                         RTE_BOND_LOG(ERR,
3522                                      "Failed to set primary slave port %d on bonded device %s",
3523                                      primary_slave_port_id, name);
3524                         return -1;
3525                 }
3526         } else if (arg_count > 1) {
3527                 RTE_BOND_LOG(INFO,
3528                              "Primary slave can be specified only once for bonded device %s",
3529                              name);
3530                 return -1;
3531         }
3532
3533         /* Parse link status monitor polling interval */
3534         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3535         if (arg_count == 1) {
3536                 uint32_t lsc_poll_interval_ms;
3537
3538                 if (rte_kvargs_process(kvlist,
3539                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3540                                        &bond_ethdev_parse_time_ms_kvarg,
3541                                        &lsc_poll_interval_ms) < 0) {
3542                         RTE_BOND_LOG(INFO,
3543                                      "Invalid lsc polling interval value specified for bonded"
3544                                      " device %s", name);
3545                         return -1;
3546                 }
3547
3548                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3549                     != 0) {
3550                         RTE_BOND_LOG(ERR,
3551                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3552                                      lsc_poll_interval_ms, name);
3553                         return -1;
3554                 }
3555         } else if (arg_count > 1) {
3556                 RTE_BOND_LOG(INFO,
3557                              "LSC polling interval can be specified only once for bonded"
3558                              " device %s", name);
3559                 return -1;
3560         }
3561
3562         /* Parse link up interrupt propagation delay */
3563         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3564         if (arg_count == 1) {
3565                 uint32_t link_up_delay_ms;
3566
3567                 if (rte_kvargs_process(kvlist,
3568                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3569                                        &bond_ethdev_parse_time_ms_kvarg,
3570                                        &link_up_delay_ms) < 0) {
3571                         RTE_BOND_LOG(INFO,
3572                                      "Invalid link up propagation delay value specified for"
3573                                      " bonded device %s", name);
3574                         return -1;
3575                 }
3576
3577                 /* Set balance mode transmit policy*/
3578                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3579                     != 0) {
3580                         RTE_BOND_LOG(ERR,
3581                                      "Failed to set link up propagation delay (%u ms) on bonded"
3582                                      " device %s", link_up_delay_ms, name);
3583                         return -1;
3584                 }
3585         } else if (arg_count > 1) {
3586                 RTE_BOND_LOG(INFO,
3587                              "Link up propagation delay can be specified only once for"
3588                              " bonded device %s", name);
3589                 return -1;
3590         }
3591
3592         /* Parse link down interrupt propagation delay */
3593         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3594         if (arg_count == 1) {
3595                 uint32_t link_down_delay_ms;
3596
3597                 if (rte_kvargs_process(kvlist,
3598                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3599                                        &bond_ethdev_parse_time_ms_kvarg,
3600                                        &link_down_delay_ms) < 0) {
3601                         RTE_BOND_LOG(INFO,
3602                                      "Invalid link down propagation delay value specified for"
3603                                      " bonded device %s", name);
3604                         return -1;
3605                 }
3606
3607                 /* Set balance mode transmit policy*/
3608                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3609                     != 0) {
3610                         RTE_BOND_LOG(ERR,
3611                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3612                                      link_down_delay_ms, name);
3613                         return -1;
3614                 }
3615         } else if (arg_count > 1) {
3616                 RTE_BOND_LOG(INFO,
3617                              "Link down propagation delay can be specified only once for  bonded device %s",
3618                              name);
3619                 return -1;
3620         }
3621
3622         return 0;
3623 }
3624
3625 struct rte_vdev_driver pmd_bond_drv = {
3626         .probe = bond_probe,
3627         .remove = bond_remove,
3628 };
3629
3630 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3631 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3632
3633 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3634         "slave=<ifc> "
3635         "primary=<ifc> "
3636         "mode=[0-6] "
3637         "xmit_policy=[l2 | l23 | l34] "
3638         "agg_mode=[count | stable | bandwidth] "
3639         "socket_id=<int> "
3640         "mac=<mac addr> "
3641         "lsc_poll_period_ms=<int> "
3642         "up_delay=<int> "
3643         "down_delay=<int>");
3644
3645 int bond_logtype;
3646
3647 RTE_INIT(bond_init_log)
3648 {
3649         bond_logtype = rte_log_register("pmd.net.bond");
3650         if (bond_logtype >= 0)
3651                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3652 }