net/ice: update RSS RETA size with supported values
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave >= slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave >= slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint16_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t subtype;
408         uint16_t i;
409         uint16_t j;
410         uint16_t k;
411
412         /* Copy slave list to protect against slave up/down changes during tx
413          * bursting */
414         slave_count = internals->active_slave_count;
415         memcpy(slaves, internals->active_slaves,
416                         sizeof(internals->active_slaves[0]) * slave_count);
417
418         idx = internals->active_slave;
419         if (idx >= slave_count) {
420                 internals->active_slave = 0;
421                 idx = 0;
422         }
423         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
424                 j = num_rx_total;
425                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
426                                          COLLECTING);
427
428                 /* Read packets from this slave */
429                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
430                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
431
432                 for (k = j; k < 2 && k < num_rx_total; k++)
433                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
434
435                 /* Handle slow protocol packets. */
436                 while (j < num_rx_total) {
437
438                         /* If packet is not pure L2 and is known, skip it */
439                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
440                                 j++;
441                                 continue;
442                         }
443
444                         if (j + 3 < num_rx_total)
445                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
446
447                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
448                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
449
450                         /* Remove packet from array if it is slow packet or slave is not
451                          * in collecting state or bonding interface is not in promiscuous
452                          * mode and packet address does not match. */
453                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
454                                 !collecting ||
455                                 (!promisc &&
456                                  !is_multicast_ether_addr(&hdr->d_addr) &&
457                                  !is_same_ether_addr(bond_mac,
458                                                      &hdr->d_addr)))) {
459
460                                 if (hdr->ether_type == ether_type_slow_be) {
461                                         bond_mode_8023ad_handle_slow_pkt(
462                                             internals, slaves[idx], bufs[j]);
463                                 } else
464                                         rte_pktmbuf_free(bufs[j]);
465
466                                 /* Packet is managed by mode 4 or dropped, shift the array */
467                                 num_rx_total--;
468                                 if (j < num_rx_total) {
469                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
470                                                 (num_rx_total - j));
471                                 }
472                         } else
473                                 j++;
474                 }
475                 if (unlikely(++idx == slave_count))
476                         idx = 0;
477         }
478
479         if (++internals->active_slave >= slave_count)
480                 internals->active_slave = 0;
481
482         return num_rx_total;
483 }
484
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
488
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491 static void
492 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
493 {
494         switch (arp_op) {
495         case ARP_OP_REQUEST:
496                 strlcpy(buf, "ARP Request", buf_len);
497                 return;
498         case ARP_OP_REPLY:
499                 strlcpy(buf, "ARP Reply", buf_len);
500                 return;
501         case ARP_OP_REVREQUEST:
502                 strlcpy(buf, "Reverse ARP Request", buf_len);
503                 return;
504         case ARP_OP_REVREPLY:
505                 strlcpy(buf, "Reverse ARP Reply", buf_len);
506                 return;
507         case ARP_OP_INVREQUEST:
508                 strlcpy(buf, "Peer Identify Request", buf_len);
509                 return;
510         case ARP_OP_INVREPLY:
511                 strlcpy(buf, "Peer Identify Reply", buf_len);
512                 return;
513         default:
514                 break;
515         }
516         strlcpy(buf, "Unknown", buf_len);
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op),
621                                 ArpOp, sizeof(ArpOp));
622                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
623         }
624 #endif
625 }
626 #endif
627
628 static uint16_t
629 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
630 {
631         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
632         struct bond_dev_private *internals = bd_tx_q->dev_private;
633         struct ether_hdr *eth_h;
634         uint16_t ether_type, offset;
635         uint16_t nb_recv_pkts;
636         int i;
637
638         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
639
640         for (i = 0; i < nb_recv_pkts; i++) {
641                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
642                 ether_type = eth_h->ether_type;
643                 offset = get_vlan_offset(eth_h, &ether_type);
644
645                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
646 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
647                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
648 #endif
649                         bond_mode_alb_arp_recv(eth_h, offset, internals);
650                 }
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
653                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
654 #endif
655         }
656
657         return nb_recv_pkts;
658 }
659
660 static uint16_t
661 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
662                 uint16_t nb_pkts)
663 {
664         struct bond_dev_private *internals;
665         struct bond_tx_queue *bd_tx_q;
666
667         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
668         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
669
670         uint16_t num_of_slaves;
671         uint16_t slaves[RTE_MAX_ETHPORTS];
672
673         uint16_t num_tx_total = 0, num_tx_slave;
674
675         static int slave_idx = 0;
676         int i, cslave_idx = 0, tx_fail_total = 0;
677
678         bd_tx_q = (struct bond_tx_queue *)queue;
679         internals = bd_tx_q->dev_private;
680
681         /* Copy slave list to protect against slave up/down changes during tx
682          * bursting */
683         num_of_slaves = internals->active_slave_count;
684         memcpy(slaves, internals->active_slaves,
685                         sizeof(internals->active_slaves[0]) * num_of_slaves);
686
687         if (num_of_slaves < 1)
688                 return num_tx_total;
689
690         /* Populate slaves mbuf with which packets are to be sent on it  */
691         for (i = 0; i < nb_pkts; i++) {
692                 cslave_idx = (slave_idx + i) % num_of_slaves;
693                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
694         }
695
696         /* increment current slave index so the next call to tx burst starts on the
697          * next slave */
698         slave_idx = ++cslave_idx;
699
700         /* Send packet burst on each slave device */
701         for (i = 0; i < num_of_slaves; i++) {
702                 if (slave_nb_pkts[i] > 0) {
703                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
704                                         slave_bufs[i], slave_nb_pkts[i]);
705
706                         /* if tx burst fails move packets to end of bufs */
707                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
708                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
709
710                                 tx_fail_total += tx_fail_slave;
711
712                                 memcpy(&bufs[nb_pkts - tx_fail_total],
713                                        &slave_bufs[i][num_tx_slave],
714                                        tx_fail_slave * sizeof(bufs[0]));
715                         }
716                         num_tx_total += num_tx_slave;
717                 }
718         }
719
720         return num_tx_total;
721 }
722
723 static uint16_t
724 bond_ethdev_tx_burst_active_backup(void *queue,
725                 struct rte_mbuf **bufs, uint16_t nb_pkts)
726 {
727         struct bond_dev_private *internals;
728         struct bond_tx_queue *bd_tx_q;
729
730         bd_tx_q = (struct bond_tx_queue *)queue;
731         internals = bd_tx_q->dev_private;
732
733         if (internals->active_slave_count < 1)
734                 return 0;
735
736         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
737                         bufs, nb_pkts);
738 }
739
740 static inline uint16_t
741 ether_hash(struct ether_hdr *eth_hdr)
742 {
743         unaligned_uint16_t *word_src_addr =
744                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
745         unaligned_uint16_t *word_dst_addr =
746                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
747
748         return (word_src_addr[0] ^ word_dst_addr[0]) ^
749                         (word_src_addr[1] ^ word_dst_addr[1]) ^
750                         (word_src_addr[2] ^ word_dst_addr[2]);
751 }
752
753 static inline uint32_t
754 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
755 {
756         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
757 }
758
759 static inline uint32_t
760 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
761 {
762         unaligned_uint32_t *word_src_addr =
763                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
764         unaligned_uint32_t *word_dst_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
766
767         return (word_src_addr[0] ^ word_dst_addr[0]) ^
768                         (word_src_addr[1] ^ word_dst_addr[1]) ^
769                         (word_src_addr[2] ^ word_dst_addr[2]) ^
770                         (word_src_addr[3] ^ word_dst_addr[3]);
771 }
772
773
774 void
775 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
776                 uint16_t slave_count, uint16_t *slaves)
777 {
778         struct ether_hdr *eth_hdr;
779         uint32_t hash;
780         int i;
781
782         for (i = 0; i < nb_pkts; i++) {
783                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
784
785                 hash = ether_hash(eth_hdr);
786
787                 slaves[i] = (hash ^= hash >> 8) % slave_count;
788         }
789 }
790
791 void
792 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint16_t slave_count, uint16_t *slaves)
794 {
795         uint16_t i;
796         struct ether_hdr *eth_hdr;
797         uint16_t proto;
798         size_t vlan_offset;
799         uint32_t hash, l3hash;
800
801         for (i = 0; i < nb_pkts; i++) {
802                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
803                 l3hash = 0;
804
805                 proto = eth_hdr->ether_type;
806                 hash = ether_hash(eth_hdr);
807
808                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
809
810                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
811                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
812                                         ((char *)(eth_hdr + 1) + vlan_offset);
813                         l3hash = ipv4_hash(ipv4_hdr);
814
815                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
816                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
817                                         ((char *)(eth_hdr + 1) + vlan_offset);
818                         l3hash = ipv6_hash(ipv6_hdr);
819                 }
820
821                 hash = hash ^ l3hash;
822                 hash ^= hash >> 16;
823                 hash ^= hash >> 8;
824
825                 slaves[i] = hash % slave_count;
826         }
827 }
828
829 void
830 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
831                 uint16_t slave_count, uint16_t *slaves)
832 {
833         struct ether_hdr *eth_hdr;
834         uint16_t proto;
835         size_t vlan_offset;
836         int i;
837
838         struct udp_hdr *udp_hdr;
839         struct tcp_hdr *tcp_hdr;
840         uint32_t hash, l3hash, l4hash;
841
842         for (i = 0; i < nb_pkts; i++) {
843                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
844                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & IPV4_HDR_IHL_MASK) *
862                                         IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
869                                                         < pkt_end)
870                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
871                                 } else if (ipv4_hdr->next_proto_id ==
872                                                                 IPPROTO_UDP) {
873                                         udp_hdr = (struct udp_hdr *)
874                                                 ((char *)ipv4_hdr +
875                                                         ip_hdr_offset);
876                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
877                                                         < pkt_end)
878                                                 l4hash = HASH_L4_PORTS(udp_hdr);
879                                 }
880                         }
881                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
882                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
883                                         ((char *)(eth_hdr + 1) + vlan_offset);
884                         l3hash = ipv6_hash(ipv6_hdr);
885
886                         if (ipv6_hdr->proto == IPPROTO_TCP) {
887                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
888                                 l4hash = HASH_L4_PORTS(tcp_hdr);
889                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
890                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
891                                 l4hash = HASH_L4_PORTS(udp_hdr);
892                         }
893                 }
894
895                 hash = l3hash ^ l4hash;
896                 hash ^= hash >> 16;
897                 hash ^= hash >> 8;
898
899                 slaves[i] = hash % slave_count;
900         }
901 }
902
903 struct bwg_slave {
904         uint64_t bwg_left_int;
905         uint64_t bwg_left_remainder;
906         uint16_t slave;
907 };
908
909 void
910 bond_tlb_activate_slave(struct bond_dev_private *internals) {
911         int i;
912
913         for (i = 0; i < internals->active_slave_count; i++) {
914                 tlb_last_obytets[internals->active_slaves[i]] = 0;
915         }
916 }
917
918 static int
919 bandwidth_cmp(const void *a, const void *b)
920 {
921         const struct bwg_slave *bwg_a = a;
922         const struct bwg_slave *bwg_b = b;
923         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
924         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
925                         (int64_t)bwg_a->bwg_left_remainder;
926         if (diff > 0)
927                 return 1;
928         else if (diff < 0)
929                 return -1;
930         else if (diff2 > 0)
931                 return 1;
932         else if (diff2 < 0)
933                 return -1;
934         else
935                 return 0;
936 }
937
938 static void
939 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
940                 struct bwg_slave *bwg_slave)
941 {
942         struct rte_eth_link link_status;
943
944         rte_eth_link_get_nowait(port_id, &link_status);
945         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
946         if (link_bwg == 0)
947                 return;
948         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
949         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
950         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
951 }
952
953 static void
954 bond_ethdev_update_tlb_slave_cb(void *arg)
955 {
956         struct bond_dev_private *internals = arg;
957         struct rte_eth_stats slave_stats;
958         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
959         uint16_t slave_count;
960         uint64_t tx_bytes;
961
962         uint8_t update_stats = 0;
963         uint16_t slave_id;
964         uint16_t i;
965
966         internals->slave_update_idx++;
967
968
969         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
970                 update_stats = 1;
971
972         for (i = 0; i < internals->active_slave_count; i++) {
973                 slave_id = internals->active_slaves[i];
974                 rte_eth_stats_get(slave_id, &slave_stats);
975                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
976                 bandwidth_left(slave_id, tx_bytes,
977                                 internals->slave_update_idx, &bwg_array[i]);
978                 bwg_array[i].slave = slave_id;
979
980                 if (update_stats) {
981                         tlb_last_obytets[slave_id] = slave_stats.obytes;
982                 }
983         }
984
985         if (update_stats == 1)
986                 internals->slave_update_idx = 0;
987
988         slave_count = i;
989         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
990         for (i = 0; i < slave_count; i++)
991                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
992
993         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
994                         (struct bond_dev_private *)internals);
995 }
996
997 static uint16_t
998 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
999 {
1000         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1001         struct bond_dev_private *internals = bd_tx_q->dev_private;
1002
1003         struct rte_eth_dev *primary_port =
1004                         &rte_eth_devices[internals->primary_port];
1005         uint16_t num_tx_total = 0;
1006         uint16_t i, j;
1007
1008         uint16_t num_of_slaves = internals->active_slave_count;
1009         uint16_t slaves[RTE_MAX_ETHPORTS];
1010
1011         struct ether_hdr *ether_hdr;
1012         struct ether_addr primary_slave_addr;
1013         struct ether_addr active_slave_addr;
1014
1015         if (num_of_slaves < 1)
1016                 return num_tx_total;
1017
1018         memcpy(slaves, internals->tlb_slaves_order,
1019                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1020
1021
1022         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1023
1024         if (nb_pkts > 3) {
1025                 for (i = 0; i < 3; i++)
1026                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1027         }
1028
1029         for (i = 0; i < num_of_slaves; i++) {
1030                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1031                 for (j = num_tx_total; j < nb_pkts; j++) {
1032                         if (j + 3 < nb_pkts)
1033                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1034
1035                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1036                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1037                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1038 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1039                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1040 #endif
1041                 }
1042
1043                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1044                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1045
1046                 if (num_tx_total == nb_pkts)
1047                         break;
1048         }
1049
1050         return num_tx_total;
1051 }
1052
1053 void
1054 bond_tlb_disable(struct bond_dev_private *internals)
1055 {
1056         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1057 }
1058
1059 void
1060 bond_tlb_enable(struct bond_dev_private *internals)
1061 {
1062         bond_ethdev_update_tlb_slave_cb(internals);
1063 }
1064
1065 static uint16_t
1066 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1067 {
1068         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1069         struct bond_dev_private *internals = bd_tx_q->dev_private;
1070
1071         struct ether_hdr *eth_h;
1072         uint16_t ether_type, offset;
1073
1074         struct client_data *client_info;
1075
1076         /*
1077          * We create transmit buffers for every slave and one additional to send
1078          * through tlb. In worst case every packet will be send on one port.
1079          */
1080         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1081         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1082
1083         /*
1084          * We create separate transmit buffers for update packets as they won't
1085          * be counted in num_tx_total.
1086          */
1087         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1088         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1089
1090         struct rte_mbuf *upd_pkt;
1091         size_t pkt_size;
1092
1093         uint16_t num_send, num_not_send = 0;
1094         uint16_t num_tx_total = 0;
1095         uint16_t slave_idx;
1096
1097         int i, j;
1098
1099         /* Search tx buffer for ARP packets and forward them to alb */
1100         for (i = 0; i < nb_pkts; i++) {
1101                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1102                 ether_type = eth_h->ether_type;
1103                 offset = get_vlan_offset(eth_h, &ether_type);
1104
1105                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1106                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1107
1108                         /* Change src mac in eth header */
1109                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1110
1111                         /* Add packet to slave tx buffer */
1112                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1113                         slave_bufs_pkts[slave_idx]++;
1114                 } else {
1115                         /* If packet is not ARP, send it with TLB policy */
1116                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1117                                         bufs[i];
1118                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1119                 }
1120         }
1121
1122         /* Update connected client ARP tables */
1123         if (internals->mode6.ntt) {
1124                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1125                         client_info = &internals->mode6.client_table[i];
1126
1127                         if (client_info->in_use) {
1128                                 /* Allocate new packet to send ARP update on current slave */
1129                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1130                                 if (upd_pkt == NULL) {
1131                                         RTE_BOND_LOG(ERR,
1132                                                      "Failed to allocate ARP packet from pool");
1133                                         continue;
1134                                 }
1135                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1136                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1137                                 upd_pkt->data_len = pkt_size;
1138                                 upd_pkt->pkt_len = pkt_size;
1139
1140                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1141                                                 internals);
1142
1143                                 /* Add packet to update tx buffer */
1144                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1145                                 update_bufs_pkts[slave_idx]++;
1146                         }
1147                 }
1148                 internals->mode6.ntt = 0;
1149         }
1150
1151         /* Send ARP packets on proper slaves */
1152         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1153                 if (slave_bufs_pkts[i] > 0) {
1154                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1155                                         slave_bufs[i], slave_bufs_pkts[i]);
1156                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1157                                 bufs[nb_pkts - 1 - num_not_send - j] =
1158                                                 slave_bufs[i][nb_pkts - 1 - j];
1159                         }
1160
1161                         num_tx_total += num_send;
1162                         num_not_send += slave_bufs_pkts[i] - num_send;
1163
1164 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1165         /* Print TX stats including update packets */
1166                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1167                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1168                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1169                         }
1170 #endif
1171                 }
1172         }
1173
1174         /* Send update packets on proper slaves */
1175         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1176                 if (update_bufs_pkts[i] > 0) {
1177                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1178                                         update_bufs_pkts[i]);
1179                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1180                                 rte_pktmbuf_free(update_bufs[i][j]);
1181                         }
1182 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1183                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1184                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1185                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1186                         }
1187 #endif
1188                 }
1189         }
1190
1191         /* Send non-ARP packets using tlb policy */
1192         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1193                 num_send = bond_ethdev_tx_burst_tlb(queue,
1194                                 slave_bufs[RTE_MAX_ETHPORTS],
1195                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1196
1197                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1198                         bufs[nb_pkts - 1 - num_not_send - j] =
1199                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1200                 }
1201
1202                 num_tx_total += num_send;
1203         }
1204
1205         return num_tx_total;
1206 }
1207
1208 static uint16_t
1209 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1210                 uint16_t nb_bufs)
1211 {
1212         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1213         struct bond_dev_private *internals = bd_tx_q->dev_private;
1214
1215         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1216         uint16_t slave_count;
1217
1218         /* Array to sort mbufs for transmission on each slave into */
1219         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1220         /* Number of mbufs for transmission on each slave */
1221         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1222         /* Mapping array generated by hash function to map mbufs to slaves */
1223         uint16_t bufs_slave_port_idxs[nb_bufs];
1224
1225         uint16_t slave_tx_count;
1226         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1227
1228         uint16_t i;
1229
1230         if (unlikely(nb_bufs == 0))
1231                 return 0;
1232
1233         /* Copy slave list to protect against slave up/down changes during tx
1234          * bursting */
1235         slave_count = internals->active_slave_count;
1236         if (unlikely(slave_count < 1))
1237                 return 0;
1238
1239         memcpy(slave_port_ids, internals->active_slaves,
1240                         sizeof(slave_port_ids[0]) * slave_count);
1241
1242         /*
1243          * Populate slaves mbuf with the packets which are to be sent on it
1244          * selecting output slave using hash based on xmit policy
1245          */
1246         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1247                         bufs_slave_port_idxs);
1248
1249         for (i = 0; i < nb_bufs; i++) {
1250                 /* Populate slave mbuf arrays with mbufs for that slave. */
1251                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1252
1253                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1254         }
1255
1256         /* Send packet burst on each slave device */
1257         for (i = 0; i < slave_count; i++) {
1258                 if (slave_nb_bufs[i] == 0)
1259                         continue;
1260
1261                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1262                                 bd_tx_q->queue_id, slave_bufs[i],
1263                                 slave_nb_bufs[i]);
1264
1265                 total_tx_count += slave_tx_count;
1266
1267                 /* If tx burst fails move packets to end of bufs */
1268                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1269                         int slave_tx_fail_count = slave_nb_bufs[i] -
1270                                         slave_tx_count;
1271                         total_tx_fail_count += slave_tx_fail_count;
1272                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1273                                &slave_bufs[i][slave_tx_count],
1274                                slave_tx_fail_count * sizeof(bufs[0]));
1275                 }
1276         }
1277
1278         return total_tx_count;
1279 }
1280
1281 static uint16_t
1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283                 uint16_t nb_bufs)
1284 {
1285         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1286         struct bond_dev_private *internals = bd_tx_q->dev_private;
1287
1288         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1289         uint16_t slave_count;
1290
1291         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1292         uint16_t dist_slave_count;
1293
1294         /* 2-D array to sort mbufs for transmission on each slave into */
1295         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1296         /* Number of mbufs for transmission on each slave */
1297         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1298         /* Mapping array generated by hash function to map mbufs to slaves */
1299         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1300
1301         uint16_t slave_tx_count;
1302         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1303
1304         uint16_t i;
1305
1306         /* Copy slave list to protect against slave up/down changes during tx
1307          * bursting */
1308         slave_count = internals->active_slave_count;
1309         if (unlikely(slave_count < 1))
1310                 return 0;
1311
1312         memcpy(slave_port_ids, internals->active_slaves,
1313                         sizeof(slave_port_ids[0]) * slave_count);
1314
1315         /* Check for LACP control packets and send if available */
1316         for (i = 0; i < slave_count; i++) {
1317                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1318                 struct rte_mbuf *ctrl_pkt = NULL;
1319
1320                 if (likely(rte_ring_empty(port->tx_ring)))
1321                         continue;
1322
1323                 if (rte_ring_dequeue(port->tx_ring,
1324                                      (void **)&ctrl_pkt) != -ENOENT) {
1325                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1326                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1327                         /*
1328                          * re-enqueue LAG control plane packets to buffering
1329                          * ring if transmission fails so the packet isn't lost.
1330                          */
1331                         if (slave_tx_count != 1)
1332                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1333                 }
1334         }
1335
1336         if (unlikely(nb_bufs == 0))
1337                 return 0;
1338
1339         dist_slave_count = 0;
1340         for (i = 0; i < slave_count; i++) {
1341                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1342
1343                 if (ACTOR_STATE(port, DISTRIBUTING))
1344                         dist_slave_port_ids[dist_slave_count++] =
1345                                         slave_port_ids[i];
1346         }
1347
1348         if (likely(dist_slave_count > 0)) {
1349
1350                 /*
1351                  * Populate slaves mbuf with the packets which are to be sent
1352                  * on it, selecting output slave using hash based on xmit policy
1353                  */
1354                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1355                                 bufs_slave_port_idxs);
1356
1357                 for (i = 0; i < nb_bufs; i++) {
1358                         /*
1359                          * Populate slave mbuf arrays with mbufs for that
1360                          * slave
1361                          */
1362                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1363
1364                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1365                                         bufs[i];
1366                 }
1367
1368
1369                 /* Send packet burst on each slave device */
1370                 for (i = 0; i < dist_slave_count; i++) {
1371                         if (slave_nb_bufs[i] == 0)
1372                                 continue;
1373
1374                         slave_tx_count = rte_eth_tx_burst(
1375                                         dist_slave_port_ids[i],
1376                                         bd_tx_q->queue_id, slave_bufs[i],
1377                                         slave_nb_bufs[i]);
1378
1379                         total_tx_count += slave_tx_count;
1380
1381                         /* If tx burst fails move packets to end of bufs */
1382                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1383                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1384                                                 slave_tx_count;
1385                                 total_tx_fail_count += slave_tx_fail_count;
1386
1387                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1388                                        &slave_bufs[i][slave_tx_count],
1389                                        slave_tx_fail_count * sizeof(bufs[0]));
1390                         }
1391                 }
1392         }
1393
1394         return total_tx_count;
1395 }
1396
1397 static uint16_t
1398 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1399                 uint16_t nb_pkts)
1400 {
1401         struct bond_dev_private *internals;
1402         struct bond_tx_queue *bd_tx_q;
1403
1404         uint16_t slaves[RTE_MAX_ETHPORTS];
1405         uint8_t tx_failed_flag = 0;
1406         uint16_t num_of_slaves;
1407
1408         uint16_t max_nb_of_tx_pkts = 0;
1409
1410         int slave_tx_total[RTE_MAX_ETHPORTS];
1411         int i, most_successful_tx_slave = -1;
1412
1413         bd_tx_q = (struct bond_tx_queue *)queue;
1414         internals = bd_tx_q->dev_private;
1415
1416         /* Copy slave list to protect against slave up/down changes during tx
1417          * bursting */
1418         num_of_slaves = internals->active_slave_count;
1419         memcpy(slaves, internals->active_slaves,
1420                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1421
1422         if (num_of_slaves < 1)
1423                 return 0;
1424
1425         /* Increment reference count on mbufs */
1426         for (i = 0; i < nb_pkts; i++)
1427                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1428
1429         /* Transmit burst on each active slave */
1430         for (i = 0; i < num_of_slaves; i++) {
1431                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1432                                         bufs, nb_pkts);
1433
1434                 if (unlikely(slave_tx_total[i] < nb_pkts))
1435                         tx_failed_flag = 1;
1436
1437                 /* record the value and slave index for the slave which transmits the
1438                  * maximum number of packets */
1439                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1440                         max_nb_of_tx_pkts = slave_tx_total[i];
1441                         most_successful_tx_slave = i;
1442                 }
1443         }
1444
1445         /* if slaves fail to transmit packets from burst, the calling application
1446          * is not expected to know about multiple references to packets so we must
1447          * handle failures of all packets except those of the most successful slave
1448          */
1449         if (unlikely(tx_failed_flag))
1450                 for (i = 0; i < num_of_slaves; i++)
1451                         if (i != most_successful_tx_slave)
1452                                 while (slave_tx_total[i] < nb_pkts)
1453                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1454
1455         return max_nb_of_tx_pkts;
1456 }
1457
1458 static void
1459 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1460 {
1461         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1462
1463         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1464                 /**
1465                  * If in mode 4 then save the link properties of the first
1466                  * slave, all subsequent slaves must match these properties
1467                  */
1468                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1469
1470                 bond_link->link_autoneg = slave_link->link_autoneg;
1471                 bond_link->link_duplex = slave_link->link_duplex;
1472                 bond_link->link_speed = slave_link->link_speed;
1473         } else {
1474                 /**
1475                  * In any other mode the link properties are set to default
1476                  * values of AUTONEG/DUPLEX
1477                  */
1478                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1479                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1480         }
1481 }
1482
1483 static int
1484 link_properties_valid(struct rte_eth_dev *ethdev,
1485                 struct rte_eth_link *slave_link)
1486 {
1487         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1488
1489         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1490                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1491
1492                 if (bond_link->link_duplex != slave_link->link_duplex ||
1493                         bond_link->link_autoneg != slave_link->link_autoneg ||
1494                         bond_link->link_speed != slave_link->link_speed)
1495                         return -1;
1496         }
1497
1498         return 0;
1499 }
1500
1501 int
1502 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1503 {
1504         struct ether_addr *mac_addr;
1505
1506         if (eth_dev == NULL) {
1507                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1508                 return -1;
1509         }
1510
1511         if (dst_mac_addr == NULL) {
1512                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1513                 return -1;
1514         }
1515
1516         mac_addr = eth_dev->data->mac_addrs;
1517
1518         ether_addr_copy(mac_addr, dst_mac_addr);
1519         return 0;
1520 }
1521
1522 int
1523 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1524 {
1525         struct ether_addr *mac_addr;
1526
1527         if (eth_dev == NULL) {
1528                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1529                 return -1;
1530         }
1531
1532         if (new_mac_addr == NULL) {
1533                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1534                 return -1;
1535         }
1536
1537         mac_addr = eth_dev->data->mac_addrs;
1538
1539         /* If new MAC is different to current MAC then update */
1540         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1541                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1542
1543         return 0;
1544 }
1545
1546 static const struct ether_addr null_mac_addr;
1547
1548 /*
1549  * Add additional MAC addresses to the slave
1550  */
1551 int
1552 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1553                 uint16_t slave_port_id)
1554 {
1555         int i, ret;
1556         struct ether_addr *mac_addr;
1557
1558         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1559                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1560                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1561                         break;
1562
1563                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1564                 if (ret < 0) {
1565                         /* rollback */
1566                         for (i--; i > 0; i--)
1567                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1568                                         &bonded_eth_dev->data->mac_addrs[i]);
1569                         return ret;
1570                 }
1571         }
1572
1573         return 0;
1574 }
1575
1576 /*
1577  * Remove additional MAC addresses from the slave
1578  */
1579 int
1580 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1581                 uint16_t slave_port_id)
1582 {
1583         int i, rc, ret;
1584         struct ether_addr *mac_addr;
1585
1586         rc = 0;
1587         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1588                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1589                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1590                         break;
1591
1592                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1593                 /* save only the first error */
1594                 if (ret < 0 && rc == 0)
1595                         rc = ret;
1596         }
1597
1598         return rc;
1599 }
1600
1601 int
1602 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1603 {
1604         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1605         int i;
1606
1607         /* Update slave devices MAC addresses */
1608         if (internals->slave_count < 1)
1609                 return -1;
1610
1611         switch (internals->mode) {
1612         case BONDING_MODE_ROUND_ROBIN:
1613         case BONDING_MODE_BALANCE:
1614         case BONDING_MODE_BROADCAST:
1615                 for (i = 0; i < internals->slave_count; i++) {
1616                         if (rte_eth_dev_default_mac_addr_set(
1617                                         internals->slaves[i].port_id,
1618                                         bonded_eth_dev->data->mac_addrs)) {
1619                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1620                                                 internals->slaves[i].port_id);
1621                                 return -1;
1622                         }
1623                 }
1624                 break;
1625         case BONDING_MODE_8023AD:
1626                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1627                 break;
1628         case BONDING_MODE_ACTIVE_BACKUP:
1629         case BONDING_MODE_TLB:
1630         case BONDING_MODE_ALB:
1631         default:
1632                 for (i = 0; i < internals->slave_count; i++) {
1633                         if (internals->slaves[i].port_id ==
1634                                         internals->current_primary_port) {
1635                                 if (rte_eth_dev_default_mac_addr_set(
1636                                                 internals->primary_port,
1637                                                 bonded_eth_dev->data->mac_addrs)) {
1638                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1639                                                         internals->current_primary_port);
1640                                         return -1;
1641                                 }
1642                         } else {
1643                                 if (rte_eth_dev_default_mac_addr_set(
1644                                                 internals->slaves[i].port_id,
1645                                                 &internals->slaves[i].persisted_mac_addr)) {
1646                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1647                                                         internals->slaves[i].port_id);
1648                                         return -1;
1649                                 }
1650                         }
1651                 }
1652         }
1653
1654         return 0;
1655 }
1656
1657 int
1658 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1659 {
1660         struct bond_dev_private *internals;
1661
1662         internals = eth_dev->data->dev_private;
1663
1664         switch (mode) {
1665         case BONDING_MODE_ROUND_ROBIN:
1666                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1667                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1668                 break;
1669         case BONDING_MODE_ACTIVE_BACKUP:
1670                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1671                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1672                 break;
1673         case BONDING_MODE_BALANCE:
1674                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1675                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1676                 break;
1677         case BONDING_MODE_BROADCAST:
1678                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1679                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1680                 break;
1681         case BONDING_MODE_8023AD:
1682                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1683                         return -1;
1684
1685                 if (internals->mode4.dedicated_queues.enabled == 0) {
1686                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1687                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1688                         RTE_BOND_LOG(WARNING,
1689                                 "Using mode 4, it is necessary to do TX burst "
1690                                 "and RX burst at least every 100ms.");
1691                 } else {
1692                         /* Use flow director's optimization */
1693                         eth_dev->rx_pkt_burst =
1694                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1695                         eth_dev->tx_pkt_burst =
1696                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1697                 }
1698                 break;
1699         case BONDING_MODE_TLB:
1700                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1701                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1702                 break;
1703         case BONDING_MODE_ALB:
1704                 if (bond_mode_alb_enable(eth_dev) != 0)
1705                         return -1;
1706
1707                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1708                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1709                 break;
1710         default:
1711                 return -1;
1712         }
1713
1714         internals->mode = mode;
1715
1716         return 0;
1717 }
1718
1719
1720 static int
1721 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1722                 struct rte_eth_dev *slave_eth_dev)
1723 {
1724         int errval = 0;
1725         struct bond_dev_private *internals = (struct bond_dev_private *)
1726                 bonded_eth_dev->data->dev_private;
1727         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1728
1729         if (port->slow_pool == NULL) {
1730                 char mem_name[256];
1731                 int slave_id = slave_eth_dev->data->port_id;
1732
1733                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1734                                 slave_id);
1735                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1736                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1737                         slave_eth_dev->data->numa_node);
1738
1739                 /* Any memory allocation failure in initialization is critical because
1740                  * resources can't be free, so reinitialization is impossible. */
1741                 if (port->slow_pool == NULL) {
1742                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1743                                 slave_id, mem_name, rte_strerror(rte_errno));
1744                 }
1745         }
1746
1747         if (internals->mode4.dedicated_queues.enabled == 1) {
1748                 /* Configure slow Rx queue */
1749
1750                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1751                                 internals->mode4.dedicated_queues.rx_qid, 128,
1752                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753                                 NULL, port->slow_pool);
1754                 if (errval != 0) {
1755                         RTE_BOND_LOG(ERR,
1756                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1757                                         slave_eth_dev->data->port_id,
1758                                         internals->mode4.dedicated_queues.rx_qid,
1759                                         errval);
1760                         return errval;
1761                 }
1762
1763                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1764                                 internals->mode4.dedicated_queues.tx_qid, 512,
1765                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1766                                 NULL);
1767                 if (errval != 0) {
1768                         RTE_BOND_LOG(ERR,
1769                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1770                                 slave_eth_dev->data->port_id,
1771                                 internals->mode4.dedicated_queues.tx_qid,
1772                                 errval);
1773                         return errval;
1774                 }
1775         }
1776         return 0;
1777 }
1778
1779 int
1780 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1781                 struct rte_eth_dev *slave_eth_dev)
1782 {
1783         struct bond_rx_queue *bd_rx_q;
1784         struct bond_tx_queue *bd_tx_q;
1785         uint16_t nb_rx_queues;
1786         uint16_t nb_tx_queues;
1787
1788         int errval;
1789         uint16_t q_id;
1790         struct rte_flow_error flow_error;
1791
1792         struct bond_dev_private *internals = (struct bond_dev_private *)
1793                 bonded_eth_dev->data->dev_private;
1794
1795         /* Stop slave */
1796         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1797
1798         /* Enable interrupts on slave device if supported */
1799         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1800                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1801
1802         /* If RSS is enabled for bonding, try to enable it for slaves  */
1803         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1804                 if (internals->rss_key_len != 0) {
1805                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1806                                         internals->rss_key_len;
1807                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1808                                         internals->rss_key;
1809                 } else {
1810                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1811                 }
1812
1813                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1814                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1815                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1816                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1817         }
1818
1819         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1820                         DEV_RX_OFFLOAD_VLAN_FILTER)
1821                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1822                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1823         else
1824                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1825                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1826
1827         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1828         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1829
1830         if (internals->mode == BONDING_MODE_8023AD) {
1831                 if (internals->mode4.dedicated_queues.enabled == 1) {
1832                         nb_rx_queues++;
1833                         nb_tx_queues++;
1834                 }
1835         }
1836
1837         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1838                                      bonded_eth_dev->data->mtu);
1839         if (errval != 0 && errval != -ENOTSUP) {
1840                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1841                                 slave_eth_dev->data->port_id, errval);
1842                 return errval;
1843         }
1844
1845         /* Configure device */
1846         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1847                         nb_rx_queues, nb_tx_queues,
1848                         &(slave_eth_dev->data->dev_conf));
1849         if (errval != 0) {
1850                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1851                                 slave_eth_dev->data->port_id, errval);
1852                 return errval;
1853         }
1854
1855         /* Setup Rx Queues */
1856         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1857                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1858
1859                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1860                                 bd_rx_q->nb_rx_desc,
1861                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1862                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1863                 if (errval != 0) {
1864                         RTE_BOND_LOG(ERR,
1865                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1866                                         slave_eth_dev->data->port_id, q_id, errval);
1867                         return errval;
1868                 }
1869         }
1870
1871         /* Setup Tx Queues */
1872         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1873                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1874
1875                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1876                                 bd_tx_q->nb_tx_desc,
1877                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1878                                 &bd_tx_q->tx_conf);
1879                 if (errval != 0) {
1880                         RTE_BOND_LOG(ERR,
1881                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1882                                 slave_eth_dev->data->port_id, q_id, errval);
1883                         return errval;
1884                 }
1885         }
1886
1887         if (internals->mode == BONDING_MODE_8023AD &&
1888                         internals->mode4.dedicated_queues.enabled == 1) {
1889                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1890                                 != 0)
1891                         return errval;
1892
1893                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1894                                 slave_eth_dev->data->port_id) != 0) {
1895                         RTE_BOND_LOG(ERR,
1896                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1897                                 slave_eth_dev->data->port_id, q_id, errval);
1898                         return -1;
1899                 }
1900
1901                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1902                         rte_flow_destroy(slave_eth_dev->data->port_id,
1903                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1904                                         &flow_error);
1905
1906                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1907                                 slave_eth_dev->data->port_id);
1908         }
1909
1910         /* Start device */
1911         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1912         if (errval != 0) {
1913                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1914                                 slave_eth_dev->data->port_id, errval);
1915                 return -1;
1916         }
1917
1918         /* If RSS is enabled for bonding, synchronize RETA */
1919         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1920                 int i;
1921                 struct bond_dev_private *internals;
1922
1923                 internals = bonded_eth_dev->data->dev_private;
1924
1925                 for (i = 0; i < internals->slave_count; i++) {
1926                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1927                                 errval = rte_eth_dev_rss_reta_update(
1928                                                 slave_eth_dev->data->port_id,
1929                                                 &internals->reta_conf[0],
1930                                                 internals->slaves[i].reta_size);
1931                                 if (errval != 0) {
1932                                         RTE_BOND_LOG(WARNING,
1933                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1934                                                      " RSS Configuration for bonding may be inconsistent.",
1935                                                      slave_eth_dev->data->port_id, errval);
1936                                 }
1937                                 break;
1938                         }
1939                 }
1940         }
1941
1942         /* If lsc interrupt is set, check initial slave's link status */
1943         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1944                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1945                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1946                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1947                         NULL);
1948         }
1949
1950         return 0;
1951 }
1952
1953 void
1954 slave_remove(struct bond_dev_private *internals,
1955                 struct rte_eth_dev *slave_eth_dev)
1956 {
1957         uint16_t i;
1958
1959         for (i = 0; i < internals->slave_count; i++)
1960                 if (internals->slaves[i].port_id ==
1961                                 slave_eth_dev->data->port_id)
1962                         break;
1963
1964         if (i < (internals->slave_count - 1)) {
1965                 struct rte_flow *flow;
1966
1967                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1968                                 sizeof(internals->slaves[0]) *
1969                                 (internals->slave_count - i - 1));
1970                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1971                         memmove(&flow->flows[i], &flow->flows[i + 1],
1972                                 sizeof(flow->flows[0]) *
1973                                 (internals->slave_count - i - 1));
1974                         flow->flows[internals->slave_count - 1] = NULL;
1975                 }
1976         }
1977
1978         internals->slave_count--;
1979
1980         /* force reconfiguration of slave interfaces */
1981         _rte_eth_dev_reset(slave_eth_dev);
1982 }
1983
1984 static void
1985 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1986
1987 void
1988 slave_add(struct bond_dev_private *internals,
1989                 struct rte_eth_dev *slave_eth_dev)
1990 {
1991         struct bond_slave_details *slave_details =
1992                         &internals->slaves[internals->slave_count];
1993
1994         slave_details->port_id = slave_eth_dev->data->port_id;
1995         slave_details->last_link_status = 0;
1996
1997         /* Mark slave devices that don't support interrupts so we can
1998          * compensate when we start the bond
1999          */
2000         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2001                 slave_details->link_status_poll_enabled = 1;
2002         }
2003
2004         slave_details->link_status_wait_to_complete = 0;
2005         /* clean tlb_last_obytes when adding port for bonding device */
2006         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2007                         sizeof(struct ether_addr));
2008 }
2009
2010 void
2011 bond_ethdev_primary_set(struct bond_dev_private *internals,
2012                 uint16_t slave_port_id)
2013 {
2014         int i;
2015
2016         if (internals->active_slave_count < 1)
2017                 internals->current_primary_port = slave_port_id;
2018         else
2019                 /* Search bonded device slave ports for new proposed primary port */
2020                 for (i = 0; i < internals->active_slave_count; i++) {
2021                         if (internals->active_slaves[i] == slave_port_id)
2022                                 internals->current_primary_port = slave_port_id;
2023                 }
2024 }
2025
2026 static void
2027 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2028
2029 static int
2030 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2031 {
2032         struct bond_dev_private *internals;
2033         int i;
2034
2035         /* slave eth dev will be started by bonded device */
2036         if (check_for_bonded_ethdev(eth_dev)) {
2037                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2038                                 eth_dev->data->port_id);
2039                 return -1;
2040         }
2041
2042         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2043         eth_dev->data->dev_started = 1;
2044
2045         internals = eth_dev->data->dev_private;
2046
2047         if (internals->slave_count == 0) {
2048                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2049                 goto out_err;
2050         }
2051
2052         if (internals->user_defined_mac == 0) {
2053                 struct ether_addr *new_mac_addr = NULL;
2054
2055                 for (i = 0; i < internals->slave_count; i++)
2056                         if (internals->slaves[i].port_id == internals->primary_port)
2057                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2058
2059                 if (new_mac_addr == NULL)
2060                         goto out_err;
2061
2062                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2063                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2064                                         eth_dev->data->port_id);
2065                         goto out_err;
2066                 }
2067         }
2068
2069         /* If bonded device is configure in promiscuous mode then re-apply config */
2070         if (internals->promiscuous_en)
2071                 bond_ethdev_promiscuous_enable(eth_dev);
2072
2073         if (internals->mode == BONDING_MODE_8023AD) {
2074                 if (internals->mode4.dedicated_queues.enabled == 1) {
2075                         internals->mode4.dedicated_queues.rx_qid =
2076                                         eth_dev->data->nb_rx_queues;
2077                         internals->mode4.dedicated_queues.tx_qid =
2078                                         eth_dev->data->nb_tx_queues;
2079                 }
2080         }
2081
2082
2083         /* Reconfigure each slave device if starting bonded device */
2084         for (i = 0; i < internals->slave_count; i++) {
2085                 struct rte_eth_dev *slave_ethdev =
2086                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2087                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2088                         RTE_BOND_LOG(ERR,
2089                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2090                                 eth_dev->data->port_id,
2091                                 internals->slaves[i].port_id);
2092                         goto out_err;
2093                 }
2094                 /* We will need to poll for link status if any slave doesn't
2095                  * support interrupts
2096                  */
2097                 if (internals->slaves[i].link_status_poll_enabled)
2098                         internals->link_status_polling_enabled = 1;
2099         }
2100
2101         /* start polling if needed */
2102         if (internals->link_status_polling_enabled) {
2103                 rte_eal_alarm_set(
2104                         internals->link_status_polling_interval_ms * 1000,
2105                         bond_ethdev_slave_link_status_change_monitor,
2106                         (void *)&rte_eth_devices[internals->port_id]);
2107         }
2108
2109         /* Update all slave devices MACs*/
2110         if (mac_address_slaves_update(eth_dev) != 0)
2111                 goto out_err;
2112
2113         if (internals->user_defined_primary_port)
2114                 bond_ethdev_primary_set(internals, internals->primary_port);
2115
2116         if (internals->mode == BONDING_MODE_8023AD)
2117                 bond_mode_8023ad_start(eth_dev);
2118
2119         if (internals->mode == BONDING_MODE_TLB ||
2120                         internals->mode == BONDING_MODE_ALB)
2121                 bond_tlb_enable(internals);
2122
2123         return 0;
2124
2125 out_err:
2126         eth_dev->data->dev_started = 0;
2127         return -1;
2128 }
2129
2130 static void
2131 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2132 {
2133         uint16_t i;
2134
2135         if (dev->data->rx_queues != NULL) {
2136                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2137                         rte_free(dev->data->rx_queues[i]);
2138                         dev->data->rx_queues[i] = NULL;
2139                 }
2140                 dev->data->nb_rx_queues = 0;
2141         }
2142
2143         if (dev->data->tx_queues != NULL) {
2144                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2145                         rte_free(dev->data->tx_queues[i]);
2146                         dev->data->tx_queues[i] = NULL;
2147                 }
2148                 dev->data->nb_tx_queues = 0;
2149         }
2150 }
2151
2152 void
2153 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2154 {
2155         struct bond_dev_private *internals = eth_dev->data->dev_private;
2156         uint16_t i;
2157
2158         if (internals->mode == BONDING_MODE_8023AD) {
2159                 struct port *port;
2160                 void *pkt = NULL;
2161
2162                 bond_mode_8023ad_stop(eth_dev);
2163
2164                 /* Discard all messages to/from mode 4 state machines */
2165                 for (i = 0; i < internals->active_slave_count; i++) {
2166                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2167
2168                         RTE_ASSERT(port->rx_ring != NULL);
2169                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2170                                 rte_pktmbuf_free(pkt);
2171
2172                         RTE_ASSERT(port->tx_ring != NULL);
2173                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2174                                 rte_pktmbuf_free(pkt);
2175                 }
2176         }
2177
2178         if (internals->mode == BONDING_MODE_TLB ||
2179                         internals->mode == BONDING_MODE_ALB) {
2180                 bond_tlb_disable(internals);
2181                 for (i = 0; i < internals->active_slave_count; i++)
2182                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2183         }
2184
2185         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2186         eth_dev->data->dev_started = 0;
2187
2188         internals->link_status_polling_enabled = 0;
2189         for (i = 0; i < internals->slave_count; i++) {
2190                 uint16_t slave_id = internals->slaves[i].port_id;
2191                 if (find_slave_by_id(internals->active_slaves,
2192                                 internals->active_slave_count, slave_id) !=
2193                                                 internals->active_slave_count) {
2194                         internals->slaves[i].last_link_status = 0;
2195                         rte_eth_dev_stop(slave_id);
2196                         deactivate_slave(eth_dev, slave_id);
2197                 }
2198         }
2199 }
2200
2201 void
2202 bond_ethdev_close(struct rte_eth_dev *dev)
2203 {
2204         struct bond_dev_private *internals = dev->data->dev_private;
2205         uint16_t bond_port_id = internals->port_id;
2206         int skipped = 0;
2207         struct rte_flow_error ferror;
2208
2209         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2210         while (internals->slave_count != skipped) {
2211                 uint16_t port_id = internals->slaves[skipped].port_id;
2212
2213                 rte_eth_dev_stop(port_id);
2214
2215                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2216                         RTE_BOND_LOG(ERR,
2217                                      "Failed to remove port %d from bonded device %s",
2218                                      port_id, dev->device->name);
2219                         skipped++;
2220                 }
2221         }
2222         bond_flow_ops.flush(dev, &ferror);
2223         bond_ethdev_free_queues(dev);
2224         rte_bitmap_reset(internals->vlan_filter_bmp);
2225 }
2226
2227 /* forward declaration */
2228 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2229
2230 static void
2231 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2232 {
2233         struct bond_dev_private *internals = dev->data->dev_private;
2234
2235         uint16_t max_nb_rx_queues = UINT16_MAX;
2236         uint16_t max_nb_tx_queues = UINT16_MAX;
2237         uint16_t max_rx_desc_lim = UINT16_MAX;
2238         uint16_t max_tx_desc_lim = UINT16_MAX;
2239
2240         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2241
2242         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2243                         internals->candidate_max_rx_pktlen :
2244                         ETHER_MAX_JUMBO_FRAME_LEN;
2245
2246         /* Max number of tx/rx queues that the bonded device can support is the
2247          * minimum values of the bonded slaves, as all slaves must be capable
2248          * of supporting the same number of tx/rx queues.
2249          */
2250         if (internals->slave_count > 0) {
2251                 struct rte_eth_dev_info slave_info;
2252                 uint16_t idx;
2253
2254                 for (idx = 0; idx < internals->slave_count; idx++) {
2255                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2256                                         &slave_info);
2257
2258                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2259                                 max_nb_rx_queues = slave_info.max_rx_queues;
2260
2261                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2262                                 max_nb_tx_queues = slave_info.max_tx_queues;
2263
2264                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2265                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2266
2267                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2268                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2269                 }
2270         }
2271
2272         dev_info->max_rx_queues = max_nb_rx_queues;
2273         dev_info->max_tx_queues = max_nb_tx_queues;
2274
2275         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2276                sizeof(dev_info->default_rxconf));
2277         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2278                sizeof(dev_info->default_txconf));
2279
2280         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2281         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2282
2283         /**
2284          * If dedicated hw queues enabled for link bonding device in LACP mode
2285          * then we need to reduce the maximum number of data path queues by 1.
2286          */
2287         if (internals->mode == BONDING_MODE_8023AD &&
2288                 internals->mode4.dedicated_queues.enabled == 1) {
2289                 dev_info->max_rx_queues--;
2290                 dev_info->max_tx_queues--;
2291         }
2292
2293         dev_info->min_rx_bufsize = 0;
2294
2295         dev_info->rx_offload_capa = internals->rx_offload_capa;
2296         dev_info->tx_offload_capa = internals->tx_offload_capa;
2297         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2298         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2299         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2300
2301         dev_info->reta_size = internals->reta_size;
2302 }
2303
2304 static int
2305 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2306 {
2307         int res;
2308         uint16_t i;
2309         struct bond_dev_private *internals = dev->data->dev_private;
2310
2311         /* don't do this while a slave is being added */
2312         rte_spinlock_lock(&internals->lock);
2313
2314         if (on)
2315                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2316         else
2317                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2318
2319         for (i = 0; i < internals->slave_count; i++) {
2320                 uint16_t port_id = internals->slaves[i].port_id;
2321
2322                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2323                 if (res == ENOTSUP)
2324                         RTE_BOND_LOG(WARNING,
2325                                      "Setting VLAN filter on slave port %u not supported.",
2326                                      port_id);
2327         }
2328
2329         rte_spinlock_unlock(&internals->lock);
2330         return 0;
2331 }
2332
2333 static int
2334 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2335                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2336                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2337 {
2338         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2339                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2340                                         0, dev->data->numa_node);
2341         if (bd_rx_q == NULL)
2342                 return -1;
2343
2344         bd_rx_q->queue_id = rx_queue_id;
2345         bd_rx_q->dev_private = dev->data->dev_private;
2346
2347         bd_rx_q->nb_rx_desc = nb_rx_desc;
2348
2349         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2350         bd_rx_q->mb_pool = mb_pool;
2351
2352         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2353
2354         return 0;
2355 }
2356
2357 static int
2358 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2359                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2360                 const struct rte_eth_txconf *tx_conf)
2361 {
2362         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2363                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2364                                         0, dev->data->numa_node);
2365
2366         if (bd_tx_q == NULL)
2367                 return -1;
2368
2369         bd_tx_q->queue_id = tx_queue_id;
2370         bd_tx_q->dev_private = dev->data->dev_private;
2371
2372         bd_tx_q->nb_tx_desc = nb_tx_desc;
2373         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2374
2375         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2376
2377         return 0;
2378 }
2379
2380 static void
2381 bond_ethdev_rx_queue_release(void *queue)
2382 {
2383         if (queue == NULL)
2384                 return;
2385
2386         rte_free(queue);
2387 }
2388
2389 static void
2390 bond_ethdev_tx_queue_release(void *queue)
2391 {
2392         if (queue == NULL)
2393                 return;
2394
2395         rte_free(queue);
2396 }
2397
2398 static void
2399 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2400 {
2401         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2402         struct bond_dev_private *internals;
2403
2404         /* Default value for polling slave found is true as we don't want to
2405          * disable the polling thread if we cannot get the lock */
2406         int i, polling_slave_found = 1;
2407
2408         if (cb_arg == NULL)
2409                 return;
2410
2411         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2412         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2413
2414         if (!bonded_ethdev->data->dev_started ||
2415                 !internals->link_status_polling_enabled)
2416                 return;
2417
2418         /* If device is currently being configured then don't check slaves link
2419          * status, wait until next period */
2420         if (rte_spinlock_trylock(&internals->lock)) {
2421                 if (internals->slave_count > 0)
2422                         polling_slave_found = 0;
2423
2424                 for (i = 0; i < internals->slave_count; i++) {
2425                         if (!internals->slaves[i].link_status_poll_enabled)
2426                                 continue;
2427
2428                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2429                         polling_slave_found = 1;
2430
2431                         /* Update slave link status */
2432                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2433                                         internals->slaves[i].link_status_wait_to_complete);
2434
2435                         /* if link status has changed since last checked then call lsc
2436                          * event callback */
2437                         if (slave_ethdev->data->dev_link.link_status !=
2438                                         internals->slaves[i].last_link_status) {
2439                                 internals->slaves[i].last_link_status =
2440                                                 slave_ethdev->data->dev_link.link_status;
2441
2442                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2443                                                 RTE_ETH_EVENT_INTR_LSC,
2444                                                 &bonded_ethdev->data->port_id,
2445                                                 NULL);
2446                         }
2447                 }
2448                 rte_spinlock_unlock(&internals->lock);
2449         }
2450
2451         if (polling_slave_found)
2452                 /* Set alarm to continue monitoring link status of slave ethdev's */
2453                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2454                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2455 }
2456
2457 static int
2458 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2459 {
2460         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2461
2462         struct bond_dev_private *bond_ctx;
2463         struct rte_eth_link slave_link;
2464
2465         uint32_t idx;
2466
2467         bond_ctx = ethdev->data->dev_private;
2468
2469         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2470
2471         if (ethdev->data->dev_started == 0 ||
2472                         bond_ctx->active_slave_count == 0) {
2473                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2474                 return 0;
2475         }
2476
2477         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2478
2479         if (wait_to_complete)
2480                 link_update = rte_eth_link_get;
2481         else
2482                 link_update = rte_eth_link_get_nowait;
2483
2484         switch (bond_ctx->mode) {
2485         case BONDING_MODE_BROADCAST:
2486                 /**
2487                  * Setting link speed to UINT32_MAX to ensure we pick up the
2488                  * value of the first active slave
2489                  */
2490                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2491
2492                 /**
2493                  * link speed is minimum value of all the slaves link speed as
2494                  * packet loss will occur on this slave if transmission at rates
2495                  * greater than this are attempted
2496                  */
2497                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2498                         link_update(bond_ctx->active_slaves[0], &slave_link);
2499
2500                         if (slave_link.link_speed <
2501                                         ethdev->data->dev_link.link_speed)
2502                                 ethdev->data->dev_link.link_speed =
2503                                                 slave_link.link_speed;
2504                 }
2505                 break;
2506         case BONDING_MODE_ACTIVE_BACKUP:
2507                 /* Current primary slave */
2508                 link_update(bond_ctx->current_primary_port, &slave_link);
2509
2510                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2511                 break;
2512         case BONDING_MODE_8023AD:
2513                 ethdev->data->dev_link.link_autoneg =
2514                                 bond_ctx->mode4.slave_link.link_autoneg;
2515                 ethdev->data->dev_link.link_duplex =
2516                                 bond_ctx->mode4.slave_link.link_duplex;
2517                 /* fall through to update link speed */
2518         case BONDING_MODE_ROUND_ROBIN:
2519         case BONDING_MODE_BALANCE:
2520         case BONDING_MODE_TLB:
2521         case BONDING_MODE_ALB:
2522         default:
2523                 /**
2524                  * In theses mode the maximum theoretical link speed is the sum
2525                  * of all the slaves
2526                  */
2527                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2528
2529                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2530                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2531
2532                         ethdev->data->dev_link.link_speed +=
2533                                         slave_link.link_speed;
2534                 }
2535         }
2536
2537
2538         return 0;
2539 }
2540
2541
2542 static int
2543 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2544 {
2545         struct bond_dev_private *internals = dev->data->dev_private;
2546         struct rte_eth_stats slave_stats;
2547         int i, j;
2548
2549         for (i = 0; i < internals->slave_count; i++) {
2550                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2551
2552                 stats->ipackets += slave_stats.ipackets;
2553                 stats->opackets += slave_stats.opackets;
2554                 stats->ibytes += slave_stats.ibytes;
2555                 stats->obytes += slave_stats.obytes;
2556                 stats->imissed += slave_stats.imissed;
2557                 stats->ierrors += slave_stats.ierrors;
2558                 stats->oerrors += slave_stats.oerrors;
2559                 stats->rx_nombuf += slave_stats.rx_nombuf;
2560
2561                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2562                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2563                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2564                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2565                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2566                         stats->q_errors[j] += slave_stats.q_errors[j];
2567                 }
2568
2569         }
2570
2571         return 0;
2572 }
2573
2574 static void
2575 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2576 {
2577         struct bond_dev_private *internals = dev->data->dev_private;
2578         int i;
2579
2580         for (i = 0; i < internals->slave_count; i++)
2581                 rte_eth_stats_reset(internals->slaves[i].port_id);
2582 }
2583
2584 static void
2585 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2586 {
2587         struct bond_dev_private *internals = eth_dev->data->dev_private;
2588         int i;
2589
2590         internals->promiscuous_en = 1;
2591
2592         switch (internals->mode) {
2593         /* Promiscuous mode is propagated to all slaves */
2594         case BONDING_MODE_ROUND_ROBIN:
2595         case BONDING_MODE_BALANCE:
2596         case BONDING_MODE_BROADCAST:
2597                 for (i = 0; i < internals->slave_count; i++)
2598                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2599                 break;
2600         /* In mode4 promiscus mode is managed when slave is added/removed */
2601         case BONDING_MODE_8023AD:
2602                 break;
2603         /* Promiscuous mode is propagated only to primary slave */
2604         case BONDING_MODE_ACTIVE_BACKUP:
2605         case BONDING_MODE_TLB:
2606         case BONDING_MODE_ALB:
2607         default:
2608                 /* Do not touch promisc when there cannot be primary ports */
2609                 if (internals->slave_count == 0)
2610                         break;
2611                 rte_eth_promiscuous_enable(internals->current_primary_port);
2612         }
2613 }
2614
2615 static void
2616 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2617 {
2618         struct bond_dev_private *internals = dev->data->dev_private;
2619         int i;
2620
2621         internals->promiscuous_en = 0;
2622
2623         switch (internals->mode) {
2624         /* Promiscuous mode is propagated to all slaves */
2625         case BONDING_MODE_ROUND_ROBIN:
2626         case BONDING_MODE_BALANCE:
2627         case BONDING_MODE_BROADCAST:
2628                 for (i = 0; i < internals->slave_count; i++)
2629                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2630                 break;
2631         /* In mode4 promiscus mode is set managed when slave is added/removed */
2632         case BONDING_MODE_8023AD:
2633                 break;
2634         /* Promiscuous mode is propagated only to primary slave */
2635         case BONDING_MODE_ACTIVE_BACKUP:
2636         case BONDING_MODE_TLB:
2637         case BONDING_MODE_ALB:
2638         default:
2639                 /* Do not touch promisc when there cannot be primary ports */
2640                 if (internals->slave_count == 0)
2641                         break;
2642                 rte_eth_promiscuous_disable(internals->current_primary_port);
2643         }
2644 }
2645
2646 static void
2647 bond_ethdev_delayed_lsc_propagation(void *arg)
2648 {
2649         if (arg == NULL)
2650                 return;
2651
2652         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2653                         RTE_ETH_EVENT_INTR_LSC, NULL);
2654 }
2655
2656 int
2657 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2658                 void *param, void *ret_param __rte_unused)
2659 {
2660         struct rte_eth_dev *bonded_eth_dev;
2661         struct bond_dev_private *internals;
2662         struct rte_eth_link link;
2663         int rc = -1;
2664
2665         uint8_t lsc_flag = 0;
2666         int valid_slave = 0;
2667         uint16_t active_pos;
2668         uint16_t i;
2669
2670         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2671                 return rc;
2672
2673         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2674
2675         if (check_for_bonded_ethdev(bonded_eth_dev))
2676                 return rc;
2677
2678         internals = bonded_eth_dev->data->dev_private;
2679
2680         /* If the device isn't started don't handle interrupts */
2681         if (!bonded_eth_dev->data->dev_started)
2682                 return rc;
2683
2684         /* verify that port_id is a valid slave of bonded port */
2685         for (i = 0; i < internals->slave_count; i++) {
2686                 if (internals->slaves[i].port_id == port_id) {
2687                         valid_slave = 1;
2688                         break;
2689                 }
2690         }
2691
2692         if (!valid_slave)
2693                 return rc;
2694
2695         /* Synchronize lsc callback parallel calls either by real link event
2696          * from the slaves PMDs or by the bonding PMD itself.
2697          */
2698         rte_spinlock_lock(&internals->lsc_lock);
2699
2700         /* Search for port in active port list */
2701         active_pos = find_slave_by_id(internals->active_slaves,
2702                         internals->active_slave_count, port_id);
2703
2704         rte_eth_link_get_nowait(port_id, &link);
2705         if (link.link_status) {
2706                 if (active_pos < internals->active_slave_count)
2707                         goto link_update;
2708
2709                 /* check link state properties if bonded link is up*/
2710                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2711                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2712                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2713                                              "for slave %d in bonding mode %d",
2714                                              port_id, internals->mode);
2715                 } else {
2716                         /* inherit slave link properties */
2717                         link_properties_set(bonded_eth_dev, &link);
2718                 }
2719
2720                 /* If no active slave ports then set this port to be
2721                  * the primary port.
2722                  */
2723                 if (internals->active_slave_count < 1) {
2724                         /* If first active slave, then change link status */
2725                         bonded_eth_dev->data->dev_link.link_status =
2726                                                                 ETH_LINK_UP;
2727                         internals->current_primary_port = port_id;
2728                         lsc_flag = 1;
2729
2730                         mac_address_slaves_update(bonded_eth_dev);
2731                 }
2732
2733                 activate_slave(bonded_eth_dev, port_id);
2734
2735                 /* If the user has defined the primary port then default to
2736                  * using it.
2737                  */
2738                 if (internals->user_defined_primary_port &&
2739                                 internals->primary_port == port_id)
2740                         bond_ethdev_primary_set(internals, port_id);
2741         } else {
2742                 if (active_pos == internals->active_slave_count)
2743                         goto link_update;
2744
2745                 /* Remove from active slave list */
2746                 deactivate_slave(bonded_eth_dev, port_id);
2747
2748                 if (internals->active_slave_count < 1)
2749                         lsc_flag = 1;
2750
2751                 /* Update primary id, take first active slave from list or if none
2752                  * available set to -1 */
2753                 if (port_id == internals->current_primary_port) {
2754                         if (internals->active_slave_count > 0)
2755                                 bond_ethdev_primary_set(internals,
2756                                                 internals->active_slaves[0]);
2757                         else
2758                                 internals->current_primary_port = internals->primary_port;
2759                 }
2760         }
2761
2762 link_update:
2763         /**
2764          * Update bonded device link properties after any change to active
2765          * slaves
2766          */
2767         bond_ethdev_link_update(bonded_eth_dev, 0);
2768
2769         if (lsc_flag) {
2770                 /* Cancel any possible outstanding interrupts if delays are enabled */
2771                 if (internals->link_up_delay_ms > 0 ||
2772                         internals->link_down_delay_ms > 0)
2773                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2774                                         bonded_eth_dev);
2775
2776                 if (bonded_eth_dev->data->dev_link.link_status) {
2777                         if (internals->link_up_delay_ms > 0)
2778                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2779                                                 bond_ethdev_delayed_lsc_propagation,
2780                                                 (void *)bonded_eth_dev);
2781                         else
2782                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2783                                                 RTE_ETH_EVENT_INTR_LSC,
2784                                                 NULL);
2785
2786                 } else {
2787                         if (internals->link_down_delay_ms > 0)
2788                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2789                                                 bond_ethdev_delayed_lsc_propagation,
2790                                                 (void *)bonded_eth_dev);
2791                         else
2792                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2793                                                 RTE_ETH_EVENT_INTR_LSC,
2794                                                 NULL);
2795                 }
2796         }
2797
2798         rte_spinlock_unlock(&internals->lsc_lock);
2799
2800         return rc;
2801 }
2802
2803 static int
2804 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2805                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2806 {
2807         unsigned i, j;
2808         int result = 0;
2809         int slave_reta_size;
2810         unsigned reta_count;
2811         struct bond_dev_private *internals = dev->data->dev_private;
2812
2813         if (reta_size != internals->reta_size)
2814                 return -EINVAL;
2815
2816          /* Copy RETA table */
2817         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2818
2819         for (i = 0; i < reta_count; i++) {
2820                 internals->reta_conf[i].mask = reta_conf[i].mask;
2821                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2822                         if ((reta_conf[i].mask >> j) & 0x01)
2823                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2824         }
2825
2826         /* Fill rest of array */
2827         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2828                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2829                                 sizeof(internals->reta_conf[0]) * reta_count);
2830
2831         /* Propagate RETA over slaves */
2832         for (i = 0; i < internals->slave_count; i++) {
2833                 slave_reta_size = internals->slaves[i].reta_size;
2834                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2835                                 &internals->reta_conf[0], slave_reta_size);
2836                 if (result < 0)
2837                         return result;
2838         }
2839
2840         return 0;
2841 }
2842
2843 static int
2844 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2845                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2846 {
2847         int i, j;
2848         struct bond_dev_private *internals = dev->data->dev_private;
2849
2850         if (reta_size != internals->reta_size)
2851                 return -EINVAL;
2852
2853          /* Copy RETA table */
2854         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2855                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2856                         if ((reta_conf[i].mask >> j) & 0x01)
2857                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2858
2859         return 0;
2860 }
2861
2862 static int
2863 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2864                 struct rte_eth_rss_conf *rss_conf)
2865 {
2866         int i, result = 0;
2867         struct bond_dev_private *internals = dev->data->dev_private;
2868         struct rte_eth_rss_conf bond_rss_conf;
2869
2870         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2871
2872         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2873
2874         if (bond_rss_conf.rss_hf != 0)
2875                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2876
2877         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2878                         sizeof(internals->rss_key)) {
2879                 if (bond_rss_conf.rss_key_len == 0)
2880                         bond_rss_conf.rss_key_len = 40;
2881                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2882                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2883                                 internals->rss_key_len);
2884         }
2885
2886         for (i = 0; i < internals->slave_count; i++) {
2887                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2888                                 &bond_rss_conf);
2889                 if (result < 0)
2890                         return result;
2891         }
2892
2893         return 0;
2894 }
2895
2896 static int
2897 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2898                 struct rte_eth_rss_conf *rss_conf)
2899 {
2900         struct bond_dev_private *internals = dev->data->dev_private;
2901
2902         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2903         rss_conf->rss_key_len = internals->rss_key_len;
2904         if (rss_conf->rss_key)
2905                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2906
2907         return 0;
2908 }
2909
2910 static int
2911 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2912 {
2913         struct rte_eth_dev *slave_eth_dev;
2914         struct bond_dev_private *internals = dev->data->dev_private;
2915         int ret, i;
2916
2917         rte_spinlock_lock(&internals->lock);
2918
2919         for (i = 0; i < internals->slave_count; i++) {
2920                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2921                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2922                         rte_spinlock_unlock(&internals->lock);
2923                         return -ENOTSUP;
2924                 }
2925         }
2926         for (i = 0; i < internals->slave_count; i++) {
2927                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2928                 if (ret < 0) {
2929                         rte_spinlock_unlock(&internals->lock);
2930                         return ret;
2931                 }
2932         }
2933
2934         rte_spinlock_unlock(&internals->lock);
2935         return 0;
2936 }
2937
2938 static int
2939 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2940 {
2941         if (mac_address_set(dev, addr)) {
2942                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2943                 return -EINVAL;
2944         }
2945
2946         return 0;
2947 }
2948
2949 static int
2950 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2951                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2952 {
2953         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2954                 *(const void **)arg = &bond_flow_ops;
2955                 return 0;
2956         }
2957         return -ENOTSUP;
2958 }
2959
2960 static int
2961 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2962                                 __rte_unused uint32_t index, uint32_t vmdq)
2963 {
2964         struct rte_eth_dev *slave_eth_dev;
2965         struct bond_dev_private *internals = dev->data->dev_private;
2966         int ret, i;
2967
2968         rte_spinlock_lock(&internals->lock);
2969
2970         for (i = 0; i < internals->slave_count; i++) {
2971                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2972                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2973                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2974                         ret = -ENOTSUP;
2975                         goto end;
2976                 }
2977         }
2978
2979         for (i = 0; i < internals->slave_count; i++) {
2980                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2981                                 mac_addr, vmdq);
2982                 if (ret < 0) {
2983                         /* rollback */
2984                         for (i--; i >= 0; i--)
2985                                 rte_eth_dev_mac_addr_remove(
2986                                         internals->slaves[i].port_id, mac_addr);
2987                         goto end;
2988                 }
2989         }
2990
2991         ret = 0;
2992 end:
2993         rte_spinlock_unlock(&internals->lock);
2994         return ret;
2995 }
2996
2997 static void
2998 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2999 {
3000         struct rte_eth_dev *slave_eth_dev;
3001         struct bond_dev_private *internals = dev->data->dev_private;
3002         int i;
3003
3004         rte_spinlock_lock(&internals->lock);
3005
3006         for (i = 0; i < internals->slave_count; i++) {
3007                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3008                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3009                         goto end;
3010         }
3011
3012         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3013
3014         for (i = 0; i < internals->slave_count; i++)
3015                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3016                                 mac_addr);
3017
3018 end:
3019         rte_spinlock_unlock(&internals->lock);
3020 }
3021
3022 const struct eth_dev_ops default_dev_ops = {
3023         .dev_start            = bond_ethdev_start,
3024         .dev_stop             = bond_ethdev_stop,
3025         .dev_close            = bond_ethdev_close,
3026         .dev_configure        = bond_ethdev_configure,
3027         .dev_infos_get        = bond_ethdev_info,
3028         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3029         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3030         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3031         .rx_queue_release     = bond_ethdev_rx_queue_release,
3032         .tx_queue_release     = bond_ethdev_tx_queue_release,
3033         .link_update          = bond_ethdev_link_update,
3034         .stats_get            = bond_ethdev_stats_get,
3035         .stats_reset          = bond_ethdev_stats_reset,
3036         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3037         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3038         .reta_update          = bond_ethdev_rss_reta_update,
3039         .reta_query           = bond_ethdev_rss_reta_query,
3040         .rss_hash_update      = bond_ethdev_rss_hash_update,
3041         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3042         .mtu_set              = bond_ethdev_mtu_set,
3043         .mac_addr_set         = bond_ethdev_mac_address_set,
3044         .mac_addr_add         = bond_ethdev_mac_addr_add,
3045         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3046         .filter_ctrl          = bond_filter_ctrl
3047 };
3048
3049 static int
3050 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3051 {
3052         const char *name = rte_vdev_device_name(dev);
3053         uint8_t socket_id = dev->device.numa_node;
3054         struct bond_dev_private *internals = NULL;
3055         struct rte_eth_dev *eth_dev = NULL;
3056         uint32_t vlan_filter_bmp_size;
3057
3058         /* now do all data allocation - for eth_dev structure, dummy pci driver
3059          * and internal (private) data
3060          */
3061
3062         /* reserve an ethdev entry */
3063         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3064         if (eth_dev == NULL) {
3065                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3066                 goto err;
3067         }
3068
3069         internals = eth_dev->data->dev_private;
3070         eth_dev->data->nb_rx_queues = (uint16_t)1;
3071         eth_dev->data->nb_tx_queues = (uint16_t)1;
3072
3073         /* Allocate memory for storing MAC addresses */
3074         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3075                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3076         if (eth_dev->data->mac_addrs == NULL) {
3077                 RTE_BOND_LOG(ERR,
3078                              "Failed to allocate %u bytes needed to store MAC addresses",
3079                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3080                 goto err;
3081         }
3082
3083         eth_dev->dev_ops = &default_dev_ops;
3084         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3085
3086         rte_spinlock_init(&internals->lock);
3087         rte_spinlock_init(&internals->lsc_lock);
3088
3089         internals->port_id = eth_dev->data->port_id;
3090         internals->mode = BONDING_MODE_INVALID;
3091         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3092         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3093         internals->burst_xmit_hash = burst_xmit_l2_hash;
3094         internals->user_defined_mac = 0;
3095
3096         internals->link_status_polling_enabled = 0;
3097
3098         internals->link_status_polling_interval_ms =
3099                 DEFAULT_POLLING_INTERVAL_10_MS;
3100         internals->link_down_delay_ms = 0;
3101         internals->link_up_delay_ms = 0;
3102
3103         internals->slave_count = 0;
3104         internals->active_slave_count = 0;
3105         internals->rx_offload_capa = 0;
3106         internals->tx_offload_capa = 0;
3107         internals->rx_queue_offload_capa = 0;
3108         internals->tx_queue_offload_capa = 0;
3109         internals->candidate_max_rx_pktlen = 0;
3110         internals->max_rx_pktlen = 0;
3111
3112         /* Initially allow to choose any offload type */
3113         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3114
3115         memset(&internals->default_rxconf, 0,
3116                sizeof(internals->default_rxconf));
3117         memset(&internals->default_txconf, 0,
3118                sizeof(internals->default_txconf));
3119
3120         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3121         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3122
3123         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3124         memset(internals->slaves, 0, sizeof(internals->slaves));
3125
3126         TAILQ_INIT(&internals->flow_list);
3127         internals->flow_isolated_valid = 0;
3128
3129         /* Set mode 4 default configuration */
3130         bond_mode_8023ad_setup(eth_dev, NULL);
3131         if (bond_ethdev_mode_set(eth_dev, mode)) {
3132                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3133                                  eth_dev->data->port_id, mode);
3134                 goto err;
3135         }
3136
3137         vlan_filter_bmp_size =
3138                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3139         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3140                                                    RTE_CACHE_LINE_SIZE);
3141         if (internals->vlan_filter_bmpmem == NULL) {
3142                 RTE_BOND_LOG(ERR,
3143                              "Failed to allocate vlan bitmap for bonded device %u",
3144                              eth_dev->data->port_id);
3145                 goto err;
3146         }
3147
3148         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3149                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3150         if (internals->vlan_filter_bmp == NULL) {
3151                 RTE_BOND_LOG(ERR,
3152                              "Failed to init vlan bitmap for bonded device %u",
3153                              eth_dev->data->port_id);
3154                 rte_free(internals->vlan_filter_bmpmem);
3155                 goto err;
3156         }
3157
3158         return eth_dev->data->port_id;
3159
3160 err:
3161         rte_free(internals);
3162         if (eth_dev != NULL)
3163                 eth_dev->data->dev_private = NULL;
3164         rte_eth_dev_release_port(eth_dev);
3165         return -1;
3166 }
3167
3168 static int
3169 bond_probe(struct rte_vdev_device *dev)
3170 {
3171         const char *name;
3172         struct bond_dev_private *internals;
3173         struct rte_kvargs *kvlist;
3174         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3175         int  arg_count, port_id;
3176         uint8_t agg_mode;
3177         struct rte_eth_dev *eth_dev;
3178
3179         if (!dev)
3180                 return -EINVAL;
3181
3182         name = rte_vdev_device_name(dev);
3183         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3184
3185         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3186                 eth_dev = rte_eth_dev_attach_secondary(name);
3187                 if (!eth_dev) {
3188                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3189                         return -1;
3190                 }
3191                 /* TODO: request info from primary to set up Rx and Tx */
3192                 eth_dev->dev_ops = &default_dev_ops;
3193                 eth_dev->device = &dev->device;
3194                 rte_eth_dev_probing_finish(eth_dev);
3195                 return 0;
3196         }
3197
3198         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3199                 pmd_bond_init_valid_arguments);
3200         if (kvlist == NULL)
3201                 return -1;
3202
3203         /* Parse link bonding mode */
3204         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3205                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3206                                 &bond_ethdev_parse_slave_mode_kvarg,
3207                                 &bonding_mode) != 0) {
3208                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3209                                         name);
3210                         goto parse_error;
3211                 }
3212         } else {
3213                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3214                                 "device %s", name);
3215                 goto parse_error;
3216         }
3217
3218         /* Parse socket id to create bonding device on */
3219         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3220         if (arg_count == 1) {
3221                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3222                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3223                                 != 0) {
3224                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3225                                         "bonded device %s", name);
3226                         goto parse_error;
3227                 }
3228         } else if (arg_count > 1) {
3229                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3230                                 "bonded device %s", name);
3231                 goto parse_error;
3232         } else {
3233                 socket_id = rte_socket_id();
3234         }
3235
3236         dev->device.numa_node = socket_id;
3237
3238         /* Create link bonding eth device */
3239         port_id = bond_alloc(dev, bonding_mode);
3240         if (port_id < 0) {
3241                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3242                                 "socket %u.",   name, bonding_mode, socket_id);
3243                 goto parse_error;
3244         }
3245         internals = rte_eth_devices[port_id].data->dev_private;
3246         internals->kvlist = kvlist;
3247
3248         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3249                 if (rte_kvargs_process(kvlist,
3250                                 PMD_BOND_AGG_MODE_KVARG,
3251                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3252                                 &agg_mode) != 0) {
3253                         RTE_BOND_LOG(ERR,
3254                                         "Failed to parse agg selection mode for bonded device %s",
3255                                         name);
3256                         goto parse_error;
3257                 }
3258
3259                 if (internals->mode == BONDING_MODE_8023AD)
3260                         internals->mode4.agg_selection = agg_mode;
3261         } else {
3262                 internals->mode4.agg_selection = AGG_STABLE;
3263         }
3264
3265         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3266         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3267                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3268         return 0;
3269
3270 parse_error:
3271         rte_kvargs_free(kvlist);
3272
3273         return -1;
3274 }
3275
3276 static int
3277 bond_remove(struct rte_vdev_device *dev)
3278 {
3279         struct rte_eth_dev *eth_dev;
3280         struct bond_dev_private *internals;
3281         const char *name;
3282
3283         if (!dev)
3284                 return -EINVAL;
3285
3286         name = rte_vdev_device_name(dev);
3287         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3288
3289         /* now free all data allocation - for eth_dev structure,
3290          * dummy pci driver and internal (private) data
3291          */
3292
3293         /* find an ethdev entry */
3294         eth_dev = rte_eth_dev_allocated(name);
3295         if (eth_dev == NULL)
3296                 return -ENODEV;
3297
3298         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3299                 return rte_eth_dev_release_port(eth_dev);
3300
3301         RTE_ASSERT(eth_dev->device == &dev->device);
3302
3303         internals = eth_dev->data->dev_private;
3304         if (internals->slave_count != 0)
3305                 return -EBUSY;
3306
3307         if (eth_dev->data->dev_started == 1) {
3308                 bond_ethdev_stop(eth_dev);
3309                 bond_ethdev_close(eth_dev);
3310         }
3311
3312         eth_dev->dev_ops = NULL;
3313         eth_dev->rx_pkt_burst = NULL;
3314         eth_dev->tx_pkt_burst = NULL;
3315
3316         internals = eth_dev->data->dev_private;
3317         /* Try to release mempool used in mode6. If the bond
3318          * device is not mode6, free the NULL is not problem.
3319          */
3320         rte_mempool_free(internals->mode6.mempool);
3321         rte_bitmap_free(internals->vlan_filter_bmp);
3322         rte_free(internals->vlan_filter_bmpmem);
3323
3324         rte_eth_dev_release_port(eth_dev);
3325
3326         return 0;
3327 }
3328
3329 /* this part will resolve the slave portids after all the other pdev and vdev
3330  * have been allocated */
3331 static int
3332 bond_ethdev_configure(struct rte_eth_dev *dev)
3333 {
3334         const char *name = dev->device->name;
3335         struct bond_dev_private *internals = dev->data->dev_private;
3336         struct rte_kvargs *kvlist = internals->kvlist;
3337         int arg_count;
3338         uint16_t port_id = dev - rte_eth_devices;
3339         uint8_t agg_mode;
3340
3341         static const uint8_t default_rss_key[40] = {
3342                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3343                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3344                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3345                 0xBE, 0xAC, 0x01, 0xFA
3346         };
3347
3348         unsigned i, j;
3349
3350         /*
3351          * If RSS is enabled, fill table with default values and
3352          * set key to the the value specified in port RSS configuration.
3353          * Fall back to default RSS key if the key is not specified
3354          */
3355         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3356                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3357                         internals->rss_key_len =
3358                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3359                         memcpy(internals->rss_key,
3360                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3361                                internals->rss_key_len);
3362                 } else {
3363                         internals->rss_key_len = sizeof(default_rss_key);
3364                         memcpy(internals->rss_key, default_rss_key,
3365                                internals->rss_key_len);
3366                 }
3367
3368                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3369                         internals->reta_conf[i].mask = ~0LL;
3370                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3371                                 internals->reta_conf[i].reta[j] =
3372                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3373                                                 dev->data->nb_rx_queues;
3374                 }
3375         }
3376
3377         /* set the max_rx_pktlen */
3378         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3379
3380         /*
3381          * if no kvlist, it means that this bonded device has been created
3382          * through the bonding api.
3383          */
3384         if (!kvlist)
3385                 return 0;
3386
3387         /* Parse MAC address for bonded device */
3388         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3389         if (arg_count == 1) {
3390                 struct ether_addr bond_mac;
3391
3392                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3393                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3394                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3395                                      name);
3396                         return -1;
3397                 }
3398
3399                 /* Set MAC address */
3400                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3401                         RTE_BOND_LOG(ERR,
3402                                      "Failed to set mac address on bonded device %s",
3403                                      name);
3404                         return -1;
3405                 }
3406         } else if (arg_count > 1) {
3407                 RTE_BOND_LOG(ERR,
3408                              "MAC address can be specified only once for bonded device %s",
3409                              name);
3410                 return -1;
3411         }
3412
3413         /* Parse/set balance mode transmit policy */
3414         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3415         if (arg_count == 1) {
3416                 uint8_t xmit_policy;
3417
3418                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3419                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3420                     0) {
3421                         RTE_BOND_LOG(INFO,
3422                                      "Invalid xmit policy specified for bonded device %s",
3423                                      name);
3424                         return -1;
3425                 }
3426
3427                 /* Set balance mode transmit policy*/
3428                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3429                         RTE_BOND_LOG(ERR,
3430                                      "Failed to set balance xmit policy on bonded device %s",
3431                                      name);
3432                         return -1;
3433                 }
3434         } else if (arg_count > 1) {
3435                 RTE_BOND_LOG(ERR,
3436                              "Transmit policy can be specified only once for bonded device %s",
3437                              name);
3438                 return -1;
3439         }
3440
3441         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3442                 if (rte_kvargs_process(kvlist,
3443                                        PMD_BOND_AGG_MODE_KVARG,
3444                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3445                                        &agg_mode) != 0) {
3446                         RTE_BOND_LOG(ERR,
3447                                      "Failed to parse agg selection mode for bonded device %s",
3448                                      name);
3449                 }
3450                 if (internals->mode == BONDING_MODE_8023AD) {
3451                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3452                                         agg_mode);
3453                         if (ret < 0) {
3454                                 RTE_BOND_LOG(ERR,
3455                                         "Invalid args for agg selection set for bonded device %s",
3456                                         name);
3457                                 return -1;
3458                         }
3459                 }
3460         }
3461
3462         /* Parse/add slave ports to bonded device */
3463         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3464                 struct bond_ethdev_slave_ports slave_ports;
3465                 unsigned i;
3466
3467                 memset(&slave_ports, 0, sizeof(slave_ports));
3468
3469                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3470                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3471                         RTE_BOND_LOG(ERR,
3472                                      "Failed to parse slave ports for bonded device %s",
3473                                      name);
3474                         return -1;
3475                 }
3476
3477                 for (i = 0; i < slave_ports.slave_count; i++) {
3478                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3479                                 RTE_BOND_LOG(ERR,
3480                                              "Failed to add port %d as slave to bonded device %s",
3481                                              slave_ports.slaves[i], name);
3482                         }
3483                 }
3484
3485         } else {
3486                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3487                 return -1;
3488         }
3489
3490         /* Parse/set primary slave port id*/
3491         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3492         if (arg_count == 1) {
3493                 uint16_t primary_slave_port_id;
3494
3495                 if (rte_kvargs_process(kvlist,
3496                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3497                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3498                                        &primary_slave_port_id) < 0) {
3499                         RTE_BOND_LOG(INFO,
3500                                      "Invalid primary slave port id specified for bonded device %s",
3501                                      name);
3502                         return -1;
3503                 }
3504
3505                 /* Set balance mode transmit policy*/
3506                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3507                     != 0) {
3508                         RTE_BOND_LOG(ERR,
3509                                      "Failed to set primary slave port %d on bonded device %s",
3510                                      primary_slave_port_id, name);
3511                         return -1;
3512                 }
3513         } else if (arg_count > 1) {
3514                 RTE_BOND_LOG(INFO,
3515                              "Primary slave can be specified only once for bonded device %s",
3516                              name);
3517                 return -1;
3518         }
3519
3520         /* Parse link status monitor polling interval */
3521         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3522         if (arg_count == 1) {
3523                 uint32_t lsc_poll_interval_ms;
3524
3525                 if (rte_kvargs_process(kvlist,
3526                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3527                                        &bond_ethdev_parse_time_ms_kvarg,
3528                                        &lsc_poll_interval_ms) < 0) {
3529                         RTE_BOND_LOG(INFO,
3530                                      "Invalid lsc polling interval value specified for bonded"
3531                                      " device %s", name);
3532                         return -1;
3533                 }
3534
3535                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3536                     != 0) {
3537                         RTE_BOND_LOG(ERR,
3538                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3539                                      lsc_poll_interval_ms, name);
3540                         return -1;
3541                 }
3542         } else if (arg_count > 1) {
3543                 RTE_BOND_LOG(INFO,
3544                              "LSC polling interval can be specified only once for bonded"
3545                              " device %s", name);
3546                 return -1;
3547         }
3548
3549         /* Parse link up interrupt propagation delay */
3550         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3551         if (arg_count == 1) {
3552                 uint32_t link_up_delay_ms;
3553
3554                 if (rte_kvargs_process(kvlist,
3555                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3556                                        &bond_ethdev_parse_time_ms_kvarg,
3557                                        &link_up_delay_ms) < 0) {
3558                         RTE_BOND_LOG(INFO,
3559                                      "Invalid link up propagation delay value specified for"
3560                                      " bonded device %s", name);
3561                         return -1;
3562                 }
3563
3564                 /* Set balance mode transmit policy*/
3565                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3566                     != 0) {
3567                         RTE_BOND_LOG(ERR,
3568                                      "Failed to set link up propagation delay (%u ms) on bonded"
3569                                      " device %s", link_up_delay_ms, name);
3570                         return -1;
3571                 }
3572         } else if (arg_count > 1) {
3573                 RTE_BOND_LOG(INFO,
3574                              "Link up propagation delay can be specified only once for"
3575                              " bonded device %s", name);
3576                 return -1;
3577         }
3578
3579         /* Parse link down interrupt propagation delay */
3580         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3581         if (arg_count == 1) {
3582                 uint32_t link_down_delay_ms;
3583
3584                 if (rte_kvargs_process(kvlist,
3585                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3586                                        &bond_ethdev_parse_time_ms_kvarg,
3587                                        &link_down_delay_ms) < 0) {
3588                         RTE_BOND_LOG(INFO,
3589                                      "Invalid link down propagation delay value specified for"
3590                                      " bonded device %s", name);
3591                         return -1;
3592                 }
3593
3594                 /* Set balance mode transmit policy*/
3595                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3596                     != 0) {
3597                         RTE_BOND_LOG(ERR,
3598                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3599                                      link_down_delay_ms, name);
3600                         return -1;
3601                 }
3602         } else if (arg_count > 1) {
3603                 RTE_BOND_LOG(INFO,
3604                              "Link down propagation delay can be specified only once for  bonded device %s",
3605                              name);
3606                 return -1;
3607         }
3608
3609         return 0;
3610 }
3611
3612 struct rte_vdev_driver pmd_bond_drv = {
3613         .probe = bond_probe,
3614         .remove = bond_remove,
3615 };
3616
3617 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3618 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3619
3620 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3621         "slave=<ifc> "
3622         "primary=<ifc> "
3623         "mode=[0-6] "
3624         "xmit_policy=[l2 | l23 | l34] "
3625         "agg_mode=[count | stable | bandwidth] "
3626         "socket_id=<int> "
3627         "mac=<mac addr> "
3628         "lsc_poll_period_ms=<int> "
3629         "up_delay=<int> "
3630         "down_delay=<int>");
3631
3632 int bond_logtype;
3633
3634 RTE_INIT(bond_init_log)
3635 {
3636         bond_logtype = rte_log_register("pmd.net.bond");
3637         if (bond_logtype >= 0)
3638                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3639 }