net/bonding: fix reset active slave
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave >= slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave >= slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t i, j, k;
408         uint8_t subtype;
409
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting ||
453                                 (!promisc &&
454                                  !is_multicast_ether_addr(&hdr->d_addr) &&
455                                  !is_same_ether_addr(bond_mac,
456                                                      &hdr->d_addr)))) {
457
458                                 if (hdr->ether_type == ether_type_slow_be) {
459                                         bond_mode_8023ad_handle_slow_pkt(
460                                             internals, slaves[idx], bufs[j]);
461                                 } else
462                                         rte_pktmbuf_free(bufs[j]);
463
464                                 /* Packet is managed by mode 4 or dropped, shift the array */
465                                 num_rx_total--;
466                                 if (j < num_rx_total) {
467                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
468                                                 (num_rx_total - j));
469                                 }
470                         } else
471                                 j++;
472                 }
473                 if (unlikely(++idx == slave_count))
474                         idx = 0;
475         }
476
477         if (++internals->active_slave >= slave_count)
478                 internals->active_slave = 0;
479
480         return num_rx_total;
481 }
482
483 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
484 uint32_t burstnumberRX;
485 uint32_t burstnumberTX;
486
487 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
488
489 static void
490 arp_op_name(uint16_t arp_op, char *buf)
491 {
492         switch (arp_op) {
493         case ARP_OP_REQUEST:
494                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
495                 return;
496         case ARP_OP_REPLY:
497                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
498                 return;
499         case ARP_OP_REVREQUEST:
500                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
501                                 "Reverse ARP Request");
502                 return;
503         case ARP_OP_REVREPLY:
504                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
505                                 "Reverse ARP Reply");
506                 return;
507         case ARP_OP_INVREQUEST:
508                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
509                                 "Peer Identify Request");
510                 return;
511         case ARP_OP_INVREPLY:
512                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
513                                 "Peer Identify Reply");
514                 return;
515         default:
516                 break;
517         }
518         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
519         return;
520 }
521 #endif
522 #define MaxIPv4String   16
523 static void
524 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
525 {
526         uint32_t ipv4_addr;
527
528         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
529         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
530                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
531                 ipv4_addr & 0xFF);
532 }
533
534 #define MAX_CLIENTS_NUMBER      128
535 uint8_t active_clients;
536 struct client_stats_t {
537         uint16_t port;
538         uint32_t ipv4_addr;
539         uint32_t ipv4_rx_packets;
540         uint32_t ipv4_tx_packets;
541 };
542 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
543
544 static void
545 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
546 {
547         int i = 0;
548
549         for (; i < MAX_CLIENTS_NUMBER; i++)     {
550                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
551                         /* Just update RX packets number for this client */
552                         if (TXorRXindicator == &burstnumberRX)
553                                 client_stats[i].ipv4_rx_packets++;
554                         else
555                                 client_stats[i].ipv4_tx_packets++;
556                         return;
557                 }
558         }
559         /* We have a new client. Insert him to the table, and increment stats */
560         if (TXorRXindicator == &burstnumberRX)
561                 client_stats[active_clients].ipv4_rx_packets++;
562         else
563                 client_stats[active_clients].ipv4_tx_packets++;
564         client_stats[active_clients].ipv4_addr = addr;
565         client_stats[active_clients].port = port;
566         active_clients++;
567
568 }
569
570 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
571 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
572         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
573                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
574                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
575                 info,                                                   \
576                 port,                                                   \
577                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
578                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
579                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
580                 src_ip,                                                 \
581                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
582                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
583                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
584                 dst_ip,                                                 \
585                 arp_op, ++burstnumber)
586 #endif
587
588 static void
589 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
590                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
591 {
592         struct ipv4_hdr *ipv4_h;
593 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
594         struct arp_hdr *arp_h;
595         char dst_ip[16];
596         char ArpOp[24];
597         char buf[16];
598 #endif
599         char src_ip[16];
600
601         uint16_t ether_type = eth_h->ether_type;
602         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
603
604 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
605         strlcpy(buf, info, 16);
606 #endif
607
608         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
609                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
610                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
611 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
612                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
613                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
614 #endif
615                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
616         }
617 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
618         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
619                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
620                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
622                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
623                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
624         }
625 #endif
626 }
627 #endif
628
629 static uint16_t
630 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
631 {
632         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
633         struct bond_dev_private *internals = bd_tx_q->dev_private;
634         struct ether_hdr *eth_h;
635         uint16_t ether_type, offset;
636         uint16_t nb_recv_pkts;
637         int i;
638
639         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
640
641         for (i = 0; i < nb_recv_pkts; i++) {
642                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
643                 ether_type = eth_h->ether_type;
644                 offset = get_vlan_offset(eth_h, &ether_type);
645
646                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
647 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
648                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
649 #endif
650                         bond_mode_alb_arp_recv(eth_h, offset, internals);
651                 }
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
654                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
655 #endif
656         }
657
658         return nb_recv_pkts;
659 }
660
661 static uint16_t
662 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
663                 uint16_t nb_pkts)
664 {
665         struct bond_dev_private *internals;
666         struct bond_tx_queue *bd_tx_q;
667
668         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
669         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
670
671         uint16_t num_of_slaves;
672         uint16_t slaves[RTE_MAX_ETHPORTS];
673
674         uint16_t num_tx_total = 0, num_tx_slave;
675
676         static int slave_idx = 0;
677         int i, cslave_idx = 0, tx_fail_total = 0;
678
679         bd_tx_q = (struct bond_tx_queue *)queue;
680         internals = bd_tx_q->dev_private;
681
682         /* Copy slave list to protect against slave up/down changes during tx
683          * bursting */
684         num_of_slaves = internals->active_slave_count;
685         memcpy(slaves, internals->active_slaves,
686                         sizeof(internals->active_slaves[0]) * num_of_slaves);
687
688         if (num_of_slaves < 1)
689                 return num_tx_total;
690
691         /* Populate slaves mbuf with which packets are to be sent on it  */
692         for (i = 0; i < nb_pkts; i++) {
693                 cslave_idx = (slave_idx + i) % num_of_slaves;
694                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
695         }
696
697         /* increment current slave index so the next call to tx burst starts on the
698          * next slave */
699         slave_idx = ++cslave_idx;
700
701         /* Send packet burst on each slave device */
702         for (i = 0; i < num_of_slaves; i++) {
703                 if (slave_nb_pkts[i] > 0) {
704                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
705                                         slave_bufs[i], slave_nb_pkts[i]);
706
707                         /* if tx burst fails move packets to end of bufs */
708                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
709                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
710
711                                 tx_fail_total += tx_fail_slave;
712
713                                 memcpy(&bufs[nb_pkts - tx_fail_total],
714                                        &slave_bufs[i][num_tx_slave],
715                                        tx_fail_slave * sizeof(bufs[0]));
716                         }
717                         num_tx_total += num_tx_slave;
718                 }
719         }
720
721         return num_tx_total;
722 }
723
724 static uint16_t
725 bond_ethdev_tx_burst_active_backup(void *queue,
726                 struct rte_mbuf **bufs, uint16_t nb_pkts)
727 {
728         struct bond_dev_private *internals;
729         struct bond_tx_queue *bd_tx_q;
730
731         bd_tx_q = (struct bond_tx_queue *)queue;
732         internals = bd_tx_q->dev_private;
733
734         if (internals->active_slave_count < 1)
735                 return 0;
736
737         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
738                         bufs, nb_pkts);
739 }
740
741 static inline uint16_t
742 ether_hash(struct ether_hdr *eth_hdr)
743 {
744         unaligned_uint16_t *word_src_addr =
745                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
746         unaligned_uint16_t *word_dst_addr =
747                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
748
749         return (word_src_addr[0] ^ word_dst_addr[0]) ^
750                         (word_src_addr[1] ^ word_dst_addr[1]) ^
751                         (word_src_addr[2] ^ word_dst_addr[2]);
752 }
753
754 static inline uint32_t
755 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
756 {
757         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
758 }
759
760 static inline uint32_t
761 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
762 {
763         unaligned_uint32_t *word_src_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
765         unaligned_uint32_t *word_dst_addr =
766                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
767
768         return (word_src_addr[0] ^ word_dst_addr[0]) ^
769                         (word_src_addr[1] ^ word_dst_addr[1]) ^
770                         (word_src_addr[2] ^ word_dst_addr[2]) ^
771                         (word_src_addr[3] ^ word_dst_addr[3]);
772 }
773
774
775 void
776 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777                 uint8_t slave_count, uint16_t *slaves)
778 {
779         struct ether_hdr *eth_hdr;
780         uint32_t hash;
781         int i;
782
783         for (i = 0; i < nb_pkts; i++) {
784                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
785
786                 hash = ether_hash(eth_hdr);
787
788                 slaves[i] = (hash ^= hash >> 8) % slave_count;
789         }
790 }
791
792 void
793 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
794                 uint8_t slave_count, uint16_t *slaves)
795 {
796         uint16_t i;
797         struct ether_hdr *eth_hdr;
798         uint16_t proto;
799         size_t vlan_offset;
800         uint32_t hash, l3hash;
801
802         for (i = 0; i < nb_pkts; i++) {
803                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
804                 l3hash = 0;
805
806                 proto = eth_hdr->ether_type;
807                 hash = ether_hash(eth_hdr);
808
809                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
810
811                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                         ((char *)(eth_hdr + 1) + vlan_offset);
814                         l3hash = ipv4_hash(ipv4_hdr);
815
816                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                         ((char *)(eth_hdr + 1) + vlan_offset);
819                         l3hash = ipv6_hash(ipv6_hdr);
820                 }
821
822                 hash = hash ^ l3hash;
823                 hash ^= hash >> 16;
824                 hash ^= hash >> 8;
825
826                 slaves[i] = hash % slave_count;
827         }
828 }
829
830 void
831 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
832                 uint8_t slave_count, uint16_t *slaves)
833 {
834         struct ether_hdr *eth_hdr;
835         uint16_t proto;
836         size_t vlan_offset;
837         int i;
838
839         struct udp_hdr *udp_hdr;
840         struct tcp_hdr *tcp_hdr;
841         uint32_t hash, l3hash, l4hash;
842
843         for (i = 0; i < nb_pkts; i++) {
844                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & IPV4_HDR_IHL_MASK) *
862                                         IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         l4hash = HASH_L4_PORTS(tcp_hdr);
869                                 } else if (ipv4_hdr->next_proto_id ==
870                                                                 IPPROTO_UDP) {
871                                         udp_hdr = (struct udp_hdr *)
872                                                 ((char *)ipv4_hdr +
873                                                         ip_hdr_offset);
874                                         l4hash = HASH_L4_PORTS(udp_hdr);
875                                 }
876                         }
877                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
878                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
879                                         ((char *)(eth_hdr + 1) + vlan_offset);
880                         l3hash = ipv6_hash(ipv6_hdr);
881
882                         if (ipv6_hdr->proto == IPPROTO_TCP) {
883                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
884                                 l4hash = HASH_L4_PORTS(tcp_hdr);
885                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
886                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
887                                 l4hash = HASH_L4_PORTS(udp_hdr);
888                         }
889                 }
890
891                 hash = l3hash ^ l4hash;
892                 hash ^= hash >> 16;
893                 hash ^= hash >> 8;
894
895                 slaves[i] = hash % slave_count;
896         }
897 }
898
899 struct bwg_slave {
900         uint64_t bwg_left_int;
901         uint64_t bwg_left_remainder;
902         uint8_t slave;
903 };
904
905 void
906 bond_tlb_activate_slave(struct bond_dev_private *internals) {
907         int i;
908
909         for (i = 0; i < internals->active_slave_count; i++) {
910                 tlb_last_obytets[internals->active_slaves[i]] = 0;
911         }
912 }
913
914 static int
915 bandwidth_cmp(const void *a, const void *b)
916 {
917         const struct bwg_slave *bwg_a = a;
918         const struct bwg_slave *bwg_b = b;
919         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
920         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
921                         (int64_t)bwg_a->bwg_left_remainder;
922         if (diff > 0)
923                 return 1;
924         else if (diff < 0)
925                 return -1;
926         else if (diff2 > 0)
927                 return 1;
928         else if (diff2 < 0)
929                 return -1;
930         else
931                 return 0;
932 }
933
934 static void
935 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
936                 struct bwg_slave *bwg_slave)
937 {
938         struct rte_eth_link link_status;
939
940         rte_eth_link_get_nowait(port_id, &link_status);
941         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
942         if (link_bwg == 0)
943                 return;
944         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
945         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
946         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
947 }
948
949 static void
950 bond_ethdev_update_tlb_slave_cb(void *arg)
951 {
952         struct bond_dev_private *internals = arg;
953         struct rte_eth_stats slave_stats;
954         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
955         uint8_t slave_count;
956         uint64_t tx_bytes;
957
958         uint8_t update_stats = 0;
959         uint8_t i, slave_id;
960
961         internals->slave_update_idx++;
962
963
964         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
965                 update_stats = 1;
966
967         for (i = 0; i < internals->active_slave_count; i++) {
968                 slave_id = internals->active_slaves[i];
969                 rte_eth_stats_get(slave_id, &slave_stats);
970                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971                 bandwidth_left(slave_id, tx_bytes,
972                                 internals->slave_update_idx, &bwg_array[i]);
973                 bwg_array[i].slave = slave_id;
974
975                 if (update_stats) {
976                         tlb_last_obytets[slave_id] = slave_stats.obytes;
977                 }
978         }
979
980         if (update_stats == 1)
981                 internals->slave_update_idx = 0;
982
983         slave_count = i;
984         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985         for (i = 0; i < slave_count; i++)
986                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
987
988         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989                         (struct bond_dev_private *)internals);
990 }
991
992 static uint16_t
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
994 {
995         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996         struct bond_dev_private *internals = bd_tx_q->dev_private;
997
998         struct rte_eth_dev *primary_port =
999                         &rte_eth_devices[internals->primary_port];
1000         uint16_t num_tx_total = 0;
1001         uint16_t i, j;
1002
1003         uint16_t num_of_slaves = internals->active_slave_count;
1004         uint16_t slaves[RTE_MAX_ETHPORTS];
1005
1006         struct ether_hdr *ether_hdr;
1007         struct ether_addr primary_slave_addr;
1008         struct ether_addr active_slave_addr;
1009
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->tlb_slaves_order,
1014                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1015
1016
1017         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1018
1019         if (nb_pkts > 3) {
1020                 for (i = 0; i < 3; i++)
1021                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1022         }
1023
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026                 for (j = num_tx_total; j < nb_pkts; j++) {
1027                         if (j + 3 < nb_pkts)
1028                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1029
1030                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1032                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1035 #endif
1036                 }
1037
1038                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1040
1041                 if (num_tx_total == nb_pkts)
1042                         break;
1043         }
1044
1045         return num_tx_total;
1046 }
1047
1048 void
1049 bond_tlb_disable(struct bond_dev_private *internals)
1050 {
1051         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1052 }
1053
1054 void
1055 bond_tlb_enable(struct bond_dev_private *internals)
1056 {
1057         bond_ethdev_update_tlb_slave_cb(internals);
1058 }
1059
1060 static uint16_t
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1062 {
1063         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064         struct bond_dev_private *internals = bd_tx_q->dev_private;
1065
1066         struct ether_hdr *eth_h;
1067         uint16_t ether_type, offset;
1068
1069         struct client_data *client_info;
1070
1071         /*
1072          * We create transmit buffers for every slave and one additional to send
1073          * through tlb. In worst case every packet will be send on one port.
1074          */
1075         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1077
1078         /*
1079          * We create separate transmit buffers for update packets as they won't
1080          * be counted in num_tx_total.
1081          */
1082         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1084
1085         struct rte_mbuf *upd_pkt;
1086         size_t pkt_size;
1087
1088         uint16_t num_send, num_not_send = 0;
1089         uint16_t num_tx_total = 0;
1090         uint16_t slave_idx;
1091
1092         int i, j;
1093
1094         /* Search tx buffer for ARP packets and forward them to alb */
1095         for (i = 0; i < nb_pkts; i++) {
1096                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097                 ether_type = eth_h->ether_type;
1098                 offset = get_vlan_offset(eth_h, &ether_type);
1099
1100                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1102
1103                         /* Change src mac in eth header */
1104                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1105
1106                         /* Add packet to slave tx buffer */
1107                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108                         slave_bufs_pkts[slave_idx]++;
1109                 } else {
1110                         /* If packet is not ARP, send it with TLB policy */
1111                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1112                                         bufs[i];
1113                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1114                 }
1115         }
1116
1117         /* Update connected client ARP tables */
1118         if (internals->mode6.ntt) {
1119                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120                         client_info = &internals->mode6.client_table[i];
1121
1122                         if (client_info->in_use) {
1123                                 /* Allocate new packet to send ARP update on current slave */
1124                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125                                 if (upd_pkt == NULL) {
1126                                         RTE_BOND_LOG(ERR,
1127                                                      "Failed to allocate ARP packet from pool");
1128                                         continue;
1129                                 }
1130                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1131                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1132                                 upd_pkt->data_len = pkt_size;
1133                                 upd_pkt->pkt_len = pkt_size;
1134
1135                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1136                                                 internals);
1137
1138                                 /* Add packet to update tx buffer */
1139                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1140                                 update_bufs_pkts[slave_idx]++;
1141                         }
1142                 }
1143                 internals->mode6.ntt = 0;
1144         }
1145
1146         /* Send ARP packets on proper slaves */
1147         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1148                 if (slave_bufs_pkts[i] > 0) {
1149                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1150                                         slave_bufs[i], slave_bufs_pkts[i]);
1151                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1152                                 bufs[nb_pkts - 1 - num_not_send - j] =
1153                                                 slave_bufs[i][nb_pkts - 1 - j];
1154                         }
1155
1156                         num_tx_total += num_send;
1157                         num_not_send += slave_bufs_pkts[i] - num_send;
1158
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160         /* Print TX stats including update packets */
1161                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send update packets on proper slaves */
1170         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1171                 if (update_bufs_pkts[i] > 0) {
1172                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1173                                         update_bufs_pkts[i]);
1174                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1175                                 rte_pktmbuf_free(update_bufs[i][j]);
1176                         }
1177 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1178                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1179                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1180                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1181                         }
1182 #endif
1183                 }
1184         }
1185
1186         /* Send non-ARP packets using tlb policy */
1187         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1188                 num_send = bond_ethdev_tx_burst_tlb(queue,
1189                                 slave_bufs[RTE_MAX_ETHPORTS],
1190                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1191
1192                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1193                         bufs[nb_pkts - 1 - num_not_send - j] =
1194                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1195                 }
1196
1197                 num_tx_total += num_send;
1198         }
1199
1200         return num_tx_total;
1201 }
1202
1203 static uint16_t
1204 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1205                 uint16_t nb_bufs)
1206 {
1207         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1208         struct bond_dev_private *internals = bd_tx_q->dev_private;
1209
1210         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1211         uint16_t slave_count;
1212
1213         /* Array to sort mbufs for transmission on each slave into */
1214         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1215         /* Number of mbufs for transmission on each slave */
1216         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1217         /* Mapping array generated by hash function to map mbufs to slaves */
1218         uint16_t bufs_slave_port_idxs[nb_bufs];
1219
1220         uint16_t slave_tx_count;
1221         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1222
1223         uint16_t i;
1224
1225         if (unlikely(nb_bufs == 0))
1226                 return 0;
1227
1228         /* Copy slave list to protect against slave up/down changes during tx
1229          * bursting */
1230         slave_count = internals->active_slave_count;
1231         if (unlikely(slave_count < 1))
1232                 return 0;
1233
1234         memcpy(slave_port_ids, internals->active_slaves,
1235                         sizeof(slave_port_ids[0]) * slave_count);
1236
1237         /*
1238          * Populate slaves mbuf with the packets which are to be sent on it
1239          * selecting output slave using hash based on xmit policy
1240          */
1241         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1242                         bufs_slave_port_idxs);
1243
1244         for (i = 0; i < nb_bufs; i++) {
1245                 /* Populate slave mbuf arrays with mbufs for that slave. */
1246                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1247
1248                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1249         }
1250
1251         /* Send packet burst on each slave device */
1252         for (i = 0; i < slave_count; i++) {
1253                 if (slave_nb_bufs[i] == 0)
1254                         continue;
1255
1256                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1257                                 bd_tx_q->queue_id, slave_bufs[i],
1258                                 slave_nb_bufs[i]);
1259
1260                 total_tx_count += slave_tx_count;
1261
1262                 /* If tx burst fails move packets to end of bufs */
1263                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1264                         int slave_tx_fail_count = slave_nb_bufs[i] -
1265                                         slave_tx_count;
1266                         total_tx_fail_count += slave_tx_fail_count;
1267                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1268                                &slave_bufs[i][slave_tx_count],
1269                                slave_tx_fail_count * sizeof(bufs[0]));
1270                 }
1271         }
1272
1273         return total_tx_count;
1274 }
1275
1276 static uint16_t
1277 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1278                 uint16_t nb_bufs)
1279 {
1280         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1281         struct bond_dev_private *internals = bd_tx_q->dev_private;
1282
1283         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1284         uint16_t slave_count;
1285
1286         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1287         uint16_t dist_slave_count;
1288
1289         /* 2-D array to sort mbufs for transmission on each slave into */
1290         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1291         /* Number of mbufs for transmission on each slave */
1292         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1293         /* Mapping array generated by hash function to map mbufs to slaves */
1294         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1295
1296         uint16_t slave_tx_count;
1297         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1298
1299         uint16_t i;
1300
1301         /* Copy slave list to protect against slave up/down changes during tx
1302          * bursting */
1303         slave_count = internals->active_slave_count;
1304         if (unlikely(slave_count < 1))
1305                 return 0;
1306
1307         memcpy(slave_port_ids, internals->active_slaves,
1308                         sizeof(slave_port_ids[0]) * slave_count);
1309
1310         /* Check for LACP control packets and send if available */
1311         for (i = 0; i < slave_count; i++) {
1312                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1313                 struct rte_mbuf *ctrl_pkt = NULL;
1314
1315                 if (likely(rte_ring_empty(port->tx_ring)))
1316                         continue;
1317
1318                 if (rte_ring_dequeue(port->tx_ring,
1319                                      (void **)&ctrl_pkt) != -ENOENT) {
1320                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1321                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1322                         /*
1323                          * re-enqueue LAG control plane packets to buffering
1324                          * ring if transmission fails so the packet isn't lost.
1325                          */
1326                         if (slave_tx_count != 1)
1327                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1328                 }
1329         }
1330
1331         if (unlikely(nb_bufs == 0))
1332                 return 0;
1333
1334         dist_slave_count = 0;
1335         for (i = 0; i < slave_count; i++) {
1336                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1337
1338                 if (ACTOR_STATE(port, DISTRIBUTING))
1339                         dist_slave_port_ids[dist_slave_count++] =
1340                                         slave_port_ids[i];
1341         }
1342
1343         if (likely(dist_slave_count > 0)) {
1344
1345                 /*
1346                  * Populate slaves mbuf with the packets which are to be sent
1347                  * on it, selecting output slave using hash based on xmit policy
1348                  */
1349                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1350                                 bufs_slave_port_idxs);
1351
1352                 for (i = 0; i < nb_bufs; i++) {
1353                         /*
1354                          * Populate slave mbuf arrays with mbufs for that
1355                          * slave
1356                          */
1357                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1358
1359                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1360                                         bufs[i];
1361                 }
1362
1363
1364                 /* Send packet burst on each slave device */
1365                 for (i = 0; i < dist_slave_count; i++) {
1366                         if (slave_nb_bufs[i] == 0)
1367                                 continue;
1368
1369                         slave_tx_count = rte_eth_tx_burst(
1370                                         dist_slave_port_ids[i],
1371                                         bd_tx_q->queue_id, slave_bufs[i],
1372                                         slave_nb_bufs[i]);
1373
1374                         total_tx_count += slave_tx_count;
1375
1376                         /* If tx burst fails move packets to end of bufs */
1377                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1378                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1379                                                 slave_tx_count;
1380                                 total_tx_fail_count += slave_tx_fail_count;
1381
1382                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1383                                        &slave_bufs[i][slave_tx_count],
1384                                        slave_tx_fail_count * sizeof(bufs[0]));
1385                         }
1386                 }
1387         }
1388
1389         return total_tx_count;
1390 }
1391
1392 static uint16_t
1393 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1394                 uint16_t nb_pkts)
1395 {
1396         struct bond_dev_private *internals;
1397         struct bond_tx_queue *bd_tx_q;
1398
1399         uint8_t tx_failed_flag = 0, num_of_slaves;
1400         uint16_t slaves[RTE_MAX_ETHPORTS];
1401
1402         uint16_t max_nb_of_tx_pkts = 0;
1403
1404         int slave_tx_total[RTE_MAX_ETHPORTS];
1405         int i, most_successful_tx_slave = -1;
1406
1407         bd_tx_q = (struct bond_tx_queue *)queue;
1408         internals = bd_tx_q->dev_private;
1409
1410         /* Copy slave list to protect against slave up/down changes during tx
1411          * bursting */
1412         num_of_slaves = internals->active_slave_count;
1413         memcpy(slaves, internals->active_slaves,
1414                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1415
1416         if (num_of_slaves < 1)
1417                 return 0;
1418
1419         /* Increment reference count on mbufs */
1420         for (i = 0; i < nb_pkts; i++)
1421                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1422
1423         /* Transmit burst on each active slave */
1424         for (i = 0; i < num_of_slaves; i++) {
1425                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1426                                         bufs, nb_pkts);
1427
1428                 if (unlikely(slave_tx_total[i] < nb_pkts))
1429                         tx_failed_flag = 1;
1430
1431                 /* record the value and slave index for the slave which transmits the
1432                  * maximum number of packets */
1433                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1434                         max_nb_of_tx_pkts = slave_tx_total[i];
1435                         most_successful_tx_slave = i;
1436                 }
1437         }
1438
1439         /* if slaves fail to transmit packets from burst, the calling application
1440          * is not expected to know about multiple references to packets so we must
1441          * handle failures of all packets except those of the most successful slave
1442          */
1443         if (unlikely(tx_failed_flag))
1444                 for (i = 0; i < num_of_slaves; i++)
1445                         if (i != most_successful_tx_slave)
1446                                 while (slave_tx_total[i] < nb_pkts)
1447                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1448
1449         return max_nb_of_tx_pkts;
1450 }
1451
1452 static void
1453 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1454 {
1455         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1456
1457         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1458                 /**
1459                  * If in mode 4 then save the link properties of the first
1460                  * slave, all subsequent slaves must match these properties
1461                  */
1462                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1463
1464                 bond_link->link_autoneg = slave_link->link_autoneg;
1465                 bond_link->link_duplex = slave_link->link_duplex;
1466                 bond_link->link_speed = slave_link->link_speed;
1467         } else {
1468                 /**
1469                  * In any other mode the link properties are set to default
1470                  * values of AUTONEG/DUPLEX
1471                  */
1472                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1473                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1474         }
1475 }
1476
1477 static int
1478 link_properties_valid(struct rte_eth_dev *ethdev,
1479                 struct rte_eth_link *slave_link)
1480 {
1481         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1482
1483         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1484                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1485
1486                 if (bond_link->link_duplex != slave_link->link_duplex ||
1487                         bond_link->link_autoneg != slave_link->link_autoneg ||
1488                         bond_link->link_speed != slave_link->link_speed)
1489                         return -1;
1490         }
1491
1492         return 0;
1493 }
1494
1495 int
1496 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1497 {
1498         struct ether_addr *mac_addr;
1499
1500         if (eth_dev == NULL) {
1501                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1502                 return -1;
1503         }
1504
1505         if (dst_mac_addr == NULL) {
1506                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1507                 return -1;
1508         }
1509
1510         mac_addr = eth_dev->data->mac_addrs;
1511
1512         ether_addr_copy(mac_addr, dst_mac_addr);
1513         return 0;
1514 }
1515
1516 int
1517 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1518 {
1519         struct ether_addr *mac_addr;
1520
1521         if (eth_dev == NULL) {
1522                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1523                 return -1;
1524         }
1525
1526         if (new_mac_addr == NULL) {
1527                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1528                 return -1;
1529         }
1530
1531         mac_addr = eth_dev->data->mac_addrs;
1532
1533         /* If new MAC is different to current MAC then update */
1534         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1535                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1536
1537         return 0;
1538 }
1539
1540 static const struct ether_addr null_mac_addr;
1541
1542 /*
1543  * Add additional MAC addresses to the slave
1544  */
1545 int
1546 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1547                 uint16_t slave_port_id)
1548 {
1549         int i, ret;
1550         struct ether_addr *mac_addr;
1551
1552         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1553                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1554                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1555                         break;
1556
1557                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1558                 if (ret < 0) {
1559                         /* rollback */
1560                         for (i--; i > 0; i--)
1561                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1562                                         &bonded_eth_dev->data->mac_addrs[i]);
1563                         return ret;
1564                 }
1565         }
1566
1567         return 0;
1568 }
1569
1570 /*
1571  * Remove additional MAC addresses from the slave
1572  */
1573 int
1574 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1575                 uint16_t slave_port_id)
1576 {
1577         int i, rc, ret;
1578         struct ether_addr *mac_addr;
1579
1580         rc = 0;
1581         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1582                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1583                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1584                         break;
1585
1586                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1587                 /* save only the first error */
1588                 if (ret < 0 && rc == 0)
1589                         rc = ret;
1590         }
1591
1592         return rc;
1593 }
1594
1595 int
1596 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1597 {
1598         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1599         int i;
1600
1601         /* Update slave devices MAC addresses */
1602         if (internals->slave_count < 1)
1603                 return -1;
1604
1605         switch (internals->mode) {
1606         case BONDING_MODE_ROUND_ROBIN:
1607         case BONDING_MODE_BALANCE:
1608         case BONDING_MODE_BROADCAST:
1609                 for (i = 0; i < internals->slave_count; i++) {
1610                         if (rte_eth_dev_default_mac_addr_set(
1611                                         internals->slaves[i].port_id,
1612                                         bonded_eth_dev->data->mac_addrs)) {
1613                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1614                                                 internals->slaves[i].port_id);
1615                                 return -1;
1616                         }
1617                 }
1618                 break;
1619         case BONDING_MODE_8023AD:
1620                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1621                 break;
1622         case BONDING_MODE_ACTIVE_BACKUP:
1623         case BONDING_MODE_TLB:
1624         case BONDING_MODE_ALB:
1625         default:
1626                 for (i = 0; i < internals->slave_count; i++) {
1627                         if (internals->slaves[i].port_id ==
1628                                         internals->current_primary_port) {
1629                                 if (rte_eth_dev_default_mac_addr_set(
1630                                                 internals->primary_port,
1631                                                 bonded_eth_dev->data->mac_addrs)) {
1632                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1633                                                         internals->current_primary_port);
1634                                         return -1;
1635                                 }
1636                         } else {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->slaves[i].port_id,
1639                                                 &internals->slaves[i].persisted_mac_addr)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->slaves[i].port_id);
1642                                         return -1;
1643                                 }
1644                         }
1645                 }
1646         }
1647
1648         return 0;
1649 }
1650
1651 int
1652 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1653 {
1654         struct bond_dev_private *internals;
1655
1656         internals = eth_dev->data->dev_private;
1657
1658         switch (mode) {
1659         case BONDING_MODE_ROUND_ROBIN:
1660                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1661                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1662                 break;
1663         case BONDING_MODE_ACTIVE_BACKUP:
1664                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1665                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1666                 break;
1667         case BONDING_MODE_BALANCE:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_BROADCAST:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1674                 break;
1675         case BONDING_MODE_8023AD:
1676                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1677                         return -1;
1678
1679                 if (internals->mode4.dedicated_queues.enabled == 0) {
1680                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1681                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1682                         RTE_BOND_LOG(WARNING,
1683                                 "Using mode 4, it is necessary to do TX burst "
1684                                 "and RX burst at least every 100ms.");
1685                 } else {
1686                         /* Use flow director's optimization */
1687                         eth_dev->rx_pkt_burst =
1688                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1689                         eth_dev->tx_pkt_burst =
1690                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1691                 }
1692                 break;
1693         case BONDING_MODE_TLB:
1694                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1695                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1696                 break;
1697         case BONDING_MODE_ALB:
1698                 if (bond_mode_alb_enable(eth_dev) != 0)
1699                         return -1;
1700
1701                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1702                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1703                 break;
1704         default:
1705                 return -1;
1706         }
1707
1708         internals->mode = mode;
1709
1710         return 0;
1711 }
1712
1713
1714 static int
1715 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1716                 struct rte_eth_dev *slave_eth_dev)
1717 {
1718         int errval = 0;
1719         struct bond_dev_private *internals = (struct bond_dev_private *)
1720                 bonded_eth_dev->data->dev_private;
1721         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1722
1723         if (port->slow_pool == NULL) {
1724                 char mem_name[256];
1725                 int slave_id = slave_eth_dev->data->port_id;
1726
1727                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1728                                 slave_id);
1729                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1730                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1731                         slave_eth_dev->data->numa_node);
1732
1733                 /* Any memory allocation failure in initialization is critical because
1734                  * resources can't be free, so reinitialization is impossible. */
1735                 if (port->slow_pool == NULL) {
1736                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1737                                 slave_id, mem_name, rte_strerror(rte_errno));
1738                 }
1739         }
1740
1741         if (internals->mode4.dedicated_queues.enabled == 1) {
1742                 /* Configure slow Rx queue */
1743
1744                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1745                                 internals->mode4.dedicated_queues.rx_qid, 128,
1746                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1747                                 NULL, port->slow_pool);
1748                 if (errval != 0) {
1749                         RTE_BOND_LOG(ERR,
1750                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1751                                         slave_eth_dev->data->port_id,
1752                                         internals->mode4.dedicated_queues.rx_qid,
1753                                         errval);
1754                         return errval;
1755                 }
1756
1757                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1758                                 internals->mode4.dedicated_queues.tx_qid, 512,
1759                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1760                                 NULL);
1761                 if (errval != 0) {
1762                         RTE_BOND_LOG(ERR,
1763                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1764                                 slave_eth_dev->data->port_id,
1765                                 internals->mode4.dedicated_queues.tx_qid,
1766                                 errval);
1767                         return errval;
1768                 }
1769         }
1770         return 0;
1771 }
1772
1773 int
1774 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1775                 struct rte_eth_dev *slave_eth_dev)
1776 {
1777         struct bond_rx_queue *bd_rx_q;
1778         struct bond_tx_queue *bd_tx_q;
1779         uint16_t nb_rx_queues;
1780         uint16_t nb_tx_queues;
1781
1782         int errval;
1783         uint16_t q_id;
1784         struct rte_flow_error flow_error;
1785
1786         struct bond_dev_private *internals = (struct bond_dev_private *)
1787                 bonded_eth_dev->data->dev_private;
1788
1789         /* Stop slave */
1790         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1791
1792         /* Enable interrupts on slave device if supported */
1793         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1794                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1795
1796         /* If RSS is enabled for bonding, try to enable it for slaves  */
1797         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1798                 if (internals->rss_key_len != 0) {
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1800                                         internals->rss_key_len;
1801                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1802                                         internals->rss_key;
1803                 } else {
1804                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1805                 }
1806
1807                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1808                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1809                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1810                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1811         }
1812
1813         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1814                         DEV_RX_OFFLOAD_VLAN_FILTER)
1815                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1816                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1817         else
1818                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1819                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1820
1821         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1822         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1823
1824         if (internals->mode == BONDING_MODE_8023AD) {
1825                 if (internals->mode4.dedicated_queues.enabled == 1) {
1826                         nb_rx_queues++;
1827                         nb_tx_queues++;
1828                 }
1829         }
1830
1831         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1832                                      bonded_eth_dev->data->mtu);
1833         if (errval != 0 && errval != -ENOTSUP) {
1834                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1835                                 slave_eth_dev->data->port_id, errval);
1836                 return errval;
1837         }
1838
1839         /* Configure device */
1840         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1841                         nb_rx_queues, nb_tx_queues,
1842                         &(slave_eth_dev->data->dev_conf));
1843         if (errval != 0) {
1844                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1845                                 slave_eth_dev->data->port_id, errval);
1846                 return errval;
1847         }
1848
1849         /* Setup Rx Queues */
1850         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1851                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1852
1853                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1854                                 bd_rx_q->nb_rx_desc,
1855                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1856                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1857                 if (errval != 0) {
1858                         RTE_BOND_LOG(ERR,
1859                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1860                                         slave_eth_dev->data->port_id, q_id, errval);
1861                         return errval;
1862                 }
1863         }
1864
1865         /* Setup Tx Queues */
1866         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1867                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1868
1869                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1870                                 bd_tx_q->nb_tx_desc,
1871                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1872                                 &bd_tx_q->tx_conf);
1873                 if (errval != 0) {
1874                         RTE_BOND_LOG(ERR,
1875                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1876                                 slave_eth_dev->data->port_id, q_id, errval);
1877                         return errval;
1878                 }
1879         }
1880
1881         if (internals->mode == BONDING_MODE_8023AD &&
1882                         internals->mode4.dedicated_queues.enabled == 1) {
1883                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1884                                 != 0)
1885                         return errval;
1886
1887                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1888                                 slave_eth_dev->data->port_id) != 0) {
1889                         RTE_BOND_LOG(ERR,
1890                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1891                                 slave_eth_dev->data->port_id, q_id, errval);
1892                         return -1;
1893                 }
1894
1895                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1896                         rte_flow_destroy(slave_eth_dev->data->port_id,
1897                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1898                                         &flow_error);
1899
1900                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1901                                 slave_eth_dev->data->port_id);
1902         }
1903
1904         /* Start device */
1905         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1906         if (errval != 0) {
1907                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1908                                 slave_eth_dev->data->port_id, errval);
1909                 return -1;
1910         }
1911
1912         /* If RSS is enabled for bonding, synchronize RETA */
1913         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1914                 int i;
1915                 struct bond_dev_private *internals;
1916
1917                 internals = bonded_eth_dev->data->dev_private;
1918
1919                 for (i = 0; i < internals->slave_count; i++) {
1920                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1921                                 errval = rte_eth_dev_rss_reta_update(
1922                                                 slave_eth_dev->data->port_id,
1923                                                 &internals->reta_conf[0],
1924                                                 internals->slaves[i].reta_size);
1925                                 if (errval != 0) {
1926                                         RTE_BOND_LOG(WARNING,
1927                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1928                                                      " RSS Configuration for bonding may be inconsistent.",
1929                                                      slave_eth_dev->data->port_id, errval);
1930                                 }
1931                                 break;
1932                         }
1933                 }
1934         }
1935
1936         /* If lsc interrupt is set, check initial slave's link status */
1937         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1938                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1939                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1940                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1941                         NULL);
1942         }
1943
1944         return 0;
1945 }
1946
1947 void
1948 slave_remove(struct bond_dev_private *internals,
1949                 struct rte_eth_dev *slave_eth_dev)
1950 {
1951         uint8_t i;
1952
1953         for (i = 0; i < internals->slave_count; i++)
1954                 if (internals->slaves[i].port_id ==
1955                                 slave_eth_dev->data->port_id)
1956                         break;
1957
1958         if (i < (internals->slave_count - 1)) {
1959                 struct rte_flow *flow;
1960
1961                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1962                                 sizeof(internals->slaves[0]) *
1963                                 (internals->slave_count - i - 1));
1964                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1965                         memmove(&flow->flows[i], &flow->flows[i + 1],
1966                                 sizeof(flow->flows[0]) *
1967                                 (internals->slave_count - i - 1));
1968                         flow->flows[internals->slave_count - 1] = NULL;
1969                 }
1970         }
1971
1972         internals->slave_count--;
1973
1974         /* force reconfiguration of slave interfaces */
1975         _rte_eth_dev_reset(slave_eth_dev);
1976 }
1977
1978 static void
1979 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1980
1981 void
1982 slave_add(struct bond_dev_private *internals,
1983                 struct rte_eth_dev *slave_eth_dev)
1984 {
1985         struct bond_slave_details *slave_details =
1986                         &internals->slaves[internals->slave_count];
1987
1988         slave_details->port_id = slave_eth_dev->data->port_id;
1989         slave_details->last_link_status = 0;
1990
1991         /* Mark slave devices that don't support interrupts so we can
1992          * compensate when we start the bond
1993          */
1994         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1995                 slave_details->link_status_poll_enabled = 1;
1996         }
1997
1998         slave_details->link_status_wait_to_complete = 0;
1999         /* clean tlb_last_obytes when adding port for bonding device */
2000         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2001                         sizeof(struct ether_addr));
2002 }
2003
2004 void
2005 bond_ethdev_primary_set(struct bond_dev_private *internals,
2006                 uint16_t slave_port_id)
2007 {
2008         int i;
2009
2010         if (internals->active_slave_count < 1)
2011                 internals->current_primary_port = slave_port_id;
2012         else
2013                 /* Search bonded device slave ports for new proposed primary port */
2014                 for (i = 0; i < internals->active_slave_count; i++) {
2015                         if (internals->active_slaves[i] == slave_port_id)
2016                                 internals->current_primary_port = slave_port_id;
2017                 }
2018 }
2019
2020 static void
2021 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2022
2023 static int
2024 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2025 {
2026         struct bond_dev_private *internals;
2027         int i;
2028
2029         /* slave eth dev will be started by bonded device */
2030         if (check_for_bonded_ethdev(eth_dev)) {
2031                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2032                                 eth_dev->data->port_id);
2033                 return -1;
2034         }
2035
2036         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2037         eth_dev->data->dev_started = 1;
2038
2039         internals = eth_dev->data->dev_private;
2040
2041         if (internals->slave_count == 0) {
2042                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2043                 goto out_err;
2044         }
2045
2046         if (internals->user_defined_mac == 0) {
2047                 struct ether_addr *new_mac_addr = NULL;
2048
2049                 for (i = 0; i < internals->slave_count; i++)
2050                         if (internals->slaves[i].port_id == internals->primary_port)
2051                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2052
2053                 if (new_mac_addr == NULL)
2054                         goto out_err;
2055
2056                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2057                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2058                                         eth_dev->data->port_id);
2059                         goto out_err;
2060                 }
2061         }
2062
2063         /* If bonded device is configure in promiscuous mode then re-apply config */
2064         if (internals->promiscuous_en)
2065                 bond_ethdev_promiscuous_enable(eth_dev);
2066
2067         if (internals->mode == BONDING_MODE_8023AD) {
2068                 if (internals->mode4.dedicated_queues.enabled == 1) {
2069                         internals->mode4.dedicated_queues.rx_qid =
2070                                         eth_dev->data->nb_rx_queues;
2071                         internals->mode4.dedicated_queues.tx_qid =
2072                                         eth_dev->data->nb_tx_queues;
2073                 }
2074         }
2075
2076
2077         /* Reconfigure each slave device if starting bonded device */
2078         for (i = 0; i < internals->slave_count; i++) {
2079                 struct rte_eth_dev *slave_ethdev =
2080                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2081                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2082                         RTE_BOND_LOG(ERR,
2083                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2084                                 eth_dev->data->port_id,
2085                                 internals->slaves[i].port_id);
2086                         goto out_err;
2087                 }
2088                 /* We will need to poll for link status if any slave doesn't
2089                  * support interrupts
2090                  */
2091                 if (internals->slaves[i].link_status_poll_enabled)
2092                         internals->link_status_polling_enabled = 1;
2093         }
2094
2095         /* start polling if needed */
2096         if (internals->link_status_polling_enabled) {
2097                 rte_eal_alarm_set(
2098                         internals->link_status_polling_interval_ms * 1000,
2099                         bond_ethdev_slave_link_status_change_monitor,
2100                         (void *)&rte_eth_devices[internals->port_id]);
2101         }
2102
2103         /* Update all slave devices MACs*/
2104         if (mac_address_slaves_update(eth_dev) != 0)
2105                 goto out_err;
2106
2107         if (internals->user_defined_primary_port)
2108                 bond_ethdev_primary_set(internals, internals->primary_port);
2109
2110         if (internals->mode == BONDING_MODE_8023AD)
2111                 bond_mode_8023ad_start(eth_dev);
2112
2113         if (internals->mode == BONDING_MODE_TLB ||
2114                         internals->mode == BONDING_MODE_ALB)
2115                 bond_tlb_enable(internals);
2116
2117         return 0;
2118
2119 out_err:
2120         eth_dev->data->dev_started = 0;
2121         return -1;
2122 }
2123
2124 static void
2125 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2126 {
2127         uint8_t i;
2128
2129         if (dev->data->rx_queues != NULL) {
2130                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2131                         rte_free(dev->data->rx_queues[i]);
2132                         dev->data->rx_queues[i] = NULL;
2133                 }
2134                 dev->data->nb_rx_queues = 0;
2135         }
2136
2137         if (dev->data->tx_queues != NULL) {
2138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2139                         rte_free(dev->data->tx_queues[i]);
2140                         dev->data->tx_queues[i] = NULL;
2141                 }
2142                 dev->data->nb_tx_queues = 0;
2143         }
2144 }
2145
2146 void
2147 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2148 {
2149         struct bond_dev_private *internals = eth_dev->data->dev_private;
2150         uint8_t i;
2151
2152         if (internals->mode == BONDING_MODE_8023AD) {
2153                 struct port *port;
2154                 void *pkt = NULL;
2155
2156                 bond_mode_8023ad_stop(eth_dev);
2157
2158                 /* Discard all messages to/from mode 4 state machines */
2159                 for (i = 0; i < internals->active_slave_count; i++) {
2160                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2161
2162                         RTE_ASSERT(port->rx_ring != NULL);
2163                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2164                                 rte_pktmbuf_free(pkt);
2165
2166                         RTE_ASSERT(port->tx_ring != NULL);
2167                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2168                                 rte_pktmbuf_free(pkt);
2169                 }
2170         }
2171
2172         if (internals->mode == BONDING_MODE_TLB ||
2173                         internals->mode == BONDING_MODE_ALB) {
2174                 bond_tlb_disable(internals);
2175                 for (i = 0; i < internals->active_slave_count; i++)
2176                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2177         }
2178
2179         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2180         eth_dev->data->dev_started = 0;
2181
2182         internals->link_status_polling_enabled = 0;
2183         for (i = 0; i < internals->slave_count; i++) {
2184                 uint16_t slave_id = internals->slaves[i].port_id;
2185                 if (find_slave_by_id(internals->active_slaves,
2186                                 internals->active_slave_count, slave_id) !=
2187                                                 internals->active_slave_count) {
2188                         internals->slaves[i].last_link_status = 0;
2189                         rte_eth_dev_stop(slave_id);
2190                         deactivate_slave(eth_dev, slave_id);
2191                 }
2192         }
2193 }
2194
2195 void
2196 bond_ethdev_close(struct rte_eth_dev *dev)
2197 {
2198         struct bond_dev_private *internals = dev->data->dev_private;
2199         uint8_t bond_port_id = internals->port_id;
2200         int skipped = 0;
2201         struct rte_flow_error ferror;
2202
2203         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2204         while (internals->slave_count != skipped) {
2205                 uint16_t port_id = internals->slaves[skipped].port_id;
2206
2207                 rte_eth_dev_stop(port_id);
2208
2209                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2210                         RTE_BOND_LOG(ERR,
2211                                      "Failed to remove port %d from bonded device %s",
2212                                      port_id, dev->device->name);
2213                         skipped++;
2214                 }
2215         }
2216         bond_flow_ops.flush(dev, &ferror);
2217         bond_ethdev_free_queues(dev);
2218         rte_bitmap_reset(internals->vlan_filter_bmp);
2219 }
2220
2221 /* forward declaration */
2222 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2223
2224 static void
2225 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2226 {
2227         struct bond_dev_private *internals = dev->data->dev_private;
2228
2229         uint16_t max_nb_rx_queues = UINT16_MAX;
2230         uint16_t max_nb_tx_queues = UINT16_MAX;
2231         uint16_t max_rx_desc_lim = UINT16_MAX;
2232         uint16_t max_tx_desc_lim = UINT16_MAX;
2233
2234         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2235
2236         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2237                         internals->candidate_max_rx_pktlen :
2238                         ETHER_MAX_JUMBO_FRAME_LEN;
2239
2240         /* Max number of tx/rx queues that the bonded device can support is the
2241          * minimum values of the bonded slaves, as all slaves must be capable
2242          * of supporting the same number of tx/rx queues.
2243          */
2244         if (internals->slave_count > 0) {
2245                 struct rte_eth_dev_info slave_info;
2246                 uint8_t idx;
2247
2248                 for (idx = 0; idx < internals->slave_count; idx++) {
2249                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2250                                         &slave_info);
2251
2252                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2253                                 max_nb_rx_queues = slave_info.max_rx_queues;
2254
2255                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2256                                 max_nb_tx_queues = slave_info.max_tx_queues;
2257
2258                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2259                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2260
2261                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2262                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2263                 }
2264         }
2265
2266         dev_info->max_rx_queues = max_nb_rx_queues;
2267         dev_info->max_tx_queues = max_nb_tx_queues;
2268
2269         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2270                sizeof(dev_info->default_rxconf));
2271         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2272                sizeof(dev_info->default_txconf));
2273
2274         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2275         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2276
2277         /**
2278          * If dedicated hw queues enabled for link bonding device in LACP mode
2279          * then we need to reduce the maximum number of data path queues by 1.
2280          */
2281         if (internals->mode == BONDING_MODE_8023AD &&
2282                 internals->mode4.dedicated_queues.enabled == 1) {
2283                 dev_info->max_rx_queues--;
2284                 dev_info->max_tx_queues--;
2285         }
2286
2287         dev_info->min_rx_bufsize = 0;
2288
2289         dev_info->rx_offload_capa = internals->rx_offload_capa;
2290         dev_info->tx_offload_capa = internals->tx_offload_capa;
2291         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2292         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2293         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2294
2295         dev_info->reta_size = internals->reta_size;
2296 }
2297
2298 static int
2299 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2300 {
2301         int res;
2302         uint16_t i;
2303         struct bond_dev_private *internals = dev->data->dev_private;
2304
2305         /* don't do this while a slave is being added */
2306         rte_spinlock_lock(&internals->lock);
2307
2308         if (on)
2309                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2310         else
2311                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2312
2313         for (i = 0; i < internals->slave_count; i++) {
2314                 uint16_t port_id = internals->slaves[i].port_id;
2315
2316                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2317                 if (res == ENOTSUP)
2318                         RTE_BOND_LOG(WARNING,
2319                                      "Setting VLAN filter on slave port %u not supported.",
2320                                      port_id);
2321         }
2322
2323         rte_spinlock_unlock(&internals->lock);
2324         return 0;
2325 }
2326
2327 static int
2328 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2329                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2330                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2331 {
2332         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2333                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2334                                         0, dev->data->numa_node);
2335         if (bd_rx_q == NULL)
2336                 return -1;
2337
2338         bd_rx_q->queue_id = rx_queue_id;
2339         bd_rx_q->dev_private = dev->data->dev_private;
2340
2341         bd_rx_q->nb_rx_desc = nb_rx_desc;
2342
2343         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2344         bd_rx_q->mb_pool = mb_pool;
2345
2346         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2347
2348         return 0;
2349 }
2350
2351 static int
2352 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2353                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2354                 const struct rte_eth_txconf *tx_conf)
2355 {
2356         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2357                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2358                                         0, dev->data->numa_node);
2359
2360         if (bd_tx_q == NULL)
2361                 return -1;
2362
2363         bd_tx_q->queue_id = tx_queue_id;
2364         bd_tx_q->dev_private = dev->data->dev_private;
2365
2366         bd_tx_q->nb_tx_desc = nb_tx_desc;
2367         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2368
2369         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2370
2371         return 0;
2372 }
2373
2374 static void
2375 bond_ethdev_rx_queue_release(void *queue)
2376 {
2377         if (queue == NULL)
2378                 return;
2379
2380         rte_free(queue);
2381 }
2382
2383 static void
2384 bond_ethdev_tx_queue_release(void *queue)
2385 {
2386         if (queue == NULL)
2387                 return;
2388
2389         rte_free(queue);
2390 }
2391
2392 static void
2393 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2394 {
2395         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2396         struct bond_dev_private *internals;
2397
2398         /* Default value for polling slave found is true as we don't want to
2399          * disable the polling thread if we cannot get the lock */
2400         int i, polling_slave_found = 1;
2401
2402         if (cb_arg == NULL)
2403                 return;
2404
2405         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2406         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2407
2408         if (!bonded_ethdev->data->dev_started ||
2409                 !internals->link_status_polling_enabled)
2410                 return;
2411
2412         /* If device is currently being configured then don't check slaves link
2413          * status, wait until next period */
2414         if (rte_spinlock_trylock(&internals->lock)) {
2415                 if (internals->slave_count > 0)
2416                         polling_slave_found = 0;
2417
2418                 for (i = 0; i < internals->slave_count; i++) {
2419                         if (!internals->slaves[i].link_status_poll_enabled)
2420                                 continue;
2421
2422                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2423                         polling_slave_found = 1;
2424
2425                         /* Update slave link status */
2426                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2427                                         internals->slaves[i].link_status_wait_to_complete);
2428
2429                         /* if link status has changed since last checked then call lsc
2430                          * event callback */
2431                         if (slave_ethdev->data->dev_link.link_status !=
2432                                         internals->slaves[i].last_link_status) {
2433                                 internals->slaves[i].last_link_status =
2434                                                 slave_ethdev->data->dev_link.link_status;
2435
2436                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2437                                                 RTE_ETH_EVENT_INTR_LSC,
2438                                                 &bonded_ethdev->data->port_id,
2439                                                 NULL);
2440                         }
2441                 }
2442                 rte_spinlock_unlock(&internals->lock);
2443         }
2444
2445         if (polling_slave_found)
2446                 /* Set alarm to continue monitoring link status of slave ethdev's */
2447                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2448                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2449 }
2450
2451 static int
2452 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2453 {
2454         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2455
2456         struct bond_dev_private *bond_ctx;
2457         struct rte_eth_link slave_link;
2458
2459         uint32_t idx;
2460
2461         bond_ctx = ethdev->data->dev_private;
2462
2463         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2464
2465         if (ethdev->data->dev_started == 0 ||
2466                         bond_ctx->active_slave_count == 0) {
2467                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2468                 return 0;
2469         }
2470
2471         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2472
2473         if (wait_to_complete)
2474                 link_update = rte_eth_link_get;
2475         else
2476                 link_update = rte_eth_link_get_nowait;
2477
2478         switch (bond_ctx->mode) {
2479         case BONDING_MODE_BROADCAST:
2480                 /**
2481                  * Setting link speed to UINT32_MAX to ensure we pick up the
2482                  * value of the first active slave
2483                  */
2484                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2485
2486                 /**
2487                  * link speed is minimum value of all the slaves link speed as
2488                  * packet loss will occur on this slave if transmission at rates
2489                  * greater than this are attempted
2490                  */
2491                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2492                         link_update(bond_ctx->active_slaves[0], &slave_link);
2493
2494                         if (slave_link.link_speed <
2495                                         ethdev->data->dev_link.link_speed)
2496                                 ethdev->data->dev_link.link_speed =
2497                                                 slave_link.link_speed;
2498                 }
2499                 break;
2500         case BONDING_MODE_ACTIVE_BACKUP:
2501                 /* Current primary slave */
2502                 link_update(bond_ctx->current_primary_port, &slave_link);
2503
2504                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2505                 break;
2506         case BONDING_MODE_8023AD:
2507                 ethdev->data->dev_link.link_autoneg =
2508                                 bond_ctx->mode4.slave_link.link_autoneg;
2509                 ethdev->data->dev_link.link_duplex =
2510                                 bond_ctx->mode4.slave_link.link_duplex;
2511                 /* fall through to update link speed */
2512         case BONDING_MODE_ROUND_ROBIN:
2513         case BONDING_MODE_BALANCE:
2514         case BONDING_MODE_TLB:
2515         case BONDING_MODE_ALB:
2516         default:
2517                 /**
2518                  * In theses mode the maximum theoretical link speed is the sum
2519                  * of all the slaves
2520                  */
2521                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2522
2523                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2524                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2525
2526                         ethdev->data->dev_link.link_speed +=
2527                                         slave_link.link_speed;
2528                 }
2529         }
2530
2531
2532         return 0;
2533 }
2534
2535
2536 static int
2537 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2538 {
2539         struct bond_dev_private *internals = dev->data->dev_private;
2540         struct rte_eth_stats slave_stats;
2541         int i, j;
2542
2543         for (i = 0; i < internals->slave_count; i++) {
2544                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2545
2546                 stats->ipackets += slave_stats.ipackets;
2547                 stats->opackets += slave_stats.opackets;
2548                 stats->ibytes += slave_stats.ibytes;
2549                 stats->obytes += slave_stats.obytes;
2550                 stats->imissed += slave_stats.imissed;
2551                 stats->ierrors += slave_stats.ierrors;
2552                 stats->oerrors += slave_stats.oerrors;
2553                 stats->rx_nombuf += slave_stats.rx_nombuf;
2554
2555                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2556                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2557                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2558                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2559                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2560                         stats->q_errors[j] += slave_stats.q_errors[j];
2561                 }
2562
2563         }
2564
2565         return 0;
2566 }
2567
2568 static void
2569 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2570 {
2571         struct bond_dev_private *internals = dev->data->dev_private;
2572         int i;
2573
2574         for (i = 0; i < internals->slave_count; i++)
2575                 rte_eth_stats_reset(internals->slaves[i].port_id);
2576 }
2577
2578 static void
2579 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2580 {
2581         struct bond_dev_private *internals = eth_dev->data->dev_private;
2582         int i;
2583
2584         internals->promiscuous_en = 1;
2585
2586         switch (internals->mode) {
2587         /* Promiscuous mode is propagated to all slaves */
2588         case BONDING_MODE_ROUND_ROBIN:
2589         case BONDING_MODE_BALANCE:
2590         case BONDING_MODE_BROADCAST:
2591                 for (i = 0; i < internals->slave_count; i++)
2592                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2593                 break;
2594         /* In mode4 promiscus mode is managed when slave is added/removed */
2595         case BONDING_MODE_8023AD:
2596                 break;
2597         /* Promiscuous mode is propagated only to primary slave */
2598         case BONDING_MODE_ACTIVE_BACKUP:
2599         case BONDING_MODE_TLB:
2600         case BONDING_MODE_ALB:
2601         default:
2602                 /* Do not touch promisc when there cannot be primary ports */
2603                 if (internals->slave_count == 0)
2604                         break;
2605                 rte_eth_promiscuous_enable(internals->current_primary_port);
2606         }
2607 }
2608
2609 static void
2610 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2611 {
2612         struct bond_dev_private *internals = dev->data->dev_private;
2613         int i;
2614
2615         internals->promiscuous_en = 0;
2616
2617         switch (internals->mode) {
2618         /* Promiscuous mode is propagated to all slaves */
2619         case BONDING_MODE_ROUND_ROBIN:
2620         case BONDING_MODE_BALANCE:
2621         case BONDING_MODE_BROADCAST:
2622                 for (i = 0; i < internals->slave_count; i++)
2623                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2624                 break;
2625         /* In mode4 promiscus mode is set managed when slave is added/removed */
2626         case BONDING_MODE_8023AD:
2627                 break;
2628         /* Promiscuous mode is propagated only to primary slave */
2629         case BONDING_MODE_ACTIVE_BACKUP:
2630         case BONDING_MODE_TLB:
2631         case BONDING_MODE_ALB:
2632         default:
2633                 /* Do not touch promisc when there cannot be primary ports */
2634                 if (internals->slave_count == 0)
2635                         break;
2636                 rte_eth_promiscuous_disable(internals->current_primary_port);
2637         }
2638 }
2639
2640 static void
2641 bond_ethdev_delayed_lsc_propagation(void *arg)
2642 {
2643         if (arg == NULL)
2644                 return;
2645
2646         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2647                         RTE_ETH_EVENT_INTR_LSC, NULL);
2648 }
2649
2650 int
2651 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2652                 void *param, void *ret_param __rte_unused)
2653 {
2654         struct rte_eth_dev *bonded_eth_dev;
2655         struct bond_dev_private *internals;
2656         struct rte_eth_link link;
2657         int rc = -1;
2658
2659         int i, valid_slave = 0;
2660         uint8_t active_pos;
2661         uint8_t lsc_flag = 0;
2662
2663         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2664                 return rc;
2665
2666         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2667
2668         if (check_for_bonded_ethdev(bonded_eth_dev))
2669                 return rc;
2670
2671         internals = bonded_eth_dev->data->dev_private;
2672
2673         /* If the device isn't started don't handle interrupts */
2674         if (!bonded_eth_dev->data->dev_started)
2675                 return rc;
2676
2677         /* verify that port_id is a valid slave of bonded port */
2678         for (i = 0; i < internals->slave_count; i++) {
2679                 if (internals->slaves[i].port_id == port_id) {
2680                         valid_slave = 1;
2681                         break;
2682                 }
2683         }
2684
2685         if (!valid_slave)
2686                 return rc;
2687
2688         /* Synchronize lsc callback parallel calls either by real link event
2689          * from the slaves PMDs or by the bonding PMD itself.
2690          */
2691         rte_spinlock_lock(&internals->lsc_lock);
2692
2693         /* Search for port in active port list */
2694         active_pos = find_slave_by_id(internals->active_slaves,
2695                         internals->active_slave_count, port_id);
2696
2697         rte_eth_link_get_nowait(port_id, &link);
2698         if (link.link_status) {
2699                 if (active_pos < internals->active_slave_count)
2700                         goto link_update;
2701
2702                 /* check link state properties if bonded link is up*/
2703                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2704                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2705                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2706                                              "for slave %d in bonding mode %d",
2707                                              port_id, internals->mode);
2708                 } else {
2709                         /* inherit slave link properties */
2710                         link_properties_set(bonded_eth_dev, &link);
2711                 }
2712
2713                 /* If no active slave ports then set this port to be
2714                  * the primary port.
2715                  */
2716                 if (internals->active_slave_count < 1) {
2717                         /* If first active slave, then change link status */
2718                         bonded_eth_dev->data->dev_link.link_status =
2719                                                                 ETH_LINK_UP;
2720                         internals->current_primary_port = port_id;
2721                         lsc_flag = 1;
2722
2723                         mac_address_slaves_update(bonded_eth_dev);
2724                 }
2725
2726                 activate_slave(bonded_eth_dev, port_id);
2727
2728                 /* If the user has defined the primary port then default to
2729                  * using it.
2730                  */
2731                 if (internals->user_defined_primary_port &&
2732                                 internals->primary_port == port_id)
2733                         bond_ethdev_primary_set(internals, port_id);
2734         } else {
2735                 if (active_pos == internals->active_slave_count)
2736                         goto link_update;
2737
2738                 /* Remove from active slave list */
2739                 deactivate_slave(bonded_eth_dev, port_id);
2740
2741                 if (internals->active_slave_count < 1)
2742                         lsc_flag = 1;
2743
2744                 /* Update primary id, take first active slave from list or if none
2745                  * available set to -1 */
2746                 if (port_id == internals->current_primary_port) {
2747                         if (internals->active_slave_count > 0)
2748                                 bond_ethdev_primary_set(internals,
2749                                                 internals->active_slaves[0]);
2750                         else
2751                                 internals->current_primary_port = internals->primary_port;
2752                 }
2753         }
2754
2755 link_update:
2756         /**
2757          * Update bonded device link properties after any change to active
2758          * slaves
2759          */
2760         bond_ethdev_link_update(bonded_eth_dev, 0);
2761
2762         if (lsc_flag) {
2763                 /* Cancel any possible outstanding interrupts if delays are enabled */
2764                 if (internals->link_up_delay_ms > 0 ||
2765                         internals->link_down_delay_ms > 0)
2766                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2767                                         bonded_eth_dev);
2768
2769                 if (bonded_eth_dev->data->dev_link.link_status) {
2770                         if (internals->link_up_delay_ms > 0)
2771                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2772                                                 bond_ethdev_delayed_lsc_propagation,
2773                                                 (void *)bonded_eth_dev);
2774                         else
2775                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2776                                                 RTE_ETH_EVENT_INTR_LSC,
2777                                                 NULL);
2778
2779                 } else {
2780                         if (internals->link_down_delay_ms > 0)
2781                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2782                                                 bond_ethdev_delayed_lsc_propagation,
2783                                                 (void *)bonded_eth_dev);
2784                         else
2785                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2786                                                 RTE_ETH_EVENT_INTR_LSC,
2787                                                 NULL);
2788                 }
2789         }
2790
2791         rte_spinlock_unlock(&internals->lsc_lock);
2792
2793         return rc;
2794 }
2795
2796 static int
2797 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2798                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2799 {
2800         unsigned i, j;
2801         int result = 0;
2802         int slave_reta_size;
2803         unsigned reta_count;
2804         struct bond_dev_private *internals = dev->data->dev_private;
2805
2806         if (reta_size != internals->reta_size)
2807                 return -EINVAL;
2808
2809          /* Copy RETA table */
2810         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2811
2812         for (i = 0; i < reta_count; i++) {
2813                 internals->reta_conf[i].mask = reta_conf[i].mask;
2814                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2815                         if ((reta_conf[i].mask >> j) & 0x01)
2816                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2817         }
2818
2819         /* Fill rest of array */
2820         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2821                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2822                                 sizeof(internals->reta_conf[0]) * reta_count);
2823
2824         /* Propagate RETA over slaves */
2825         for (i = 0; i < internals->slave_count; i++) {
2826                 slave_reta_size = internals->slaves[i].reta_size;
2827                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2828                                 &internals->reta_conf[0], slave_reta_size);
2829                 if (result < 0)
2830                         return result;
2831         }
2832
2833         return 0;
2834 }
2835
2836 static int
2837 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2838                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2839 {
2840         int i, j;
2841         struct bond_dev_private *internals = dev->data->dev_private;
2842
2843         if (reta_size != internals->reta_size)
2844                 return -EINVAL;
2845
2846          /* Copy RETA table */
2847         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2848                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2849                         if ((reta_conf[i].mask >> j) & 0x01)
2850                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2851
2852         return 0;
2853 }
2854
2855 static int
2856 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2857                 struct rte_eth_rss_conf *rss_conf)
2858 {
2859         int i, result = 0;
2860         struct bond_dev_private *internals = dev->data->dev_private;
2861         struct rte_eth_rss_conf bond_rss_conf;
2862
2863         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2864
2865         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2866
2867         if (bond_rss_conf.rss_hf != 0)
2868                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2869
2870         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2871                         sizeof(internals->rss_key)) {
2872                 if (bond_rss_conf.rss_key_len == 0)
2873                         bond_rss_conf.rss_key_len = 40;
2874                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2875                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2876                                 internals->rss_key_len);
2877         }
2878
2879         for (i = 0; i < internals->slave_count; i++) {
2880                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2881                                 &bond_rss_conf);
2882                 if (result < 0)
2883                         return result;
2884         }
2885
2886         return 0;
2887 }
2888
2889 static int
2890 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2891                 struct rte_eth_rss_conf *rss_conf)
2892 {
2893         struct bond_dev_private *internals = dev->data->dev_private;
2894
2895         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2896         rss_conf->rss_key_len = internals->rss_key_len;
2897         if (rss_conf->rss_key)
2898                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2899
2900         return 0;
2901 }
2902
2903 static int
2904 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2905 {
2906         struct rte_eth_dev *slave_eth_dev;
2907         struct bond_dev_private *internals = dev->data->dev_private;
2908         int ret, i;
2909
2910         rte_spinlock_lock(&internals->lock);
2911
2912         for (i = 0; i < internals->slave_count; i++) {
2913                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2914                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2915                         rte_spinlock_unlock(&internals->lock);
2916                         return -ENOTSUP;
2917                 }
2918         }
2919         for (i = 0; i < internals->slave_count; i++) {
2920                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2921                 if (ret < 0) {
2922                         rte_spinlock_unlock(&internals->lock);
2923                         return ret;
2924                 }
2925         }
2926
2927         rte_spinlock_unlock(&internals->lock);
2928         return 0;
2929 }
2930
2931 static int
2932 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2933 {
2934         if (mac_address_set(dev, addr)) {
2935                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2936                 return -EINVAL;
2937         }
2938
2939         return 0;
2940 }
2941
2942 static int
2943 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2944                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2945 {
2946         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2947                 *(const void **)arg = &bond_flow_ops;
2948                 return 0;
2949         }
2950         return -ENOTSUP;
2951 }
2952
2953 static int
2954 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2955                                 __rte_unused uint32_t index, uint32_t vmdq)
2956 {
2957         struct rte_eth_dev *slave_eth_dev;
2958         struct bond_dev_private *internals = dev->data->dev_private;
2959         int ret, i;
2960
2961         rte_spinlock_lock(&internals->lock);
2962
2963         for (i = 0; i < internals->slave_count; i++) {
2964                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2965                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2966                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2967                         ret = -ENOTSUP;
2968                         goto end;
2969                 }
2970         }
2971
2972         for (i = 0; i < internals->slave_count; i++) {
2973                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2974                                 mac_addr, vmdq);
2975                 if (ret < 0) {
2976                         /* rollback */
2977                         for (i--; i >= 0; i--)
2978                                 rte_eth_dev_mac_addr_remove(
2979                                         internals->slaves[i].port_id, mac_addr);
2980                         goto end;
2981                 }
2982         }
2983
2984         ret = 0;
2985 end:
2986         rte_spinlock_unlock(&internals->lock);
2987         return ret;
2988 }
2989
2990 static void
2991 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2992 {
2993         struct rte_eth_dev *slave_eth_dev;
2994         struct bond_dev_private *internals = dev->data->dev_private;
2995         int i;
2996
2997         rte_spinlock_lock(&internals->lock);
2998
2999         for (i = 0; i < internals->slave_count; i++) {
3000                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3001                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3002                         goto end;
3003         }
3004
3005         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3006
3007         for (i = 0; i < internals->slave_count; i++)
3008                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3009                                 mac_addr);
3010
3011 end:
3012         rte_spinlock_unlock(&internals->lock);
3013 }
3014
3015 const struct eth_dev_ops default_dev_ops = {
3016         .dev_start            = bond_ethdev_start,
3017         .dev_stop             = bond_ethdev_stop,
3018         .dev_close            = bond_ethdev_close,
3019         .dev_configure        = bond_ethdev_configure,
3020         .dev_infos_get        = bond_ethdev_info,
3021         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3022         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3023         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3024         .rx_queue_release     = bond_ethdev_rx_queue_release,
3025         .tx_queue_release     = bond_ethdev_tx_queue_release,
3026         .link_update          = bond_ethdev_link_update,
3027         .stats_get            = bond_ethdev_stats_get,
3028         .stats_reset          = bond_ethdev_stats_reset,
3029         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3030         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3031         .reta_update          = bond_ethdev_rss_reta_update,
3032         .reta_query           = bond_ethdev_rss_reta_query,
3033         .rss_hash_update      = bond_ethdev_rss_hash_update,
3034         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3035         .mtu_set              = bond_ethdev_mtu_set,
3036         .mac_addr_set         = bond_ethdev_mac_address_set,
3037         .mac_addr_add         = bond_ethdev_mac_addr_add,
3038         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3039         .filter_ctrl          = bond_filter_ctrl
3040 };
3041
3042 static int
3043 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3044 {
3045         const char *name = rte_vdev_device_name(dev);
3046         uint8_t socket_id = dev->device.numa_node;
3047         struct bond_dev_private *internals = NULL;
3048         struct rte_eth_dev *eth_dev = NULL;
3049         uint32_t vlan_filter_bmp_size;
3050
3051         /* now do all data allocation - for eth_dev structure, dummy pci driver
3052          * and internal (private) data
3053          */
3054
3055         /* reserve an ethdev entry */
3056         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3057         if (eth_dev == NULL) {
3058                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3059                 goto err;
3060         }
3061
3062         internals = eth_dev->data->dev_private;
3063         eth_dev->data->nb_rx_queues = (uint16_t)1;
3064         eth_dev->data->nb_tx_queues = (uint16_t)1;
3065
3066         /* Allocate memory for storing MAC addresses */
3067         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3068                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3069         if (eth_dev->data->mac_addrs == NULL) {
3070                 RTE_BOND_LOG(ERR,
3071                              "Failed to allocate %u bytes needed to store MAC addresses",
3072                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3073                 goto err;
3074         }
3075
3076         eth_dev->dev_ops = &default_dev_ops;
3077         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3078
3079         rte_spinlock_init(&internals->lock);
3080         rte_spinlock_init(&internals->lsc_lock);
3081
3082         internals->port_id = eth_dev->data->port_id;
3083         internals->mode = BONDING_MODE_INVALID;
3084         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3085         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3086         internals->burst_xmit_hash = burst_xmit_l2_hash;
3087         internals->user_defined_mac = 0;
3088
3089         internals->link_status_polling_enabled = 0;
3090
3091         internals->link_status_polling_interval_ms =
3092                 DEFAULT_POLLING_INTERVAL_10_MS;
3093         internals->link_down_delay_ms = 0;
3094         internals->link_up_delay_ms = 0;
3095
3096         internals->slave_count = 0;
3097         internals->active_slave_count = 0;
3098         internals->rx_offload_capa = 0;
3099         internals->tx_offload_capa = 0;
3100         internals->rx_queue_offload_capa = 0;
3101         internals->tx_queue_offload_capa = 0;
3102         internals->candidate_max_rx_pktlen = 0;
3103         internals->max_rx_pktlen = 0;
3104
3105         /* Initially allow to choose any offload type */
3106         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3107
3108         memset(&internals->default_rxconf, 0,
3109                sizeof(internals->default_rxconf));
3110         memset(&internals->default_txconf, 0,
3111                sizeof(internals->default_txconf));
3112
3113         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3114         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3115
3116         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3117         memset(internals->slaves, 0, sizeof(internals->slaves));
3118
3119         TAILQ_INIT(&internals->flow_list);
3120         internals->flow_isolated_valid = 0;
3121
3122         /* Set mode 4 default configuration */
3123         bond_mode_8023ad_setup(eth_dev, NULL);
3124         if (bond_ethdev_mode_set(eth_dev, mode)) {
3125                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3126                                  eth_dev->data->port_id, mode);
3127                 goto err;
3128         }
3129
3130         vlan_filter_bmp_size =
3131                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3132         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3133                                                    RTE_CACHE_LINE_SIZE);
3134         if (internals->vlan_filter_bmpmem == NULL) {
3135                 RTE_BOND_LOG(ERR,
3136                              "Failed to allocate vlan bitmap for bonded device %u",
3137                              eth_dev->data->port_id);
3138                 goto err;
3139         }
3140
3141         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3142                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3143         if (internals->vlan_filter_bmp == NULL) {
3144                 RTE_BOND_LOG(ERR,
3145                              "Failed to init vlan bitmap for bonded device %u",
3146                              eth_dev->data->port_id);
3147                 rte_free(internals->vlan_filter_bmpmem);
3148                 goto err;
3149         }
3150
3151         return eth_dev->data->port_id;
3152
3153 err:
3154         rte_free(internals);
3155         if (eth_dev != NULL)
3156                 eth_dev->data->dev_private = NULL;
3157         rte_eth_dev_release_port(eth_dev);
3158         return -1;
3159 }
3160
3161 static int
3162 bond_probe(struct rte_vdev_device *dev)
3163 {
3164         const char *name;
3165         struct bond_dev_private *internals;
3166         struct rte_kvargs *kvlist;
3167         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3168         int  arg_count, port_id;
3169         uint8_t agg_mode;
3170         struct rte_eth_dev *eth_dev;
3171
3172         if (!dev)
3173                 return -EINVAL;
3174
3175         name = rte_vdev_device_name(dev);
3176         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3177
3178         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3179                 eth_dev = rte_eth_dev_attach_secondary(name);
3180                 if (!eth_dev) {
3181                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3182                         return -1;
3183                 }
3184                 /* TODO: request info from primary to set up Rx and Tx */
3185                 eth_dev->dev_ops = &default_dev_ops;
3186                 eth_dev->device = &dev->device;
3187                 rte_eth_dev_probing_finish(eth_dev);
3188                 return 0;
3189         }
3190
3191         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3192                 pmd_bond_init_valid_arguments);
3193         if (kvlist == NULL)
3194                 return -1;
3195
3196         /* Parse link bonding mode */
3197         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3198                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3199                                 &bond_ethdev_parse_slave_mode_kvarg,
3200                                 &bonding_mode) != 0) {
3201                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3202                                         name);
3203                         goto parse_error;
3204                 }
3205         } else {
3206                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3207                                 "device %s", name);
3208                 goto parse_error;
3209         }
3210
3211         /* Parse socket id to create bonding device on */
3212         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3213         if (arg_count == 1) {
3214                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3215                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3216                                 != 0) {
3217                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3218                                         "bonded device %s", name);
3219                         goto parse_error;
3220                 }
3221         } else if (arg_count > 1) {
3222                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3223                                 "bonded device %s", name);
3224                 goto parse_error;
3225         } else {
3226                 socket_id = rte_socket_id();
3227         }
3228
3229         dev->device.numa_node = socket_id;
3230
3231         /* Create link bonding eth device */
3232         port_id = bond_alloc(dev, bonding_mode);
3233         if (port_id < 0) {
3234                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3235                                 "socket %u.",   name, bonding_mode, socket_id);
3236                 goto parse_error;
3237         }
3238         internals = rte_eth_devices[port_id].data->dev_private;
3239         internals->kvlist = kvlist;
3240
3241         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3242                 if (rte_kvargs_process(kvlist,
3243                                 PMD_BOND_AGG_MODE_KVARG,
3244                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3245                                 &agg_mode) != 0) {
3246                         RTE_BOND_LOG(ERR,
3247                                         "Failed to parse agg selection mode for bonded device %s",
3248                                         name);
3249                         goto parse_error;
3250                 }
3251
3252                 if (internals->mode == BONDING_MODE_8023AD)
3253                         internals->mode4.agg_selection = agg_mode;
3254         } else {
3255                 internals->mode4.agg_selection = AGG_STABLE;
3256         }
3257
3258         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3259         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3260                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3261         return 0;
3262
3263 parse_error:
3264         rte_kvargs_free(kvlist);
3265
3266         return -1;
3267 }
3268
3269 static int
3270 bond_remove(struct rte_vdev_device *dev)
3271 {
3272         struct rte_eth_dev *eth_dev;
3273         struct bond_dev_private *internals;
3274         const char *name;
3275
3276         if (!dev)
3277                 return -EINVAL;
3278
3279         name = rte_vdev_device_name(dev);
3280         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3281
3282         /* now free all data allocation - for eth_dev structure,
3283          * dummy pci driver and internal (private) data
3284          */
3285
3286         /* find an ethdev entry */
3287         eth_dev = rte_eth_dev_allocated(name);
3288         if (eth_dev == NULL)
3289                 return -ENODEV;
3290
3291         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3292                 return rte_eth_dev_release_port(eth_dev);
3293
3294         RTE_ASSERT(eth_dev->device == &dev->device);
3295
3296         internals = eth_dev->data->dev_private;
3297         if (internals->slave_count != 0)
3298                 return -EBUSY;
3299
3300         if (eth_dev->data->dev_started == 1) {
3301                 bond_ethdev_stop(eth_dev);
3302                 bond_ethdev_close(eth_dev);
3303         }
3304
3305         eth_dev->dev_ops = NULL;
3306         eth_dev->rx_pkt_burst = NULL;
3307         eth_dev->tx_pkt_burst = NULL;
3308
3309         internals = eth_dev->data->dev_private;
3310         /* Try to release mempool used in mode6. If the bond
3311          * device is not mode6, free the NULL is not problem.
3312          */
3313         rte_mempool_free(internals->mode6.mempool);
3314         rte_bitmap_free(internals->vlan_filter_bmp);
3315         rte_free(internals->vlan_filter_bmpmem);
3316
3317         rte_eth_dev_release_port(eth_dev);
3318
3319         return 0;
3320 }
3321
3322 /* this part will resolve the slave portids after all the other pdev and vdev
3323  * have been allocated */
3324 static int
3325 bond_ethdev_configure(struct rte_eth_dev *dev)
3326 {
3327         const char *name = dev->device->name;
3328         struct bond_dev_private *internals = dev->data->dev_private;
3329         struct rte_kvargs *kvlist = internals->kvlist;
3330         int arg_count;
3331         uint16_t port_id = dev - rte_eth_devices;
3332         uint8_t agg_mode;
3333
3334         static const uint8_t default_rss_key[40] = {
3335                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3336                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3337                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3338                 0xBE, 0xAC, 0x01, 0xFA
3339         };
3340
3341         unsigned i, j;
3342
3343         /*
3344          * If RSS is enabled, fill table with default values and
3345          * set key to the the value specified in port RSS configuration.
3346          * Fall back to default RSS key if the key is not specified
3347          */
3348         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3349                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3350                         internals->rss_key_len =
3351                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3352                         memcpy(internals->rss_key,
3353                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3354                                internals->rss_key_len);
3355                 } else {
3356                         internals->rss_key_len = sizeof(default_rss_key);
3357                         memcpy(internals->rss_key, default_rss_key,
3358                                internals->rss_key_len);
3359                 }
3360
3361                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3362                         internals->reta_conf[i].mask = ~0LL;
3363                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3364                                 internals->reta_conf[i].reta[j] =
3365                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3366                                                 dev->data->nb_rx_queues;
3367                 }
3368         }
3369
3370         /* set the max_rx_pktlen */
3371         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3372
3373         /*
3374          * if no kvlist, it means that this bonded device has been created
3375          * through the bonding api.
3376          */
3377         if (!kvlist)
3378                 return 0;
3379
3380         /* Parse MAC address for bonded device */
3381         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3382         if (arg_count == 1) {
3383                 struct ether_addr bond_mac;
3384
3385                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3386                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3387                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3388                                      name);
3389                         return -1;
3390                 }
3391
3392                 /* Set MAC address */
3393                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3394                         RTE_BOND_LOG(ERR,
3395                                      "Failed to set mac address on bonded device %s",
3396                                      name);
3397                         return -1;
3398                 }
3399         } else if (arg_count > 1) {
3400                 RTE_BOND_LOG(ERR,
3401                              "MAC address can be specified only once for bonded device %s",
3402                              name);
3403                 return -1;
3404         }
3405
3406         /* Parse/set balance mode transmit policy */
3407         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3408         if (arg_count == 1) {
3409                 uint8_t xmit_policy;
3410
3411                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3412                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3413                     0) {
3414                         RTE_BOND_LOG(INFO,
3415                                      "Invalid xmit policy specified for bonded device %s",
3416                                      name);
3417                         return -1;
3418                 }
3419
3420                 /* Set balance mode transmit policy*/
3421                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3422                         RTE_BOND_LOG(ERR,
3423                                      "Failed to set balance xmit policy on bonded device %s",
3424                                      name);
3425                         return -1;
3426                 }
3427         } else if (arg_count > 1) {
3428                 RTE_BOND_LOG(ERR,
3429                              "Transmit policy can be specified only once for bonded device %s",
3430                              name);
3431                 return -1;
3432         }
3433
3434         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3435                 if (rte_kvargs_process(kvlist,
3436                                        PMD_BOND_AGG_MODE_KVARG,
3437                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3438                                        &agg_mode) != 0) {
3439                         RTE_BOND_LOG(ERR,
3440                                      "Failed to parse agg selection mode for bonded device %s",
3441                                      name);
3442                 }
3443                 if (internals->mode == BONDING_MODE_8023AD) {
3444                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3445                                         agg_mode);
3446                         if (ret < 0) {
3447                                 RTE_BOND_LOG(ERR,
3448                                         "Invalid args for agg selection set for bonded device %s",
3449                                         name);
3450                                 return -1;
3451                         }
3452                 }
3453         }
3454
3455         /* Parse/add slave ports to bonded device */
3456         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3457                 struct bond_ethdev_slave_ports slave_ports;
3458                 unsigned i;
3459
3460                 memset(&slave_ports, 0, sizeof(slave_ports));
3461
3462                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3463                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3464                         RTE_BOND_LOG(ERR,
3465                                      "Failed to parse slave ports for bonded device %s",
3466                                      name);
3467                         return -1;
3468                 }
3469
3470                 for (i = 0; i < slave_ports.slave_count; i++) {
3471                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3472                                 RTE_BOND_LOG(ERR,
3473                                              "Failed to add port %d as slave to bonded device %s",
3474                                              slave_ports.slaves[i], name);
3475                         }
3476                 }
3477
3478         } else {
3479                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3480                 return -1;
3481         }
3482
3483         /* Parse/set primary slave port id*/
3484         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3485         if (arg_count == 1) {
3486                 uint16_t primary_slave_port_id;
3487
3488                 if (rte_kvargs_process(kvlist,
3489                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3490                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3491                                        &primary_slave_port_id) < 0) {
3492                         RTE_BOND_LOG(INFO,
3493                                      "Invalid primary slave port id specified for bonded device %s",
3494                                      name);
3495                         return -1;
3496                 }
3497
3498                 /* Set balance mode transmit policy*/
3499                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3500                     != 0) {
3501                         RTE_BOND_LOG(ERR,
3502                                      "Failed to set primary slave port %d on bonded device %s",
3503                                      primary_slave_port_id, name);
3504                         return -1;
3505                 }
3506         } else if (arg_count > 1) {
3507                 RTE_BOND_LOG(INFO,
3508                              "Primary slave can be specified only once for bonded device %s",
3509                              name);
3510                 return -1;
3511         }
3512
3513         /* Parse link status monitor polling interval */
3514         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3515         if (arg_count == 1) {
3516                 uint32_t lsc_poll_interval_ms;
3517
3518                 if (rte_kvargs_process(kvlist,
3519                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3520                                        &bond_ethdev_parse_time_ms_kvarg,
3521                                        &lsc_poll_interval_ms) < 0) {
3522                         RTE_BOND_LOG(INFO,
3523                                      "Invalid lsc polling interval value specified for bonded"
3524                                      " device %s", name);
3525                         return -1;
3526                 }
3527
3528                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3529                     != 0) {
3530                         RTE_BOND_LOG(ERR,
3531                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3532                                      lsc_poll_interval_ms, name);
3533                         return -1;
3534                 }
3535         } else if (arg_count > 1) {
3536                 RTE_BOND_LOG(INFO,
3537                              "LSC polling interval can be specified only once for bonded"
3538                              " device %s", name);
3539                 return -1;
3540         }
3541
3542         /* Parse link up interrupt propagation delay */
3543         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3544         if (arg_count == 1) {
3545                 uint32_t link_up_delay_ms;
3546
3547                 if (rte_kvargs_process(kvlist,
3548                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3549                                        &bond_ethdev_parse_time_ms_kvarg,
3550                                        &link_up_delay_ms) < 0) {
3551                         RTE_BOND_LOG(INFO,
3552                                      "Invalid link up propagation delay value specified for"
3553                                      " bonded device %s", name);
3554                         return -1;
3555                 }
3556
3557                 /* Set balance mode transmit policy*/
3558                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3559                     != 0) {
3560                         RTE_BOND_LOG(ERR,
3561                                      "Failed to set link up propagation delay (%u ms) on bonded"
3562                                      " device %s", link_up_delay_ms, name);
3563                         return -1;
3564                 }
3565         } else if (arg_count > 1) {
3566                 RTE_BOND_LOG(INFO,
3567                              "Link up propagation delay can be specified only once for"
3568                              " bonded device %s", name);
3569                 return -1;
3570         }
3571
3572         /* Parse link down interrupt propagation delay */
3573         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3574         if (arg_count == 1) {
3575                 uint32_t link_down_delay_ms;
3576
3577                 if (rte_kvargs_process(kvlist,
3578                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3579                                        &bond_ethdev_parse_time_ms_kvarg,
3580                                        &link_down_delay_ms) < 0) {
3581                         RTE_BOND_LOG(INFO,
3582                                      "Invalid link down propagation delay value specified for"
3583                                      " bonded device %s", name);
3584                         return -1;
3585                 }
3586
3587                 /* Set balance mode transmit policy*/
3588                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3589                     != 0) {
3590                         RTE_BOND_LOG(ERR,
3591                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3592                                      link_down_delay_ms, name);
3593                         return -1;
3594                 }
3595         } else if (arg_count > 1) {
3596                 RTE_BOND_LOG(INFO,
3597                              "Link down propagation delay can be specified only once for  bonded device %s",
3598                              name);
3599                 return -1;
3600         }
3601
3602         return 0;
3603 }
3604
3605 struct rte_vdev_driver pmd_bond_drv = {
3606         .probe = bond_probe,
3607         .remove = bond_remove,
3608 };
3609
3610 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3611 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3612
3613 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3614         "slave=<ifc> "
3615         "primary=<ifc> "
3616         "mode=[0-6] "
3617         "xmit_policy=[l2 | l23 | l34] "
3618         "agg_mode=[count | stable | bandwidth] "
3619         "socket_id=<int> "
3620         "mac=<mac addr> "
3621         "lsc_poll_period_ms=<int> "
3622         "up_delay=<int> "
3623         "down_delay=<int>");
3624
3625 int bond_logtype;
3626
3627 RTE_INIT(bond_init_log)
3628 {
3629         bond_logtype = rte_log_register("pmd.net.bond");
3630         if (bond_logtype >= 0)
3631                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3632 }