net/bonding: fix packet count type for LACP
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave >= slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave >= slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint16_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t subtype;
408         uint16_t i;
409         uint16_t j;
410         uint16_t k;
411
412         /* Copy slave list to protect against slave up/down changes during tx
413          * bursting */
414         slave_count = internals->active_slave_count;
415         memcpy(slaves, internals->active_slaves,
416                         sizeof(internals->active_slaves[0]) * slave_count);
417
418         idx = internals->active_slave;
419         if (idx >= slave_count) {
420                 internals->active_slave = 0;
421                 idx = 0;
422         }
423         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
424                 j = num_rx_total;
425                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
426                                          COLLECTING);
427
428                 /* Read packets from this slave */
429                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
430                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
431
432                 for (k = j; k < 2 && k < num_rx_total; k++)
433                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
434
435                 /* Handle slow protocol packets. */
436                 while (j < num_rx_total) {
437
438                         /* If packet is not pure L2 and is known, skip it */
439                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
440                                 j++;
441                                 continue;
442                         }
443
444                         if (j + 3 < num_rx_total)
445                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
446
447                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
448                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
449
450                         /* Remove packet from array if it is slow packet or slave is not
451                          * in collecting state or bonding interface is not in promiscuous
452                          * mode and packet address does not match. */
453                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
454                                 !collecting ||
455                                 (!promisc &&
456                                  !is_multicast_ether_addr(&hdr->d_addr) &&
457                                  !is_same_ether_addr(bond_mac,
458                                                      &hdr->d_addr)))) {
459
460                                 if (hdr->ether_type == ether_type_slow_be) {
461                                         bond_mode_8023ad_handle_slow_pkt(
462                                             internals, slaves[idx], bufs[j]);
463                                 } else
464                                         rte_pktmbuf_free(bufs[j]);
465
466                                 /* Packet is managed by mode 4 or dropped, shift the array */
467                                 num_rx_total--;
468                                 if (j < num_rx_total) {
469                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
470                                                 (num_rx_total - j));
471                                 }
472                         } else
473                                 j++;
474                 }
475                 if (unlikely(++idx == slave_count))
476                         idx = 0;
477         }
478
479         if (++internals->active_slave >= slave_count)
480                 internals->active_slave = 0;
481
482         return num_rx_total;
483 }
484
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
488
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491 static void
492 arp_op_name(uint16_t arp_op, char *buf)
493 {
494         switch (arp_op) {
495         case ARP_OP_REQUEST:
496                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
497                 return;
498         case ARP_OP_REPLY:
499                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
500                 return;
501         case ARP_OP_REVREQUEST:
502                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
503                                 "Reverse ARP Request");
504                 return;
505         case ARP_OP_REVREPLY:
506                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
507                                 "Reverse ARP Reply");
508                 return;
509         case ARP_OP_INVREQUEST:
510                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
511                                 "Peer Identify Request");
512                 return;
513         case ARP_OP_INVREPLY:
514                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
515                                 "Peer Identify Reply");
516                 return;
517         default:
518                 break;
519         }
520         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
521         return;
522 }
523 #endif
524 #define MaxIPv4String   16
525 static void
526 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
527 {
528         uint32_t ipv4_addr;
529
530         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
531         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
532                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
533                 ipv4_addr & 0xFF);
534 }
535
536 #define MAX_CLIENTS_NUMBER      128
537 uint8_t active_clients;
538 struct client_stats_t {
539         uint16_t port;
540         uint32_t ipv4_addr;
541         uint32_t ipv4_rx_packets;
542         uint32_t ipv4_tx_packets;
543 };
544 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
545
546 static void
547 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
548 {
549         int i = 0;
550
551         for (; i < MAX_CLIENTS_NUMBER; i++)     {
552                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
553                         /* Just update RX packets number for this client */
554                         if (TXorRXindicator == &burstnumberRX)
555                                 client_stats[i].ipv4_rx_packets++;
556                         else
557                                 client_stats[i].ipv4_tx_packets++;
558                         return;
559                 }
560         }
561         /* We have a new client. Insert him to the table, and increment stats */
562         if (TXorRXindicator == &burstnumberRX)
563                 client_stats[active_clients].ipv4_rx_packets++;
564         else
565                 client_stats[active_clients].ipv4_tx_packets++;
566         client_stats[active_clients].ipv4_addr = addr;
567         client_stats[active_clients].port = port;
568         active_clients++;
569
570 }
571
572 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
573 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
574         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
575                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
576                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
577                 info,                                                   \
578                 port,                                                   \
579                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
580                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
581                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
582                 src_ip,                                                 \
583                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
584                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
585                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
586                 dst_ip,                                                 \
587                 arp_op, ++burstnumber)
588 #endif
589
590 static void
591 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
592                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
593 {
594         struct ipv4_hdr *ipv4_h;
595 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
596         struct arp_hdr *arp_h;
597         char dst_ip[16];
598         char ArpOp[24];
599         char buf[16];
600 #endif
601         char src_ip[16];
602
603         uint16_t ether_type = eth_h->ether_type;
604         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
605
606 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
607         strlcpy(buf, info, 16);
608 #endif
609
610         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
611                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
612                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
613 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
614                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
615                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
616 #endif
617                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
618         }
619 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
620         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
621                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
622                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
623                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
624                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
625                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
626         }
627 #endif
628 }
629 #endif
630
631 static uint16_t
632 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
633 {
634         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
635         struct bond_dev_private *internals = bd_tx_q->dev_private;
636         struct ether_hdr *eth_h;
637         uint16_t ether_type, offset;
638         uint16_t nb_recv_pkts;
639         int i;
640
641         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
642
643         for (i = 0; i < nb_recv_pkts; i++) {
644                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
645                 ether_type = eth_h->ether_type;
646                 offset = get_vlan_offset(eth_h, &ether_type);
647
648                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
649 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
650                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
651 #endif
652                         bond_mode_alb_arp_recv(eth_h, offset, internals);
653                 }
654 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
655                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
656                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
657 #endif
658         }
659
660         return nb_recv_pkts;
661 }
662
663 static uint16_t
664 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
665                 uint16_t nb_pkts)
666 {
667         struct bond_dev_private *internals;
668         struct bond_tx_queue *bd_tx_q;
669
670         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
671         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
672
673         uint16_t num_of_slaves;
674         uint16_t slaves[RTE_MAX_ETHPORTS];
675
676         uint16_t num_tx_total = 0, num_tx_slave;
677
678         static int slave_idx = 0;
679         int i, cslave_idx = 0, tx_fail_total = 0;
680
681         bd_tx_q = (struct bond_tx_queue *)queue;
682         internals = bd_tx_q->dev_private;
683
684         /* Copy slave list to protect against slave up/down changes during tx
685          * bursting */
686         num_of_slaves = internals->active_slave_count;
687         memcpy(slaves, internals->active_slaves,
688                         sizeof(internals->active_slaves[0]) * num_of_slaves);
689
690         if (num_of_slaves < 1)
691                 return num_tx_total;
692
693         /* Populate slaves mbuf with which packets are to be sent on it  */
694         for (i = 0; i < nb_pkts; i++) {
695                 cslave_idx = (slave_idx + i) % num_of_slaves;
696                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
697         }
698
699         /* increment current slave index so the next call to tx burst starts on the
700          * next slave */
701         slave_idx = ++cslave_idx;
702
703         /* Send packet burst on each slave device */
704         for (i = 0; i < num_of_slaves; i++) {
705                 if (slave_nb_pkts[i] > 0) {
706                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
707                                         slave_bufs[i], slave_nb_pkts[i]);
708
709                         /* if tx burst fails move packets to end of bufs */
710                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
711                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
712
713                                 tx_fail_total += tx_fail_slave;
714
715                                 memcpy(&bufs[nb_pkts - tx_fail_total],
716                                        &slave_bufs[i][num_tx_slave],
717                                        tx_fail_slave * sizeof(bufs[0]));
718                         }
719                         num_tx_total += num_tx_slave;
720                 }
721         }
722
723         return num_tx_total;
724 }
725
726 static uint16_t
727 bond_ethdev_tx_burst_active_backup(void *queue,
728                 struct rte_mbuf **bufs, uint16_t nb_pkts)
729 {
730         struct bond_dev_private *internals;
731         struct bond_tx_queue *bd_tx_q;
732
733         bd_tx_q = (struct bond_tx_queue *)queue;
734         internals = bd_tx_q->dev_private;
735
736         if (internals->active_slave_count < 1)
737                 return 0;
738
739         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
740                         bufs, nb_pkts);
741 }
742
743 static inline uint16_t
744 ether_hash(struct ether_hdr *eth_hdr)
745 {
746         unaligned_uint16_t *word_src_addr =
747                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
748         unaligned_uint16_t *word_dst_addr =
749                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
750
751         return (word_src_addr[0] ^ word_dst_addr[0]) ^
752                         (word_src_addr[1] ^ word_dst_addr[1]) ^
753                         (word_src_addr[2] ^ word_dst_addr[2]);
754 }
755
756 static inline uint32_t
757 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
758 {
759         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
760 }
761
762 static inline uint32_t
763 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
764 {
765         unaligned_uint32_t *word_src_addr =
766                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
767         unaligned_uint32_t *word_dst_addr =
768                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
769
770         return (word_src_addr[0] ^ word_dst_addr[0]) ^
771                         (word_src_addr[1] ^ word_dst_addr[1]) ^
772                         (word_src_addr[2] ^ word_dst_addr[2]) ^
773                         (word_src_addr[3] ^ word_dst_addr[3]);
774 }
775
776
777 void
778 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
779                 uint16_t slave_count, uint16_t *slaves)
780 {
781         struct ether_hdr *eth_hdr;
782         uint32_t hash;
783         int i;
784
785         for (i = 0; i < nb_pkts; i++) {
786                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
787
788                 hash = ether_hash(eth_hdr);
789
790                 slaves[i] = (hash ^= hash >> 8) % slave_count;
791         }
792 }
793
794 void
795 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
796                 uint16_t slave_count, uint16_t *slaves)
797 {
798         uint16_t i;
799         struct ether_hdr *eth_hdr;
800         uint16_t proto;
801         size_t vlan_offset;
802         uint32_t hash, l3hash;
803
804         for (i = 0; i < nb_pkts; i++) {
805                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
806                 l3hash = 0;
807
808                 proto = eth_hdr->ether_type;
809                 hash = ether_hash(eth_hdr);
810
811                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
812
813                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
814                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
815                                         ((char *)(eth_hdr + 1) + vlan_offset);
816                         l3hash = ipv4_hash(ipv4_hdr);
817
818                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
819                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
820                                         ((char *)(eth_hdr + 1) + vlan_offset);
821                         l3hash = ipv6_hash(ipv6_hdr);
822                 }
823
824                 hash = hash ^ l3hash;
825                 hash ^= hash >> 16;
826                 hash ^= hash >> 8;
827
828                 slaves[i] = hash % slave_count;
829         }
830 }
831
832 void
833 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
834                 uint16_t slave_count, uint16_t *slaves)
835 {
836         struct ether_hdr *eth_hdr;
837         uint16_t proto;
838         size_t vlan_offset;
839         int i;
840
841         struct udp_hdr *udp_hdr;
842         struct tcp_hdr *tcp_hdr;
843         uint32_t hash, l3hash, l4hash;
844
845         for (i = 0; i < nb_pkts; i++) {
846                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
847                 proto = eth_hdr->ether_type;
848                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
849                 l3hash = 0;
850                 l4hash = 0;
851
852                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
853                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
854                                         ((char *)(eth_hdr + 1) + vlan_offset);
855                         size_t ip_hdr_offset;
856
857                         l3hash = ipv4_hash(ipv4_hdr);
858
859                         /* there is no L4 header in fragmented packet */
860                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
861                                                                 == 0)) {
862                                 ip_hdr_offset = (ipv4_hdr->version_ihl
863                                         & IPV4_HDR_IHL_MASK) *
864                                         IPV4_IHL_MULTIPLIER;
865
866                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
867                                         tcp_hdr = (struct tcp_hdr *)
868                                                 ((char *)ipv4_hdr +
869                                                         ip_hdr_offset);
870                                         l4hash = HASH_L4_PORTS(tcp_hdr);
871                                 } else if (ipv4_hdr->next_proto_id ==
872                                                                 IPPROTO_UDP) {
873                                         udp_hdr = (struct udp_hdr *)
874                                                 ((char *)ipv4_hdr +
875                                                         ip_hdr_offset);
876                                         l4hash = HASH_L4_PORTS(udp_hdr);
877                                 }
878                         }
879                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
880                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
881                                         ((char *)(eth_hdr + 1) + vlan_offset);
882                         l3hash = ipv6_hash(ipv6_hdr);
883
884                         if (ipv6_hdr->proto == IPPROTO_TCP) {
885                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
886                                 l4hash = HASH_L4_PORTS(tcp_hdr);
887                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
888                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
889                                 l4hash = HASH_L4_PORTS(udp_hdr);
890                         }
891                 }
892
893                 hash = l3hash ^ l4hash;
894                 hash ^= hash >> 16;
895                 hash ^= hash >> 8;
896
897                 slaves[i] = hash % slave_count;
898         }
899 }
900
901 struct bwg_slave {
902         uint64_t bwg_left_int;
903         uint64_t bwg_left_remainder;
904         uint16_t slave;
905 };
906
907 void
908 bond_tlb_activate_slave(struct bond_dev_private *internals) {
909         int i;
910
911         for (i = 0; i < internals->active_slave_count; i++) {
912                 tlb_last_obytets[internals->active_slaves[i]] = 0;
913         }
914 }
915
916 static int
917 bandwidth_cmp(const void *a, const void *b)
918 {
919         const struct bwg_slave *bwg_a = a;
920         const struct bwg_slave *bwg_b = b;
921         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
922         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
923                         (int64_t)bwg_a->bwg_left_remainder;
924         if (diff > 0)
925                 return 1;
926         else if (diff < 0)
927                 return -1;
928         else if (diff2 > 0)
929                 return 1;
930         else if (diff2 < 0)
931                 return -1;
932         else
933                 return 0;
934 }
935
936 static void
937 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
938                 struct bwg_slave *bwg_slave)
939 {
940         struct rte_eth_link link_status;
941
942         rte_eth_link_get_nowait(port_id, &link_status);
943         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
944         if (link_bwg == 0)
945                 return;
946         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
947         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
948         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
949 }
950
951 static void
952 bond_ethdev_update_tlb_slave_cb(void *arg)
953 {
954         struct bond_dev_private *internals = arg;
955         struct rte_eth_stats slave_stats;
956         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
957         uint16_t slave_count;
958         uint64_t tx_bytes;
959
960         uint8_t update_stats = 0;
961         uint16_t slave_id;
962         uint16_t i;
963
964         internals->slave_update_idx++;
965
966
967         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
968                 update_stats = 1;
969
970         for (i = 0; i < internals->active_slave_count; i++) {
971                 slave_id = internals->active_slaves[i];
972                 rte_eth_stats_get(slave_id, &slave_stats);
973                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
974                 bandwidth_left(slave_id, tx_bytes,
975                                 internals->slave_update_idx, &bwg_array[i]);
976                 bwg_array[i].slave = slave_id;
977
978                 if (update_stats) {
979                         tlb_last_obytets[slave_id] = slave_stats.obytes;
980                 }
981         }
982
983         if (update_stats == 1)
984                 internals->slave_update_idx = 0;
985
986         slave_count = i;
987         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
988         for (i = 0; i < slave_count; i++)
989                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
990
991         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
992                         (struct bond_dev_private *)internals);
993 }
994
995 static uint16_t
996 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
997 {
998         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
999         struct bond_dev_private *internals = bd_tx_q->dev_private;
1000
1001         struct rte_eth_dev *primary_port =
1002                         &rte_eth_devices[internals->primary_port];
1003         uint16_t num_tx_total = 0;
1004         uint16_t i, j;
1005
1006         uint16_t num_of_slaves = internals->active_slave_count;
1007         uint16_t slaves[RTE_MAX_ETHPORTS];
1008
1009         struct ether_hdr *ether_hdr;
1010         struct ether_addr primary_slave_addr;
1011         struct ether_addr active_slave_addr;
1012
1013         if (num_of_slaves < 1)
1014                 return num_tx_total;
1015
1016         memcpy(slaves, internals->tlb_slaves_order,
1017                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1018
1019
1020         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1021
1022         if (nb_pkts > 3) {
1023                 for (i = 0; i < 3; i++)
1024                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1025         }
1026
1027         for (i = 0; i < num_of_slaves; i++) {
1028                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1029                 for (j = num_tx_total; j < nb_pkts; j++) {
1030                         if (j + 3 < nb_pkts)
1031                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1032
1033                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1034                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1035                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1036 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1037                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1038 #endif
1039                 }
1040
1041                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1042                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1043
1044                 if (num_tx_total == nb_pkts)
1045                         break;
1046         }
1047
1048         return num_tx_total;
1049 }
1050
1051 void
1052 bond_tlb_disable(struct bond_dev_private *internals)
1053 {
1054         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1055 }
1056
1057 void
1058 bond_tlb_enable(struct bond_dev_private *internals)
1059 {
1060         bond_ethdev_update_tlb_slave_cb(internals);
1061 }
1062
1063 static uint16_t
1064 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1065 {
1066         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1067         struct bond_dev_private *internals = bd_tx_q->dev_private;
1068
1069         struct ether_hdr *eth_h;
1070         uint16_t ether_type, offset;
1071
1072         struct client_data *client_info;
1073
1074         /*
1075          * We create transmit buffers for every slave and one additional to send
1076          * through tlb. In worst case every packet will be send on one port.
1077          */
1078         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1079         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1080
1081         /*
1082          * We create separate transmit buffers for update packets as they won't
1083          * be counted in num_tx_total.
1084          */
1085         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1086         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1087
1088         struct rte_mbuf *upd_pkt;
1089         size_t pkt_size;
1090
1091         uint16_t num_send, num_not_send = 0;
1092         uint16_t num_tx_total = 0;
1093         uint16_t slave_idx;
1094
1095         int i, j;
1096
1097         /* Search tx buffer for ARP packets and forward them to alb */
1098         for (i = 0; i < nb_pkts; i++) {
1099                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1100                 ether_type = eth_h->ether_type;
1101                 offset = get_vlan_offset(eth_h, &ether_type);
1102
1103                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1104                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1105
1106                         /* Change src mac in eth header */
1107                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1108
1109                         /* Add packet to slave tx buffer */
1110                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1111                         slave_bufs_pkts[slave_idx]++;
1112                 } else {
1113                         /* If packet is not ARP, send it with TLB policy */
1114                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1115                                         bufs[i];
1116                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1117                 }
1118         }
1119
1120         /* Update connected client ARP tables */
1121         if (internals->mode6.ntt) {
1122                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1123                         client_info = &internals->mode6.client_table[i];
1124
1125                         if (client_info->in_use) {
1126                                 /* Allocate new packet to send ARP update on current slave */
1127                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1128                                 if (upd_pkt == NULL) {
1129                                         RTE_BOND_LOG(ERR,
1130                                                      "Failed to allocate ARP packet from pool");
1131                                         continue;
1132                                 }
1133                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1134                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1135                                 upd_pkt->data_len = pkt_size;
1136                                 upd_pkt->pkt_len = pkt_size;
1137
1138                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1139                                                 internals);
1140
1141                                 /* Add packet to update tx buffer */
1142                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1143                                 update_bufs_pkts[slave_idx]++;
1144                         }
1145                 }
1146                 internals->mode6.ntt = 0;
1147         }
1148
1149         /* Send ARP packets on proper slaves */
1150         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1151                 if (slave_bufs_pkts[i] > 0) {
1152                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1153                                         slave_bufs[i], slave_bufs_pkts[i]);
1154                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1155                                 bufs[nb_pkts - 1 - num_not_send - j] =
1156                                                 slave_bufs[i][nb_pkts - 1 - j];
1157                         }
1158
1159                         num_tx_total += num_send;
1160                         num_not_send += slave_bufs_pkts[i] - num_send;
1161
1162 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1163         /* Print TX stats including update packets */
1164                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1165                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1166                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1167                         }
1168 #endif
1169                 }
1170         }
1171
1172         /* Send update packets on proper slaves */
1173         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1174                 if (update_bufs_pkts[i] > 0) {
1175                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1176                                         update_bufs_pkts[i]);
1177                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1178                                 rte_pktmbuf_free(update_bufs[i][j]);
1179                         }
1180 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1181                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1182                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1183                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1184                         }
1185 #endif
1186                 }
1187         }
1188
1189         /* Send non-ARP packets using tlb policy */
1190         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1191                 num_send = bond_ethdev_tx_burst_tlb(queue,
1192                                 slave_bufs[RTE_MAX_ETHPORTS],
1193                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1194
1195                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1196                         bufs[nb_pkts - 1 - num_not_send - j] =
1197                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1198                 }
1199
1200                 num_tx_total += num_send;
1201         }
1202
1203         return num_tx_total;
1204 }
1205
1206 static uint16_t
1207 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1208                 uint16_t nb_bufs)
1209 {
1210         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1211         struct bond_dev_private *internals = bd_tx_q->dev_private;
1212
1213         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1214         uint16_t slave_count;
1215
1216         /* Array to sort mbufs for transmission on each slave into */
1217         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1218         /* Number of mbufs for transmission on each slave */
1219         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1220         /* Mapping array generated by hash function to map mbufs to slaves */
1221         uint16_t bufs_slave_port_idxs[nb_bufs];
1222
1223         uint16_t slave_tx_count;
1224         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1225
1226         uint16_t i;
1227
1228         if (unlikely(nb_bufs == 0))
1229                 return 0;
1230
1231         /* Copy slave list to protect against slave up/down changes during tx
1232          * bursting */
1233         slave_count = internals->active_slave_count;
1234         if (unlikely(slave_count < 1))
1235                 return 0;
1236
1237         memcpy(slave_port_ids, internals->active_slaves,
1238                         sizeof(slave_port_ids[0]) * slave_count);
1239
1240         /*
1241          * Populate slaves mbuf with the packets which are to be sent on it
1242          * selecting output slave using hash based on xmit policy
1243          */
1244         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1245                         bufs_slave_port_idxs);
1246
1247         for (i = 0; i < nb_bufs; i++) {
1248                 /* Populate slave mbuf arrays with mbufs for that slave. */
1249                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1250
1251                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1252         }
1253
1254         /* Send packet burst on each slave device */
1255         for (i = 0; i < slave_count; i++) {
1256                 if (slave_nb_bufs[i] == 0)
1257                         continue;
1258
1259                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1260                                 bd_tx_q->queue_id, slave_bufs[i],
1261                                 slave_nb_bufs[i]);
1262
1263                 total_tx_count += slave_tx_count;
1264
1265                 /* If tx burst fails move packets to end of bufs */
1266                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1267                         int slave_tx_fail_count = slave_nb_bufs[i] -
1268                                         slave_tx_count;
1269                         total_tx_fail_count += slave_tx_fail_count;
1270                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1271                                &slave_bufs[i][slave_tx_count],
1272                                slave_tx_fail_count * sizeof(bufs[0]));
1273                 }
1274         }
1275
1276         return total_tx_count;
1277 }
1278
1279 static uint16_t
1280 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1281                 uint16_t nb_bufs)
1282 {
1283         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1284         struct bond_dev_private *internals = bd_tx_q->dev_private;
1285
1286         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1287         uint16_t slave_count;
1288
1289         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1290         uint16_t dist_slave_count;
1291
1292         /* 2-D array to sort mbufs for transmission on each slave into */
1293         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1294         /* Number of mbufs for transmission on each slave */
1295         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1296         /* Mapping array generated by hash function to map mbufs to slaves */
1297         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1298
1299         uint16_t slave_tx_count;
1300         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1301
1302         uint16_t i;
1303
1304         /* Copy slave list to protect against slave up/down changes during tx
1305          * bursting */
1306         slave_count = internals->active_slave_count;
1307         if (unlikely(slave_count < 1))
1308                 return 0;
1309
1310         memcpy(slave_port_ids, internals->active_slaves,
1311                         sizeof(slave_port_ids[0]) * slave_count);
1312
1313         /* Check for LACP control packets and send if available */
1314         for (i = 0; i < slave_count; i++) {
1315                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1316                 struct rte_mbuf *ctrl_pkt = NULL;
1317
1318                 if (likely(rte_ring_empty(port->tx_ring)))
1319                         continue;
1320
1321                 if (rte_ring_dequeue(port->tx_ring,
1322                                      (void **)&ctrl_pkt) != -ENOENT) {
1323                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1324                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1325                         /*
1326                          * re-enqueue LAG control plane packets to buffering
1327                          * ring if transmission fails so the packet isn't lost.
1328                          */
1329                         if (slave_tx_count != 1)
1330                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1331                 }
1332         }
1333
1334         if (unlikely(nb_bufs == 0))
1335                 return 0;
1336
1337         dist_slave_count = 0;
1338         for (i = 0; i < slave_count; i++) {
1339                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1340
1341                 if (ACTOR_STATE(port, DISTRIBUTING))
1342                         dist_slave_port_ids[dist_slave_count++] =
1343                                         slave_port_ids[i];
1344         }
1345
1346         if (likely(dist_slave_count > 0)) {
1347
1348                 /*
1349                  * Populate slaves mbuf with the packets which are to be sent
1350                  * on it, selecting output slave using hash based on xmit policy
1351                  */
1352                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1353                                 bufs_slave_port_idxs);
1354
1355                 for (i = 0; i < nb_bufs; i++) {
1356                         /*
1357                          * Populate slave mbuf arrays with mbufs for that
1358                          * slave
1359                          */
1360                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1361
1362                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1363                                         bufs[i];
1364                 }
1365
1366
1367                 /* Send packet burst on each slave device */
1368                 for (i = 0; i < dist_slave_count; i++) {
1369                         if (slave_nb_bufs[i] == 0)
1370                                 continue;
1371
1372                         slave_tx_count = rte_eth_tx_burst(
1373                                         dist_slave_port_ids[i],
1374                                         bd_tx_q->queue_id, slave_bufs[i],
1375                                         slave_nb_bufs[i]);
1376
1377                         total_tx_count += slave_tx_count;
1378
1379                         /* If tx burst fails move packets to end of bufs */
1380                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1381                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1382                                                 slave_tx_count;
1383                                 total_tx_fail_count += slave_tx_fail_count;
1384
1385                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1386                                        &slave_bufs[i][slave_tx_count],
1387                                        slave_tx_fail_count * sizeof(bufs[0]));
1388                         }
1389                 }
1390         }
1391
1392         return total_tx_count;
1393 }
1394
1395 static uint16_t
1396 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1397                 uint16_t nb_pkts)
1398 {
1399         struct bond_dev_private *internals;
1400         struct bond_tx_queue *bd_tx_q;
1401
1402         uint16_t slaves[RTE_MAX_ETHPORTS];
1403         uint8_t tx_failed_flag = 0;
1404         uint16_t num_of_slaves;
1405
1406         uint16_t max_nb_of_tx_pkts = 0;
1407
1408         int slave_tx_total[RTE_MAX_ETHPORTS];
1409         int i, most_successful_tx_slave = -1;
1410
1411         bd_tx_q = (struct bond_tx_queue *)queue;
1412         internals = bd_tx_q->dev_private;
1413
1414         /* Copy slave list to protect against slave up/down changes during tx
1415          * bursting */
1416         num_of_slaves = internals->active_slave_count;
1417         memcpy(slaves, internals->active_slaves,
1418                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1419
1420         if (num_of_slaves < 1)
1421                 return 0;
1422
1423         /* Increment reference count on mbufs */
1424         for (i = 0; i < nb_pkts; i++)
1425                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1426
1427         /* Transmit burst on each active slave */
1428         for (i = 0; i < num_of_slaves; i++) {
1429                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1430                                         bufs, nb_pkts);
1431
1432                 if (unlikely(slave_tx_total[i] < nb_pkts))
1433                         tx_failed_flag = 1;
1434
1435                 /* record the value and slave index for the slave which transmits the
1436                  * maximum number of packets */
1437                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1438                         max_nb_of_tx_pkts = slave_tx_total[i];
1439                         most_successful_tx_slave = i;
1440                 }
1441         }
1442
1443         /* if slaves fail to transmit packets from burst, the calling application
1444          * is not expected to know about multiple references to packets so we must
1445          * handle failures of all packets except those of the most successful slave
1446          */
1447         if (unlikely(tx_failed_flag))
1448                 for (i = 0; i < num_of_slaves; i++)
1449                         if (i != most_successful_tx_slave)
1450                                 while (slave_tx_total[i] < nb_pkts)
1451                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1452
1453         return max_nb_of_tx_pkts;
1454 }
1455
1456 static void
1457 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1458 {
1459         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1460
1461         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1462                 /**
1463                  * If in mode 4 then save the link properties of the first
1464                  * slave, all subsequent slaves must match these properties
1465                  */
1466                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1467
1468                 bond_link->link_autoneg = slave_link->link_autoneg;
1469                 bond_link->link_duplex = slave_link->link_duplex;
1470                 bond_link->link_speed = slave_link->link_speed;
1471         } else {
1472                 /**
1473                  * In any other mode the link properties are set to default
1474                  * values of AUTONEG/DUPLEX
1475                  */
1476                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1477                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1478         }
1479 }
1480
1481 static int
1482 link_properties_valid(struct rte_eth_dev *ethdev,
1483                 struct rte_eth_link *slave_link)
1484 {
1485         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1486
1487         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1488                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1489
1490                 if (bond_link->link_duplex != slave_link->link_duplex ||
1491                         bond_link->link_autoneg != slave_link->link_autoneg ||
1492                         bond_link->link_speed != slave_link->link_speed)
1493                         return -1;
1494         }
1495
1496         return 0;
1497 }
1498
1499 int
1500 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1501 {
1502         struct ether_addr *mac_addr;
1503
1504         if (eth_dev == NULL) {
1505                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1506                 return -1;
1507         }
1508
1509         if (dst_mac_addr == NULL) {
1510                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1511                 return -1;
1512         }
1513
1514         mac_addr = eth_dev->data->mac_addrs;
1515
1516         ether_addr_copy(mac_addr, dst_mac_addr);
1517         return 0;
1518 }
1519
1520 int
1521 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1522 {
1523         struct ether_addr *mac_addr;
1524
1525         if (eth_dev == NULL) {
1526                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1527                 return -1;
1528         }
1529
1530         if (new_mac_addr == NULL) {
1531                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1532                 return -1;
1533         }
1534
1535         mac_addr = eth_dev->data->mac_addrs;
1536
1537         /* If new MAC is different to current MAC then update */
1538         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1539                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1540
1541         return 0;
1542 }
1543
1544 static const struct ether_addr null_mac_addr;
1545
1546 /*
1547  * Add additional MAC addresses to the slave
1548  */
1549 int
1550 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1551                 uint16_t slave_port_id)
1552 {
1553         int i, ret;
1554         struct ether_addr *mac_addr;
1555
1556         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1557                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1558                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1559                         break;
1560
1561                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1562                 if (ret < 0) {
1563                         /* rollback */
1564                         for (i--; i > 0; i--)
1565                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1566                                         &bonded_eth_dev->data->mac_addrs[i]);
1567                         return ret;
1568                 }
1569         }
1570
1571         return 0;
1572 }
1573
1574 /*
1575  * Remove additional MAC addresses from the slave
1576  */
1577 int
1578 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1579                 uint16_t slave_port_id)
1580 {
1581         int i, rc, ret;
1582         struct ether_addr *mac_addr;
1583
1584         rc = 0;
1585         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1586                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1587                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1588                         break;
1589
1590                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1591                 /* save only the first error */
1592                 if (ret < 0 && rc == 0)
1593                         rc = ret;
1594         }
1595
1596         return rc;
1597 }
1598
1599 int
1600 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1601 {
1602         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1603         int i;
1604
1605         /* Update slave devices MAC addresses */
1606         if (internals->slave_count < 1)
1607                 return -1;
1608
1609         switch (internals->mode) {
1610         case BONDING_MODE_ROUND_ROBIN:
1611         case BONDING_MODE_BALANCE:
1612         case BONDING_MODE_BROADCAST:
1613                 for (i = 0; i < internals->slave_count; i++) {
1614                         if (rte_eth_dev_default_mac_addr_set(
1615                                         internals->slaves[i].port_id,
1616                                         bonded_eth_dev->data->mac_addrs)) {
1617                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1618                                                 internals->slaves[i].port_id);
1619                                 return -1;
1620                         }
1621                 }
1622                 break;
1623         case BONDING_MODE_8023AD:
1624                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1625                 break;
1626         case BONDING_MODE_ACTIVE_BACKUP:
1627         case BONDING_MODE_TLB:
1628         case BONDING_MODE_ALB:
1629         default:
1630                 for (i = 0; i < internals->slave_count; i++) {
1631                         if (internals->slaves[i].port_id ==
1632                                         internals->current_primary_port) {
1633                                 if (rte_eth_dev_default_mac_addr_set(
1634                                                 internals->primary_port,
1635                                                 bonded_eth_dev->data->mac_addrs)) {
1636                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1637                                                         internals->current_primary_port);
1638                                         return -1;
1639                                 }
1640                         } else {
1641                                 if (rte_eth_dev_default_mac_addr_set(
1642                                                 internals->slaves[i].port_id,
1643                                                 &internals->slaves[i].persisted_mac_addr)) {
1644                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1645                                                         internals->slaves[i].port_id);
1646                                         return -1;
1647                                 }
1648                         }
1649                 }
1650         }
1651
1652         return 0;
1653 }
1654
1655 int
1656 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1657 {
1658         struct bond_dev_private *internals;
1659
1660         internals = eth_dev->data->dev_private;
1661
1662         switch (mode) {
1663         case BONDING_MODE_ROUND_ROBIN:
1664                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1665                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1666                 break;
1667         case BONDING_MODE_ACTIVE_BACKUP:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1670                 break;
1671         case BONDING_MODE_BALANCE:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1674                 break;
1675         case BONDING_MODE_BROADCAST:
1676                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1677                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678                 break;
1679         case BONDING_MODE_8023AD:
1680                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1681                         return -1;
1682
1683                 if (internals->mode4.dedicated_queues.enabled == 0) {
1684                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1685                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1686                         RTE_BOND_LOG(WARNING,
1687                                 "Using mode 4, it is necessary to do TX burst "
1688                                 "and RX burst at least every 100ms.");
1689                 } else {
1690                         /* Use flow director's optimization */
1691                         eth_dev->rx_pkt_burst =
1692                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1693                         eth_dev->tx_pkt_burst =
1694                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1695                 }
1696                 break;
1697         case BONDING_MODE_TLB:
1698                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1699                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1700                 break;
1701         case BONDING_MODE_ALB:
1702                 if (bond_mode_alb_enable(eth_dev) != 0)
1703                         return -1;
1704
1705                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1706                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1707                 break;
1708         default:
1709                 return -1;
1710         }
1711
1712         internals->mode = mode;
1713
1714         return 0;
1715 }
1716
1717
1718 static int
1719 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1720                 struct rte_eth_dev *slave_eth_dev)
1721 {
1722         int errval = 0;
1723         struct bond_dev_private *internals = (struct bond_dev_private *)
1724                 bonded_eth_dev->data->dev_private;
1725         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1726
1727         if (port->slow_pool == NULL) {
1728                 char mem_name[256];
1729                 int slave_id = slave_eth_dev->data->port_id;
1730
1731                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1732                                 slave_id);
1733                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1734                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1735                         slave_eth_dev->data->numa_node);
1736
1737                 /* Any memory allocation failure in initialization is critical because
1738                  * resources can't be free, so reinitialization is impossible. */
1739                 if (port->slow_pool == NULL) {
1740                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1741                                 slave_id, mem_name, rte_strerror(rte_errno));
1742                 }
1743         }
1744
1745         if (internals->mode4.dedicated_queues.enabled == 1) {
1746                 /* Configure slow Rx queue */
1747
1748                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1749                                 internals->mode4.dedicated_queues.rx_qid, 128,
1750                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1751                                 NULL, port->slow_pool);
1752                 if (errval != 0) {
1753                         RTE_BOND_LOG(ERR,
1754                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1755                                         slave_eth_dev->data->port_id,
1756                                         internals->mode4.dedicated_queues.rx_qid,
1757                                         errval);
1758                         return errval;
1759                 }
1760
1761                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1762                                 internals->mode4.dedicated_queues.tx_qid, 512,
1763                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1764                                 NULL);
1765                 if (errval != 0) {
1766                         RTE_BOND_LOG(ERR,
1767                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1768                                 slave_eth_dev->data->port_id,
1769                                 internals->mode4.dedicated_queues.tx_qid,
1770                                 errval);
1771                         return errval;
1772                 }
1773         }
1774         return 0;
1775 }
1776
1777 int
1778 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1779                 struct rte_eth_dev *slave_eth_dev)
1780 {
1781         struct bond_rx_queue *bd_rx_q;
1782         struct bond_tx_queue *bd_tx_q;
1783         uint16_t nb_rx_queues;
1784         uint16_t nb_tx_queues;
1785
1786         int errval;
1787         uint16_t q_id;
1788         struct rte_flow_error flow_error;
1789
1790         struct bond_dev_private *internals = (struct bond_dev_private *)
1791                 bonded_eth_dev->data->dev_private;
1792
1793         /* Stop slave */
1794         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1795
1796         /* Enable interrupts on slave device if supported */
1797         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1798                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1799
1800         /* If RSS is enabled for bonding, try to enable it for slaves  */
1801         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1802                 if (internals->rss_key_len != 0) {
1803                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1804                                         internals->rss_key_len;
1805                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1806                                         internals->rss_key;
1807                 } else {
1808                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1809                 }
1810
1811                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1812                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1813                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1814                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1815         }
1816
1817         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1818                         DEV_RX_OFFLOAD_VLAN_FILTER)
1819                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1820                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1821         else
1822                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1823                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1824
1825         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1826         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1827
1828         if (internals->mode == BONDING_MODE_8023AD) {
1829                 if (internals->mode4.dedicated_queues.enabled == 1) {
1830                         nb_rx_queues++;
1831                         nb_tx_queues++;
1832                 }
1833         }
1834
1835         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1836                                      bonded_eth_dev->data->mtu);
1837         if (errval != 0 && errval != -ENOTSUP) {
1838                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1839                                 slave_eth_dev->data->port_id, errval);
1840                 return errval;
1841         }
1842
1843         /* Configure device */
1844         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1845                         nb_rx_queues, nb_tx_queues,
1846                         &(slave_eth_dev->data->dev_conf));
1847         if (errval != 0) {
1848                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1849                                 slave_eth_dev->data->port_id, errval);
1850                 return errval;
1851         }
1852
1853         /* Setup Rx Queues */
1854         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1855                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1856
1857                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1858                                 bd_rx_q->nb_rx_desc,
1859                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1860                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1861                 if (errval != 0) {
1862                         RTE_BOND_LOG(ERR,
1863                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1864                                         slave_eth_dev->data->port_id, q_id, errval);
1865                         return errval;
1866                 }
1867         }
1868
1869         /* Setup Tx Queues */
1870         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1871                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1872
1873                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1874                                 bd_tx_q->nb_tx_desc,
1875                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1876                                 &bd_tx_q->tx_conf);
1877                 if (errval != 0) {
1878                         RTE_BOND_LOG(ERR,
1879                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1880                                 slave_eth_dev->data->port_id, q_id, errval);
1881                         return errval;
1882                 }
1883         }
1884
1885         if (internals->mode == BONDING_MODE_8023AD &&
1886                         internals->mode4.dedicated_queues.enabled == 1) {
1887                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1888                                 != 0)
1889                         return errval;
1890
1891                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1892                                 slave_eth_dev->data->port_id) != 0) {
1893                         RTE_BOND_LOG(ERR,
1894                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1895                                 slave_eth_dev->data->port_id, q_id, errval);
1896                         return -1;
1897                 }
1898
1899                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1900                         rte_flow_destroy(slave_eth_dev->data->port_id,
1901                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1902                                         &flow_error);
1903
1904                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1905                                 slave_eth_dev->data->port_id);
1906         }
1907
1908         /* Start device */
1909         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1910         if (errval != 0) {
1911                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1912                                 slave_eth_dev->data->port_id, errval);
1913                 return -1;
1914         }
1915
1916         /* If RSS is enabled for bonding, synchronize RETA */
1917         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1918                 int i;
1919                 struct bond_dev_private *internals;
1920
1921                 internals = bonded_eth_dev->data->dev_private;
1922
1923                 for (i = 0; i < internals->slave_count; i++) {
1924                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1925                                 errval = rte_eth_dev_rss_reta_update(
1926                                                 slave_eth_dev->data->port_id,
1927                                                 &internals->reta_conf[0],
1928                                                 internals->slaves[i].reta_size);
1929                                 if (errval != 0) {
1930                                         RTE_BOND_LOG(WARNING,
1931                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1932                                                      " RSS Configuration for bonding may be inconsistent.",
1933                                                      slave_eth_dev->data->port_id, errval);
1934                                 }
1935                                 break;
1936                         }
1937                 }
1938         }
1939
1940         /* If lsc interrupt is set, check initial slave's link status */
1941         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1942                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1943                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1944                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1945                         NULL);
1946         }
1947
1948         return 0;
1949 }
1950
1951 void
1952 slave_remove(struct bond_dev_private *internals,
1953                 struct rte_eth_dev *slave_eth_dev)
1954 {
1955         uint16_t i;
1956
1957         for (i = 0; i < internals->slave_count; i++)
1958                 if (internals->slaves[i].port_id ==
1959                                 slave_eth_dev->data->port_id)
1960                         break;
1961
1962         if (i < (internals->slave_count - 1)) {
1963                 struct rte_flow *flow;
1964
1965                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1966                                 sizeof(internals->slaves[0]) *
1967                                 (internals->slave_count - i - 1));
1968                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1969                         memmove(&flow->flows[i], &flow->flows[i + 1],
1970                                 sizeof(flow->flows[0]) *
1971                                 (internals->slave_count - i - 1));
1972                         flow->flows[internals->slave_count - 1] = NULL;
1973                 }
1974         }
1975
1976         internals->slave_count--;
1977
1978         /* force reconfiguration of slave interfaces */
1979         _rte_eth_dev_reset(slave_eth_dev);
1980 }
1981
1982 static void
1983 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1984
1985 void
1986 slave_add(struct bond_dev_private *internals,
1987                 struct rte_eth_dev *slave_eth_dev)
1988 {
1989         struct bond_slave_details *slave_details =
1990                         &internals->slaves[internals->slave_count];
1991
1992         slave_details->port_id = slave_eth_dev->data->port_id;
1993         slave_details->last_link_status = 0;
1994
1995         /* Mark slave devices that don't support interrupts so we can
1996          * compensate when we start the bond
1997          */
1998         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1999                 slave_details->link_status_poll_enabled = 1;
2000         }
2001
2002         slave_details->link_status_wait_to_complete = 0;
2003         /* clean tlb_last_obytes when adding port for bonding device */
2004         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2005                         sizeof(struct ether_addr));
2006 }
2007
2008 void
2009 bond_ethdev_primary_set(struct bond_dev_private *internals,
2010                 uint16_t slave_port_id)
2011 {
2012         int i;
2013
2014         if (internals->active_slave_count < 1)
2015                 internals->current_primary_port = slave_port_id;
2016         else
2017                 /* Search bonded device slave ports for new proposed primary port */
2018                 for (i = 0; i < internals->active_slave_count; i++) {
2019                         if (internals->active_slaves[i] == slave_port_id)
2020                                 internals->current_primary_port = slave_port_id;
2021                 }
2022 }
2023
2024 static void
2025 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2026
2027 static int
2028 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2029 {
2030         struct bond_dev_private *internals;
2031         int i;
2032
2033         /* slave eth dev will be started by bonded device */
2034         if (check_for_bonded_ethdev(eth_dev)) {
2035                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2036                                 eth_dev->data->port_id);
2037                 return -1;
2038         }
2039
2040         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2041         eth_dev->data->dev_started = 1;
2042
2043         internals = eth_dev->data->dev_private;
2044
2045         if (internals->slave_count == 0) {
2046                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2047                 goto out_err;
2048         }
2049
2050         if (internals->user_defined_mac == 0) {
2051                 struct ether_addr *new_mac_addr = NULL;
2052
2053                 for (i = 0; i < internals->slave_count; i++)
2054                         if (internals->slaves[i].port_id == internals->primary_port)
2055                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2056
2057                 if (new_mac_addr == NULL)
2058                         goto out_err;
2059
2060                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2061                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2062                                         eth_dev->data->port_id);
2063                         goto out_err;
2064                 }
2065         }
2066
2067         /* If bonded device is configure in promiscuous mode then re-apply config */
2068         if (internals->promiscuous_en)
2069                 bond_ethdev_promiscuous_enable(eth_dev);
2070
2071         if (internals->mode == BONDING_MODE_8023AD) {
2072                 if (internals->mode4.dedicated_queues.enabled == 1) {
2073                         internals->mode4.dedicated_queues.rx_qid =
2074                                         eth_dev->data->nb_rx_queues;
2075                         internals->mode4.dedicated_queues.tx_qid =
2076                                         eth_dev->data->nb_tx_queues;
2077                 }
2078         }
2079
2080
2081         /* Reconfigure each slave device if starting bonded device */
2082         for (i = 0; i < internals->slave_count; i++) {
2083                 struct rte_eth_dev *slave_ethdev =
2084                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2085                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2086                         RTE_BOND_LOG(ERR,
2087                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2088                                 eth_dev->data->port_id,
2089                                 internals->slaves[i].port_id);
2090                         goto out_err;
2091                 }
2092                 /* We will need to poll for link status if any slave doesn't
2093                  * support interrupts
2094                  */
2095                 if (internals->slaves[i].link_status_poll_enabled)
2096                         internals->link_status_polling_enabled = 1;
2097         }
2098
2099         /* start polling if needed */
2100         if (internals->link_status_polling_enabled) {
2101                 rte_eal_alarm_set(
2102                         internals->link_status_polling_interval_ms * 1000,
2103                         bond_ethdev_slave_link_status_change_monitor,
2104                         (void *)&rte_eth_devices[internals->port_id]);
2105         }
2106
2107         /* Update all slave devices MACs*/
2108         if (mac_address_slaves_update(eth_dev) != 0)
2109                 goto out_err;
2110
2111         if (internals->user_defined_primary_port)
2112                 bond_ethdev_primary_set(internals, internals->primary_port);
2113
2114         if (internals->mode == BONDING_MODE_8023AD)
2115                 bond_mode_8023ad_start(eth_dev);
2116
2117         if (internals->mode == BONDING_MODE_TLB ||
2118                         internals->mode == BONDING_MODE_ALB)
2119                 bond_tlb_enable(internals);
2120
2121         return 0;
2122
2123 out_err:
2124         eth_dev->data->dev_started = 0;
2125         return -1;
2126 }
2127
2128 static void
2129 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2130 {
2131         uint8_t i;
2132
2133         if (dev->data->rx_queues != NULL) {
2134                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2135                         rte_free(dev->data->rx_queues[i]);
2136                         dev->data->rx_queues[i] = NULL;
2137                 }
2138                 dev->data->nb_rx_queues = 0;
2139         }
2140
2141         if (dev->data->tx_queues != NULL) {
2142                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2143                         rte_free(dev->data->tx_queues[i]);
2144                         dev->data->tx_queues[i] = NULL;
2145                 }
2146                 dev->data->nb_tx_queues = 0;
2147         }
2148 }
2149
2150 void
2151 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2152 {
2153         struct bond_dev_private *internals = eth_dev->data->dev_private;
2154         uint16_t i;
2155
2156         if (internals->mode == BONDING_MODE_8023AD) {
2157                 struct port *port;
2158                 void *pkt = NULL;
2159
2160                 bond_mode_8023ad_stop(eth_dev);
2161
2162                 /* Discard all messages to/from mode 4 state machines */
2163                 for (i = 0; i < internals->active_slave_count; i++) {
2164                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2165
2166                         RTE_ASSERT(port->rx_ring != NULL);
2167                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2168                                 rte_pktmbuf_free(pkt);
2169
2170                         RTE_ASSERT(port->tx_ring != NULL);
2171                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2172                                 rte_pktmbuf_free(pkt);
2173                 }
2174         }
2175
2176         if (internals->mode == BONDING_MODE_TLB ||
2177                         internals->mode == BONDING_MODE_ALB) {
2178                 bond_tlb_disable(internals);
2179                 for (i = 0; i < internals->active_slave_count; i++)
2180                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2181         }
2182
2183         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2184         eth_dev->data->dev_started = 0;
2185
2186         internals->link_status_polling_enabled = 0;
2187         for (i = 0; i < internals->slave_count; i++) {
2188                 uint16_t slave_id = internals->slaves[i].port_id;
2189                 if (find_slave_by_id(internals->active_slaves,
2190                                 internals->active_slave_count, slave_id) !=
2191                                                 internals->active_slave_count) {
2192                         internals->slaves[i].last_link_status = 0;
2193                         rte_eth_dev_stop(slave_id);
2194                         deactivate_slave(eth_dev, slave_id);
2195                 }
2196         }
2197 }
2198
2199 void
2200 bond_ethdev_close(struct rte_eth_dev *dev)
2201 {
2202         struct bond_dev_private *internals = dev->data->dev_private;
2203         uint16_t bond_port_id = internals->port_id;
2204         int skipped = 0;
2205         struct rte_flow_error ferror;
2206
2207         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2208         while (internals->slave_count != skipped) {
2209                 uint16_t port_id = internals->slaves[skipped].port_id;
2210
2211                 rte_eth_dev_stop(port_id);
2212
2213                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2214                         RTE_BOND_LOG(ERR,
2215                                      "Failed to remove port %d from bonded device %s",
2216                                      port_id, dev->device->name);
2217                         skipped++;
2218                 }
2219         }
2220         bond_flow_ops.flush(dev, &ferror);
2221         bond_ethdev_free_queues(dev);
2222         rte_bitmap_reset(internals->vlan_filter_bmp);
2223 }
2224
2225 /* forward declaration */
2226 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2227
2228 static void
2229 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2230 {
2231         struct bond_dev_private *internals = dev->data->dev_private;
2232
2233         uint16_t max_nb_rx_queues = UINT16_MAX;
2234         uint16_t max_nb_tx_queues = UINT16_MAX;
2235         uint16_t max_rx_desc_lim = UINT16_MAX;
2236         uint16_t max_tx_desc_lim = UINT16_MAX;
2237
2238         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2239
2240         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2241                         internals->candidate_max_rx_pktlen :
2242                         ETHER_MAX_JUMBO_FRAME_LEN;
2243
2244         /* Max number of tx/rx queues that the bonded device can support is the
2245          * minimum values of the bonded slaves, as all slaves must be capable
2246          * of supporting the same number of tx/rx queues.
2247          */
2248         if (internals->slave_count > 0) {
2249                 struct rte_eth_dev_info slave_info;
2250                 uint16_t idx;
2251
2252                 for (idx = 0; idx < internals->slave_count; idx++) {
2253                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2254                                         &slave_info);
2255
2256                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2257                                 max_nb_rx_queues = slave_info.max_rx_queues;
2258
2259                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2260                                 max_nb_tx_queues = slave_info.max_tx_queues;
2261
2262                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2263                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2264
2265                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2266                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2267                 }
2268         }
2269
2270         dev_info->max_rx_queues = max_nb_rx_queues;
2271         dev_info->max_tx_queues = max_nb_tx_queues;
2272
2273         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2274                sizeof(dev_info->default_rxconf));
2275         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2276                sizeof(dev_info->default_txconf));
2277
2278         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2279         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2280
2281         /**
2282          * If dedicated hw queues enabled for link bonding device in LACP mode
2283          * then we need to reduce the maximum number of data path queues by 1.
2284          */
2285         if (internals->mode == BONDING_MODE_8023AD &&
2286                 internals->mode4.dedicated_queues.enabled == 1) {
2287                 dev_info->max_rx_queues--;
2288                 dev_info->max_tx_queues--;
2289         }
2290
2291         dev_info->min_rx_bufsize = 0;
2292
2293         dev_info->rx_offload_capa = internals->rx_offload_capa;
2294         dev_info->tx_offload_capa = internals->tx_offload_capa;
2295         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2296         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2297         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2298
2299         dev_info->reta_size = internals->reta_size;
2300 }
2301
2302 static int
2303 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2304 {
2305         int res;
2306         uint16_t i;
2307         struct bond_dev_private *internals = dev->data->dev_private;
2308
2309         /* don't do this while a slave is being added */
2310         rte_spinlock_lock(&internals->lock);
2311
2312         if (on)
2313                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2314         else
2315                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2316
2317         for (i = 0; i < internals->slave_count; i++) {
2318                 uint16_t port_id = internals->slaves[i].port_id;
2319
2320                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2321                 if (res == ENOTSUP)
2322                         RTE_BOND_LOG(WARNING,
2323                                      "Setting VLAN filter on slave port %u not supported.",
2324                                      port_id);
2325         }
2326
2327         rte_spinlock_unlock(&internals->lock);
2328         return 0;
2329 }
2330
2331 static int
2332 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2333                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2334                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2335 {
2336         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2337                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2338                                         0, dev->data->numa_node);
2339         if (bd_rx_q == NULL)
2340                 return -1;
2341
2342         bd_rx_q->queue_id = rx_queue_id;
2343         bd_rx_q->dev_private = dev->data->dev_private;
2344
2345         bd_rx_q->nb_rx_desc = nb_rx_desc;
2346
2347         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2348         bd_rx_q->mb_pool = mb_pool;
2349
2350         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2351
2352         return 0;
2353 }
2354
2355 static int
2356 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2357                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2358                 const struct rte_eth_txconf *tx_conf)
2359 {
2360         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2361                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2362                                         0, dev->data->numa_node);
2363
2364         if (bd_tx_q == NULL)
2365                 return -1;
2366
2367         bd_tx_q->queue_id = tx_queue_id;
2368         bd_tx_q->dev_private = dev->data->dev_private;
2369
2370         bd_tx_q->nb_tx_desc = nb_tx_desc;
2371         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2372
2373         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2374
2375         return 0;
2376 }
2377
2378 static void
2379 bond_ethdev_rx_queue_release(void *queue)
2380 {
2381         if (queue == NULL)
2382                 return;
2383
2384         rte_free(queue);
2385 }
2386
2387 static void
2388 bond_ethdev_tx_queue_release(void *queue)
2389 {
2390         if (queue == NULL)
2391                 return;
2392
2393         rte_free(queue);
2394 }
2395
2396 static void
2397 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2398 {
2399         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2400         struct bond_dev_private *internals;
2401
2402         /* Default value for polling slave found is true as we don't want to
2403          * disable the polling thread if we cannot get the lock */
2404         int i, polling_slave_found = 1;
2405
2406         if (cb_arg == NULL)
2407                 return;
2408
2409         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2410         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2411
2412         if (!bonded_ethdev->data->dev_started ||
2413                 !internals->link_status_polling_enabled)
2414                 return;
2415
2416         /* If device is currently being configured then don't check slaves link
2417          * status, wait until next period */
2418         if (rte_spinlock_trylock(&internals->lock)) {
2419                 if (internals->slave_count > 0)
2420                         polling_slave_found = 0;
2421
2422                 for (i = 0; i < internals->slave_count; i++) {
2423                         if (!internals->slaves[i].link_status_poll_enabled)
2424                                 continue;
2425
2426                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2427                         polling_slave_found = 1;
2428
2429                         /* Update slave link status */
2430                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2431                                         internals->slaves[i].link_status_wait_to_complete);
2432
2433                         /* if link status has changed since last checked then call lsc
2434                          * event callback */
2435                         if (slave_ethdev->data->dev_link.link_status !=
2436                                         internals->slaves[i].last_link_status) {
2437                                 internals->slaves[i].last_link_status =
2438                                                 slave_ethdev->data->dev_link.link_status;
2439
2440                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2441                                                 RTE_ETH_EVENT_INTR_LSC,
2442                                                 &bonded_ethdev->data->port_id,
2443                                                 NULL);
2444                         }
2445                 }
2446                 rte_spinlock_unlock(&internals->lock);
2447         }
2448
2449         if (polling_slave_found)
2450                 /* Set alarm to continue monitoring link status of slave ethdev's */
2451                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2452                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2453 }
2454
2455 static int
2456 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2457 {
2458         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2459
2460         struct bond_dev_private *bond_ctx;
2461         struct rte_eth_link slave_link;
2462
2463         uint32_t idx;
2464
2465         bond_ctx = ethdev->data->dev_private;
2466
2467         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2468
2469         if (ethdev->data->dev_started == 0 ||
2470                         bond_ctx->active_slave_count == 0) {
2471                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2472                 return 0;
2473         }
2474
2475         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2476
2477         if (wait_to_complete)
2478                 link_update = rte_eth_link_get;
2479         else
2480                 link_update = rte_eth_link_get_nowait;
2481
2482         switch (bond_ctx->mode) {
2483         case BONDING_MODE_BROADCAST:
2484                 /**
2485                  * Setting link speed to UINT32_MAX to ensure we pick up the
2486                  * value of the first active slave
2487                  */
2488                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2489
2490                 /**
2491                  * link speed is minimum value of all the slaves link speed as
2492                  * packet loss will occur on this slave if transmission at rates
2493                  * greater than this are attempted
2494                  */
2495                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2496                         link_update(bond_ctx->active_slaves[0], &slave_link);
2497
2498                         if (slave_link.link_speed <
2499                                         ethdev->data->dev_link.link_speed)
2500                                 ethdev->data->dev_link.link_speed =
2501                                                 slave_link.link_speed;
2502                 }
2503                 break;
2504         case BONDING_MODE_ACTIVE_BACKUP:
2505                 /* Current primary slave */
2506                 link_update(bond_ctx->current_primary_port, &slave_link);
2507
2508                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2509                 break;
2510         case BONDING_MODE_8023AD:
2511                 ethdev->data->dev_link.link_autoneg =
2512                                 bond_ctx->mode4.slave_link.link_autoneg;
2513                 ethdev->data->dev_link.link_duplex =
2514                                 bond_ctx->mode4.slave_link.link_duplex;
2515                 /* fall through to update link speed */
2516         case BONDING_MODE_ROUND_ROBIN:
2517         case BONDING_MODE_BALANCE:
2518         case BONDING_MODE_TLB:
2519         case BONDING_MODE_ALB:
2520         default:
2521                 /**
2522                  * In theses mode the maximum theoretical link speed is the sum
2523                  * of all the slaves
2524                  */
2525                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2526
2527                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2528                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2529
2530                         ethdev->data->dev_link.link_speed +=
2531                                         slave_link.link_speed;
2532                 }
2533         }
2534
2535
2536         return 0;
2537 }
2538
2539
2540 static int
2541 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2542 {
2543         struct bond_dev_private *internals = dev->data->dev_private;
2544         struct rte_eth_stats slave_stats;
2545         int i, j;
2546
2547         for (i = 0; i < internals->slave_count; i++) {
2548                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2549
2550                 stats->ipackets += slave_stats.ipackets;
2551                 stats->opackets += slave_stats.opackets;
2552                 stats->ibytes += slave_stats.ibytes;
2553                 stats->obytes += slave_stats.obytes;
2554                 stats->imissed += slave_stats.imissed;
2555                 stats->ierrors += slave_stats.ierrors;
2556                 stats->oerrors += slave_stats.oerrors;
2557                 stats->rx_nombuf += slave_stats.rx_nombuf;
2558
2559                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2560                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2561                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2562                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2563                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2564                         stats->q_errors[j] += slave_stats.q_errors[j];
2565                 }
2566
2567         }
2568
2569         return 0;
2570 }
2571
2572 static void
2573 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2574 {
2575         struct bond_dev_private *internals = dev->data->dev_private;
2576         int i;
2577
2578         for (i = 0; i < internals->slave_count; i++)
2579                 rte_eth_stats_reset(internals->slaves[i].port_id);
2580 }
2581
2582 static void
2583 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2584 {
2585         struct bond_dev_private *internals = eth_dev->data->dev_private;
2586         int i;
2587
2588         internals->promiscuous_en = 1;
2589
2590         switch (internals->mode) {
2591         /* Promiscuous mode is propagated to all slaves */
2592         case BONDING_MODE_ROUND_ROBIN:
2593         case BONDING_MODE_BALANCE:
2594         case BONDING_MODE_BROADCAST:
2595                 for (i = 0; i < internals->slave_count; i++)
2596                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2597                 break;
2598         /* In mode4 promiscus mode is managed when slave is added/removed */
2599         case BONDING_MODE_8023AD:
2600                 break;
2601         /* Promiscuous mode is propagated only to primary slave */
2602         case BONDING_MODE_ACTIVE_BACKUP:
2603         case BONDING_MODE_TLB:
2604         case BONDING_MODE_ALB:
2605         default:
2606                 /* Do not touch promisc when there cannot be primary ports */
2607                 if (internals->slave_count == 0)
2608                         break;
2609                 rte_eth_promiscuous_enable(internals->current_primary_port);
2610         }
2611 }
2612
2613 static void
2614 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2615 {
2616         struct bond_dev_private *internals = dev->data->dev_private;
2617         int i;
2618
2619         internals->promiscuous_en = 0;
2620
2621         switch (internals->mode) {
2622         /* Promiscuous mode is propagated to all slaves */
2623         case BONDING_MODE_ROUND_ROBIN:
2624         case BONDING_MODE_BALANCE:
2625         case BONDING_MODE_BROADCAST:
2626                 for (i = 0; i < internals->slave_count; i++)
2627                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2628                 break;
2629         /* In mode4 promiscus mode is set managed when slave is added/removed */
2630         case BONDING_MODE_8023AD:
2631                 break;
2632         /* Promiscuous mode is propagated only to primary slave */
2633         case BONDING_MODE_ACTIVE_BACKUP:
2634         case BONDING_MODE_TLB:
2635         case BONDING_MODE_ALB:
2636         default:
2637                 /* Do not touch promisc when there cannot be primary ports */
2638                 if (internals->slave_count == 0)
2639                         break;
2640                 rte_eth_promiscuous_disable(internals->current_primary_port);
2641         }
2642 }
2643
2644 static void
2645 bond_ethdev_delayed_lsc_propagation(void *arg)
2646 {
2647         if (arg == NULL)
2648                 return;
2649
2650         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2651                         RTE_ETH_EVENT_INTR_LSC, NULL);
2652 }
2653
2654 int
2655 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2656                 void *param, void *ret_param __rte_unused)
2657 {
2658         struct rte_eth_dev *bonded_eth_dev;
2659         struct bond_dev_private *internals;
2660         struct rte_eth_link link;
2661         int rc = -1;
2662
2663         uint8_t lsc_flag = 0;
2664         int valid_slave = 0;
2665         uint16_t active_pos;
2666         uint16_t i;
2667
2668         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2669                 return rc;
2670
2671         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2672
2673         if (check_for_bonded_ethdev(bonded_eth_dev))
2674                 return rc;
2675
2676         internals = bonded_eth_dev->data->dev_private;
2677
2678         /* If the device isn't started don't handle interrupts */
2679         if (!bonded_eth_dev->data->dev_started)
2680                 return rc;
2681
2682         /* verify that port_id is a valid slave of bonded port */
2683         for (i = 0; i < internals->slave_count; i++) {
2684                 if (internals->slaves[i].port_id == port_id) {
2685                         valid_slave = 1;
2686                         break;
2687                 }
2688         }
2689
2690         if (!valid_slave)
2691                 return rc;
2692
2693         /* Synchronize lsc callback parallel calls either by real link event
2694          * from the slaves PMDs or by the bonding PMD itself.
2695          */
2696         rte_spinlock_lock(&internals->lsc_lock);
2697
2698         /* Search for port in active port list */
2699         active_pos = find_slave_by_id(internals->active_slaves,
2700                         internals->active_slave_count, port_id);
2701
2702         rte_eth_link_get_nowait(port_id, &link);
2703         if (link.link_status) {
2704                 if (active_pos < internals->active_slave_count)
2705                         goto link_update;
2706
2707                 /* check link state properties if bonded link is up*/
2708                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2709                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2710                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2711                                              "for slave %d in bonding mode %d",
2712                                              port_id, internals->mode);
2713                 } else {
2714                         /* inherit slave link properties */
2715                         link_properties_set(bonded_eth_dev, &link);
2716                 }
2717
2718                 /* If no active slave ports then set this port to be
2719                  * the primary port.
2720                  */
2721                 if (internals->active_slave_count < 1) {
2722                         /* If first active slave, then change link status */
2723                         bonded_eth_dev->data->dev_link.link_status =
2724                                                                 ETH_LINK_UP;
2725                         internals->current_primary_port = port_id;
2726                         lsc_flag = 1;
2727
2728                         mac_address_slaves_update(bonded_eth_dev);
2729                 }
2730
2731                 activate_slave(bonded_eth_dev, port_id);
2732
2733                 /* If the user has defined the primary port then default to
2734                  * using it.
2735                  */
2736                 if (internals->user_defined_primary_port &&
2737                                 internals->primary_port == port_id)
2738                         bond_ethdev_primary_set(internals, port_id);
2739         } else {
2740                 if (active_pos == internals->active_slave_count)
2741                         goto link_update;
2742
2743                 /* Remove from active slave list */
2744                 deactivate_slave(bonded_eth_dev, port_id);
2745
2746                 if (internals->active_slave_count < 1)
2747                         lsc_flag = 1;
2748
2749                 /* Update primary id, take first active slave from list or if none
2750                  * available set to -1 */
2751                 if (port_id == internals->current_primary_port) {
2752                         if (internals->active_slave_count > 0)
2753                                 bond_ethdev_primary_set(internals,
2754                                                 internals->active_slaves[0]);
2755                         else
2756                                 internals->current_primary_port = internals->primary_port;
2757                 }
2758         }
2759
2760 link_update:
2761         /**
2762          * Update bonded device link properties after any change to active
2763          * slaves
2764          */
2765         bond_ethdev_link_update(bonded_eth_dev, 0);
2766
2767         if (lsc_flag) {
2768                 /* Cancel any possible outstanding interrupts if delays are enabled */
2769                 if (internals->link_up_delay_ms > 0 ||
2770                         internals->link_down_delay_ms > 0)
2771                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2772                                         bonded_eth_dev);
2773
2774                 if (bonded_eth_dev->data->dev_link.link_status) {
2775                         if (internals->link_up_delay_ms > 0)
2776                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2777                                                 bond_ethdev_delayed_lsc_propagation,
2778                                                 (void *)bonded_eth_dev);
2779                         else
2780                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2781                                                 RTE_ETH_EVENT_INTR_LSC,
2782                                                 NULL);
2783
2784                 } else {
2785                         if (internals->link_down_delay_ms > 0)
2786                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2787                                                 bond_ethdev_delayed_lsc_propagation,
2788                                                 (void *)bonded_eth_dev);
2789                         else
2790                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2791                                                 RTE_ETH_EVENT_INTR_LSC,
2792                                                 NULL);
2793                 }
2794         }
2795
2796         rte_spinlock_unlock(&internals->lsc_lock);
2797
2798         return rc;
2799 }
2800
2801 static int
2802 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2803                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2804 {
2805         unsigned i, j;
2806         int result = 0;
2807         int slave_reta_size;
2808         unsigned reta_count;
2809         struct bond_dev_private *internals = dev->data->dev_private;
2810
2811         if (reta_size != internals->reta_size)
2812                 return -EINVAL;
2813
2814          /* Copy RETA table */
2815         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2816
2817         for (i = 0; i < reta_count; i++) {
2818                 internals->reta_conf[i].mask = reta_conf[i].mask;
2819                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2820                         if ((reta_conf[i].mask >> j) & 0x01)
2821                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2822         }
2823
2824         /* Fill rest of array */
2825         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2826                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2827                                 sizeof(internals->reta_conf[0]) * reta_count);
2828
2829         /* Propagate RETA over slaves */
2830         for (i = 0; i < internals->slave_count; i++) {
2831                 slave_reta_size = internals->slaves[i].reta_size;
2832                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2833                                 &internals->reta_conf[0], slave_reta_size);
2834                 if (result < 0)
2835                         return result;
2836         }
2837
2838         return 0;
2839 }
2840
2841 static int
2842 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2843                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2844 {
2845         int i, j;
2846         struct bond_dev_private *internals = dev->data->dev_private;
2847
2848         if (reta_size != internals->reta_size)
2849                 return -EINVAL;
2850
2851          /* Copy RETA table */
2852         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2853                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2854                         if ((reta_conf[i].mask >> j) & 0x01)
2855                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2856
2857         return 0;
2858 }
2859
2860 static int
2861 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2862                 struct rte_eth_rss_conf *rss_conf)
2863 {
2864         int i, result = 0;
2865         struct bond_dev_private *internals = dev->data->dev_private;
2866         struct rte_eth_rss_conf bond_rss_conf;
2867
2868         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2869
2870         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2871
2872         if (bond_rss_conf.rss_hf != 0)
2873                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2874
2875         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2876                         sizeof(internals->rss_key)) {
2877                 if (bond_rss_conf.rss_key_len == 0)
2878                         bond_rss_conf.rss_key_len = 40;
2879                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2880                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2881                                 internals->rss_key_len);
2882         }
2883
2884         for (i = 0; i < internals->slave_count; i++) {
2885                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2886                                 &bond_rss_conf);
2887                 if (result < 0)
2888                         return result;
2889         }
2890
2891         return 0;
2892 }
2893
2894 static int
2895 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2896                 struct rte_eth_rss_conf *rss_conf)
2897 {
2898         struct bond_dev_private *internals = dev->data->dev_private;
2899
2900         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2901         rss_conf->rss_key_len = internals->rss_key_len;
2902         if (rss_conf->rss_key)
2903                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2904
2905         return 0;
2906 }
2907
2908 static int
2909 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2910 {
2911         struct rte_eth_dev *slave_eth_dev;
2912         struct bond_dev_private *internals = dev->data->dev_private;
2913         int ret, i;
2914
2915         rte_spinlock_lock(&internals->lock);
2916
2917         for (i = 0; i < internals->slave_count; i++) {
2918                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2919                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2920                         rte_spinlock_unlock(&internals->lock);
2921                         return -ENOTSUP;
2922                 }
2923         }
2924         for (i = 0; i < internals->slave_count; i++) {
2925                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2926                 if (ret < 0) {
2927                         rte_spinlock_unlock(&internals->lock);
2928                         return ret;
2929                 }
2930         }
2931
2932         rte_spinlock_unlock(&internals->lock);
2933         return 0;
2934 }
2935
2936 static int
2937 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2938 {
2939         if (mac_address_set(dev, addr)) {
2940                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2941                 return -EINVAL;
2942         }
2943
2944         return 0;
2945 }
2946
2947 static int
2948 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2949                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2950 {
2951         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2952                 *(const void **)arg = &bond_flow_ops;
2953                 return 0;
2954         }
2955         return -ENOTSUP;
2956 }
2957
2958 static int
2959 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2960                                 __rte_unused uint32_t index, uint32_t vmdq)
2961 {
2962         struct rte_eth_dev *slave_eth_dev;
2963         struct bond_dev_private *internals = dev->data->dev_private;
2964         int ret, i;
2965
2966         rte_spinlock_lock(&internals->lock);
2967
2968         for (i = 0; i < internals->slave_count; i++) {
2969                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2970                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2971                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2972                         ret = -ENOTSUP;
2973                         goto end;
2974                 }
2975         }
2976
2977         for (i = 0; i < internals->slave_count; i++) {
2978                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2979                                 mac_addr, vmdq);
2980                 if (ret < 0) {
2981                         /* rollback */
2982                         for (i--; i >= 0; i--)
2983                                 rte_eth_dev_mac_addr_remove(
2984                                         internals->slaves[i].port_id, mac_addr);
2985                         goto end;
2986                 }
2987         }
2988
2989         ret = 0;
2990 end:
2991         rte_spinlock_unlock(&internals->lock);
2992         return ret;
2993 }
2994
2995 static void
2996 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2997 {
2998         struct rte_eth_dev *slave_eth_dev;
2999         struct bond_dev_private *internals = dev->data->dev_private;
3000         int i;
3001
3002         rte_spinlock_lock(&internals->lock);
3003
3004         for (i = 0; i < internals->slave_count; i++) {
3005                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3006                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3007                         goto end;
3008         }
3009
3010         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3011
3012         for (i = 0; i < internals->slave_count; i++)
3013                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3014                                 mac_addr);
3015
3016 end:
3017         rte_spinlock_unlock(&internals->lock);
3018 }
3019
3020 const struct eth_dev_ops default_dev_ops = {
3021         .dev_start            = bond_ethdev_start,
3022         .dev_stop             = bond_ethdev_stop,
3023         .dev_close            = bond_ethdev_close,
3024         .dev_configure        = bond_ethdev_configure,
3025         .dev_infos_get        = bond_ethdev_info,
3026         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3027         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3028         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3029         .rx_queue_release     = bond_ethdev_rx_queue_release,
3030         .tx_queue_release     = bond_ethdev_tx_queue_release,
3031         .link_update          = bond_ethdev_link_update,
3032         .stats_get            = bond_ethdev_stats_get,
3033         .stats_reset          = bond_ethdev_stats_reset,
3034         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3035         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3036         .reta_update          = bond_ethdev_rss_reta_update,
3037         .reta_query           = bond_ethdev_rss_reta_query,
3038         .rss_hash_update      = bond_ethdev_rss_hash_update,
3039         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3040         .mtu_set              = bond_ethdev_mtu_set,
3041         .mac_addr_set         = bond_ethdev_mac_address_set,
3042         .mac_addr_add         = bond_ethdev_mac_addr_add,
3043         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3044         .filter_ctrl          = bond_filter_ctrl
3045 };
3046
3047 static int
3048 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3049 {
3050         const char *name = rte_vdev_device_name(dev);
3051         uint8_t socket_id = dev->device.numa_node;
3052         struct bond_dev_private *internals = NULL;
3053         struct rte_eth_dev *eth_dev = NULL;
3054         uint32_t vlan_filter_bmp_size;
3055
3056         /* now do all data allocation - for eth_dev structure, dummy pci driver
3057          * and internal (private) data
3058          */
3059
3060         /* reserve an ethdev entry */
3061         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3062         if (eth_dev == NULL) {
3063                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3064                 goto err;
3065         }
3066
3067         internals = eth_dev->data->dev_private;
3068         eth_dev->data->nb_rx_queues = (uint16_t)1;
3069         eth_dev->data->nb_tx_queues = (uint16_t)1;
3070
3071         /* Allocate memory for storing MAC addresses */
3072         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3073                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3074         if (eth_dev->data->mac_addrs == NULL) {
3075                 RTE_BOND_LOG(ERR,
3076                              "Failed to allocate %u bytes needed to store MAC addresses",
3077                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3078                 goto err;
3079         }
3080
3081         eth_dev->dev_ops = &default_dev_ops;
3082         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3083
3084         rte_spinlock_init(&internals->lock);
3085         rte_spinlock_init(&internals->lsc_lock);
3086
3087         internals->port_id = eth_dev->data->port_id;
3088         internals->mode = BONDING_MODE_INVALID;
3089         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3090         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3091         internals->burst_xmit_hash = burst_xmit_l2_hash;
3092         internals->user_defined_mac = 0;
3093
3094         internals->link_status_polling_enabled = 0;
3095
3096         internals->link_status_polling_interval_ms =
3097                 DEFAULT_POLLING_INTERVAL_10_MS;
3098         internals->link_down_delay_ms = 0;
3099         internals->link_up_delay_ms = 0;
3100
3101         internals->slave_count = 0;
3102         internals->active_slave_count = 0;
3103         internals->rx_offload_capa = 0;
3104         internals->tx_offload_capa = 0;
3105         internals->rx_queue_offload_capa = 0;
3106         internals->tx_queue_offload_capa = 0;
3107         internals->candidate_max_rx_pktlen = 0;
3108         internals->max_rx_pktlen = 0;
3109
3110         /* Initially allow to choose any offload type */
3111         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3112
3113         memset(&internals->default_rxconf, 0,
3114                sizeof(internals->default_rxconf));
3115         memset(&internals->default_txconf, 0,
3116                sizeof(internals->default_txconf));
3117
3118         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3119         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3120
3121         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3122         memset(internals->slaves, 0, sizeof(internals->slaves));
3123
3124         TAILQ_INIT(&internals->flow_list);
3125         internals->flow_isolated_valid = 0;
3126
3127         /* Set mode 4 default configuration */
3128         bond_mode_8023ad_setup(eth_dev, NULL);
3129         if (bond_ethdev_mode_set(eth_dev, mode)) {
3130                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3131                                  eth_dev->data->port_id, mode);
3132                 goto err;
3133         }
3134
3135         vlan_filter_bmp_size =
3136                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3137         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3138                                                    RTE_CACHE_LINE_SIZE);
3139         if (internals->vlan_filter_bmpmem == NULL) {
3140                 RTE_BOND_LOG(ERR,
3141                              "Failed to allocate vlan bitmap for bonded device %u",
3142                              eth_dev->data->port_id);
3143                 goto err;
3144         }
3145
3146         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3147                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3148         if (internals->vlan_filter_bmp == NULL) {
3149                 RTE_BOND_LOG(ERR,
3150                              "Failed to init vlan bitmap for bonded device %u",
3151                              eth_dev->data->port_id);
3152                 rte_free(internals->vlan_filter_bmpmem);
3153                 goto err;
3154         }
3155
3156         return eth_dev->data->port_id;
3157
3158 err:
3159         rte_free(internals);
3160         if (eth_dev != NULL)
3161                 eth_dev->data->dev_private = NULL;
3162         rte_eth_dev_release_port(eth_dev);
3163         return -1;
3164 }
3165
3166 static int
3167 bond_probe(struct rte_vdev_device *dev)
3168 {
3169         const char *name;
3170         struct bond_dev_private *internals;
3171         struct rte_kvargs *kvlist;
3172         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3173         int  arg_count, port_id;
3174         uint8_t agg_mode;
3175         struct rte_eth_dev *eth_dev;
3176
3177         if (!dev)
3178                 return -EINVAL;
3179
3180         name = rte_vdev_device_name(dev);
3181         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3182
3183         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3184                 eth_dev = rte_eth_dev_attach_secondary(name);
3185                 if (!eth_dev) {
3186                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3187                         return -1;
3188                 }
3189                 /* TODO: request info from primary to set up Rx and Tx */
3190                 eth_dev->dev_ops = &default_dev_ops;
3191                 eth_dev->device = &dev->device;
3192                 rte_eth_dev_probing_finish(eth_dev);
3193                 return 0;
3194         }
3195
3196         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3197                 pmd_bond_init_valid_arguments);
3198         if (kvlist == NULL)
3199                 return -1;
3200
3201         /* Parse link bonding mode */
3202         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3203                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3204                                 &bond_ethdev_parse_slave_mode_kvarg,
3205                                 &bonding_mode) != 0) {
3206                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3207                                         name);
3208                         goto parse_error;
3209                 }
3210         } else {
3211                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3212                                 "device %s", name);
3213                 goto parse_error;
3214         }
3215
3216         /* Parse socket id to create bonding device on */
3217         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3218         if (arg_count == 1) {
3219                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3220                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3221                                 != 0) {
3222                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3223                                         "bonded device %s", name);
3224                         goto parse_error;
3225                 }
3226         } else if (arg_count > 1) {
3227                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3228                                 "bonded device %s", name);
3229                 goto parse_error;
3230         } else {
3231                 socket_id = rte_socket_id();
3232         }
3233
3234         dev->device.numa_node = socket_id;
3235
3236         /* Create link bonding eth device */
3237         port_id = bond_alloc(dev, bonding_mode);
3238         if (port_id < 0) {
3239                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3240                                 "socket %u.",   name, bonding_mode, socket_id);
3241                 goto parse_error;
3242         }
3243         internals = rte_eth_devices[port_id].data->dev_private;
3244         internals->kvlist = kvlist;
3245
3246         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3247                 if (rte_kvargs_process(kvlist,
3248                                 PMD_BOND_AGG_MODE_KVARG,
3249                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3250                                 &agg_mode) != 0) {
3251                         RTE_BOND_LOG(ERR,
3252                                         "Failed to parse agg selection mode for bonded device %s",
3253                                         name);
3254                         goto parse_error;
3255                 }
3256
3257                 if (internals->mode == BONDING_MODE_8023AD)
3258                         internals->mode4.agg_selection = agg_mode;
3259         } else {
3260                 internals->mode4.agg_selection = AGG_STABLE;
3261         }
3262
3263         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3264         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3265                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3266         return 0;
3267
3268 parse_error:
3269         rte_kvargs_free(kvlist);
3270
3271         return -1;
3272 }
3273
3274 static int
3275 bond_remove(struct rte_vdev_device *dev)
3276 {
3277         struct rte_eth_dev *eth_dev;
3278         struct bond_dev_private *internals;
3279         const char *name;
3280
3281         if (!dev)
3282                 return -EINVAL;
3283
3284         name = rte_vdev_device_name(dev);
3285         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3286
3287         /* now free all data allocation - for eth_dev structure,
3288          * dummy pci driver and internal (private) data
3289          */
3290
3291         /* find an ethdev entry */
3292         eth_dev = rte_eth_dev_allocated(name);
3293         if (eth_dev == NULL)
3294                 return -ENODEV;
3295
3296         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3297                 return rte_eth_dev_release_port(eth_dev);
3298
3299         RTE_ASSERT(eth_dev->device == &dev->device);
3300
3301         internals = eth_dev->data->dev_private;
3302         if (internals->slave_count != 0)
3303                 return -EBUSY;
3304
3305         if (eth_dev->data->dev_started == 1) {
3306                 bond_ethdev_stop(eth_dev);
3307                 bond_ethdev_close(eth_dev);
3308         }
3309
3310         eth_dev->dev_ops = NULL;
3311         eth_dev->rx_pkt_burst = NULL;
3312         eth_dev->tx_pkt_burst = NULL;
3313
3314         internals = eth_dev->data->dev_private;
3315         /* Try to release mempool used in mode6. If the bond
3316          * device is not mode6, free the NULL is not problem.
3317          */
3318         rte_mempool_free(internals->mode6.mempool);
3319         rte_bitmap_free(internals->vlan_filter_bmp);
3320         rte_free(internals->vlan_filter_bmpmem);
3321
3322         rte_eth_dev_release_port(eth_dev);
3323
3324         return 0;
3325 }
3326
3327 /* this part will resolve the slave portids after all the other pdev and vdev
3328  * have been allocated */
3329 static int
3330 bond_ethdev_configure(struct rte_eth_dev *dev)
3331 {
3332         const char *name = dev->device->name;
3333         struct bond_dev_private *internals = dev->data->dev_private;
3334         struct rte_kvargs *kvlist = internals->kvlist;
3335         int arg_count;
3336         uint16_t port_id = dev - rte_eth_devices;
3337         uint8_t agg_mode;
3338
3339         static const uint8_t default_rss_key[40] = {
3340                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3341                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3342                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3343                 0xBE, 0xAC, 0x01, 0xFA
3344         };
3345
3346         unsigned i, j;
3347
3348         /*
3349          * If RSS is enabled, fill table with default values and
3350          * set key to the the value specified in port RSS configuration.
3351          * Fall back to default RSS key if the key is not specified
3352          */
3353         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3354                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3355                         internals->rss_key_len =
3356                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3357                         memcpy(internals->rss_key,
3358                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3359                                internals->rss_key_len);
3360                 } else {
3361                         internals->rss_key_len = sizeof(default_rss_key);
3362                         memcpy(internals->rss_key, default_rss_key,
3363                                internals->rss_key_len);
3364                 }
3365
3366                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3367                         internals->reta_conf[i].mask = ~0LL;
3368                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3369                                 internals->reta_conf[i].reta[j] =
3370                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3371                                                 dev->data->nb_rx_queues;
3372                 }
3373         }
3374
3375         /* set the max_rx_pktlen */
3376         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3377
3378         /*
3379          * if no kvlist, it means that this bonded device has been created
3380          * through the bonding api.
3381          */
3382         if (!kvlist)
3383                 return 0;
3384
3385         /* Parse MAC address for bonded device */
3386         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3387         if (arg_count == 1) {
3388                 struct ether_addr bond_mac;
3389
3390                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3391                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3392                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3393                                      name);
3394                         return -1;
3395                 }
3396
3397                 /* Set MAC address */
3398                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3399                         RTE_BOND_LOG(ERR,
3400                                      "Failed to set mac address on bonded device %s",
3401                                      name);
3402                         return -1;
3403                 }
3404         } else if (arg_count > 1) {
3405                 RTE_BOND_LOG(ERR,
3406                              "MAC address can be specified only once for bonded device %s",
3407                              name);
3408                 return -1;
3409         }
3410
3411         /* Parse/set balance mode transmit policy */
3412         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3413         if (arg_count == 1) {
3414                 uint8_t xmit_policy;
3415
3416                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3417                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3418                     0) {
3419                         RTE_BOND_LOG(INFO,
3420                                      "Invalid xmit policy specified for bonded device %s",
3421                                      name);
3422                         return -1;
3423                 }
3424
3425                 /* Set balance mode transmit policy*/
3426                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3427                         RTE_BOND_LOG(ERR,
3428                                      "Failed to set balance xmit policy on bonded device %s",
3429                                      name);
3430                         return -1;
3431                 }
3432         } else if (arg_count > 1) {
3433                 RTE_BOND_LOG(ERR,
3434                              "Transmit policy can be specified only once for bonded device %s",
3435                              name);
3436                 return -1;
3437         }
3438
3439         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3440                 if (rte_kvargs_process(kvlist,
3441                                        PMD_BOND_AGG_MODE_KVARG,
3442                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3443                                        &agg_mode) != 0) {
3444                         RTE_BOND_LOG(ERR,
3445                                      "Failed to parse agg selection mode for bonded device %s",
3446                                      name);
3447                 }
3448                 if (internals->mode == BONDING_MODE_8023AD) {
3449                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3450                                         agg_mode);
3451                         if (ret < 0) {
3452                                 RTE_BOND_LOG(ERR,
3453                                         "Invalid args for agg selection set for bonded device %s",
3454                                         name);
3455                                 return -1;
3456                         }
3457                 }
3458         }
3459
3460         /* Parse/add slave ports to bonded device */
3461         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3462                 struct bond_ethdev_slave_ports slave_ports;
3463                 unsigned i;
3464
3465                 memset(&slave_ports, 0, sizeof(slave_ports));
3466
3467                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3468                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3469                         RTE_BOND_LOG(ERR,
3470                                      "Failed to parse slave ports for bonded device %s",
3471                                      name);
3472                         return -1;
3473                 }
3474
3475                 for (i = 0; i < slave_ports.slave_count; i++) {
3476                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3477                                 RTE_BOND_LOG(ERR,
3478                                              "Failed to add port %d as slave to bonded device %s",
3479                                              slave_ports.slaves[i], name);
3480                         }
3481                 }
3482
3483         } else {
3484                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3485                 return -1;
3486         }
3487
3488         /* Parse/set primary slave port id*/
3489         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3490         if (arg_count == 1) {
3491                 uint16_t primary_slave_port_id;
3492
3493                 if (rte_kvargs_process(kvlist,
3494                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3495                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3496                                        &primary_slave_port_id) < 0) {
3497                         RTE_BOND_LOG(INFO,
3498                                      "Invalid primary slave port id specified for bonded device %s",
3499                                      name);
3500                         return -1;
3501                 }
3502
3503                 /* Set balance mode transmit policy*/
3504                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3505                     != 0) {
3506                         RTE_BOND_LOG(ERR,
3507                                      "Failed to set primary slave port %d on bonded device %s",
3508                                      primary_slave_port_id, name);
3509                         return -1;
3510                 }
3511         } else if (arg_count > 1) {
3512                 RTE_BOND_LOG(INFO,
3513                              "Primary slave can be specified only once for bonded device %s",
3514                              name);
3515                 return -1;
3516         }
3517
3518         /* Parse link status monitor polling interval */
3519         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3520         if (arg_count == 1) {
3521                 uint32_t lsc_poll_interval_ms;
3522
3523                 if (rte_kvargs_process(kvlist,
3524                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3525                                        &bond_ethdev_parse_time_ms_kvarg,
3526                                        &lsc_poll_interval_ms) < 0) {
3527                         RTE_BOND_LOG(INFO,
3528                                      "Invalid lsc polling interval value specified for bonded"
3529                                      " device %s", name);
3530                         return -1;
3531                 }
3532
3533                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3534                     != 0) {
3535                         RTE_BOND_LOG(ERR,
3536                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3537                                      lsc_poll_interval_ms, name);
3538                         return -1;
3539                 }
3540         } else if (arg_count > 1) {
3541                 RTE_BOND_LOG(INFO,
3542                              "LSC polling interval can be specified only once for bonded"
3543                              " device %s", name);
3544                 return -1;
3545         }
3546
3547         /* Parse link up interrupt propagation delay */
3548         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3549         if (arg_count == 1) {
3550                 uint32_t link_up_delay_ms;
3551
3552                 if (rte_kvargs_process(kvlist,
3553                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3554                                        &bond_ethdev_parse_time_ms_kvarg,
3555                                        &link_up_delay_ms) < 0) {
3556                         RTE_BOND_LOG(INFO,
3557                                      "Invalid link up propagation delay value specified for"
3558                                      " bonded device %s", name);
3559                         return -1;
3560                 }
3561
3562                 /* Set balance mode transmit policy*/
3563                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3564                     != 0) {
3565                         RTE_BOND_LOG(ERR,
3566                                      "Failed to set link up propagation delay (%u ms) on bonded"
3567                                      " device %s", link_up_delay_ms, name);
3568                         return -1;
3569                 }
3570         } else if (arg_count > 1) {
3571                 RTE_BOND_LOG(INFO,
3572                              "Link up propagation delay can be specified only once for"
3573                              " bonded device %s", name);
3574                 return -1;
3575         }
3576
3577         /* Parse link down interrupt propagation delay */
3578         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3579         if (arg_count == 1) {
3580                 uint32_t link_down_delay_ms;
3581
3582                 if (rte_kvargs_process(kvlist,
3583                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3584                                        &bond_ethdev_parse_time_ms_kvarg,
3585                                        &link_down_delay_ms) < 0) {
3586                         RTE_BOND_LOG(INFO,
3587                                      "Invalid link down propagation delay value specified for"
3588                                      " bonded device %s", name);
3589                         return -1;
3590                 }
3591
3592                 /* Set balance mode transmit policy*/
3593                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3594                     != 0) {
3595                         RTE_BOND_LOG(ERR,
3596                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3597                                      link_down_delay_ms, name);
3598                         return -1;
3599                 }
3600         } else if (arg_count > 1) {
3601                 RTE_BOND_LOG(INFO,
3602                              "Link down propagation delay can be specified only once for  bonded device %s",
3603                              name);
3604                 return -1;
3605         }
3606
3607         return 0;
3608 }
3609
3610 struct rte_vdev_driver pmd_bond_drv = {
3611         .probe = bond_probe,
3612         .remove = bond_remove,
3613 };
3614
3615 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3616 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3617
3618 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3619         "slave=<ifc> "
3620         "primary=<ifc> "
3621         "mode=[0-6] "
3622         "xmit_policy=[l2 | l23 | l34] "
3623         "agg_mode=[count | stable | bandwidth] "
3624         "socket_id=<int> "
3625         "mac=<mac addr> "
3626         "lsc_poll_period_ms=<int> "
3627         "up_delay=<int> "
3628         "down_delay=<int>");
3629
3630 int bond_logtype;
3631
3632 RTE_INIT(bond_init_log)
3633 {
3634         bond_logtype = rte_log_register("pmd.net.bond");
3635         if (bond_logtype >= 0)
3636                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3637 }