f04719e5938acefaf033a0b56a672bb554721363
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave >= slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave >= slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint16_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t subtype;
408         uint16_t i;
409         uint16_t j;
410         uint16_t k;
411
412         /* Copy slave list to protect against slave up/down changes during tx
413          * bursting */
414         slave_count = internals->active_slave_count;
415         memcpy(slaves, internals->active_slaves,
416                         sizeof(internals->active_slaves[0]) * slave_count);
417
418         idx = internals->active_slave;
419         if (idx >= slave_count) {
420                 internals->active_slave = 0;
421                 idx = 0;
422         }
423         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
424                 j = num_rx_total;
425                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
426                                          COLLECTING);
427
428                 /* Read packets from this slave */
429                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
430                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
431
432                 for (k = j; k < 2 && k < num_rx_total; k++)
433                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
434
435                 /* Handle slow protocol packets. */
436                 while (j < num_rx_total) {
437
438                         /* If packet is not pure L2 and is known, skip it */
439                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
440                                 j++;
441                                 continue;
442                         }
443
444                         if (j + 3 < num_rx_total)
445                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
446
447                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
448                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
449
450                         /* Remove packet from array if it is slow packet or slave is not
451                          * in collecting state or bonding interface is not in promiscuous
452                          * mode and packet address does not match. */
453                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
454                                 !collecting ||
455                                 (!promisc &&
456                                  !is_multicast_ether_addr(&hdr->d_addr) &&
457                                  !is_same_ether_addr(bond_mac,
458                                                      &hdr->d_addr)))) {
459
460                                 if (hdr->ether_type == ether_type_slow_be) {
461                                         bond_mode_8023ad_handle_slow_pkt(
462                                             internals, slaves[idx], bufs[j]);
463                                 } else
464                                         rte_pktmbuf_free(bufs[j]);
465
466                                 /* Packet is managed by mode 4 or dropped, shift the array */
467                                 num_rx_total--;
468                                 if (j < num_rx_total) {
469                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
470                                                 (num_rx_total - j));
471                                 }
472                         } else
473                                 j++;
474                 }
475                 if (unlikely(++idx == slave_count))
476                         idx = 0;
477         }
478
479         if (++internals->active_slave >= slave_count)
480                 internals->active_slave = 0;
481
482         return num_rx_total;
483 }
484
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
488
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491 static void
492 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
493 {
494         switch (arp_op) {
495         case RTE_ARP_OP_REQUEST:
496                 strlcpy(buf, "ARP Request", buf_len);
497                 return;
498         case RTE_ARP_OP_REPLY:
499                 strlcpy(buf, "ARP Reply", buf_len);
500                 return;
501         case RTE_ARP_OP_REVREQUEST:
502                 strlcpy(buf, "Reverse ARP Request", buf_len);
503                 return;
504         case RTE_ARP_OP_REVREPLY:
505                 strlcpy(buf, "Reverse ARP Reply", buf_len);
506                 return;
507         case RTE_ARP_OP_INVREQUEST:
508                 strlcpy(buf, "Peer Identify Request", buf_len);
509                 return;
510         case RTE_ARP_OP_INVREPLY:
511                 strlcpy(buf, "Peer Identify Reply", buf_len);
512                 return;
513         default:
514                 break;
515         }
516         strlcpy(buf, "Unknown", buf_len);
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct rte_arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
621                                 ArpOp, sizeof(ArpOp));
622                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
623         }
624 #endif
625 }
626 #endif
627
628 static uint16_t
629 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
630 {
631         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
632         struct bond_dev_private *internals = bd_tx_q->dev_private;
633         struct ether_hdr *eth_h;
634         uint16_t ether_type, offset;
635         uint16_t nb_recv_pkts;
636         int i;
637
638         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
639
640         for (i = 0; i < nb_recv_pkts; i++) {
641                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
642                 ether_type = eth_h->ether_type;
643                 offset = get_vlan_offset(eth_h, &ether_type);
644
645                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
646 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
647                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
648 #endif
649                         bond_mode_alb_arp_recv(eth_h, offset, internals);
650                 }
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
653                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
654 #endif
655         }
656
657         return nb_recv_pkts;
658 }
659
660 static uint16_t
661 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
662                 uint16_t nb_pkts)
663 {
664         struct bond_dev_private *internals;
665         struct bond_tx_queue *bd_tx_q;
666
667         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
668         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
669
670         uint16_t num_of_slaves;
671         uint16_t slaves[RTE_MAX_ETHPORTS];
672
673         uint16_t num_tx_total = 0, num_tx_slave;
674
675         static int slave_idx = 0;
676         int i, cslave_idx = 0, tx_fail_total = 0;
677
678         bd_tx_q = (struct bond_tx_queue *)queue;
679         internals = bd_tx_q->dev_private;
680
681         /* Copy slave list to protect against slave up/down changes during tx
682          * bursting */
683         num_of_slaves = internals->active_slave_count;
684         memcpy(slaves, internals->active_slaves,
685                         sizeof(internals->active_slaves[0]) * num_of_slaves);
686
687         if (num_of_slaves < 1)
688                 return num_tx_total;
689
690         /* Populate slaves mbuf with which packets are to be sent on it  */
691         for (i = 0; i < nb_pkts; i++) {
692                 cslave_idx = (slave_idx + i) % num_of_slaves;
693                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
694         }
695
696         /* increment current slave index so the next call to tx burst starts on the
697          * next slave */
698         slave_idx = ++cslave_idx;
699
700         /* Send packet burst on each slave device */
701         for (i = 0; i < num_of_slaves; i++) {
702                 if (slave_nb_pkts[i] > 0) {
703                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
704                                         slave_bufs[i], slave_nb_pkts[i]);
705
706                         /* if tx burst fails move packets to end of bufs */
707                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
708                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
709
710                                 tx_fail_total += tx_fail_slave;
711
712                                 memcpy(&bufs[nb_pkts - tx_fail_total],
713                                        &slave_bufs[i][num_tx_slave],
714                                        tx_fail_slave * sizeof(bufs[0]));
715                         }
716                         num_tx_total += num_tx_slave;
717                 }
718         }
719
720         return num_tx_total;
721 }
722
723 static uint16_t
724 bond_ethdev_tx_burst_active_backup(void *queue,
725                 struct rte_mbuf **bufs, uint16_t nb_pkts)
726 {
727         struct bond_dev_private *internals;
728         struct bond_tx_queue *bd_tx_q;
729
730         bd_tx_q = (struct bond_tx_queue *)queue;
731         internals = bd_tx_q->dev_private;
732
733         if (internals->active_slave_count < 1)
734                 return 0;
735
736         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
737                         bufs, nb_pkts);
738 }
739
740 static inline uint16_t
741 ether_hash(struct ether_hdr *eth_hdr)
742 {
743         unaligned_uint16_t *word_src_addr =
744                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
745         unaligned_uint16_t *word_dst_addr =
746                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
747
748         return (word_src_addr[0] ^ word_dst_addr[0]) ^
749                         (word_src_addr[1] ^ word_dst_addr[1]) ^
750                         (word_src_addr[2] ^ word_dst_addr[2]);
751 }
752
753 static inline uint32_t
754 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
755 {
756         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
757 }
758
759 static inline uint32_t
760 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
761 {
762         unaligned_uint32_t *word_src_addr =
763                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
764         unaligned_uint32_t *word_dst_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
766
767         return (word_src_addr[0] ^ word_dst_addr[0]) ^
768                         (word_src_addr[1] ^ word_dst_addr[1]) ^
769                         (word_src_addr[2] ^ word_dst_addr[2]) ^
770                         (word_src_addr[3] ^ word_dst_addr[3]);
771 }
772
773
774 void
775 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
776                 uint16_t slave_count, uint16_t *slaves)
777 {
778         struct ether_hdr *eth_hdr;
779         uint32_t hash;
780         int i;
781
782         for (i = 0; i < nb_pkts; i++) {
783                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
784
785                 hash = ether_hash(eth_hdr);
786
787                 slaves[i] = (hash ^= hash >> 8) % slave_count;
788         }
789 }
790
791 void
792 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint16_t slave_count, uint16_t *slaves)
794 {
795         uint16_t i;
796         struct ether_hdr *eth_hdr;
797         uint16_t proto;
798         size_t vlan_offset;
799         uint32_t hash, l3hash;
800
801         for (i = 0; i < nb_pkts; i++) {
802                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
803                 l3hash = 0;
804
805                 proto = eth_hdr->ether_type;
806                 hash = ether_hash(eth_hdr);
807
808                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
809
810                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
811                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
812                                         ((char *)(eth_hdr + 1) + vlan_offset);
813                         l3hash = ipv4_hash(ipv4_hdr);
814
815                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
816                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
817                                         ((char *)(eth_hdr + 1) + vlan_offset);
818                         l3hash = ipv6_hash(ipv6_hdr);
819                 }
820
821                 hash = hash ^ l3hash;
822                 hash ^= hash >> 16;
823                 hash ^= hash >> 8;
824
825                 slaves[i] = hash % slave_count;
826         }
827 }
828
829 void
830 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
831                 uint16_t slave_count, uint16_t *slaves)
832 {
833         struct ether_hdr *eth_hdr;
834         uint16_t proto;
835         size_t vlan_offset;
836         int i;
837
838         struct udp_hdr *udp_hdr;
839         struct tcp_hdr *tcp_hdr;
840         uint32_t hash, l3hash, l4hash;
841
842         for (i = 0; i < nb_pkts; i++) {
843                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
844                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & IPV4_HDR_IHL_MASK) *
862                                         IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
869                                                         < pkt_end)
870                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
871                                 } else if (ipv4_hdr->next_proto_id ==
872                                                                 IPPROTO_UDP) {
873                                         udp_hdr = (struct udp_hdr *)
874                                                 ((char *)ipv4_hdr +
875                                                         ip_hdr_offset);
876                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
877                                                         < pkt_end)
878                                                 l4hash = HASH_L4_PORTS(udp_hdr);
879                                 }
880                         }
881                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
882                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
883                                         ((char *)(eth_hdr + 1) + vlan_offset);
884                         l3hash = ipv6_hash(ipv6_hdr);
885
886                         if (ipv6_hdr->proto == IPPROTO_TCP) {
887                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
888                                 l4hash = HASH_L4_PORTS(tcp_hdr);
889                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
890                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
891                                 l4hash = HASH_L4_PORTS(udp_hdr);
892                         }
893                 }
894
895                 hash = l3hash ^ l4hash;
896                 hash ^= hash >> 16;
897                 hash ^= hash >> 8;
898
899                 slaves[i] = hash % slave_count;
900         }
901 }
902
903 struct bwg_slave {
904         uint64_t bwg_left_int;
905         uint64_t bwg_left_remainder;
906         uint16_t slave;
907 };
908
909 void
910 bond_tlb_activate_slave(struct bond_dev_private *internals) {
911         int i;
912
913         for (i = 0; i < internals->active_slave_count; i++) {
914                 tlb_last_obytets[internals->active_slaves[i]] = 0;
915         }
916 }
917
918 static int
919 bandwidth_cmp(const void *a, const void *b)
920 {
921         const struct bwg_slave *bwg_a = a;
922         const struct bwg_slave *bwg_b = b;
923         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
924         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
925                         (int64_t)bwg_a->bwg_left_remainder;
926         if (diff > 0)
927                 return 1;
928         else if (diff < 0)
929                 return -1;
930         else if (diff2 > 0)
931                 return 1;
932         else if (diff2 < 0)
933                 return -1;
934         else
935                 return 0;
936 }
937
938 static void
939 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
940                 struct bwg_slave *bwg_slave)
941 {
942         struct rte_eth_link link_status;
943
944         rte_eth_link_get_nowait(port_id, &link_status);
945         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
946         if (link_bwg == 0)
947                 return;
948         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
949         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
950         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
951 }
952
953 static void
954 bond_ethdev_update_tlb_slave_cb(void *arg)
955 {
956         struct bond_dev_private *internals = arg;
957         struct rte_eth_stats slave_stats;
958         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
959         uint16_t slave_count;
960         uint64_t tx_bytes;
961
962         uint8_t update_stats = 0;
963         uint16_t slave_id;
964         uint16_t i;
965
966         internals->slave_update_idx++;
967
968
969         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
970                 update_stats = 1;
971
972         for (i = 0; i < internals->active_slave_count; i++) {
973                 slave_id = internals->active_slaves[i];
974                 rte_eth_stats_get(slave_id, &slave_stats);
975                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
976                 bandwidth_left(slave_id, tx_bytes,
977                                 internals->slave_update_idx, &bwg_array[i]);
978                 bwg_array[i].slave = slave_id;
979
980                 if (update_stats) {
981                         tlb_last_obytets[slave_id] = slave_stats.obytes;
982                 }
983         }
984
985         if (update_stats == 1)
986                 internals->slave_update_idx = 0;
987
988         slave_count = i;
989         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
990         for (i = 0; i < slave_count; i++)
991                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
992
993         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
994                         (struct bond_dev_private *)internals);
995 }
996
997 static uint16_t
998 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
999 {
1000         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1001         struct bond_dev_private *internals = bd_tx_q->dev_private;
1002
1003         struct rte_eth_dev *primary_port =
1004                         &rte_eth_devices[internals->primary_port];
1005         uint16_t num_tx_total = 0;
1006         uint16_t i, j;
1007
1008         uint16_t num_of_slaves = internals->active_slave_count;
1009         uint16_t slaves[RTE_MAX_ETHPORTS];
1010
1011         struct ether_hdr *ether_hdr;
1012         struct ether_addr primary_slave_addr;
1013         struct ether_addr active_slave_addr;
1014
1015         if (num_of_slaves < 1)
1016                 return num_tx_total;
1017
1018         memcpy(slaves, internals->tlb_slaves_order,
1019                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1020
1021
1022         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1023
1024         if (nb_pkts > 3) {
1025                 for (i = 0; i < 3; i++)
1026                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1027         }
1028
1029         for (i = 0; i < num_of_slaves; i++) {
1030                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1031                 for (j = num_tx_total; j < nb_pkts; j++) {
1032                         if (j + 3 < nb_pkts)
1033                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1034
1035                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1036                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1037                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1038 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1039                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1040 #endif
1041                 }
1042
1043                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1044                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1045
1046                 if (num_tx_total == nb_pkts)
1047                         break;
1048         }
1049
1050         return num_tx_total;
1051 }
1052
1053 void
1054 bond_tlb_disable(struct bond_dev_private *internals)
1055 {
1056         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1057 }
1058
1059 void
1060 bond_tlb_enable(struct bond_dev_private *internals)
1061 {
1062         bond_ethdev_update_tlb_slave_cb(internals);
1063 }
1064
1065 static uint16_t
1066 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1067 {
1068         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1069         struct bond_dev_private *internals = bd_tx_q->dev_private;
1070
1071         struct ether_hdr *eth_h;
1072         uint16_t ether_type, offset;
1073
1074         struct client_data *client_info;
1075
1076         /*
1077          * We create transmit buffers for every slave and one additional to send
1078          * through tlb. In worst case every packet will be send on one port.
1079          */
1080         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1081         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1082
1083         /*
1084          * We create separate transmit buffers for update packets as they won't
1085          * be counted in num_tx_total.
1086          */
1087         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1088         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1089
1090         struct rte_mbuf *upd_pkt;
1091         size_t pkt_size;
1092
1093         uint16_t num_send, num_not_send = 0;
1094         uint16_t num_tx_total = 0;
1095         uint16_t slave_idx;
1096
1097         int i, j;
1098
1099         /* Search tx buffer for ARP packets and forward them to alb */
1100         for (i = 0; i < nb_pkts; i++) {
1101                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1102                 ether_type = eth_h->ether_type;
1103                 offset = get_vlan_offset(eth_h, &ether_type);
1104
1105                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1106                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1107
1108                         /* Change src mac in eth header */
1109                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1110
1111                         /* Add packet to slave tx buffer */
1112                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1113                         slave_bufs_pkts[slave_idx]++;
1114                 } else {
1115                         /* If packet is not ARP, send it with TLB policy */
1116                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1117                                         bufs[i];
1118                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1119                 }
1120         }
1121
1122         /* Update connected client ARP tables */
1123         if (internals->mode6.ntt) {
1124                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1125                         client_info = &internals->mode6.client_table[i];
1126
1127                         if (client_info->in_use) {
1128                                 /* Allocate new packet to send ARP update on current slave */
1129                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1130                                 if (upd_pkt == NULL) {
1131                                         RTE_BOND_LOG(ERR,
1132                                                      "Failed to allocate ARP packet from pool");
1133                                         continue;
1134                                 }
1135                                 pkt_size = sizeof(struct ether_hdr) +
1136                                         sizeof(struct rte_arp_hdr) +
1137                                         client_info->vlan_count *
1138                                         sizeof(struct vlan_hdr);
1139                                 upd_pkt->data_len = pkt_size;
1140                                 upd_pkt->pkt_len = pkt_size;
1141
1142                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1143                                                 internals);
1144
1145                                 /* Add packet to update tx buffer */
1146                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1147                                 update_bufs_pkts[slave_idx]++;
1148                         }
1149                 }
1150                 internals->mode6.ntt = 0;
1151         }
1152
1153         /* Send ARP packets on proper slaves */
1154         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1155                 if (slave_bufs_pkts[i] > 0) {
1156                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1157                                         slave_bufs[i], slave_bufs_pkts[i]);
1158                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1159                                 bufs[nb_pkts - 1 - num_not_send - j] =
1160                                                 slave_bufs[i][nb_pkts - 1 - j];
1161                         }
1162
1163                         num_tx_total += num_send;
1164                         num_not_send += slave_bufs_pkts[i] - num_send;
1165
1166 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1167         /* Print TX stats including update packets */
1168                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1169                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1170                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1171                         }
1172 #endif
1173                 }
1174         }
1175
1176         /* Send update packets on proper slaves */
1177         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1178                 if (update_bufs_pkts[i] > 0) {
1179                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1180                                         update_bufs_pkts[i]);
1181                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1182                                 rte_pktmbuf_free(update_bufs[i][j]);
1183                         }
1184 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1185                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1186                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1187                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1188                         }
1189 #endif
1190                 }
1191         }
1192
1193         /* Send non-ARP packets using tlb policy */
1194         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1195                 num_send = bond_ethdev_tx_burst_tlb(queue,
1196                                 slave_bufs[RTE_MAX_ETHPORTS],
1197                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1198
1199                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1200                         bufs[nb_pkts - 1 - num_not_send - j] =
1201                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1202                 }
1203
1204                 num_tx_total += num_send;
1205         }
1206
1207         return num_tx_total;
1208 }
1209
1210 static uint16_t
1211 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1212                 uint16_t nb_bufs)
1213 {
1214         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1215         struct bond_dev_private *internals = bd_tx_q->dev_private;
1216
1217         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1218         uint16_t slave_count;
1219
1220         /* Array to sort mbufs for transmission on each slave into */
1221         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1222         /* Number of mbufs for transmission on each slave */
1223         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1224         /* Mapping array generated by hash function to map mbufs to slaves */
1225         uint16_t bufs_slave_port_idxs[nb_bufs];
1226
1227         uint16_t slave_tx_count;
1228         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1229
1230         uint16_t i;
1231
1232         if (unlikely(nb_bufs == 0))
1233                 return 0;
1234
1235         /* Copy slave list to protect against slave up/down changes during tx
1236          * bursting */
1237         slave_count = internals->active_slave_count;
1238         if (unlikely(slave_count < 1))
1239                 return 0;
1240
1241         memcpy(slave_port_ids, internals->active_slaves,
1242                         sizeof(slave_port_ids[0]) * slave_count);
1243
1244         /*
1245          * Populate slaves mbuf with the packets which are to be sent on it
1246          * selecting output slave using hash based on xmit policy
1247          */
1248         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1249                         bufs_slave_port_idxs);
1250
1251         for (i = 0; i < nb_bufs; i++) {
1252                 /* Populate slave mbuf arrays with mbufs for that slave. */
1253                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1254
1255                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1256         }
1257
1258         /* Send packet burst on each slave device */
1259         for (i = 0; i < slave_count; i++) {
1260                 if (slave_nb_bufs[i] == 0)
1261                         continue;
1262
1263                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1264                                 bd_tx_q->queue_id, slave_bufs[i],
1265                                 slave_nb_bufs[i]);
1266
1267                 total_tx_count += slave_tx_count;
1268
1269                 /* If tx burst fails move packets to end of bufs */
1270                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1271                         int slave_tx_fail_count = slave_nb_bufs[i] -
1272                                         slave_tx_count;
1273                         total_tx_fail_count += slave_tx_fail_count;
1274                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1275                                &slave_bufs[i][slave_tx_count],
1276                                slave_tx_fail_count * sizeof(bufs[0]));
1277                 }
1278         }
1279
1280         return total_tx_count;
1281 }
1282
1283 static uint16_t
1284 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1285                 uint16_t nb_bufs)
1286 {
1287         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1288         struct bond_dev_private *internals = bd_tx_q->dev_private;
1289
1290         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1291         uint16_t slave_count;
1292
1293         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1294         uint16_t dist_slave_count;
1295
1296         /* 2-D array to sort mbufs for transmission on each slave into */
1297         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1298         /* Number of mbufs for transmission on each slave */
1299         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1300         /* Mapping array generated by hash function to map mbufs to slaves */
1301         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1302
1303         uint16_t slave_tx_count;
1304         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1305
1306         uint16_t i;
1307
1308         /* Copy slave list to protect against slave up/down changes during tx
1309          * bursting */
1310         slave_count = internals->active_slave_count;
1311         if (unlikely(slave_count < 1))
1312                 return 0;
1313
1314         memcpy(slave_port_ids, internals->active_slaves,
1315                         sizeof(slave_port_ids[0]) * slave_count);
1316
1317         /* Check for LACP control packets and send if available */
1318         for (i = 0; i < slave_count; i++) {
1319                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1320                 struct rte_mbuf *ctrl_pkt = NULL;
1321
1322                 if (likely(rte_ring_empty(port->tx_ring)))
1323                         continue;
1324
1325                 if (rte_ring_dequeue(port->tx_ring,
1326                                      (void **)&ctrl_pkt) != -ENOENT) {
1327                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1328                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1329                         /*
1330                          * re-enqueue LAG control plane packets to buffering
1331                          * ring if transmission fails so the packet isn't lost.
1332                          */
1333                         if (slave_tx_count != 1)
1334                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1335                 }
1336         }
1337
1338         if (unlikely(nb_bufs == 0))
1339                 return 0;
1340
1341         dist_slave_count = 0;
1342         for (i = 0; i < slave_count; i++) {
1343                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1344
1345                 if (ACTOR_STATE(port, DISTRIBUTING))
1346                         dist_slave_port_ids[dist_slave_count++] =
1347                                         slave_port_ids[i];
1348         }
1349
1350         if (likely(dist_slave_count > 0)) {
1351
1352                 /*
1353                  * Populate slaves mbuf with the packets which are to be sent
1354                  * on it, selecting output slave using hash based on xmit policy
1355                  */
1356                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1357                                 bufs_slave_port_idxs);
1358
1359                 for (i = 0; i < nb_bufs; i++) {
1360                         /*
1361                          * Populate slave mbuf arrays with mbufs for that
1362                          * slave
1363                          */
1364                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1365
1366                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1367                                         bufs[i];
1368                 }
1369
1370
1371                 /* Send packet burst on each slave device */
1372                 for (i = 0; i < dist_slave_count; i++) {
1373                         if (slave_nb_bufs[i] == 0)
1374                                 continue;
1375
1376                         slave_tx_count = rte_eth_tx_burst(
1377                                         dist_slave_port_ids[i],
1378                                         bd_tx_q->queue_id, slave_bufs[i],
1379                                         slave_nb_bufs[i]);
1380
1381                         total_tx_count += slave_tx_count;
1382
1383                         /* If tx burst fails move packets to end of bufs */
1384                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1385                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1386                                                 slave_tx_count;
1387                                 total_tx_fail_count += slave_tx_fail_count;
1388
1389                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1390                                        &slave_bufs[i][slave_tx_count],
1391                                        slave_tx_fail_count * sizeof(bufs[0]));
1392                         }
1393                 }
1394         }
1395
1396         return total_tx_count;
1397 }
1398
1399 static uint16_t
1400 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1401                 uint16_t nb_pkts)
1402 {
1403         struct bond_dev_private *internals;
1404         struct bond_tx_queue *bd_tx_q;
1405
1406         uint16_t slaves[RTE_MAX_ETHPORTS];
1407         uint8_t tx_failed_flag = 0;
1408         uint16_t num_of_slaves;
1409
1410         uint16_t max_nb_of_tx_pkts = 0;
1411
1412         int slave_tx_total[RTE_MAX_ETHPORTS];
1413         int i, most_successful_tx_slave = -1;
1414
1415         bd_tx_q = (struct bond_tx_queue *)queue;
1416         internals = bd_tx_q->dev_private;
1417
1418         /* Copy slave list to protect against slave up/down changes during tx
1419          * bursting */
1420         num_of_slaves = internals->active_slave_count;
1421         memcpy(slaves, internals->active_slaves,
1422                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1423
1424         if (num_of_slaves < 1)
1425                 return 0;
1426
1427         /* Increment reference count on mbufs */
1428         for (i = 0; i < nb_pkts; i++)
1429                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1430
1431         /* Transmit burst on each active slave */
1432         for (i = 0; i < num_of_slaves; i++) {
1433                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1434                                         bufs, nb_pkts);
1435
1436                 if (unlikely(slave_tx_total[i] < nb_pkts))
1437                         tx_failed_flag = 1;
1438
1439                 /* record the value and slave index for the slave which transmits the
1440                  * maximum number of packets */
1441                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1442                         max_nb_of_tx_pkts = slave_tx_total[i];
1443                         most_successful_tx_slave = i;
1444                 }
1445         }
1446
1447         /* if slaves fail to transmit packets from burst, the calling application
1448          * is not expected to know about multiple references to packets so we must
1449          * handle failures of all packets except those of the most successful slave
1450          */
1451         if (unlikely(tx_failed_flag))
1452                 for (i = 0; i < num_of_slaves; i++)
1453                         if (i != most_successful_tx_slave)
1454                                 while (slave_tx_total[i] < nb_pkts)
1455                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1456
1457         return max_nb_of_tx_pkts;
1458 }
1459
1460 static void
1461 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1462 {
1463         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1464
1465         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1466                 /**
1467                  * If in mode 4 then save the link properties of the first
1468                  * slave, all subsequent slaves must match these properties
1469                  */
1470                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1471
1472                 bond_link->link_autoneg = slave_link->link_autoneg;
1473                 bond_link->link_duplex = slave_link->link_duplex;
1474                 bond_link->link_speed = slave_link->link_speed;
1475         } else {
1476                 /**
1477                  * In any other mode the link properties are set to default
1478                  * values of AUTONEG/DUPLEX
1479                  */
1480                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1481                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1482         }
1483 }
1484
1485 static int
1486 link_properties_valid(struct rte_eth_dev *ethdev,
1487                 struct rte_eth_link *slave_link)
1488 {
1489         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1490
1491         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1492                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1493
1494                 if (bond_link->link_duplex != slave_link->link_duplex ||
1495                         bond_link->link_autoneg != slave_link->link_autoneg ||
1496                         bond_link->link_speed != slave_link->link_speed)
1497                         return -1;
1498         }
1499
1500         return 0;
1501 }
1502
1503 int
1504 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1505 {
1506         struct ether_addr *mac_addr;
1507
1508         if (eth_dev == NULL) {
1509                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1510                 return -1;
1511         }
1512
1513         if (dst_mac_addr == NULL) {
1514                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1515                 return -1;
1516         }
1517
1518         mac_addr = eth_dev->data->mac_addrs;
1519
1520         ether_addr_copy(mac_addr, dst_mac_addr);
1521         return 0;
1522 }
1523
1524 int
1525 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1526 {
1527         struct ether_addr *mac_addr;
1528
1529         if (eth_dev == NULL) {
1530                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1531                 return -1;
1532         }
1533
1534         if (new_mac_addr == NULL) {
1535                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1536                 return -1;
1537         }
1538
1539         mac_addr = eth_dev->data->mac_addrs;
1540
1541         /* If new MAC is different to current MAC then update */
1542         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1543                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1544
1545         return 0;
1546 }
1547
1548 static const struct ether_addr null_mac_addr;
1549
1550 /*
1551  * Add additional MAC addresses to the slave
1552  */
1553 int
1554 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1555                 uint16_t slave_port_id)
1556 {
1557         int i, ret;
1558         struct ether_addr *mac_addr;
1559
1560         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1561                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1562                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1563                         break;
1564
1565                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1566                 if (ret < 0) {
1567                         /* rollback */
1568                         for (i--; i > 0; i--)
1569                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1570                                         &bonded_eth_dev->data->mac_addrs[i]);
1571                         return ret;
1572                 }
1573         }
1574
1575         return 0;
1576 }
1577
1578 /*
1579  * Remove additional MAC addresses from the slave
1580  */
1581 int
1582 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1583                 uint16_t slave_port_id)
1584 {
1585         int i, rc, ret;
1586         struct ether_addr *mac_addr;
1587
1588         rc = 0;
1589         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1590                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1591                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1592                         break;
1593
1594                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1595                 /* save only the first error */
1596                 if (ret < 0 && rc == 0)
1597                         rc = ret;
1598         }
1599
1600         return rc;
1601 }
1602
1603 int
1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 {
1606         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1607         int i;
1608
1609         /* Update slave devices MAC addresses */
1610         if (internals->slave_count < 1)
1611                 return -1;
1612
1613         switch (internals->mode) {
1614         case BONDING_MODE_ROUND_ROBIN:
1615         case BONDING_MODE_BALANCE:
1616         case BONDING_MODE_BROADCAST:
1617                 for (i = 0; i < internals->slave_count; i++) {
1618                         if (rte_eth_dev_default_mac_addr_set(
1619                                         internals->slaves[i].port_id,
1620                                         bonded_eth_dev->data->mac_addrs)) {
1621                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1622                                                 internals->slaves[i].port_id);
1623                                 return -1;
1624                         }
1625                 }
1626                 break;
1627         case BONDING_MODE_8023AD:
1628                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629                 break;
1630         case BONDING_MODE_ACTIVE_BACKUP:
1631         case BONDING_MODE_TLB:
1632         case BONDING_MODE_ALB:
1633         default:
1634                 for (i = 0; i < internals->slave_count; i++) {
1635                         if (internals->slaves[i].port_id ==
1636                                         internals->current_primary_port) {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->primary_port,
1639                                                 bonded_eth_dev->data->mac_addrs)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->current_primary_port);
1642                                         return -1;
1643                                 }
1644                         } else {
1645                                 if (rte_eth_dev_default_mac_addr_set(
1646                                                 internals->slaves[i].port_id,
1647                                                 &internals->slaves[i].persisted_mac_addr)) {
1648                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1649                                                         internals->slaves[i].port_id);
1650                                         return -1;
1651                                 }
1652                         }
1653                 }
1654         }
1655
1656         return 0;
1657 }
1658
1659 int
1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 {
1662         struct bond_dev_private *internals;
1663
1664         internals = eth_dev->data->dev_private;
1665
1666         switch (mode) {
1667         case BONDING_MODE_ROUND_ROBIN:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_ACTIVE_BACKUP:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674                 break;
1675         case BONDING_MODE_BALANCE:
1676                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1677                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678                 break;
1679         case BONDING_MODE_BROADCAST:
1680                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1681                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682                 break;
1683         case BONDING_MODE_8023AD:
1684                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1685                         return -1;
1686
1687                 if (internals->mode4.dedicated_queues.enabled == 0) {
1688                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1689                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1690                         RTE_BOND_LOG(WARNING,
1691                                 "Using mode 4, it is necessary to do TX burst "
1692                                 "and RX burst at least every 100ms.");
1693                 } else {
1694                         /* Use flow director's optimization */
1695                         eth_dev->rx_pkt_burst =
1696                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1697                         eth_dev->tx_pkt_burst =
1698                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1699                 }
1700                 break;
1701         case BONDING_MODE_TLB:
1702                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1703                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704                 break;
1705         case BONDING_MODE_ALB:
1706                 if (bond_mode_alb_enable(eth_dev) != 0)
1707                         return -1;
1708
1709                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1710                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1711                 break;
1712         default:
1713                 return -1;
1714         }
1715
1716         internals->mode = mode;
1717
1718         return 0;
1719 }
1720
1721
1722 static int
1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1724                 struct rte_eth_dev *slave_eth_dev)
1725 {
1726         int errval = 0;
1727         struct bond_dev_private *internals = (struct bond_dev_private *)
1728                 bonded_eth_dev->data->dev_private;
1729         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1730
1731         if (port->slow_pool == NULL) {
1732                 char mem_name[256];
1733                 int slave_id = slave_eth_dev->data->port_id;
1734
1735                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736                                 slave_id);
1737                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1738                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1739                         slave_eth_dev->data->numa_node);
1740
1741                 /* Any memory allocation failure in initialization is critical because
1742                  * resources can't be free, so reinitialization is impossible. */
1743                 if (port->slow_pool == NULL) {
1744                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1745                                 slave_id, mem_name, rte_strerror(rte_errno));
1746                 }
1747         }
1748
1749         if (internals->mode4.dedicated_queues.enabled == 1) {
1750                 /* Configure slow Rx queue */
1751
1752                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1753                                 internals->mode4.dedicated_queues.rx_qid, 128,
1754                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755                                 NULL, port->slow_pool);
1756                 if (errval != 0) {
1757                         RTE_BOND_LOG(ERR,
1758                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1759                                         slave_eth_dev->data->port_id,
1760                                         internals->mode4.dedicated_queues.rx_qid,
1761                                         errval);
1762                         return errval;
1763                 }
1764
1765                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1766                                 internals->mode4.dedicated_queues.tx_qid, 512,
1767                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1768                                 NULL);
1769                 if (errval != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id,
1773                                 internals->mode4.dedicated_queues.tx_qid,
1774                                 errval);
1775                         return errval;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 int
1782 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1783                 struct rte_eth_dev *slave_eth_dev)
1784 {
1785         struct bond_rx_queue *bd_rx_q;
1786         struct bond_tx_queue *bd_tx_q;
1787         uint16_t nb_rx_queues;
1788         uint16_t nb_tx_queues;
1789
1790         int errval;
1791         uint16_t q_id;
1792         struct rte_flow_error flow_error;
1793
1794         struct bond_dev_private *internals = (struct bond_dev_private *)
1795                 bonded_eth_dev->data->dev_private;
1796
1797         /* Stop slave */
1798         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799
1800         /* Enable interrupts on slave device if supported */
1801         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1802                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803
1804         /* If RSS is enabled for bonding, try to enable it for slaves  */
1805         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1806                 if (internals->rss_key_len != 0) {
1807                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1808                                         internals->rss_key_len;
1809                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1810                                         internals->rss_key;
1811                 } else {
1812                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1813                 }
1814
1815                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1816                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1817                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1818                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1819         }
1820
1821         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1822                         DEV_RX_OFFLOAD_VLAN_FILTER)
1823                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1824                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1825         else
1826                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1827                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1828
1829         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1830         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1831
1832         if (internals->mode == BONDING_MODE_8023AD) {
1833                 if (internals->mode4.dedicated_queues.enabled == 1) {
1834                         nb_rx_queues++;
1835                         nb_tx_queues++;
1836                 }
1837         }
1838
1839         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1840                                      bonded_eth_dev->data->mtu);
1841         if (errval != 0 && errval != -ENOTSUP) {
1842                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1843                                 slave_eth_dev->data->port_id, errval);
1844                 return errval;
1845         }
1846
1847         /* Configure device */
1848         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1849                         nb_rx_queues, nb_tx_queues,
1850                         &(slave_eth_dev->data->dev_conf));
1851         if (errval != 0) {
1852                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1853                                 slave_eth_dev->data->port_id, errval);
1854                 return errval;
1855         }
1856
1857         /* Setup Rx Queues */
1858         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1859                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1860
1861                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1862                                 bd_rx_q->nb_rx_desc,
1863                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1864                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1865                 if (errval != 0) {
1866                         RTE_BOND_LOG(ERR,
1867                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1868                                         slave_eth_dev->data->port_id, q_id, errval);
1869                         return errval;
1870                 }
1871         }
1872
1873         /* Setup Tx Queues */
1874         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1875                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1876
1877                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1878                                 bd_tx_q->nb_tx_desc,
1879                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1880                                 &bd_tx_q->tx_conf);
1881                 if (errval != 0) {
1882                         RTE_BOND_LOG(ERR,
1883                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1884                                 slave_eth_dev->data->port_id, q_id, errval);
1885                         return errval;
1886                 }
1887         }
1888
1889         if (internals->mode == BONDING_MODE_8023AD &&
1890                         internals->mode4.dedicated_queues.enabled == 1) {
1891                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1892                                 != 0)
1893                         return errval;
1894
1895                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1896                                 slave_eth_dev->data->port_id) != 0) {
1897                         RTE_BOND_LOG(ERR,
1898                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1899                                 slave_eth_dev->data->port_id, q_id, errval);
1900                         return -1;
1901                 }
1902
1903                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1904                         rte_flow_destroy(slave_eth_dev->data->port_id,
1905                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1906                                         &flow_error);
1907
1908                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1909                                 slave_eth_dev->data->port_id);
1910         }
1911
1912         /* Start device */
1913         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1914         if (errval != 0) {
1915                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1916                                 slave_eth_dev->data->port_id, errval);
1917                 return -1;
1918         }
1919
1920         /* If RSS is enabled for bonding, synchronize RETA */
1921         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1922                 int i;
1923                 struct bond_dev_private *internals;
1924
1925                 internals = bonded_eth_dev->data->dev_private;
1926
1927                 for (i = 0; i < internals->slave_count; i++) {
1928                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1929                                 errval = rte_eth_dev_rss_reta_update(
1930                                                 slave_eth_dev->data->port_id,
1931                                                 &internals->reta_conf[0],
1932                                                 internals->slaves[i].reta_size);
1933                                 if (errval != 0) {
1934                                         RTE_BOND_LOG(WARNING,
1935                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1936                                                      " RSS Configuration for bonding may be inconsistent.",
1937                                                      slave_eth_dev->data->port_id, errval);
1938                                 }
1939                                 break;
1940                         }
1941                 }
1942         }
1943
1944         /* If lsc interrupt is set, check initial slave's link status */
1945         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1946                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1947                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1948                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1949                         NULL);
1950         }
1951
1952         return 0;
1953 }
1954
1955 void
1956 slave_remove(struct bond_dev_private *internals,
1957                 struct rte_eth_dev *slave_eth_dev)
1958 {
1959         uint16_t i;
1960
1961         for (i = 0; i < internals->slave_count; i++)
1962                 if (internals->slaves[i].port_id ==
1963                                 slave_eth_dev->data->port_id)
1964                         break;
1965
1966         if (i < (internals->slave_count - 1)) {
1967                 struct rte_flow *flow;
1968
1969                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1970                                 sizeof(internals->slaves[0]) *
1971                                 (internals->slave_count - i - 1));
1972                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1973                         memmove(&flow->flows[i], &flow->flows[i + 1],
1974                                 sizeof(flow->flows[0]) *
1975                                 (internals->slave_count - i - 1));
1976                         flow->flows[internals->slave_count - 1] = NULL;
1977                 }
1978         }
1979
1980         internals->slave_count--;
1981
1982         /* force reconfiguration of slave interfaces */
1983         _rte_eth_dev_reset(slave_eth_dev);
1984 }
1985
1986 static void
1987 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1988
1989 void
1990 slave_add(struct bond_dev_private *internals,
1991                 struct rte_eth_dev *slave_eth_dev)
1992 {
1993         struct bond_slave_details *slave_details =
1994                         &internals->slaves[internals->slave_count];
1995
1996         slave_details->port_id = slave_eth_dev->data->port_id;
1997         slave_details->last_link_status = 0;
1998
1999         /* Mark slave devices that don't support interrupts so we can
2000          * compensate when we start the bond
2001          */
2002         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2003                 slave_details->link_status_poll_enabled = 1;
2004         }
2005
2006         slave_details->link_status_wait_to_complete = 0;
2007         /* clean tlb_last_obytes when adding port for bonding device */
2008         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2009                         sizeof(struct ether_addr));
2010 }
2011
2012 void
2013 bond_ethdev_primary_set(struct bond_dev_private *internals,
2014                 uint16_t slave_port_id)
2015 {
2016         int i;
2017
2018         if (internals->active_slave_count < 1)
2019                 internals->current_primary_port = slave_port_id;
2020         else
2021                 /* Search bonded device slave ports for new proposed primary port */
2022                 for (i = 0; i < internals->active_slave_count; i++) {
2023                         if (internals->active_slaves[i] == slave_port_id)
2024                                 internals->current_primary_port = slave_port_id;
2025                 }
2026 }
2027
2028 static void
2029 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2030
2031 static int
2032 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2033 {
2034         struct bond_dev_private *internals;
2035         int i;
2036
2037         /* slave eth dev will be started by bonded device */
2038         if (check_for_bonded_ethdev(eth_dev)) {
2039                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2040                                 eth_dev->data->port_id);
2041                 return -1;
2042         }
2043
2044         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2045         eth_dev->data->dev_started = 1;
2046
2047         internals = eth_dev->data->dev_private;
2048
2049         if (internals->slave_count == 0) {
2050                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2051                 goto out_err;
2052         }
2053
2054         if (internals->user_defined_mac == 0) {
2055                 struct ether_addr *new_mac_addr = NULL;
2056
2057                 for (i = 0; i < internals->slave_count; i++)
2058                         if (internals->slaves[i].port_id == internals->primary_port)
2059                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2060
2061                 if (new_mac_addr == NULL)
2062                         goto out_err;
2063
2064                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2065                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2066                                         eth_dev->data->port_id);
2067                         goto out_err;
2068                 }
2069         }
2070
2071         /* If bonded device is configure in promiscuous mode then re-apply config */
2072         if (internals->promiscuous_en)
2073                 bond_ethdev_promiscuous_enable(eth_dev);
2074
2075         if (internals->mode == BONDING_MODE_8023AD) {
2076                 if (internals->mode4.dedicated_queues.enabled == 1) {
2077                         internals->mode4.dedicated_queues.rx_qid =
2078                                         eth_dev->data->nb_rx_queues;
2079                         internals->mode4.dedicated_queues.tx_qid =
2080                                         eth_dev->data->nb_tx_queues;
2081                 }
2082         }
2083
2084
2085         /* Reconfigure each slave device if starting bonded device */
2086         for (i = 0; i < internals->slave_count; i++) {
2087                 struct rte_eth_dev *slave_ethdev =
2088                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2089                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2090                         RTE_BOND_LOG(ERR,
2091                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2092                                 eth_dev->data->port_id,
2093                                 internals->slaves[i].port_id);
2094                         goto out_err;
2095                 }
2096                 /* We will need to poll for link status if any slave doesn't
2097                  * support interrupts
2098                  */
2099                 if (internals->slaves[i].link_status_poll_enabled)
2100                         internals->link_status_polling_enabled = 1;
2101         }
2102
2103         /* start polling if needed */
2104         if (internals->link_status_polling_enabled) {
2105                 rte_eal_alarm_set(
2106                         internals->link_status_polling_interval_ms * 1000,
2107                         bond_ethdev_slave_link_status_change_monitor,
2108                         (void *)&rte_eth_devices[internals->port_id]);
2109         }
2110
2111         /* Update all slave devices MACs*/
2112         if (mac_address_slaves_update(eth_dev) != 0)
2113                 goto out_err;
2114
2115         if (internals->user_defined_primary_port)
2116                 bond_ethdev_primary_set(internals, internals->primary_port);
2117
2118         if (internals->mode == BONDING_MODE_8023AD)
2119                 bond_mode_8023ad_start(eth_dev);
2120
2121         if (internals->mode == BONDING_MODE_TLB ||
2122                         internals->mode == BONDING_MODE_ALB)
2123                 bond_tlb_enable(internals);
2124
2125         return 0;
2126
2127 out_err:
2128         eth_dev->data->dev_started = 0;
2129         return -1;
2130 }
2131
2132 static void
2133 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2134 {
2135         uint16_t i;
2136
2137         if (dev->data->rx_queues != NULL) {
2138                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2139                         rte_free(dev->data->rx_queues[i]);
2140                         dev->data->rx_queues[i] = NULL;
2141                 }
2142                 dev->data->nb_rx_queues = 0;
2143         }
2144
2145         if (dev->data->tx_queues != NULL) {
2146                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2147                         rte_free(dev->data->tx_queues[i]);
2148                         dev->data->tx_queues[i] = NULL;
2149                 }
2150                 dev->data->nb_tx_queues = 0;
2151         }
2152 }
2153
2154 void
2155 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2156 {
2157         struct bond_dev_private *internals = eth_dev->data->dev_private;
2158         uint16_t i;
2159
2160         if (internals->mode == BONDING_MODE_8023AD) {
2161                 struct port *port;
2162                 void *pkt = NULL;
2163
2164                 bond_mode_8023ad_stop(eth_dev);
2165
2166                 /* Discard all messages to/from mode 4 state machines */
2167                 for (i = 0; i < internals->active_slave_count; i++) {
2168                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2169
2170                         RTE_ASSERT(port->rx_ring != NULL);
2171                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2172                                 rte_pktmbuf_free(pkt);
2173
2174                         RTE_ASSERT(port->tx_ring != NULL);
2175                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2176                                 rte_pktmbuf_free(pkt);
2177                 }
2178         }
2179
2180         if (internals->mode == BONDING_MODE_TLB ||
2181                         internals->mode == BONDING_MODE_ALB) {
2182                 bond_tlb_disable(internals);
2183                 for (i = 0; i < internals->active_slave_count; i++)
2184                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2185         }
2186
2187         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2188         eth_dev->data->dev_started = 0;
2189
2190         internals->link_status_polling_enabled = 0;
2191         for (i = 0; i < internals->slave_count; i++) {
2192                 uint16_t slave_id = internals->slaves[i].port_id;
2193                 if (find_slave_by_id(internals->active_slaves,
2194                                 internals->active_slave_count, slave_id) !=
2195                                                 internals->active_slave_count) {
2196                         internals->slaves[i].last_link_status = 0;
2197                         rte_eth_dev_stop(slave_id);
2198                         deactivate_slave(eth_dev, slave_id);
2199                 }
2200         }
2201 }
2202
2203 void
2204 bond_ethdev_close(struct rte_eth_dev *dev)
2205 {
2206         struct bond_dev_private *internals = dev->data->dev_private;
2207         uint16_t bond_port_id = internals->port_id;
2208         int skipped = 0;
2209         struct rte_flow_error ferror;
2210
2211         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2212         while (internals->slave_count != skipped) {
2213                 uint16_t port_id = internals->slaves[skipped].port_id;
2214
2215                 rte_eth_dev_stop(port_id);
2216
2217                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2218                         RTE_BOND_LOG(ERR,
2219                                      "Failed to remove port %d from bonded device %s",
2220                                      port_id, dev->device->name);
2221                         skipped++;
2222                 }
2223         }
2224         bond_flow_ops.flush(dev, &ferror);
2225         bond_ethdev_free_queues(dev);
2226         rte_bitmap_reset(internals->vlan_filter_bmp);
2227 }
2228
2229 /* forward declaration */
2230 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2231
2232 static void
2233 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2234 {
2235         struct bond_dev_private *internals = dev->data->dev_private;
2236
2237         uint16_t max_nb_rx_queues = UINT16_MAX;
2238         uint16_t max_nb_tx_queues = UINT16_MAX;
2239         uint16_t max_rx_desc_lim = UINT16_MAX;
2240         uint16_t max_tx_desc_lim = UINT16_MAX;
2241
2242         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2243
2244         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2245                         internals->candidate_max_rx_pktlen :
2246                         ETHER_MAX_JUMBO_FRAME_LEN;
2247
2248         /* Max number of tx/rx queues that the bonded device can support is the
2249          * minimum values of the bonded slaves, as all slaves must be capable
2250          * of supporting the same number of tx/rx queues.
2251          */
2252         if (internals->slave_count > 0) {
2253                 struct rte_eth_dev_info slave_info;
2254                 uint16_t idx;
2255
2256                 for (idx = 0; idx < internals->slave_count; idx++) {
2257                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2258                                         &slave_info);
2259
2260                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2261                                 max_nb_rx_queues = slave_info.max_rx_queues;
2262
2263                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2264                                 max_nb_tx_queues = slave_info.max_tx_queues;
2265
2266                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2267                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2268
2269                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2270                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2271                 }
2272         }
2273
2274         dev_info->max_rx_queues = max_nb_rx_queues;
2275         dev_info->max_tx_queues = max_nb_tx_queues;
2276
2277         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2278                sizeof(dev_info->default_rxconf));
2279         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2280                sizeof(dev_info->default_txconf));
2281
2282         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2283         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2284
2285         /**
2286          * If dedicated hw queues enabled for link bonding device in LACP mode
2287          * then we need to reduce the maximum number of data path queues by 1.
2288          */
2289         if (internals->mode == BONDING_MODE_8023AD &&
2290                 internals->mode4.dedicated_queues.enabled == 1) {
2291                 dev_info->max_rx_queues--;
2292                 dev_info->max_tx_queues--;
2293         }
2294
2295         dev_info->min_rx_bufsize = 0;
2296
2297         dev_info->rx_offload_capa = internals->rx_offload_capa;
2298         dev_info->tx_offload_capa = internals->tx_offload_capa;
2299         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2300         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2301         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2302
2303         dev_info->reta_size = internals->reta_size;
2304 }
2305
2306 static int
2307 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2308 {
2309         int res;
2310         uint16_t i;
2311         struct bond_dev_private *internals = dev->data->dev_private;
2312
2313         /* don't do this while a slave is being added */
2314         rte_spinlock_lock(&internals->lock);
2315
2316         if (on)
2317                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2318         else
2319                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2320
2321         for (i = 0; i < internals->slave_count; i++) {
2322                 uint16_t port_id = internals->slaves[i].port_id;
2323
2324                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2325                 if (res == ENOTSUP)
2326                         RTE_BOND_LOG(WARNING,
2327                                      "Setting VLAN filter on slave port %u not supported.",
2328                                      port_id);
2329         }
2330
2331         rte_spinlock_unlock(&internals->lock);
2332         return 0;
2333 }
2334
2335 static int
2336 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2337                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2338                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2339 {
2340         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2341                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2342                                         0, dev->data->numa_node);
2343         if (bd_rx_q == NULL)
2344                 return -1;
2345
2346         bd_rx_q->queue_id = rx_queue_id;
2347         bd_rx_q->dev_private = dev->data->dev_private;
2348
2349         bd_rx_q->nb_rx_desc = nb_rx_desc;
2350
2351         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2352         bd_rx_q->mb_pool = mb_pool;
2353
2354         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2355
2356         return 0;
2357 }
2358
2359 static int
2360 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2361                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2362                 const struct rte_eth_txconf *tx_conf)
2363 {
2364         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2365                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2366                                         0, dev->data->numa_node);
2367
2368         if (bd_tx_q == NULL)
2369                 return -1;
2370
2371         bd_tx_q->queue_id = tx_queue_id;
2372         bd_tx_q->dev_private = dev->data->dev_private;
2373
2374         bd_tx_q->nb_tx_desc = nb_tx_desc;
2375         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2376
2377         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2378
2379         return 0;
2380 }
2381
2382 static void
2383 bond_ethdev_rx_queue_release(void *queue)
2384 {
2385         if (queue == NULL)
2386                 return;
2387
2388         rte_free(queue);
2389 }
2390
2391 static void
2392 bond_ethdev_tx_queue_release(void *queue)
2393 {
2394         if (queue == NULL)
2395                 return;
2396
2397         rte_free(queue);
2398 }
2399
2400 static void
2401 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2402 {
2403         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2404         struct bond_dev_private *internals;
2405
2406         /* Default value for polling slave found is true as we don't want to
2407          * disable the polling thread if we cannot get the lock */
2408         int i, polling_slave_found = 1;
2409
2410         if (cb_arg == NULL)
2411                 return;
2412
2413         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2414         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2415
2416         if (!bonded_ethdev->data->dev_started ||
2417                 !internals->link_status_polling_enabled)
2418                 return;
2419
2420         /* If device is currently being configured then don't check slaves link
2421          * status, wait until next period */
2422         if (rte_spinlock_trylock(&internals->lock)) {
2423                 if (internals->slave_count > 0)
2424                         polling_slave_found = 0;
2425
2426                 for (i = 0; i < internals->slave_count; i++) {
2427                         if (!internals->slaves[i].link_status_poll_enabled)
2428                                 continue;
2429
2430                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2431                         polling_slave_found = 1;
2432
2433                         /* Update slave link status */
2434                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2435                                         internals->slaves[i].link_status_wait_to_complete);
2436
2437                         /* if link status has changed since last checked then call lsc
2438                          * event callback */
2439                         if (slave_ethdev->data->dev_link.link_status !=
2440                                         internals->slaves[i].last_link_status) {
2441                                 internals->slaves[i].last_link_status =
2442                                                 slave_ethdev->data->dev_link.link_status;
2443
2444                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2445                                                 RTE_ETH_EVENT_INTR_LSC,
2446                                                 &bonded_ethdev->data->port_id,
2447                                                 NULL);
2448                         }
2449                 }
2450                 rte_spinlock_unlock(&internals->lock);
2451         }
2452
2453         if (polling_slave_found)
2454                 /* Set alarm to continue monitoring link status of slave ethdev's */
2455                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2456                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2457 }
2458
2459 static int
2460 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2461 {
2462         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2463
2464         struct bond_dev_private *bond_ctx;
2465         struct rte_eth_link slave_link;
2466
2467         uint32_t idx;
2468
2469         bond_ctx = ethdev->data->dev_private;
2470
2471         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2472
2473         if (ethdev->data->dev_started == 0 ||
2474                         bond_ctx->active_slave_count == 0) {
2475                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2476                 return 0;
2477         }
2478
2479         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2480
2481         if (wait_to_complete)
2482                 link_update = rte_eth_link_get;
2483         else
2484                 link_update = rte_eth_link_get_nowait;
2485
2486         switch (bond_ctx->mode) {
2487         case BONDING_MODE_BROADCAST:
2488                 /**
2489                  * Setting link speed to UINT32_MAX to ensure we pick up the
2490                  * value of the first active slave
2491                  */
2492                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2493
2494                 /**
2495                  * link speed is minimum value of all the slaves link speed as
2496                  * packet loss will occur on this slave if transmission at rates
2497                  * greater than this are attempted
2498                  */
2499                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2500                         link_update(bond_ctx->active_slaves[0], &slave_link);
2501
2502                         if (slave_link.link_speed <
2503                                         ethdev->data->dev_link.link_speed)
2504                                 ethdev->data->dev_link.link_speed =
2505                                                 slave_link.link_speed;
2506                 }
2507                 break;
2508         case BONDING_MODE_ACTIVE_BACKUP:
2509                 /* Current primary slave */
2510                 link_update(bond_ctx->current_primary_port, &slave_link);
2511
2512                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2513                 break;
2514         case BONDING_MODE_8023AD:
2515                 ethdev->data->dev_link.link_autoneg =
2516                                 bond_ctx->mode4.slave_link.link_autoneg;
2517                 ethdev->data->dev_link.link_duplex =
2518                                 bond_ctx->mode4.slave_link.link_duplex;
2519                 /* fall through to update link speed */
2520         case BONDING_MODE_ROUND_ROBIN:
2521         case BONDING_MODE_BALANCE:
2522         case BONDING_MODE_TLB:
2523         case BONDING_MODE_ALB:
2524         default:
2525                 /**
2526                  * In theses mode the maximum theoretical link speed is the sum
2527                  * of all the slaves
2528                  */
2529                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2530
2531                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2532                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2533
2534                         ethdev->data->dev_link.link_speed +=
2535                                         slave_link.link_speed;
2536                 }
2537         }
2538
2539
2540         return 0;
2541 }
2542
2543
2544 static int
2545 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2546 {
2547         struct bond_dev_private *internals = dev->data->dev_private;
2548         struct rte_eth_stats slave_stats;
2549         int i, j;
2550
2551         for (i = 0; i < internals->slave_count; i++) {
2552                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2553
2554                 stats->ipackets += slave_stats.ipackets;
2555                 stats->opackets += slave_stats.opackets;
2556                 stats->ibytes += slave_stats.ibytes;
2557                 stats->obytes += slave_stats.obytes;
2558                 stats->imissed += slave_stats.imissed;
2559                 stats->ierrors += slave_stats.ierrors;
2560                 stats->oerrors += slave_stats.oerrors;
2561                 stats->rx_nombuf += slave_stats.rx_nombuf;
2562
2563                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2564                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2565                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2566                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2567                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2568                         stats->q_errors[j] += slave_stats.q_errors[j];
2569                 }
2570
2571         }
2572
2573         return 0;
2574 }
2575
2576 static void
2577 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2578 {
2579         struct bond_dev_private *internals = dev->data->dev_private;
2580         int i;
2581
2582         for (i = 0; i < internals->slave_count; i++)
2583                 rte_eth_stats_reset(internals->slaves[i].port_id);
2584 }
2585
2586 static void
2587 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2588 {
2589         struct bond_dev_private *internals = eth_dev->data->dev_private;
2590         int i;
2591
2592         internals->promiscuous_en = 1;
2593
2594         switch (internals->mode) {
2595         /* Promiscuous mode is propagated to all slaves */
2596         case BONDING_MODE_ROUND_ROBIN:
2597         case BONDING_MODE_BALANCE:
2598         case BONDING_MODE_BROADCAST:
2599                 for (i = 0; i < internals->slave_count; i++)
2600                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2601                 break;
2602         /* In mode4 promiscus mode is managed when slave is added/removed */
2603         case BONDING_MODE_8023AD:
2604                 break;
2605         /* Promiscuous mode is propagated only to primary slave */
2606         case BONDING_MODE_ACTIVE_BACKUP:
2607         case BONDING_MODE_TLB:
2608         case BONDING_MODE_ALB:
2609         default:
2610                 /* Do not touch promisc when there cannot be primary ports */
2611                 if (internals->slave_count == 0)
2612                         break;
2613                 rte_eth_promiscuous_enable(internals->current_primary_port);
2614         }
2615 }
2616
2617 static void
2618 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2619 {
2620         struct bond_dev_private *internals = dev->data->dev_private;
2621         int i;
2622
2623         internals->promiscuous_en = 0;
2624
2625         switch (internals->mode) {
2626         /* Promiscuous mode is propagated to all slaves */
2627         case BONDING_MODE_ROUND_ROBIN:
2628         case BONDING_MODE_BALANCE:
2629         case BONDING_MODE_BROADCAST:
2630                 for (i = 0; i < internals->slave_count; i++)
2631                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2632                 break;
2633         /* In mode4 promiscus mode is set managed when slave is added/removed */
2634         case BONDING_MODE_8023AD:
2635                 break;
2636         /* Promiscuous mode is propagated only to primary slave */
2637         case BONDING_MODE_ACTIVE_BACKUP:
2638         case BONDING_MODE_TLB:
2639         case BONDING_MODE_ALB:
2640         default:
2641                 /* Do not touch promisc when there cannot be primary ports */
2642                 if (internals->slave_count == 0)
2643                         break;
2644                 rte_eth_promiscuous_disable(internals->current_primary_port);
2645         }
2646 }
2647
2648 static void
2649 bond_ethdev_delayed_lsc_propagation(void *arg)
2650 {
2651         if (arg == NULL)
2652                 return;
2653
2654         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2655                         RTE_ETH_EVENT_INTR_LSC, NULL);
2656 }
2657
2658 int
2659 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2660                 void *param, void *ret_param __rte_unused)
2661 {
2662         struct rte_eth_dev *bonded_eth_dev;
2663         struct bond_dev_private *internals;
2664         struct rte_eth_link link;
2665         int rc = -1;
2666
2667         uint8_t lsc_flag = 0;
2668         int valid_slave = 0;
2669         uint16_t active_pos;
2670         uint16_t i;
2671
2672         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2673                 return rc;
2674
2675         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2676
2677         if (check_for_bonded_ethdev(bonded_eth_dev))
2678                 return rc;
2679
2680         internals = bonded_eth_dev->data->dev_private;
2681
2682         /* If the device isn't started don't handle interrupts */
2683         if (!bonded_eth_dev->data->dev_started)
2684                 return rc;
2685
2686         /* verify that port_id is a valid slave of bonded port */
2687         for (i = 0; i < internals->slave_count; i++) {
2688                 if (internals->slaves[i].port_id == port_id) {
2689                         valid_slave = 1;
2690                         break;
2691                 }
2692         }
2693
2694         if (!valid_slave)
2695                 return rc;
2696
2697         /* Synchronize lsc callback parallel calls either by real link event
2698          * from the slaves PMDs or by the bonding PMD itself.
2699          */
2700         rte_spinlock_lock(&internals->lsc_lock);
2701
2702         /* Search for port in active port list */
2703         active_pos = find_slave_by_id(internals->active_slaves,
2704                         internals->active_slave_count, port_id);
2705
2706         rte_eth_link_get_nowait(port_id, &link);
2707         if (link.link_status) {
2708                 if (active_pos < internals->active_slave_count)
2709                         goto link_update;
2710
2711                 /* check link state properties if bonded link is up*/
2712                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2713                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2714                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2715                                              "for slave %d in bonding mode %d",
2716                                              port_id, internals->mode);
2717                 } else {
2718                         /* inherit slave link properties */
2719                         link_properties_set(bonded_eth_dev, &link);
2720                 }
2721
2722                 /* If no active slave ports then set this port to be
2723                  * the primary port.
2724                  */
2725                 if (internals->active_slave_count < 1) {
2726                         /* If first active slave, then change link status */
2727                         bonded_eth_dev->data->dev_link.link_status =
2728                                                                 ETH_LINK_UP;
2729                         internals->current_primary_port = port_id;
2730                         lsc_flag = 1;
2731
2732                         mac_address_slaves_update(bonded_eth_dev);
2733                 }
2734
2735                 activate_slave(bonded_eth_dev, port_id);
2736
2737                 /* If the user has defined the primary port then default to
2738                  * using it.
2739                  */
2740                 if (internals->user_defined_primary_port &&
2741                                 internals->primary_port == port_id)
2742                         bond_ethdev_primary_set(internals, port_id);
2743         } else {
2744                 if (active_pos == internals->active_slave_count)
2745                         goto link_update;
2746
2747                 /* Remove from active slave list */
2748                 deactivate_slave(bonded_eth_dev, port_id);
2749
2750                 if (internals->active_slave_count < 1)
2751                         lsc_flag = 1;
2752
2753                 /* Update primary id, take first active slave from list or if none
2754                  * available set to -1 */
2755                 if (port_id == internals->current_primary_port) {
2756                         if (internals->active_slave_count > 0)
2757                                 bond_ethdev_primary_set(internals,
2758                                                 internals->active_slaves[0]);
2759                         else
2760                                 internals->current_primary_port = internals->primary_port;
2761                 }
2762         }
2763
2764 link_update:
2765         /**
2766          * Update bonded device link properties after any change to active
2767          * slaves
2768          */
2769         bond_ethdev_link_update(bonded_eth_dev, 0);
2770
2771         if (lsc_flag) {
2772                 /* Cancel any possible outstanding interrupts if delays are enabled */
2773                 if (internals->link_up_delay_ms > 0 ||
2774                         internals->link_down_delay_ms > 0)
2775                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2776                                         bonded_eth_dev);
2777
2778                 if (bonded_eth_dev->data->dev_link.link_status) {
2779                         if (internals->link_up_delay_ms > 0)
2780                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2781                                                 bond_ethdev_delayed_lsc_propagation,
2782                                                 (void *)bonded_eth_dev);
2783                         else
2784                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2785                                                 RTE_ETH_EVENT_INTR_LSC,
2786                                                 NULL);
2787
2788                 } else {
2789                         if (internals->link_down_delay_ms > 0)
2790                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2791                                                 bond_ethdev_delayed_lsc_propagation,
2792                                                 (void *)bonded_eth_dev);
2793                         else
2794                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2795                                                 RTE_ETH_EVENT_INTR_LSC,
2796                                                 NULL);
2797                 }
2798         }
2799
2800         rte_spinlock_unlock(&internals->lsc_lock);
2801
2802         return rc;
2803 }
2804
2805 static int
2806 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2807                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2808 {
2809         unsigned i, j;
2810         int result = 0;
2811         int slave_reta_size;
2812         unsigned reta_count;
2813         struct bond_dev_private *internals = dev->data->dev_private;
2814
2815         if (reta_size != internals->reta_size)
2816                 return -EINVAL;
2817
2818          /* Copy RETA table */
2819         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2820
2821         for (i = 0; i < reta_count; i++) {
2822                 internals->reta_conf[i].mask = reta_conf[i].mask;
2823                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2824                         if ((reta_conf[i].mask >> j) & 0x01)
2825                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2826         }
2827
2828         /* Fill rest of array */
2829         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2830                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2831                                 sizeof(internals->reta_conf[0]) * reta_count);
2832
2833         /* Propagate RETA over slaves */
2834         for (i = 0; i < internals->slave_count; i++) {
2835                 slave_reta_size = internals->slaves[i].reta_size;
2836                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2837                                 &internals->reta_conf[0], slave_reta_size);
2838                 if (result < 0)
2839                         return result;
2840         }
2841
2842         return 0;
2843 }
2844
2845 static int
2846 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2847                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2848 {
2849         int i, j;
2850         struct bond_dev_private *internals = dev->data->dev_private;
2851
2852         if (reta_size != internals->reta_size)
2853                 return -EINVAL;
2854
2855          /* Copy RETA table */
2856         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2857                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2858                         if ((reta_conf[i].mask >> j) & 0x01)
2859                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2860
2861         return 0;
2862 }
2863
2864 static int
2865 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2866                 struct rte_eth_rss_conf *rss_conf)
2867 {
2868         int i, result = 0;
2869         struct bond_dev_private *internals = dev->data->dev_private;
2870         struct rte_eth_rss_conf bond_rss_conf;
2871
2872         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2873
2874         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2875
2876         if (bond_rss_conf.rss_hf != 0)
2877                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2878
2879         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2880                         sizeof(internals->rss_key)) {
2881                 if (bond_rss_conf.rss_key_len == 0)
2882                         bond_rss_conf.rss_key_len = 40;
2883                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2884                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2885                                 internals->rss_key_len);
2886         }
2887
2888         for (i = 0; i < internals->slave_count; i++) {
2889                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2890                                 &bond_rss_conf);
2891                 if (result < 0)
2892                         return result;
2893         }
2894
2895         return 0;
2896 }
2897
2898 static int
2899 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2900                 struct rte_eth_rss_conf *rss_conf)
2901 {
2902         struct bond_dev_private *internals = dev->data->dev_private;
2903
2904         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2905         rss_conf->rss_key_len = internals->rss_key_len;
2906         if (rss_conf->rss_key)
2907                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2908
2909         return 0;
2910 }
2911
2912 static int
2913 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2914 {
2915         struct rte_eth_dev *slave_eth_dev;
2916         struct bond_dev_private *internals = dev->data->dev_private;
2917         int ret, i;
2918
2919         rte_spinlock_lock(&internals->lock);
2920
2921         for (i = 0; i < internals->slave_count; i++) {
2922                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2923                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2924                         rte_spinlock_unlock(&internals->lock);
2925                         return -ENOTSUP;
2926                 }
2927         }
2928         for (i = 0; i < internals->slave_count; i++) {
2929                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2930                 if (ret < 0) {
2931                         rte_spinlock_unlock(&internals->lock);
2932                         return ret;
2933                 }
2934         }
2935
2936         rte_spinlock_unlock(&internals->lock);
2937         return 0;
2938 }
2939
2940 static int
2941 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2942 {
2943         if (mac_address_set(dev, addr)) {
2944                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2945                 return -EINVAL;
2946         }
2947
2948         return 0;
2949 }
2950
2951 static int
2952 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2953                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2954 {
2955         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2956                 *(const void **)arg = &bond_flow_ops;
2957                 return 0;
2958         }
2959         return -ENOTSUP;
2960 }
2961
2962 static int
2963 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2964                                 __rte_unused uint32_t index, uint32_t vmdq)
2965 {
2966         struct rte_eth_dev *slave_eth_dev;
2967         struct bond_dev_private *internals = dev->data->dev_private;
2968         int ret, i;
2969
2970         rte_spinlock_lock(&internals->lock);
2971
2972         for (i = 0; i < internals->slave_count; i++) {
2973                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2974                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2975                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2976                         ret = -ENOTSUP;
2977                         goto end;
2978                 }
2979         }
2980
2981         for (i = 0; i < internals->slave_count; i++) {
2982                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2983                                 mac_addr, vmdq);
2984                 if (ret < 0) {
2985                         /* rollback */
2986                         for (i--; i >= 0; i--)
2987                                 rte_eth_dev_mac_addr_remove(
2988                                         internals->slaves[i].port_id, mac_addr);
2989                         goto end;
2990                 }
2991         }
2992
2993         ret = 0;
2994 end:
2995         rte_spinlock_unlock(&internals->lock);
2996         return ret;
2997 }
2998
2999 static void
3000 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3001 {
3002         struct rte_eth_dev *slave_eth_dev;
3003         struct bond_dev_private *internals = dev->data->dev_private;
3004         int i;
3005
3006         rte_spinlock_lock(&internals->lock);
3007
3008         for (i = 0; i < internals->slave_count; i++) {
3009                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3010                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3011                         goto end;
3012         }
3013
3014         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3015
3016         for (i = 0; i < internals->slave_count; i++)
3017                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3018                                 mac_addr);
3019
3020 end:
3021         rte_spinlock_unlock(&internals->lock);
3022 }
3023
3024 const struct eth_dev_ops default_dev_ops = {
3025         .dev_start            = bond_ethdev_start,
3026         .dev_stop             = bond_ethdev_stop,
3027         .dev_close            = bond_ethdev_close,
3028         .dev_configure        = bond_ethdev_configure,
3029         .dev_infos_get        = bond_ethdev_info,
3030         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3031         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3032         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3033         .rx_queue_release     = bond_ethdev_rx_queue_release,
3034         .tx_queue_release     = bond_ethdev_tx_queue_release,
3035         .link_update          = bond_ethdev_link_update,
3036         .stats_get            = bond_ethdev_stats_get,
3037         .stats_reset          = bond_ethdev_stats_reset,
3038         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3039         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3040         .reta_update          = bond_ethdev_rss_reta_update,
3041         .reta_query           = bond_ethdev_rss_reta_query,
3042         .rss_hash_update      = bond_ethdev_rss_hash_update,
3043         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3044         .mtu_set              = bond_ethdev_mtu_set,
3045         .mac_addr_set         = bond_ethdev_mac_address_set,
3046         .mac_addr_add         = bond_ethdev_mac_addr_add,
3047         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3048         .filter_ctrl          = bond_filter_ctrl
3049 };
3050
3051 static int
3052 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3053 {
3054         const char *name = rte_vdev_device_name(dev);
3055         uint8_t socket_id = dev->device.numa_node;
3056         struct bond_dev_private *internals = NULL;
3057         struct rte_eth_dev *eth_dev = NULL;
3058         uint32_t vlan_filter_bmp_size;
3059
3060         /* now do all data allocation - for eth_dev structure, dummy pci driver
3061          * and internal (private) data
3062          */
3063
3064         /* reserve an ethdev entry */
3065         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3066         if (eth_dev == NULL) {
3067                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3068                 goto err;
3069         }
3070
3071         internals = eth_dev->data->dev_private;
3072         eth_dev->data->nb_rx_queues = (uint16_t)1;
3073         eth_dev->data->nb_tx_queues = (uint16_t)1;
3074
3075         /* Allocate memory for storing MAC addresses */
3076         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3077                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3078         if (eth_dev->data->mac_addrs == NULL) {
3079                 RTE_BOND_LOG(ERR,
3080                              "Failed to allocate %u bytes needed to store MAC addresses",
3081                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3082                 goto err;
3083         }
3084
3085         eth_dev->dev_ops = &default_dev_ops;
3086         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3087
3088         rte_spinlock_init(&internals->lock);
3089         rte_spinlock_init(&internals->lsc_lock);
3090
3091         internals->port_id = eth_dev->data->port_id;
3092         internals->mode = BONDING_MODE_INVALID;
3093         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3094         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3095         internals->burst_xmit_hash = burst_xmit_l2_hash;
3096         internals->user_defined_mac = 0;
3097
3098         internals->link_status_polling_enabled = 0;
3099
3100         internals->link_status_polling_interval_ms =
3101                 DEFAULT_POLLING_INTERVAL_10_MS;
3102         internals->link_down_delay_ms = 0;
3103         internals->link_up_delay_ms = 0;
3104
3105         internals->slave_count = 0;
3106         internals->active_slave_count = 0;
3107         internals->rx_offload_capa = 0;
3108         internals->tx_offload_capa = 0;
3109         internals->rx_queue_offload_capa = 0;
3110         internals->tx_queue_offload_capa = 0;
3111         internals->candidate_max_rx_pktlen = 0;
3112         internals->max_rx_pktlen = 0;
3113
3114         /* Initially allow to choose any offload type */
3115         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3116
3117         memset(&internals->default_rxconf, 0,
3118                sizeof(internals->default_rxconf));
3119         memset(&internals->default_txconf, 0,
3120                sizeof(internals->default_txconf));
3121
3122         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3123         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3124
3125         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3126         memset(internals->slaves, 0, sizeof(internals->slaves));
3127
3128         TAILQ_INIT(&internals->flow_list);
3129         internals->flow_isolated_valid = 0;
3130
3131         /* Set mode 4 default configuration */
3132         bond_mode_8023ad_setup(eth_dev, NULL);
3133         if (bond_ethdev_mode_set(eth_dev, mode)) {
3134                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3135                                  eth_dev->data->port_id, mode);
3136                 goto err;
3137         }
3138
3139         vlan_filter_bmp_size =
3140                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3141         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3142                                                    RTE_CACHE_LINE_SIZE);
3143         if (internals->vlan_filter_bmpmem == NULL) {
3144                 RTE_BOND_LOG(ERR,
3145                              "Failed to allocate vlan bitmap for bonded device %u",
3146                              eth_dev->data->port_id);
3147                 goto err;
3148         }
3149
3150         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3151                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3152         if (internals->vlan_filter_bmp == NULL) {
3153                 RTE_BOND_LOG(ERR,
3154                              "Failed to init vlan bitmap for bonded device %u",
3155                              eth_dev->data->port_id);
3156                 rte_free(internals->vlan_filter_bmpmem);
3157                 goto err;
3158         }
3159
3160         return eth_dev->data->port_id;
3161
3162 err:
3163         rte_free(internals);
3164         if (eth_dev != NULL)
3165                 eth_dev->data->dev_private = NULL;
3166         rte_eth_dev_release_port(eth_dev);
3167         return -1;
3168 }
3169
3170 static int
3171 bond_probe(struct rte_vdev_device *dev)
3172 {
3173         const char *name;
3174         struct bond_dev_private *internals;
3175         struct rte_kvargs *kvlist;
3176         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3177         int  arg_count, port_id;
3178         uint8_t agg_mode;
3179         struct rte_eth_dev *eth_dev;
3180
3181         if (!dev)
3182                 return -EINVAL;
3183
3184         name = rte_vdev_device_name(dev);
3185         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3186
3187         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3188                 eth_dev = rte_eth_dev_attach_secondary(name);
3189                 if (!eth_dev) {
3190                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3191                         return -1;
3192                 }
3193                 /* TODO: request info from primary to set up Rx and Tx */
3194                 eth_dev->dev_ops = &default_dev_ops;
3195                 eth_dev->device = &dev->device;
3196                 rte_eth_dev_probing_finish(eth_dev);
3197                 return 0;
3198         }
3199
3200         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3201                 pmd_bond_init_valid_arguments);
3202         if (kvlist == NULL)
3203                 return -1;
3204
3205         /* Parse link bonding mode */
3206         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3207                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3208                                 &bond_ethdev_parse_slave_mode_kvarg,
3209                                 &bonding_mode) != 0) {
3210                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3211                                         name);
3212                         goto parse_error;
3213                 }
3214         } else {
3215                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3216                                 "device %s", name);
3217                 goto parse_error;
3218         }
3219
3220         /* Parse socket id to create bonding device on */
3221         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3222         if (arg_count == 1) {
3223                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3224                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3225                                 != 0) {
3226                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3227                                         "bonded device %s", name);
3228                         goto parse_error;
3229                 }
3230         } else if (arg_count > 1) {
3231                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3232                                 "bonded device %s", name);
3233                 goto parse_error;
3234         } else {
3235                 socket_id = rte_socket_id();
3236         }
3237
3238         dev->device.numa_node = socket_id;
3239
3240         /* Create link bonding eth device */
3241         port_id = bond_alloc(dev, bonding_mode);
3242         if (port_id < 0) {
3243                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3244                                 "socket %u.",   name, bonding_mode, socket_id);
3245                 goto parse_error;
3246         }
3247         internals = rte_eth_devices[port_id].data->dev_private;
3248         internals->kvlist = kvlist;
3249
3250         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3251                 if (rte_kvargs_process(kvlist,
3252                                 PMD_BOND_AGG_MODE_KVARG,
3253                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3254                                 &agg_mode) != 0) {
3255                         RTE_BOND_LOG(ERR,
3256                                         "Failed to parse agg selection mode for bonded device %s",
3257                                         name);
3258                         goto parse_error;
3259                 }
3260
3261                 if (internals->mode == BONDING_MODE_8023AD)
3262                         internals->mode4.agg_selection = agg_mode;
3263         } else {
3264                 internals->mode4.agg_selection = AGG_STABLE;
3265         }
3266
3267         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3268         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3269                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3270         return 0;
3271
3272 parse_error:
3273         rte_kvargs_free(kvlist);
3274
3275         return -1;
3276 }
3277
3278 static int
3279 bond_remove(struct rte_vdev_device *dev)
3280 {
3281         struct rte_eth_dev *eth_dev;
3282         struct bond_dev_private *internals;
3283         const char *name;
3284
3285         if (!dev)
3286                 return -EINVAL;
3287
3288         name = rte_vdev_device_name(dev);
3289         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3290
3291         /* now free all data allocation - for eth_dev structure,
3292          * dummy pci driver and internal (private) data
3293          */
3294
3295         /* find an ethdev entry */
3296         eth_dev = rte_eth_dev_allocated(name);
3297         if (eth_dev == NULL)
3298                 return -ENODEV;
3299
3300         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3301                 return rte_eth_dev_release_port(eth_dev);
3302
3303         RTE_ASSERT(eth_dev->device == &dev->device);
3304
3305         internals = eth_dev->data->dev_private;
3306         if (internals->slave_count != 0)
3307                 return -EBUSY;
3308
3309         if (eth_dev->data->dev_started == 1) {
3310                 bond_ethdev_stop(eth_dev);
3311                 bond_ethdev_close(eth_dev);
3312         }
3313
3314         eth_dev->dev_ops = NULL;
3315         eth_dev->rx_pkt_burst = NULL;
3316         eth_dev->tx_pkt_burst = NULL;
3317
3318         internals = eth_dev->data->dev_private;
3319         /* Try to release mempool used in mode6. If the bond
3320          * device is not mode6, free the NULL is not problem.
3321          */
3322         rte_mempool_free(internals->mode6.mempool);
3323         rte_bitmap_free(internals->vlan_filter_bmp);
3324         rte_free(internals->vlan_filter_bmpmem);
3325
3326         rte_eth_dev_release_port(eth_dev);
3327
3328         return 0;
3329 }
3330
3331 /* this part will resolve the slave portids after all the other pdev and vdev
3332  * have been allocated */
3333 static int
3334 bond_ethdev_configure(struct rte_eth_dev *dev)
3335 {
3336         const char *name = dev->device->name;
3337         struct bond_dev_private *internals = dev->data->dev_private;
3338         struct rte_kvargs *kvlist = internals->kvlist;
3339         int arg_count;
3340         uint16_t port_id = dev - rte_eth_devices;
3341         uint8_t agg_mode;
3342
3343         static const uint8_t default_rss_key[40] = {
3344                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3345                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3346                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3347                 0xBE, 0xAC, 0x01, 0xFA
3348         };
3349
3350         unsigned i, j;
3351
3352         /*
3353          * If RSS is enabled, fill table with default values and
3354          * set key to the the value specified in port RSS configuration.
3355          * Fall back to default RSS key if the key is not specified
3356          */
3357         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3358                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3359                         internals->rss_key_len =
3360                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3361                         memcpy(internals->rss_key,
3362                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3363                                internals->rss_key_len);
3364                 } else {
3365                         internals->rss_key_len = sizeof(default_rss_key);
3366                         memcpy(internals->rss_key, default_rss_key,
3367                                internals->rss_key_len);
3368                 }
3369
3370                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3371                         internals->reta_conf[i].mask = ~0LL;
3372                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3373                                 internals->reta_conf[i].reta[j] =
3374                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3375                                                 dev->data->nb_rx_queues;
3376                 }
3377         }
3378
3379         /* set the max_rx_pktlen */
3380         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3381
3382         /*
3383          * if no kvlist, it means that this bonded device has been created
3384          * through the bonding api.
3385          */
3386         if (!kvlist)
3387                 return 0;
3388
3389         /* Parse MAC address for bonded device */
3390         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3391         if (arg_count == 1) {
3392                 struct ether_addr bond_mac;
3393
3394                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3395                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3396                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3397                                      name);
3398                         return -1;
3399                 }
3400
3401                 /* Set MAC address */
3402                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3403                         RTE_BOND_LOG(ERR,
3404                                      "Failed to set mac address on bonded device %s",
3405                                      name);
3406                         return -1;
3407                 }
3408         } else if (arg_count > 1) {
3409                 RTE_BOND_LOG(ERR,
3410                              "MAC address can be specified only once for bonded device %s",
3411                              name);
3412                 return -1;
3413         }
3414
3415         /* Parse/set balance mode transmit policy */
3416         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3417         if (arg_count == 1) {
3418                 uint8_t xmit_policy;
3419
3420                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3421                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3422                     0) {
3423                         RTE_BOND_LOG(INFO,
3424                                      "Invalid xmit policy specified for bonded device %s",
3425                                      name);
3426                         return -1;
3427                 }
3428
3429                 /* Set balance mode transmit policy*/
3430                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3431                         RTE_BOND_LOG(ERR,
3432                                      "Failed to set balance xmit policy on bonded device %s",
3433                                      name);
3434                         return -1;
3435                 }
3436         } else if (arg_count > 1) {
3437                 RTE_BOND_LOG(ERR,
3438                              "Transmit policy can be specified only once for bonded device %s",
3439                              name);
3440                 return -1;
3441         }
3442
3443         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3444                 if (rte_kvargs_process(kvlist,
3445                                        PMD_BOND_AGG_MODE_KVARG,
3446                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3447                                        &agg_mode) != 0) {
3448                         RTE_BOND_LOG(ERR,
3449                                      "Failed to parse agg selection mode for bonded device %s",
3450                                      name);
3451                 }
3452                 if (internals->mode == BONDING_MODE_8023AD) {
3453                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3454                                         agg_mode);
3455                         if (ret < 0) {
3456                                 RTE_BOND_LOG(ERR,
3457                                         "Invalid args for agg selection set for bonded device %s",
3458                                         name);
3459                                 return -1;
3460                         }
3461                 }
3462         }
3463
3464         /* Parse/add slave ports to bonded device */
3465         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3466                 struct bond_ethdev_slave_ports slave_ports;
3467                 unsigned i;
3468
3469                 memset(&slave_ports, 0, sizeof(slave_ports));
3470
3471                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3472                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3473                         RTE_BOND_LOG(ERR,
3474                                      "Failed to parse slave ports for bonded device %s",
3475                                      name);
3476                         return -1;
3477                 }
3478
3479                 for (i = 0; i < slave_ports.slave_count; i++) {
3480                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3481                                 RTE_BOND_LOG(ERR,
3482                                              "Failed to add port %d as slave to bonded device %s",
3483                                              slave_ports.slaves[i], name);
3484                         }
3485                 }
3486
3487         } else {
3488                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3489                 return -1;
3490         }
3491
3492         /* Parse/set primary slave port id*/
3493         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3494         if (arg_count == 1) {
3495                 uint16_t primary_slave_port_id;
3496
3497                 if (rte_kvargs_process(kvlist,
3498                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3499                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3500                                        &primary_slave_port_id) < 0) {
3501                         RTE_BOND_LOG(INFO,
3502                                      "Invalid primary slave port id specified for bonded device %s",
3503                                      name);
3504                         return -1;
3505                 }
3506
3507                 /* Set balance mode transmit policy*/
3508                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3509                     != 0) {
3510                         RTE_BOND_LOG(ERR,
3511                                      "Failed to set primary slave port %d on bonded device %s",
3512                                      primary_slave_port_id, name);
3513                         return -1;
3514                 }
3515         } else if (arg_count > 1) {
3516                 RTE_BOND_LOG(INFO,
3517                              "Primary slave can be specified only once for bonded device %s",
3518                              name);
3519                 return -1;
3520         }
3521
3522         /* Parse link status monitor polling interval */
3523         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3524         if (arg_count == 1) {
3525                 uint32_t lsc_poll_interval_ms;
3526
3527                 if (rte_kvargs_process(kvlist,
3528                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3529                                        &bond_ethdev_parse_time_ms_kvarg,
3530                                        &lsc_poll_interval_ms) < 0) {
3531                         RTE_BOND_LOG(INFO,
3532                                      "Invalid lsc polling interval value specified for bonded"
3533                                      " device %s", name);
3534                         return -1;
3535                 }
3536
3537                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3538                     != 0) {
3539                         RTE_BOND_LOG(ERR,
3540                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3541                                      lsc_poll_interval_ms, name);
3542                         return -1;
3543                 }
3544         } else if (arg_count > 1) {
3545                 RTE_BOND_LOG(INFO,
3546                              "LSC polling interval can be specified only once for bonded"
3547                              " device %s", name);
3548                 return -1;
3549         }
3550
3551         /* Parse link up interrupt propagation delay */
3552         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3553         if (arg_count == 1) {
3554                 uint32_t link_up_delay_ms;
3555
3556                 if (rte_kvargs_process(kvlist,
3557                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3558                                        &bond_ethdev_parse_time_ms_kvarg,
3559                                        &link_up_delay_ms) < 0) {
3560                         RTE_BOND_LOG(INFO,
3561                                      "Invalid link up propagation delay value specified for"
3562                                      " bonded device %s", name);
3563                         return -1;
3564                 }
3565
3566                 /* Set balance mode transmit policy*/
3567                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3568                     != 0) {
3569                         RTE_BOND_LOG(ERR,
3570                                      "Failed to set link up propagation delay (%u ms) on bonded"
3571                                      " device %s", link_up_delay_ms, name);
3572                         return -1;
3573                 }
3574         } else if (arg_count > 1) {
3575                 RTE_BOND_LOG(INFO,
3576                              "Link up propagation delay can be specified only once for"
3577                              " bonded device %s", name);
3578                 return -1;
3579         }
3580
3581         /* Parse link down interrupt propagation delay */
3582         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3583         if (arg_count == 1) {
3584                 uint32_t link_down_delay_ms;
3585
3586                 if (rte_kvargs_process(kvlist,
3587                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3588                                        &bond_ethdev_parse_time_ms_kvarg,
3589                                        &link_down_delay_ms) < 0) {
3590                         RTE_BOND_LOG(INFO,
3591                                      "Invalid link down propagation delay value specified for"
3592                                      " bonded device %s", name);
3593                         return -1;
3594                 }
3595
3596                 /* Set balance mode transmit policy*/
3597                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3598                     != 0) {
3599                         RTE_BOND_LOG(ERR,
3600                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3601                                      link_down_delay_ms, name);
3602                         return -1;
3603                 }
3604         } else if (arg_count > 1) {
3605                 RTE_BOND_LOG(INFO,
3606                              "Link down propagation delay can be specified only once for  bonded device %s",
3607                              name);
3608                 return -1;
3609         }
3610
3611         return 0;
3612 }
3613
3614 struct rte_vdev_driver pmd_bond_drv = {
3615         .probe = bond_probe,
3616         .remove = bond_remove,
3617 };
3618
3619 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3620 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3621
3622 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3623         "slave=<ifc> "
3624         "primary=<ifc> "
3625         "mode=[0-6] "
3626         "xmit_policy=[l2 | l23 | l34] "
3627         "agg_mode=[count | stable | bandwidth] "
3628         "socket_id=<int> "
3629         "mac=<mac addr> "
3630         "lsc_poll_period_ms=<int> "
3631         "up_delay=<int> "
3632         "down_delay=<int>");
3633
3634 int bond_logtype;
3635
3636 RTE_INIT(bond_init_log)
3637 {
3638         bond_logtype = rte_log_register("pmd.net.bond");
3639         if (bond_logtype >= 0)
3640                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3641 }