net/bonding: fix buffer length when printing strings
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave >= slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave >= slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint16_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t subtype;
408         uint16_t i;
409         uint16_t j;
410         uint16_t k;
411
412         /* Copy slave list to protect against slave up/down changes during tx
413          * bursting */
414         slave_count = internals->active_slave_count;
415         memcpy(slaves, internals->active_slaves,
416                         sizeof(internals->active_slaves[0]) * slave_count);
417
418         idx = internals->active_slave;
419         if (idx >= slave_count) {
420                 internals->active_slave = 0;
421                 idx = 0;
422         }
423         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
424                 j = num_rx_total;
425                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
426                                          COLLECTING);
427
428                 /* Read packets from this slave */
429                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
430                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
431
432                 for (k = j; k < 2 && k < num_rx_total; k++)
433                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
434
435                 /* Handle slow protocol packets. */
436                 while (j < num_rx_total) {
437
438                         /* If packet is not pure L2 and is known, skip it */
439                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
440                                 j++;
441                                 continue;
442                         }
443
444                         if (j + 3 < num_rx_total)
445                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
446
447                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
448                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
449
450                         /* Remove packet from array if it is slow packet or slave is not
451                          * in collecting state or bonding interface is not in promiscuous
452                          * mode and packet address does not match. */
453                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
454                                 !collecting ||
455                                 (!promisc &&
456                                  !is_multicast_ether_addr(&hdr->d_addr) &&
457                                  !is_same_ether_addr(bond_mac,
458                                                      &hdr->d_addr)))) {
459
460                                 if (hdr->ether_type == ether_type_slow_be) {
461                                         bond_mode_8023ad_handle_slow_pkt(
462                                             internals, slaves[idx], bufs[j]);
463                                 } else
464                                         rte_pktmbuf_free(bufs[j]);
465
466                                 /* Packet is managed by mode 4 or dropped, shift the array */
467                                 num_rx_total--;
468                                 if (j < num_rx_total) {
469                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
470                                                 (num_rx_total - j));
471                                 }
472                         } else
473                                 j++;
474                 }
475                 if (unlikely(++idx == slave_count))
476                         idx = 0;
477         }
478
479         if (++internals->active_slave >= slave_count)
480                 internals->active_slave = 0;
481
482         return num_rx_total;
483 }
484
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
488
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491 static void
492 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
493 {
494         switch (arp_op) {
495         case ARP_OP_REQUEST:
496                 snprintf(buf, buf_len, "%s", "ARP Request");
497                 return;
498         case ARP_OP_REPLY:
499                 snprintf(buf, buf_len, "%s", "ARP Reply");
500                 return;
501         case ARP_OP_REVREQUEST:
502                 snprintf(buf, buf_len, "%s", "Reverse ARP Request");
503                 return;
504         case ARP_OP_REVREPLY:
505                 snprintf(buf, buf_len, "%s", "Reverse ARP Reply");
506                 return;
507         case ARP_OP_INVREQUEST:
508                 snprintf(buf, buf_len, "%s", "Peer Identify Request");
509                 return;
510         case ARP_OP_INVREPLY:
511                 snprintf(buf, buf_len, "%s", "Peer Identify Reply");
512                 return;
513         default:
514                 break;
515         }
516         snprintf(buf, buf_len, "%s", "Unknown");
517         return;
518 }
519 #endif
520 #define MaxIPv4String   16
521 static void
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523 {
524         uint32_t ipv4_addr;
525
526         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529                 ipv4_addr & 0xFF);
530 }
531
532 #define MAX_CLIENTS_NUMBER      128
533 uint8_t active_clients;
534 struct client_stats_t {
535         uint16_t port;
536         uint32_t ipv4_addr;
537         uint32_t ipv4_rx_packets;
538         uint32_t ipv4_tx_packets;
539 };
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542 static void
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
544 {
545         int i = 0;
546
547         for (; i < MAX_CLIENTS_NUMBER; i++)     {
548                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
549                         /* Just update RX packets number for this client */
550                         if (TXorRXindicator == &burstnumberRX)
551                                 client_stats[i].ipv4_rx_packets++;
552                         else
553                                 client_stats[i].ipv4_tx_packets++;
554                         return;
555                 }
556         }
557         /* We have a new client. Insert him to the table, and increment stats */
558         if (TXorRXindicator == &burstnumberRX)
559                 client_stats[active_clients].ipv4_rx_packets++;
560         else
561                 client_stats[active_clients].ipv4_tx_packets++;
562         client_stats[active_clients].ipv4_addr = addr;
563         client_stats[active_clients].port = port;
564         active_clients++;
565
566 }
567
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
571                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573                 info,                                                   \
574                 port,                                                   \
575                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578                 src_ip,                                                 \
579                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582                 dst_ip,                                                 \
583                 arp_op, ++burstnumber)
584 #endif
585
586 static void
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op),
621                                 ArpOp, sizeof(ArpOp));
622                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
623         }
624 #endif
625 }
626 #endif
627
628 static uint16_t
629 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
630 {
631         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
632         struct bond_dev_private *internals = bd_tx_q->dev_private;
633         struct ether_hdr *eth_h;
634         uint16_t ether_type, offset;
635         uint16_t nb_recv_pkts;
636         int i;
637
638         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
639
640         for (i = 0; i < nb_recv_pkts; i++) {
641                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
642                 ether_type = eth_h->ether_type;
643                 offset = get_vlan_offset(eth_h, &ether_type);
644
645                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
646 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
647                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
648 #endif
649                         bond_mode_alb_arp_recv(eth_h, offset, internals);
650                 }
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
653                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
654 #endif
655         }
656
657         return nb_recv_pkts;
658 }
659
660 static uint16_t
661 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
662                 uint16_t nb_pkts)
663 {
664         struct bond_dev_private *internals;
665         struct bond_tx_queue *bd_tx_q;
666
667         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
668         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
669
670         uint16_t num_of_slaves;
671         uint16_t slaves[RTE_MAX_ETHPORTS];
672
673         uint16_t num_tx_total = 0, num_tx_slave;
674
675         static int slave_idx = 0;
676         int i, cslave_idx = 0, tx_fail_total = 0;
677
678         bd_tx_q = (struct bond_tx_queue *)queue;
679         internals = bd_tx_q->dev_private;
680
681         /* Copy slave list to protect against slave up/down changes during tx
682          * bursting */
683         num_of_slaves = internals->active_slave_count;
684         memcpy(slaves, internals->active_slaves,
685                         sizeof(internals->active_slaves[0]) * num_of_slaves);
686
687         if (num_of_slaves < 1)
688                 return num_tx_total;
689
690         /* Populate slaves mbuf with which packets are to be sent on it  */
691         for (i = 0; i < nb_pkts; i++) {
692                 cslave_idx = (slave_idx + i) % num_of_slaves;
693                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
694         }
695
696         /* increment current slave index so the next call to tx burst starts on the
697          * next slave */
698         slave_idx = ++cslave_idx;
699
700         /* Send packet burst on each slave device */
701         for (i = 0; i < num_of_slaves; i++) {
702                 if (slave_nb_pkts[i] > 0) {
703                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
704                                         slave_bufs[i], slave_nb_pkts[i]);
705
706                         /* if tx burst fails move packets to end of bufs */
707                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
708                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
709
710                                 tx_fail_total += tx_fail_slave;
711
712                                 memcpy(&bufs[nb_pkts - tx_fail_total],
713                                        &slave_bufs[i][num_tx_slave],
714                                        tx_fail_slave * sizeof(bufs[0]));
715                         }
716                         num_tx_total += num_tx_slave;
717                 }
718         }
719
720         return num_tx_total;
721 }
722
723 static uint16_t
724 bond_ethdev_tx_burst_active_backup(void *queue,
725                 struct rte_mbuf **bufs, uint16_t nb_pkts)
726 {
727         struct bond_dev_private *internals;
728         struct bond_tx_queue *bd_tx_q;
729
730         bd_tx_q = (struct bond_tx_queue *)queue;
731         internals = bd_tx_q->dev_private;
732
733         if (internals->active_slave_count < 1)
734                 return 0;
735
736         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
737                         bufs, nb_pkts);
738 }
739
740 static inline uint16_t
741 ether_hash(struct ether_hdr *eth_hdr)
742 {
743         unaligned_uint16_t *word_src_addr =
744                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
745         unaligned_uint16_t *word_dst_addr =
746                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
747
748         return (word_src_addr[0] ^ word_dst_addr[0]) ^
749                         (word_src_addr[1] ^ word_dst_addr[1]) ^
750                         (word_src_addr[2] ^ word_dst_addr[2]);
751 }
752
753 static inline uint32_t
754 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
755 {
756         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
757 }
758
759 static inline uint32_t
760 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
761 {
762         unaligned_uint32_t *word_src_addr =
763                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
764         unaligned_uint32_t *word_dst_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
766
767         return (word_src_addr[0] ^ word_dst_addr[0]) ^
768                         (word_src_addr[1] ^ word_dst_addr[1]) ^
769                         (word_src_addr[2] ^ word_dst_addr[2]) ^
770                         (word_src_addr[3] ^ word_dst_addr[3]);
771 }
772
773
774 void
775 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
776                 uint16_t slave_count, uint16_t *slaves)
777 {
778         struct ether_hdr *eth_hdr;
779         uint32_t hash;
780         int i;
781
782         for (i = 0; i < nb_pkts; i++) {
783                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
784
785                 hash = ether_hash(eth_hdr);
786
787                 slaves[i] = (hash ^= hash >> 8) % slave_count;
788         }
789 }
790
791 void
792 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint16_t slave_count, uint16_t *slaves)
794 {
795         uint16_t i;
796         struct ether_hdr *eth_hdr;
797         uint16_t proto;
798         size_t vlan_offset;
799         uint32_t hash, l3hash;
800
801         for (i = 0; i < nb_pkts; i++) {
802                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
803                 l3hash = 0;
804
805                 proto = eth_hdr->ether_type;
806                 hash = ether_hash(eth_hdr);
807
808                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
809
810                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
811                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
812                                         ((char *)(eth_hdr + 1) + vlan_offset);
813                         l3hash = ipv4_hash(ipv4_hdr);
814
815                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
816                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
817                                         ((char *)(eth_hdr + 1) + vlan_offset);
818                         l3hash = ipv6_hash(ipv6_hdr);
819                 }
820
821                 hash = hash ^ l3hash;
822                 hash ^= hash >> 16;
823                 hash ^= hash >> 8;
824
825                 slaves[i] = hash % slave_count;
826         }
827 }
828
829 void
830 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
831                 uint16_t slave_count, uint16_t *slaves)
832 {
833         struct ether_hdr *eth_hdr;
834         uint16_t proto;
835         size_t vlan_offset;
836         int i;
837
838         struct udp_hdr *udp_hdr;
839         struct tcp_hdr *tcp_hdr;
840         uint32_t hash, l3hash, l4hash;
841
842         for (i = 0; i < nb_pkts; i++) {
843                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
844                 proto = eth_hdr->ether_type;
845                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
846                 l3hash = 0;
847                 l4hash = 0;
848
849                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
850                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
851                                         ((char *)(eth_hdr + 1) + vlan_offset);
852                         size_t ip_hdr_offset;
853
854                         l3hash = ipv4_hash(ipv4_hdr);
855
856                         /* there is no L4 header in fragmented packet */
857                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
858                                                                 == 0)) {
859                                 ip_hdr_offset = (ipv4_hdr->version_ihl
860                                         & IPV4_HDR_IHL_MASK) *
861                                         IPV4_IHL_MULTIPLIER;
862
863                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
864                                         tcp_hdr = (struct tcp_hdr *)
865                                                 ((char *)ipv4_hdr +
866                                                         ip_hdr_offset);
867                                         l4hash = HASH_L4_PORTS(tcp_hdr);
868                                 } else if (ipv4_hdr->next_proto_id ==
869                                                                 IPPROTO_UDP) {
870                                         udp_hdr = (struct udp_hdr *)
871                                                 ((char *)ipv4_hdr +
872                                                         ip_hdr_offset);
873                                         l4hash = HASH_L4_PORTS(udp_hdr);
874                                 }
875                         }
876                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
877                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
878                                         ((char *)(eth_hdr + 1) + vlan_offset);
879                         l3hash = ipv6_hash(ipv6_hdr);
880
881                         if (ipv6_hdr->proto == IPPROTO_TCP) {
882                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
883                                 l4hash = HASH_L4_PORTS(tcp_hdr);
884                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
885                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
886                                 l4hash = HASH_L4_PORTS(udp_hdr);
887                         }
888                 }
889
890                 hash = l3hash ^ l4hash;
891                 hash ^= hash >> 16;
892                 hash ^= hash >> 8;
893
894                 slaves[i] = hash % slave_count;
895         }
896 }
897
898 struct bwg_slave {
899         uint64_t bwg_left_int;
900         uint64_t bwg_left_remainder;
901         uint16_t slave;
902 };
903
904 void
905 bond_tlb_activate_slave(struct bond_dev_private *internals) {
906         int i;
907
908         for (i = 0; i < internals->active_slave_count; i++) {
909                 tlb_last_obytets[internals->active_slaves[i]] = 0;
910         }
911 }
912
913 static int
914 bandwidth_cmp(const void *a, const void *b)
915 {
916         const struct bwg_slave *bwg_a = a;
917         const struct bwg_slave *bwg_b = b;
918         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
919         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
920                         (int64_t)bwg_a->bwg_left_remainder;
921         if (diff > 0)
922                 return 1;
923         else if (diff < 0)
924                 return -1;
925         else if (diff2 > 0)
926                 return 1;
927         else if (diff2 < 0)
928                 return -1;
929         else
930                 return 0;
931 }
932
933 static void
934 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
935                 struct bwg_slave *bwg_slave)
936 {
937         struct rte_eth_link link_status;
938
939         rte_eth_link_get_nowait(port_id, &link_status);
940         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
941         if (link_bwg == 0)
942                 return;
943         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
944         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
945         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
946 }
947
948 static void
949 bond_ethdev_update_tlb_slave_cb(void *arg)
950 {
951         struct bond_dev_private *internals = arg;
952         struct rte_eth_stats slave_stats;
953         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
954         uint16_t slave_count;
955         uint64_t tx_bytes;
956
957         uint8_t update_stats = 0;
958         uint16_t slave_id;
959         uint16_t i;
960
961         internals->slave_update_idx++;
962
963
964         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
965                 update_stats = 1;
966
967         for (i = 0; i < internals->active_slave_count; i++) {
968                 slave_id = internals->active_slaves[i];
969                 rte_eth_stats_get(slave_id, &slave_stats);
970                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971                 bandwidth_left(slave_id, tx_bytes,
972                                 internals->slave_update_idx, &bwg_array[i]);
973                 bwg_array[i].slave = slave_id;
974
975                 if (update_stats) {
976                         tlb_last_obytets[slave_id] = slave_stats.obytes;
977                 }
978         }
979
980         if (update_stats == 1)
981                 internals->slave_update_idx = 0;
982
983         slave_count = i;
984         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985         for (i = 0; i < slave_count; i++)
986                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
987
988         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989                         (struct bond_dev_private *)internals);
990 }
991
992 static uint16_t
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
994 {
995         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996         struct bond_dev_private *internals = bd_tx_q->dev_private;
997
998         struct rte_eth_dev *primary_port =
999                         &rte_eth_devices[internals->primary_port];
1000         uint16_t num_tx_total = 0;
1001         uint16_t i, j;
1002
1003         uint16_t num_of_slaves = internals->active_slave_count;
1004         uint16_t slaves[RTE_MAX_ETHPORTS];
1005
1006         struct ether_hdr *ether_hdr;
1007         struct ether_addr primary_slave_addr;
1008         struct ether_addr active_slave_addr;
1009
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->tlb_slaves_order,
1014                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1015
1016
1017         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1018
1019         if (nb_pkts > 3) {
1020                 for (i = 0; i < 3; i++)
1021                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1022         }
1023
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026                 for (j = num_tx_total; j < nb_pkts; j++) {
1027                         if (j + 3 < nb_pkts)
1028                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1029
1030                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1032                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1035 #endif
1036                 }
1037
1038                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1040
1041                 if (num_tx_total == nb_pkts)
1042                         break;
1043         }
1044
1045         return num_tx_total;
1046 }
1047
1048 void
1049 bond_tlb_disable(struct bond_dev_private *internals)
1050 {
1051         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1052 }
1053
1054 void
1055 bond_tlb_enable(struct bond_dev_private *internals)
1056 {
1057         bond_ethdev_update_tlb_slave_cb(internals);
1058 }
1059
1060 static uint16_t
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1062 {
1063         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064         struct bond_dev_private *internals = bd_tx_q->dev_private;
1065
1066         struct ether_hdr *eth_h;
1067         uint16_t ether_type, offset;
1068
1069         struct client_data *client_info;
1070
1071         /*
1072          * We create transmit buffers for every slave and one additional to send
1073          * through tlb. In worst case every packet will be send on one port.
1074          */
1075         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1077
1078         /*
1079          * We create separate transmit buffers for update packets as they won't
1080          * be counted in num_tx_total.
1081          */
1082         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1084
1085         struct rte_mbuf *upd_pkt;
1086         size_t pkt_size;
1087
1088         uint16_t num_send, num_not_send = 0;
1089         uint16_t num_tx_total = 0;
1090         uint16_t slave_idx;
1091
1092         int i, j;
1093
1094         /* Search tx buffer for ARP packets and forward them to alb */
1095         for (i = 0; i < nb_pkts; i++) {
1096                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097                 ether_type = eth_h->ether_type;
1098                 offset = get_vlan_offset(eth_h, &ether_type);
1099
1100                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1102
1103                         /* Change src mac in eth header */
1104                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1105
1106                         /* Add packet to slave tx buffer */
1107                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108                         slave_bufs_pkts[slave_idx]++;
1109                 } else {
1110                         /* If packet is not ARP, send it with TLB policy */
1111                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1112                                         bufs[i];
1113                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1114                 }
1115         }
1116
1117         /* Update connected client ARP tables */
1118         if (internals->mode6.ntt) {
1119                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120                         client_info = &internals->mode6.client_table[i];
1121
1122                         if (client_info->in_use) {
1123                                 /* Allocate new packet to send ARP update on current slave */
1124                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125                                 if (upd_pkt == NULL) {
1126                                         RTE_BOND_LOG(ERR,
1127                                                      "Failed to allocate ARP packet from pool");
1128                                         continue;
1129                                 }
1130                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1131                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1132                                 upd_pkt->data_len = pkt_size;
1133                                 upd_pkt->pkt_len = pkt_size;
1134
1135                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1136                                                 internals);
1137
1138                                 /* Add packet to update tx buffer */
1139                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1140                                 update_bufs_pkts[slave_idx]++;
1141                         }
1142                 }
1143                 internals->mode6.ntt = 0;
1144         }
1145
1146         /* Send ARP packets on proper slaves */
1147         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1148                 if (slave_bufs_pkts[i] > 0) {
1149                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1150                                         slave_bufs[i], slave_bufs_pkts[i]);
1151                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1152                                 bufs[nb_pkts - 1 - num_not_send - j] =
1153                                                 slave_bufs[i][nb_pkts - 1 - j];
1154                         }
1155
1156                         num_tx_total += num_send;
1157                         num_not_send += slave_bufs_pkts[i] - num_send;
1158
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160         /* Print TX stats including update packets */
1161                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send update packets on proper slaves */
1170         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1171                 if (update_bufs_pkts[i] > 0) {
1172                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1173                                         update_bufs_pkts[i]);
1174                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1175                                 rte_pktmbuf_free(update_bufs[i][j]);
1176                         }
1177 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1178                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1179                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1180                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1181                         }
1182 #endif
1183                 }
1184         }
1185
1186         /* Send non-ARP packets using tlb policy */
1187         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1188                 num_send = bond_ethdev_tx_burst_tlb(queue,
1189                                 slave_bufs[RTE_MAX_ETHPORTS],
1190                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1191
1192                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1193                         bufs[nb_pkts - 1 - num_not_send - j] =
1194                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1195                 }
1196
1197                 num_tx_total += num_send;
1198         }
1199
1200         return num_tx_total;
1201 }
1202
1203 static uint16_t
1204 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1205                 uint16_t nb_bufs)
1206 {
1207         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1208         struct bond_dev_private *internals = bd_tx_q->dev_private;
1209
1210         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1211         uint16_t slave_count;
1212
1213         /* Array to sort mbufs for transmission on each slave into */
1214         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1215         /* Number of mbufs for transmission on each slave */
1216         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1217         /* Mapping array generated by hash function to map mbufs to slaves */
1218         uint16_t bufs_slave_port_idxs[nb_bufs];
1219
1220         uint16_t slave_tx_count;
1221         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1222
1223         uint16_t i;
1224
1225         if (unlikely(nb_bufs == 0))
1226                 return 0;
1227
1228         /* Copy slave list to protect against slave up/down changes during tx
1229          * bursting */
1230         slave_count = internals->active_slave_count;
1231         if (unlikely(slave_count < 1))
1232                 return 0;
1233
1234         memcpy(slave_port_ids, internals->active_slaves,
1235                         sizeof(slave_port_ids[0]) * slave_count);
1236
1237         /*
1238          * Populate slaves mbuf with the packets which are to be sent on it
1239          * selecting output slave using hash based on xmit policy
1240          */
1241         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1242                         bufs_slave_port_idxs);
1243
1244         for (i = 0; i < nb_bufs; i++) {
1245                 /* Populate slave mbuf arrays with mbufs for that slave. */
1246                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1247
1248                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1249         }
1250
1251         /* Send packet burst on each slave device */
1252         for (i = 0; i < slave_count; i++) {
1253                 if (slave_nb_bufs[i] == 0)
1254                         continue;
1255
1256                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1257                                 bd_tx_q->queue_id, slave_bufs[i],
1258                                 slave_nb_bufs[i]);
1259
1260                 total_tx_count += slave_tx_count;
1261
1262                 /* If tx burst fails move packets to end of bufs */
1263                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1264                         int slave_tx_fail_count = slave_nb_bufs[i] -
1265                                         slave_tx_count;
1266                         total_tx_fail_count += slave_tx_fail_count;
1267                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1268                                &slave_bufs[i][slave_tx_count],
1269                                slave_tx_fail_count * sizeof(bufs[0]));
1270                 }
1271         }
1272
1273         return total_tx_count;
1274 }
1275
1276 static uint16_t
1277 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1278                 uint16_t nb_bufs)
1279 {
1280         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1281         struct bond_dev_private *internals = bd_tx_q->dev_private;
1282
1283         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1284         uint16_t slave_count;
1285
1286         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1287         uint16_t dist_slave_count;
1288
1289         /* 2-D array to sort mbufs for transmission on each slave into */
1290         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1291         /* Number of mbufs for transmission on each slave */
1292         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1293         /* Mapping array generated by hash function to map mbufs to slaves */
1294         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1295
1296         uint16_t slave_tx_count;
1297         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1298
1299         uint16_t i;
1300
1301         /* Copy slave list to protect against slave up/down changes during tx
1302          * bursting */
1303         slave_count = internals->active_slave_count;
1304         if (unlikely(slave_count < 1))
1305                 return 0;
1306
1307         memcpy(slave_port_ids, internals->active_slaves,
1308                         sizeof(slave_port_ids[0]) * slave_count);
1309
1310         /* Check for LACP control packets and send if available */
1311         for (i = 0; i < slave_count; i++) {
1312                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1313                 struct rte_mbuf *ctrl_pkt = NULL;
1314
1315                 if (likely(rte_ring_empty(port->tx_ring)))
1316                         continue;
1317
1318                 if (rte_ring_dequeue(port->tx_ring,
1319                                      (void **)&ctrl_pkt) != -ENOENT) {
1320                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1321                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1322                         /*
1323                          * re-enqueue LAG control plane packets to buffering
1324                          * ring if transmission fails so the packet isn't lost.
1325                          */
1326                         if (slave_tx_count != 1)
1327                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1328                 }
1329         }
1330
1331         if (unlikely(nb_bufs == 0))
1332                 return 0;
1333
1334         dist_slave_count = 0;
1335         for (i = 0; i < slave_count; i++) {
1336                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1337
1338                 if (ACTOR_STATE(port, DISTRIBUTING))
1339                         dist_slave_port_ids[dist_slave_count++] =
1340                                         slave_port_ids[i];
1341         }
1342
1343         if (likely(dist_slave_count > 0)) {
1344
1345                 /*
1346                  * Populate slaves mbuf with the packets which are to be sent
1347                  * on it, selecting output slave using hash based on xmit policy
1348                  */
1349                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1350                                 bufs_slave_port_idxs);
1351
1352                 for (i = 0; i < nb_bufs; i++) {
1353                         /*
1354                          * Populate slave mbuf arrays with mbufs for that
1355                          * slave
1356                          */
1357                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1358
1359                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1360                                         bufs[i];
1361                 }
1362
1363
1364                 /* Send packet burst on each slave device */
1365                 for (i = 0; i < dist_slave_count; i++) {
1366                         if (slave_nb_bufs[i] == 0)
1367                                 continue;
1368
1369                         slave_tx_count = rte_eth_tx_burst(
1370                                         dist_slave_port_ids[i],
1371                                         bd_tx_q->queue_id, slave_bufs[i],
1372                                         slave_nb_bufs[i]);
1373
1374                         total_tx_count += slave_tx_count;
1375
1376                         /* If tx burst fails move packets to end of bufs */
1377                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1378                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1379                                                 slave_tx_count;
1380                                 total_tx_fail_count += slave_tx_fail_count;
1381
1382                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1383                                        &slave_bufs[i][slave_tx_count],
1384                                        slave_tx_fail_count * sizeof(bufs[0]));
1385                         }
1386                 }
1387         }
1388
1389         return total_tx_count;
1390 }
1391
1392 static uint16_t
1393 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1394                 uint16_t nb_pkts)
1395 {
1396         struct bond_dev_private *internals;
1397         struct bond_tx_queue *bd_tx_q;
1398
1399         uint16_t slaves[RTE_MAX_ETHPORTS];
1400         uint8_t tx_failed_flag = 0;
1401         uint16_t num_of_slaves;
1402
1403         uint16_t max_nb_of_tx_pkts = 0;
1404
1405         int slave_tx_total[RTE_MAX_ETHPORTS];
1406         int i, most_successful_tx_slave = -1;
1407
1408         bd_tx_q = (struct bond_tx_queue *)queue;
1409         internals = bd_tx_q->dev_private;
1410
1411         /* Copy slave list to protect against slave up/down changes during tx
1412          * bursting */
1413         num_of_slaves = internals->active_slave_count;
1414         memcpy(slaves, internals->active_slaves,
1415                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1416
1417         if (num_of_slaves < 1)
1418                 return 0;
1419
1420         /* Increment reference count on mbufs */
1421         for (i = 0; i < nb_pkts; i++)
1422                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1423
1424         /* Transmit burst on each active slave */
1425         for (i = 0; i < num_of_slaves; i++) {
1426                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1427                                         bufs, nb_pkts);
1428
1429                 if (unlikely(slave_tx_total[i] < nb_pkts))
1430                         tx_failed_flag = 1;
1431
1432                 /* record the value and slave index for the slave which transmits the
1433                  * maximum number of packets */
1434                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1435                         max_nb_of_tx_pkts = slave_tx_total[i];
1436                         most_successful_tx_slave = i;
1437                 }
1438         }
1439
1440         /* if slaves fail to transmit packets from burst, the calling application
1441          * is not expected to know about multiple references to packets so we must
1442          * handle failures of all packets except those of the most successful slave
1443          */
1444         if (unlikely(tx_failed_flag))
1445                 for (i = 0; i < num_of_slaves; i++)
1446                         if (i != most_successful_tx_slave)
1447                                 while (slave_tx_total[i] < nb_pkts)
1448                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1449
1450         return max_nb_of_tx_pkts;
1451 }
1452
1453 static void
1454 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1455 {
1456         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1457
1458         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1459                 /**
1460                  * If in mode 4 then save the link properties of the first
1461                  * slave, all subsequent slaves must match these properties
1462                  */
1463                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1464
1465                 bond_link->link_autoneg = slave_link->link_autoneg;
1466                 bond_link->link_duplex = slave_link->link_duplex;
1467                 bond_link->link_speed = slave_link->link_speed;
1468         } else {
1469                 /**
1470                  * In any other mode the link properties are set to default
1471                  * values of AUTONEG/DUPLEX
1472                  */
1473                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1474                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1475         }
1476 }
1477
1478 static int
1479 link_properties_valid(struct rte_eth_dev *ethdev,
1480                 struct rte_eth_link *slave_link)
1481 {
1482         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1483
1484         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1485                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1486
1487                 if (bond_link->link_duplex != slave_link->link_duplex ||
1488                         bond_link->link_autoneg != slave_link->link_autoneg ||
1489                         bond_link->link_speed != slave_link->link_speed)
1490                         return -1;
1491         }
1492
1493         return 0;
1494 }
1495
1496 int
1497 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1498 {
1499         struct ether_addr *mac_addr;
1500
1501         if (eth_dev == NULL) {
1502                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1503                 return -1;
1504         }
1505
1506         if (dst_mac_addr == NULL) {
1507                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1508                 return -1;
1509         }
1510
1511         mac_addr = eth_dev->data->mac_addrs;
1512
1513         ether_addr_copy(mac_addr, dst_mac_addr);
1514         return 0;
1515 }
1516
1517 int
1518 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1519 {
1520         struct ether_addr *mac_addr;
1521
1522         if (eth_dev == NULL) {
1523                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1524                 return -1;
1525         }
1526
1527         if (new_mac_addr == NULL) {
1528                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1529                 return -1;
1530         }
1531
1532         mac_addr = eth_dev->data->mac_addrs;
1533
1534         /* If new MAC is different to current MAC then update */
1535         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1536                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1537
1538         return 0;
1539 }
1540
1541 static const struct ether_addr null_mac_addr;
1542
1543 /*
1544  * Add additional MAC addresses to the slave
1545  */
1546 int
1547 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1548                 uint16_t slave_port_id)
1549 {
1550         int i, ret;
1551         struct ether_addr *mac_addr;
1552
1553         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1554                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1555                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1556                         break;
1557
1558                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1559                 if (ret < 0) {
1560                         /* rollback */
1561                         for (i--; i > 0; i--)
1562                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1563                                         &bonded_eth_dev->data->mac_addrs[i]);
1564                         return ret;
1565                 }
1566         }
1567
1568         return 0;
1569 }
1570
1571 /*
1572  * Remove additional MAC addresses from the slave
1573  */
1574 int
1575 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1576                 uint16_t slave_port_id)
1577 {
1578         int i, rc, ret;
1579         struct ether_addr *mac_addr;
1580
1581         rc = 0;
1582         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1583                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1584                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1585                         break;
1586
1587                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1588                 /* save only the first error */
1589                 if (ret < 0 && rc == 0)
1590                         rc = ret;
1591         }
1592
1593         return rc;
1594 }
1595
1596 int
1597 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1598 {
1599         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1600         int i;
1601
1602         /* Update slave devices MAC addresses */
1603         if (internals->slave_count < 1)
1604                 return -1;
1605
1606         switch (internals->mode) {
1607         case BONDING_MODE_ROUND_ROBIN:
1608         case BONDING_MODE_BALANCE:
1609         case BONDING_MODE_BROADCAST:
1610                 for (i = 0; i < internals->slave_count; i++) {
1611                         if (rte_eth_dev_default_mac_addr_set(
1612                                         internals->slaves[i].port_id,
1613                                         bonded_eth_dev->data->mac_addrs)) {
1614                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1615                                                 internals->slaves[i].port_id);
1616                                 return -1;
1617                         }
1618                 }
1619                 break;
1620         case BONDING_MODE_8023AD:
1621                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1622                 break;
1623         case BONDING_MODE_ACTIVE_BACKUP:
1624         case BONDING_MODE_TLB:
1625         case BONDING_MODE_ALB:
1626         default:
1627                 for (i = 0; i < internals->slave_count; i++) {
1628                         if (internals->slaves[i].port_id ==
1629                                         internals->current_primary_port) {
1630                                 if (rte_eth_dev_default_mac_addr_set(
1631                                                 internals->primary_port,
1632                                                 bonded_eth_dev->data->mac_addrs)) {
1633                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1634                                                         internals->current_primary_port);
1635                                         return -1;
1636                                 }
1637                         } else {
1638                                 if (rte_eth_dev_default_mac_addr_set(
1639                                                 internals->slaves[i].port_id,
1640                                                 &internals->slaves[i].persisted_mac_addr)) {
1641                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1642                                                         internals->slaves[i].port_id);
1643                                         return -1;
1644                                 }
1645                         }
1646                 }
1647         }
1648
1649         return 0;
1650 }
1651
1652 int
1653 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1654 {
1655         struct bond_dev_private *internals;
1656
1657         internals = eth_dev->data->dev_private;
1658
1659         switch (mode) {
1660         case BONDING_MODE_ROUND_ROBIN:
1661                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1662                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1663                 break;
1664         case BONDING_MODE_ACTIVE_BACKUP:
1665                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1666                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1667                 break;
1668         case BONDING_MODE_BALANCE:
1669                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1670                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1671                 break;
1672         case BONDING_MODE_BROADCAST:
1673                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1674                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1675                 break;
1676         case BONDING_MODE_8023AD:
1677                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1678                         return -1;
1679
1680                 if (internals->mode4.dedicated_queues.enabled == 0) {
1681                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1682                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1683                         RTE_BOND_LOG(WARNING,
1684                                 "Using mode 4, it is necessary to do TX burst "
1685                                 "and RX burst at least every 100ms.");
1686                 } else {
1687                         /* Use flow director's optimization */
1688                         eth_dev->rx_pkt_burst =
1689                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1690                         eth_dev->tx_pkt_burst =
1691                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1692                 }
1693                 break;
1694         case BONDING_MODE_TLB:
1695                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1696                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1697                 break;
1698         case BONDING_MODE_ALB:
1699                 if (bond_mode_alb_enable(eth_dev) != 0)
1700                         return -1;
1701
1702                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1703                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1704                 break;
1705         default:
1706                 return -1;
1707         }
1708
1709         internals->mode = mode;
1710
1711         return 0;
1712 }
1713
1714
1715 static int
1716 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1717                 struct rte_eth_dev *slave_eth_dev)
1718 {
1719         int errval = 0;
1720         struct bond_dev_private *internals = (struct bond_dev_private *)
1721                 bonded_eth_dev->data->dev_private;
1722         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1723
1724         if (port->slow_pool == NULL) {
1725                 char mem_name[256];
1726                 int slave_id = slave_eth_dev->data->port_id;
1727
1728                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1729                                 slave_id);
1730                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1731                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1732                         slave_eth_dev->data->numa_node);
1733
1734                 /* Any memory allocation failure in initialization is critical because
1735                  * resources can't be free, so reinitialization is impossible. */
1736                 if (port->slow_pool == NULL) {
1737                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1738                                 slave_id, mem_name, rte_strerror(rte_errno));
1739                 }
1740         }
1741
1742         if (internals->mode4.dedicated_queues.enabled == 1) {
1743                 /* Configure slow Rx queue */
1744
1745                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1746                                 internals->mode4.dedicated_queues.rx_qid, 128,
1747                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1748                                 NULL, port->slow_pool);
1749                 if (errval != 0) {
1750                         RTE_BOND_LOG(ERR,
1751                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1752                                         slave_eth_dev->data->port_id,
1753                                         internals->mode4.dedicated_queues.rx_qid,
1754                                         errval);
1755                         return errval;
1756                 }
1757
1758                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1759                                 internals->mode4.dedicated_queues.tx_qid, 512,
1760                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1761                                 NULL);
1762                 if (errval != 0) {
1763                         RTE_BOND_LOG(ERR,
1764                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1765                                 slave_eth_dev->data->port_id,
1766                                 internals->mode4.dedicated_queues.tx_qid,
1767                                 errval);
1768                         return errval;
1769                 }
1770         }
1771         return 0;
1772 }
1773
1774 int
1775 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1776                 struct rte_eth_dev *slave_eth_dev)
1777 {
1778         struct bond_rx_queue *bd_rx_q;
1779         struct bond_tx_queue *bd_tx_q;
1780         uint16_t nb_rx_queues;
1781         uint16_t nb_tx_queues;
1782
1783         int errval;
1784         uint16_t q_id;
1785         struct rte_flow_error flow_error;
1786
1787         struct bond_dev_private *internals = (struct bond_dev_private *)
1788                 bonded_eth_dev->data->dev_private;
1789
1790         /* Stop slave */
1791         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1792
1793         /* Enable interrupts on slave device if supported */
1794         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1795                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1796
1797         /* If RSS is enabled for bonding, try to enable it for slaves  */
1798         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1799                 if (internals->rss_key_len != 0) {
1800                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1801                                         internals->rss_key_len;
1802                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1803                                         internals->rss_key;
1804                 } else {
1805                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1806                 }
1807
1808                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1809                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1810                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1811                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1812         }
1813
1814         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1815                         DEV_RX_OFFLOAD_VLAN_FILTER)
1816                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1817                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1818         else
1819                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1820                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1821
1822         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1823         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1824
1825         if (internals->mode == BONDING_MODE_8023AD) {
1826                 if (internals->mode4.dedicated_queues.enabled == 1) {
1827                         nb_rx_queues++;
1828                         nb_tx_queues++;
1829                 }
1830         }
1831
1832         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1833                                      bonded_eth_dev->data->mtu);
1834         if (errval != 0 && errval != -ENOTSUP) {
1835                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1836                                 slave_eth_dev->data->port_id, errval);
1837                 return errval;
1838         }
1839
1840         /* Configure device */
1841         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1842                         nb_rx_queues, nb_tx_queues,
1843                         &(slave_eth_dev->data->dev_conf));
1844         if (errval != 0) {
1845                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1846                                 slave_eth_dev->data->port_id, errval);
1847                 return errval;
1848         }
1849
1850         /* Setup Rx Queues */
1851         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1852                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1853
1854                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1855                                 bd_rx_q->nb_rx_desc,
1856                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1857                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1858                 if (errval != 0) {
1859                         RTE_BOND_LOG(ERR,
1860                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1861                                         slave_eth_dev->data->port_id, q_id, errval);
1862                         return errval;
1863                 }
1864         }
1865
1866         /* Setup Tx Queues */
1867         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1868                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1869
1870                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1871                                 bd_tx_q->nb_tx_desc,
1872                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1873                                 &bd_tx_q->tx_conf);
1874                 if (errval != 0) {
1875                         RTE_BOND_LOG(ERR,
1876                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1877                                 slave_eth_dev->data->port_id, q_id, errval);
1878                         return errval;
1879                 }
1880         }
1881
1882         if (internals->mode == BONDING_MODE_8023AD &&
1883                         internals->mode4.dedicated_queues.enabled == 1) {
1884                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1885                                 != 0)
1886                         return errval;
1887
1888                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1889                                 slave_eth_dev->data->port_id) != 0) {
1890                         RTE_BOND_LOG(ERR,
1891                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1892                                 slave_eth_dev->data->port_id, q_id, errval);
1893                         return -1;
1894                 }
1895
1896                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1897                         rte_flow_destroy(slave_eth_dev->data->port_id,
1898                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1899                                         &flow_error);
1900
1901                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1902                                 slave_eth_dev->data->port_id);
1903         }
1904
1905         /* Start device */
1906         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1907         if (errval != 0) {
1908                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1909                                 slave_eth_dev->data->port_id, errval);
1910                 return -1;
1911         }
1912
1913         /* If RSS is enabled for bonding, synchronize RETA */
1914         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1915                 int i;
1916                 struct bond_dev_private *internals;
1917
1918                 internals = bonded_eth_dev->data->dev_private;
1919
1920                 for (i = 0; i < internals->slave_count; i++) {
1921                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1922                                 errval = rte_eth_dev_rss_reta_update(
1923                                                 slave_eth_dev->data->port_id,
1924                                                 &internals->reta_conf[0],
1925                                                 internals->slaves[i].reta_size);
1926                                 if (errval != 0) {
1927                                         RTE_BOND_LOG(WARNING,
1928                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1929                                                      " RSS Configuration for bonding may be inconsistent.",
1930                                                      slave_eth_dev->data->port_id, errval);
1931                                 }
1932                                 break;
1933                         }
1934                 }
1935         }
1936
1937         /* If lsc interrupt is set, check initial slave's link status */
1938         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1939                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1940                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1941                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1942                         NULL);
1943         }
1944
1945         return 0;
1946 }
1947
1948 void
1949 slave_remove(struct bond_dev_private *internals,
1950                 struct rte_eth_dev *slave_eth_dev)
1951 {
1952         uint16_t i;
1953
1954         for (i = 0; i < internals->slave_count; i++)
1955                 if (internals->slaves[i].port_id ==
1956                                 slave_eth_dev->data->port_id)
1957                         break;
1958
1959         if (i < (internals->slave_count - 1)) {
1960                 struct rte_flow *flow;
1961
1962                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1963                                 sizeof(internals->slaves[0]) *
1964                                 (internals->slave_count - i - 1));
1965                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1966                         memmove(&flow->flows[i], &flow->flows[i + 1],
1967                                 sizeof(flow->flows[0]) *
1968                                 (internals->slave_count - i - 1));
1969                         flow->flows[internals->slave_count - 1] = NULL;
1970                 }
1971         }
1972
1973         internals->slave_count--;
1974
1975         /* force reconfiguration of slave interfaces */
1976         _rte_eth_dev_reset(slave_eth_dev);
1977 }
1978
1979 static void
1980 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1981
1982 void
1983 slave_add(struct bond_dev_private *internals,
1984                 struct rte_eth_dev *slave_eth_dev)
1985 {
1986         struct bond_slave_details *slave_details =
1987                         &internals->slaves[internals->slave_count];
1988
1989         slave_details->port_id = slave_eth_dev->data->port_id;
1990         slave_details->last_link_status = 0;
1991
1992         /* Mark slave devices that don't support interrupts so we can
1993          * compensate when we start the bond
1994          */
1995         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1996                 slave_details->link_status_poll_enabled = 1;
1997         }
1998
1999         slave_details->link_status_wait_to_complete = 0;
2000         /* clean tlb_last_obytes when adding port for bonding device */
2001         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2002                         sizeof(struct ether_addr));
2003 }
2004
2005 void
2006 bond_ethdev_primary_set(struct bond_dev_private *internals,
2007                 uint16_t slave_port_id)
2008 {
2009         int i;
2010
2011         if (internals->active_slave_count < 1)
2012                 internals->current_primary_port = slave_port_id;
2013         else
2014                 /* Search bonded device slave ports for new proposed primary port */
2015                 for (i = 0; i < internals->active_slave_count; i++) {
2016                         if (internals->active_slaves[i] == slave_port_id)
2017                                 internals->current_primary_port = slave_port_id;
2018                 }
2019 }
2020
2021 static void
2022 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2023
2024 static int
2025 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2026 {
2027         struct bond_dev_private *internals;
2028         int i;
2029
2030         /* slave eth dev will be started by bonded device */
2031         if (check_for_bonded_ethdev(eth_dev)) {
2032                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2033                                 eth_dev->data->port_id);
2034                 return -1;
2035         }
2036
2037         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2038         eth_dev->data->dev_started = 1;
2039
2040         internals = eth_dev->data->dev_private;
2041
2042         if (internals->slave_count == 0) {
2043                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2044                 goto out_err;
2045         }
2046
2047         if (internals->user_defined_mac == 0) {
2048                 struct ether_addr *new_mac_addr = NULL;
2049
2050                 for (i = 0; i < internals->slave_count; i++)
2051                         if (internals->slaves[i].port_id == internals->primary_port)
2052                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2053
2054                 if (new_mac_addr == NULL)
2055                         goto out_err;
2056
2057                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2058                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2059                                         eth_dev->data->port_id);
2060                         goto out_err;
2061                 }
2062         }
2063
2064         /* If bonded device is configure in promiscuous mode then re-apply config */
2065         if (internals->promiscuous_en)
2066                 bond_ethdev_promiscuous_enable(eth_dev);
2067
2068         if (internals->mode == BONDING_MODE_8023AD) {
2069                 if (internals->mode4.dedicated_queues.enabled == 1) {
2070                         internals->mode4.dedicated_queues.rx_qid =
2071                                         eth_dev->data->nb_rx_queues;
2072                         internals->mode4.dedicated_queues.tx_qid =
2073                                         eth_dev->data->nb_tx_queues;
2074                 }
2075         }
2076
2077
2078         /* Reconfigure each slave device if starting bonded device */
2079         for (i = 0; i < internals->slave_count; i++) {
2080                 struct rte_eth_dev *slave_ethdev =
2081                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2082                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2083                         RTE_BOND_LOG(ERR,
2084                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2085                                 eth_dev->data->port_id,
2086                                 internals->slaves[i].port_id);
2087                         goto out_err;
2088                 }
2089                 /* We will need to poll for link status if any slave doesn't
2090                  * support interrupts
2091                  */
2092                 if (internals->slaves[i].link_status_poll_enabled)
2093                         internals->link_status_polling_enabled = 1;
2094         }
2095
2096         /* start polling if needed */
2097         if (internals->link_status_polling_enabled) {
2098                 rte_eal_alarm_set(
2099                         internals->link_status_polling_interval_ms * 1000,
2100                         bond_ethdev_slave_link_status_change_monitor,
2101                         (void *)&rte_eth_devices[internals->port_id]);
2102         }
2103
2104         /* Update all slave devices MACs*/
2105         if (mac_address_slaves_update(eth_dev) != 0)
2106                 goto out_err;
2107
2108         if (internals->user_defined_primary_port)
2109                 bond_ethdev_primary_set(internals, internals->primary_port);
2110
2111         if (internals->mode == BONDING_MODE_8023AD)
2112                 bond_mode_8023ad_start(eth_dev);
2113
2114         if (internals->mode == BONDING_MODE_TLB ||
2115                         internals->mode == BONDING_MODE_ALB)
2116                 bond_tlb_enable(internals);
2117
2118         return 0;
2119
2120 out_err:
2121         eth_dev->data->dev_started = 0;
2122         return -1;
2123 }
2124
2125 static void
2126 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2127 {
2128         uint16_t i;
2129
2130         if (dev->data->rx_queues != NULL) {
2131                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2132                         rte_free(dev->data->rx_queues[i]);
2133                         dev->data->rx_queues[i] = NULL;
2134                 }
2135                 dev->data->nb_rx_queues = 0;
2136         }
2137
2138         if (dev->data->tx_queues != NULL) {
2139                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2140                         rte_free(dev->data->tx_queues[i]);
2141                         dev->data->tx_queues[i] = NULL;
2142                 }
2143                 dev->data->nb_tx_queues = 0;
2144         }
2145 }
2146
2147 void
2148 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2149 {
2150         struct bond_dev_private *internals = eth_dev->data->dev_private;
2151         uint16_t i;
2152
2153         if (internals->mode == BONDING_MODE_8023AD) {
2154                 struct port *port;
2155                 void *pkt = NULL;
2156
2157                 bond_mode_8023ad_stop(eth_dev);
2158
2159                 /* Discard all messages to/from mode 4 state machines */
2160                 for (i = 0; i < internals->active_slave_count; i++) {
2161                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2162
2163                         RTE_ASSERT(port->rx_ring != NULL);
2164                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2165                                 rte_pktmbuf_free(pkt);
2166
2167                         RTE_ASSERT(port->tx_ring != NULL);
2168                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2169                                 rte_pktmbuf_free(pkt);
2170                 }
2171         }
2172
2173         if (internals->mode == BONDING_MODE_TLB ||
2174                         internals->mode == BONDING_MODE_ALB) {
2175                 bond_tlb_disable(internals);
2176                 for (i = 0; i < internals->active_slave_count; i++)
2177                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2178         }
2179
2180         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2181         eth_dev->data->dev_started = 0;
2182
2183         internals->link_status_polling_enabled = 0;
2184         for (i = 0; i < internals->slave_count; i++) {
2185                 uint16_t slave_id = internals->slaves[i].port_id;
2186                 if (find_slave_by_id(internals->active_slaves,
2187                                 internals->active_slave_count, slave_id) !=
2188                                                 internals->active_slave_count) {
2189                         internals->slaves[i].last_link_status = 0;
2190                         rte_eth_dev_stop(slave_id);
2191                         deactivate_slave(eth_dev, slave_id);
2192                 }
2193         }
2194 }
2195
2196 void
2197 bond_ethdev_close(struct rte_eth_dev *dev)
2198 {
2199         struct bond_dev_private *internals = dev->data->dev_private;
2200         uint16_t bond_port_id = internals->port_id;
2201         int skipped = 0;
2202         struct rte_flow_error ferror;
2203
2204         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2205         while (internals->slave_count != skipped) {
2206                 uint16_t port_id = internals->slaves[skipped].port_id;
2207
2208                 rte_eth_dev_stop(port_id);
2209
2210                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2211                         RTE_BOND_LOG(ERR,
2212                                      "Failed to remove port %d from bonded device %s",
2213                                      port_id, dev->device->name);
2214                         skipped++;
2215                 }
2216         }
2217         bond_flow_ops.flush(dev, &ferror);
2218         bond_ethdev_free_queues(dev);
2219         rte_bitmap_reset(internals->vlan_filter_bmp);
2220 }
2221
2222 /* forward declaration */
2223 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2224
2225 static void
2226 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2227 {
2228         struct bond_dev_private *internals = dev->data->dev_private;
2229
2230         uint16_t max_nb_rx_queues = UINT16_MAX;
2231         uint16_t max_nb_tx_queues = UINT16_MAX;
2232         uint16_t max_rx_desc_lim = UINT16_MAX;
2233         uint16_t max_tx_desc_lim = UINT16_MAX;
2234
2235         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2236
2237         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2238                         internals->candidate_max_rx_pktlen :
2239                         ETHER_MAX_JUMBO_FRAME_LEN;
2240
2241         /* Max number of tx/rx queues that the bonded device can support is the
2242          * minimum values of the bonded slaves, as all slaves must be capable
2243          * of supporting the same number of tx/rx queues.
2244          */
2245         if (internals->slave_count > 0) {
2246                 struct rte_eth_dev_info slave_info;
2247                 uint16_t idx;
2248
2249                 for (idx = 0; idx < internals->slave_count; idx++) {
2250                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2251                                         &slave_info);
2252
2253                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2254                                 max_nb_rx_queues = slave_info.max_rx_queues;
2255
2256                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2257                                 max_nb_tx_queues = slave_info.max_tx_queues;
2258
2259                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2260                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2261
2262                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2263                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2264                 }
2265         }
2266
2267         dev_info->max_rx_queues = max_nb_rx_queues;
2268         dev_info->max_tx_queues = max_nb_tx_queues;
2269
2270         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2271                sizeof(dev_info->default_rxconf));
2272         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2273                sizeof(dev_info->default_txconf));
2274
2275         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2276         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2277
2278         /**
2279          * If dedicated hw queues enabled for link bonding device in LACP mode
2280          * then we need to reduce the maximum number of data path queues by 1.
2281          */
2282         if (internals->mode == BONDING_MODE_8023AD &&
2283                 internals->mode4.dedicated_queues.enabled == 1) {
2284                 dev_info->max_rx_queues--;
2285                 dev_info->max_tx_queues--;
2286         }
2287
2288         dev_info->min_rx_bufsize = 0;
2289
2290         dev_info->rx_offload_capa = internals->rx_offload_capa;
2291         dev_info->tx_offload_capa = internals->tx_offload_capa;
2292         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2293         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2294         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2295
2296         dev_info->reta_size = internals->reta_size;
2297 }
2298
2299 static int
2300 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2301 {
2302         int res;
2303         uint16_t i;
2304         struct bond_dev_private *internals = dev->data->dev_private;
2305
2306         /* don't do this while a slave is being added */
2307         rte_spinlock_lock(&internals->lock);
2308
2309         if (on)
2310                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2311         else
2312                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2313
2314         for (i = 0; i < internals->slave_count; i++) {
2315                 uint16_t port_id = internals->slaves[i].port_id;
2316
2317                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2318                 if (res == ENOTSUP)
2319                         RTE_BOND_LOG(WARNING,
2320                                      "Setting VLAN filter on slave port %u not supported.",
2321                                      port_id);
2322         }
2323
2324         rte_spinlock_unlock(&internals->lock);
2325         return 0;
2326 }
2327
2328 static int
2329 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2330                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2331                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2332 {
2333         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2334                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2335                                         0, dev->data->numa_node);
2336         if (bd_rx_q == NULL)
2337                 return -1;
2338
2339         bd_rx_q->queue_id = rx_queue_id;
2340         bd_rx_q->dev_private = dev->data->dev_private;
2341
2342         bd_rx_q->nb_rx_desc = nb_rx_desc;
2343
2344         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2345         bd_rx_q->mb_pool = mb_pool;
2346
2347         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2348
2349         return 0;
2350 }
2351
2352 static int
2353 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2354                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2355                 const struct rte_eth_txconf *tx_conf)
2356 {
2357         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2358                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2359                                         0, dev->data->numa_node);
2360
2361         if (bd_tx_q == NULL)
2362                 return -1;
2363
2364         bd_tx_q->queue_id = tx_queue_id;
2365         bd_tx_q->dev_private = dev->data->dev_private;
2366
2367         bd_tx_q->nb_tx_desc = nb_tx_desc;
2368         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2369
2370         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2371
2372         return 0;
2373 }
2374
2375 static void
2376 bond_ethdev_rx_queue_release(void *queue)
2377 {
2378         if (queue == NULL)
2379                 return;
2380
2381         rte_free(queue);
2382 }
2383
2384 static void
2385 bond_ethdev_tx_queue_release(void *queue)
2386 {
2387         if (queue == NULL)
2388                 return;
2389
2390         rte_free(queue);
2391 }
2392
2393 static void
2394 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2395 {
2396         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2397         struct bond_dev_private *internals;
2398
2399         /* Default value for polling slave found is true as we don't want to
2400          * disable the polling thread if we cannot get the lock */
2401         int i, polling_slave_found = 1;
2402
2403         if (cb_arg == NULL)
2404                 return;
2405
2406         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2407         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2408
2409         if (!bonded_ethdev->data->dev_started ||
2410                 !internals->link_status_polling_enabled)
2411                 return;
2412
2413         /* If device is currently being configured then don't check slaves link
2414          * status, wait until next period */
2415         if (rte_spinlock_trylock(&internals->lock)) {
2416                 if (internals->slave_count > 0)
2417                         polling_slave_found = 0;
2418
2419                 for (i = 0; i < internals->slave_count; i++) {
2420                         if (!internals->slaves[i].link_status_poll_enabled)
2421                                 continue;
2422
2423                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2424                         polling_slave_found = 1;
2425
2426                         /* Update slave link status */
2427                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2428                                         internals->slaves[i].link_status_wait_to_complete);
2429
2430                         /* if link status has changed since last checked then call lsc
2431                          * event callback */
2432                         if (slave_ethdev->data->dev_link.link_status !=
2433                                         internals->slaves[i].last_link_status) {
2434                                 internals->slaves[i].last_link_status =
2435                                                 slave_ethdev->data->dev_link.link_status;
2436
2437                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2438                                                 RTE_ETH_EVENT_INTR_LSC,
2439                                                 &bonded_ethdev->data->port_id,
2440                                                 NULL);
2441                         }
2442                 }
2443                 rte_spinlock_unlock(&internals->lock);
2444         }
2445
2446         if (polling_slave_found)
2447                 /* Set alarm to continue monitoring link status of slave ethdev's */
2448                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2449                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2450 }
2451
2452 static int
2453 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2454 {
2455         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2456
2457         struct bond_dev_private *bond_ctx;
2458         struct rte_eth_link slave_link;
2459
2460         uint32_t idx;
2461
2462         bond_ctx = ethdev->data->dev_private;
2463
2464         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2465
2466         if (ethdev->data->dev_started == 0 ||
2467                         bond_ctx->active_slave_count == 0) {
2468                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2469                 return 0;
2470         }
2471
2472         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2473
2474         if (wait_to_complete)
2475                 link_update = rte_eth_link_get;
2476         else
2477                 link_update = rte_eth_link_get_nowait;
2478
2479         switch (bond_ctx->mode) {
2480         case BONDING_MODE_BROADCAST:
2481                 /**
2482                  * Setting link speed to UINT32_MAX to ensure we pick up the
2483                  * value of the first active slave
2484                  */
2485                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2486
2487                 /**
2488                  * link speed is minimum value of all the slaves link speed as
2489                  * packet loss will occur on this slave if transmission at rates
2490                  * greater than this are attempted
2491                  */
2492                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2493                         link_update(bond_ctx->active_slaves[0], &slave_link);
2494
2495                         if (slave_link.link_speed <
2496                                         ethdev->data->dev_link.link_speed)
2497                                 ethdev->data->dev_link.link_speed =
2498                                                 slave_link.link_speed;
2499                 }
2500                 break;
2501         case BONDING_MODE_ACTIVE_BACKUP:
2502                 /* Current primary slave */
2503                 link_update(bond_ctx->current_primary_port, &slave_link);
2504
2505                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2506                 break;
2507         case BONDING_MODE_8023AD:
2508                 ethdev->data->dev_link.link_autoneg =
2509                                 bond_ctx->mode4.slave_link.link_autoneg;
2510                 ethdev->data->dev_link.link_duplex =
2511                                 bond_ctx->mode4.slave_link.link_duplex;
2512                 /* fall through to update link speed */
2513         case BONDING_MODE_ROUND_ROBIN:
2514         case BONDING_MODE_BALANCE:
2515         case BONDING_MODE_TLB:
2516         case BONDING_MODE_ALB:
2517         default:
2518                 /**
2519                  * In theses mode the maximum theoretical link speed is the sum
2520                  * of all the slaves
2521                  */
2522                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2523
2524                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2525                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2526
2527                         ethdev->data->dev_link.link_speed +=
2528                                         slave_link.link_speed;
2529                 }
2530         }
2531
2532
2533         return 0;
2534 }
2535
2536
2537 static int
2538 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2539 {
2540         struct bond_dev_private *internals = dev->data->dev_private;
2541         struct rte_eth_stats slave_stats;
2542         int i, j;
2543
2544         for (i = 0; i < internals->slave_count; i++) {
2545                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2546
2547                 stats->ipackets += slave_stats.ipackets;
2548                 stats->opackets += slave_stats.opackets;
2549                 stats->ibytes += slave_stats.ibytes;
2550                 stats->obytes += slave_stats.obytes;
2551                 stats->imissed += slave_stats.imissed;
2552                 stats->ierrors += slave_stats.ierrors;
2553                 stats->oerrors += slave_stats.oerrors;
2554                 stats->rx_nombuf += slave_stats.rx_nombuf;
2555
2556                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2557                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2558                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2559                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2560                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2561                         stats->q_errors[j] += slave_stats.q_errors[j];
2562                 }
2563
2564         }
2565
2566         return 0;
2567 }
2568
2569 static void
2570 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2571 {
2572         struct bond_dev_private *internals = dev->data->dev_private;
2573         int i;
2574
2575         for (i = 0; i < internals->slave_count; i++)
2576                 rte_eth_stats_reset(internals->slaves[i].port_id);
2577 }
2578
2579 static void
2580 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2581 {
2582         struct bond_dev_private *internals = eth_dev->data->dev_private;
2583         int i;
2584
2585         internals->promiscuous_en = 1;
2586
2587         switch (internals->mode) {
2588         /* Promiscuous mode is propagated to all slaves */
2589         case BONDING_MODE_ROUND_ROBIN:
2590         case BONDING_MODE_BALANCE:
2591         case BONDING_MODE_BROADCAST:
2592                 for (i = 0; i < internals->slave_count; i++)
2593                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2594                 break;
2595         /* In mode4 promiscus mode is managed when slave is added/removed */
2596         case BONDING_MODE_8023AD:
2597                 break;
2598         /* Promiscuous mode is propagated only to primary slave */
2599         case BONDING_MODE_ACTIVE_BACKUP:
2600         case BONDING_MODE_TLB:
2601         case BONDING_MODE_ALB:
2602         default:
2603                 /* Do not touch promisc when there cannot be primary ports */
2604                 if (internals->slave_count == 0)
2605                         break;
2606                 rte_eth_promiscuous_enable(internals->current_primary_port);
2607         }
2608 }
2609
2610 static void
2611 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2612 {
2613         struct bond_dev_private *internals = dev->data->dev_private;
2614         int i;
2615
2616         internals->promiscuous_en = 0;
2617
2618         switch (internals->mode) {
2619         /* Promiscuous mode is propagated to all slaves */
2620         case BONDING_MODE_ROUND_ROBIN:
2621         case BONDING_MODE_BALANCE:
2622         case BONDING_MODE_BROADCAST:
2623                 for (i = 0; i < internals->slave_count; i++)
2624                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2625                 break;
2626         /* In mode4 promiscus mode is set managed when slave is added/removed */
2627         case BONDING_MODE_8023AD:
2628                 break;
2629         /* Promiscuous mode is propagated only to primary slave */
2630         case BONDING_MODE_ACTIVE_BACKUP:
2631         case BONDING_MODE_TLB:
2632         case BONDING_MODE_ALB:
2633         default:
2634                 /* Do not touch promisc when there cannot be primary ports */
2635                 if (internals->slave_count == 0)
2636                         break;
2637                 rte_eth_promiscuous_disable(internals->current_primary_port);
2638         }
2639 }
2640
2641 static void
2642 bond_ethdev_delayed_lsc_propagation(void *arg)
2643 {
2644         if (arg == NULL)
2645                 return;
2646
2647         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2648                         RTE_ETH_EVENT_INTR_LSC, NULL);
2649 }
2650
2651 int
2652 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2653                 void *param, void *ret_param __rte_unused)
2654 {
2655         struct rte_eth_dev *bonded_eth_dev;
2656         struct bond_dev_private *internals;
2657         struct rte_eth_link link;
2658         int rc = -1;
2659
2660         uint8_t lsc_flag = 0;
2661         int valid_slave = 0;
2662         uint16_t active_pos;
2663         uint16_t i;
2664
2665         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2666                 return rc;
2667
2668         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2669
2670         if (check_for_bonded_ethdev(bonded_eth_dev))
2671                 return rc;
2672
2673         internals = bonded_eth_dev->data->dev_private;
2674
2675         /* If the device isn't started don't handle interrupts */
2676         if (!bonded_eth_dev->data->dev_started)
2677                 return rc;
2678
2679         /* verify that port_id is a valid slave of bonded port */
2680         for (i = 0; i < internals->slave_count; i++) {
2681                 if (internals->slaves[i].port_id == port_id) {
2682                         valid_slave = 1;
2683                         break;
2684                 }
2685         }
2686
2687         if (!valid_slave)
2688                 return rc;
2689
2690         /* Synchronize lsc callback parallel calls either by real link event
2691          * from the slaves PMDs or by the bonding PMD itself.
2692          */
2693         rte_spinlock_lock(&internals->lsc_lock);
2694
2695         /* Search for port in active port list */
2696         active_pos = find_slave_by_id(internals->active_slaves,
2697                         internals->active_slave_count, port_id);
2698
2699         rte_eth_link_get_nowait(port_id, &link);
2700         if (link.link_status) {
2701                 if (active_pos < internals->active_slave_count)
2702                         goto link_update;
2703
2704                 /* check link state properties if bonded link is up*/
2705                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2706                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2707                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2708                                              "for slave %d in bonding mode %d",
2709                                              port_id, internals->mode);
2710                 } else {
2711                         /* inherit slave link properties */
2712                         link_properties_set(bonded_eth_dev, &link);
2713                 }
2714
2715                 /* If no active slave ports then set this port to be
2716                  * the primary port.
2717                  */
2718                 if (internals->active_slave_count < 1) {
2719                         /* If first active slave, then change link status */
2720                         bonded_eth_dev->data->dev_link.link_status =
2721                                                                 ETH_LINK_UP;
2722                         internals->current_primary_port = port_id;
2723                         lsc_flag = 1;
2724
2725                         mac_address_slaves_update(bonded_eth_dev);
2726                 }
2727
2728                 activate_slave(bonded_eth_dev, port_id);
2729
2730                 /* If the user has defined the primary port then default to
2731                  * using it.
2732                  */
2733                 if (internals->user_defined_primary_port &&
2734                                 internals->primary_port == port_id)
2735                         bond_ethdev_primary_set(internals, port_id);
2736         } else {
2737                 if (active_pos == internals->active_slave_count)
2738                         goto link_update;
2739
2740                 /* Remove from active slave list */
2741                 deactivate_slave(bonded_eth_dev, port_id);
2742
2743                 if (internals->active_slave_count < 1)
2744                         lsc_flag = 1;
2745
2746                 /* Update primary id, take first active slave from list or if none
2747                  * available set to -1 */
2748                 if (port_id == internals->current_primary_port) {
2749                         if (internals->active_slave_count > 0)
2750                                 bond_ethdev_primary_set(internals,
2751                                                 internals->active_slaves[0]);
2752                         else
2753                                 internals->current_primary_port = internals->primary_port;
2754                 }
2755         }
2756
2757 link_update:
2758         /**
2759          * Update bonded device link properties after any change to active
2760          * slaves
2761          */
2762         bond_ethdev_link_update(bonded_eth_dev, 0);
2763
2764         if (lsc_flag) {
2765                 /* Cancel any possible outstanding interrupts if delays are enabled */
2766                 if (internals->link_up_delay_ms > 0 ||
2767                         internals->link_down_delay_ms > 0)
2768                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2769                                         bonded_eth_dev);
2770
2771                 if (bonded_eth_dev->data->dev_link.link_status) {
2772                         if (internals->link_up_delay_ms > 0)
2773                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2774                                                 bond_ethdev_delayed_lsc_propagation,
2775                                                 (void *)bonded_eth_dev);
2776                         else
2777                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2778                                                 RTE_ETH_EVENT_INTR_LSC,
2779                                                 NULL);
2780
2781                 } else {
2782                         if (internals->link_down_delay_ms > 0)
2783                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2784                                                 bond_ethdev_delayed_lsc_propagation,
2785                                                 (void *)bonded_eth_dev);
2786                         else
2787                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2788                                                 RTE_ETH_EVENT_INTR_LSC,
2789                                                 NULL);
2790                 }
2791         }
2792
2793         rte_spinlock_unlock(&internals->lsc_lock);
2794
2795         return rc;
2796 }
2797
2798 static int
2799 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2800                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2801 {
2802         unsigned i, j;
2803         int result = 0;
2804         int slave_reta_size;
2805         unsigned reta_count;
2806         struct bond_dev_private *internals = dev->data->dev_private;
2807
2808         if (reta_size != internals->reta_size)
2809                 return -EINVAL;
2810
2811          /* Copy RETA table */
2812         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2813
2814         for (i = 0; i < reta_count; i++) {
2815                 internals->reta_conf[i].mask = reta_conf[i].mask;
2816                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2817                         if ((reta_conf[i].mask >> j) & 0x01)
2818                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2819         }
2820
2821         /* Fill rest of array */
2822         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2823                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2824                                 sizeof(internals->reta_conf[0]) * reta_count);
2825
2826         /* Propagate RETA over slaves */
2827         for (i = 0; i < internals->slave_count; i++) {
2828                 slave_reta_size = internals->slaves[i].reta_size;
2829                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2830                                 &internals->reta_conf[0], slave_reta_size);
2831                 if (result < 0)
2832                         return result;
2833         }
2834
2835         return 0;
2836 }
2837
2838 static int
2839 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2840                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2841 {
2842         int i, j;
2843         struct bond_dev_private *internals = dev->data->dev_private;
2844
2845         if (reta_size != internals->reta_size)
2846                 return -EINVAL;
2847
2848          /* Copy RETA table */
2849         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2850                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2851                         if ((reta_conf[i].mask >> j) & 0x01)
2852                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2853
2854         return 0;
2855 }
2856
2857 static int
2858 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2859                 struct rte_eth_rss_conf *rss_conf)
2860 {
2861         int i, result = 0;
2862         struct bond_dev_private *internals = dev->data->dev_private;
2863         struct rte_eth_rss_conf bond_rss_conf;
2864
2865         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2866
2867         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2868
2869         if (bond_rss_conf.rss_hf != 0)
2870                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2871
2872         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2873                         sizeof(internals->rss_key)) {
2874                 if (bond_rss_conf.rss_key_len == 0)
2875                         bond_rss_conf.rss_key_len = 40;
2876                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2877                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2878                                 internals->rss_key_len);
2879         }
2880
2881         for (i = 0; i < internals->slave_count; i++) {
2882                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2883                                 &bond_rss_conf);
2884                 if (result < 0)
2885                         return result;
2886         }
2887
2888         return 0;
2889 }
2890
2891 static int
2892 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2893                 struct rte_eth_rss_conf *rss_conf)
2894 {
2895         struct bond_dev_private *internals = dev->data->dev_private;
2896
2897         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2898         rss_conf->rss_key_len = internals->rss_key_len;
2899         if (rss_conf->rss_key)
2900                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2901
2902         return 0;
2903 }
2904
2905 static int
2906 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2907 {
2908         struct rte_eth_dev *slave_eth_dev;
2909         struct bond_dev_private *internals = dev->data->dev_private;
2910         int ret, i;
2911
2912         rte_spinlock_lock(&internals->lock);
2913
2914         for (i = 0; i < internals->slave_count; i++) {
2915                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2916                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2917                         rte_spinlock_unlock(&internals->lock);
2918                         return -ENOTSUP;
2919                 }
2920         }
2921         for (i = 0; i < internals->slave_count; i++) {
2922                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2923                 if (ret < 0) {
2924                         rte_spinlock_unlock(&internals->lock);
2925                         return ret;
2926                 }
2927         }
2928
2929         rte_spinlock_unlock(&internals->lock);
2930         return 0;
2931 }
2932
2933 static int
2934 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2935 {
2936         if (mac_address_set(dev, addr)) {
2937                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2938                 return -EINVAL;
2939         }
2940
2941         return 0;
2942 }
2943
2944 static int
2945 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2946                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2947 {
2948         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2949                 *(const void **)arg = &bond_flow_ops;
2950                 return 0;
2951         }
2952         return -ENOTSUP;
2953 }
2954
2955 static int
2956 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2957                                 __rte_unused uint32_t index, uint32_t vmdq)
2958 {
2959         struct rte_eth_dev *slave_eth_dev;
2960         struct bond_dev_private *internals = dev->data->dev_private;
2961         int ret, i;
2962
2963         rte_spinlock_lock(&internals->lock);
2964
2965         for (i = 0; i < internals->slave_count; i++) {
2966                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2967                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2968                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2969                         ret = -ENOTSUP;
2970                         goto end;
2971                 }
2972         }
2973
2974         for (i = 0; i < internals->slave_count; i++) {
2975                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2976                                 mac_addr, vmdq);
2977                 if (ret < 0) {
2978                         /* rollback */
2979                         for (i--; i >= 0; i--)
2980                                 rte_eth_dev_mac_addr_remove(
2981                                         internals->slaves[i].port_id, mac_addr);
2982                         goto end;
2983                 }
2984         }
2985
2986         ret = 0;
2987 end:
2988         rte_spinlock_unlock(&internals->lock);
2989         return ret;
2990 }
2991
2992 static void
2993 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2994 {
2995         struct rte_eth_dev *slave_eth_dev;
2996         struct bond_dev_private *internals = dev->data->dev_private;
2997         int i;
2998
2999         rte_spinlock_lock(&internals->lock);
3000
3001         for (i = 0; i < internals->slave_count; i++) {
3002                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3003                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3004                         goto end;
3005         }
3006
3007         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3008
3009         for (i = 0; i < internals->slave_count; i++)
3010                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3011                                 mac_addr);
3012
3013 end:
3014         rte_spinlock_unlock(&internals->lock);
3015 }
3016
3017 const struct eth_dev_ops default_dev_ops = {
3018         .dev_start            = bond_ethdev_start,
3019         .dev_stop             = bond_ethdev_stop,
3020         .dev_close            = bond_ethdev_close,
3021         .dev_configure        = bond_ethdev_configure,
3022         .dev_infos_get        = bond_ethdev_info,
3023         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3024         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3025         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3026         .rx_queue_release     = bond_ethdev_rx_queue_release,
3027         .tx_queue_release     = bond_ethdev_tx_queue_release,
3028         .link_update          = bond_ethdev_link_update,
3029         .stats_get            = bond_ethdev_stats_get,
3030         .stats_reset          = bond_ethdev_stats_reset,
3031         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3032         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3033         .reta_update          = bond_ethdev_rss_reta_update,
3034         .reta_query           = bond_ethdev_rss_reta_query,
3035         .rss_hash_update      = bond_ethdev_rss_hash_update,
3036         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3037         .mtu_set              = bond_ethdev_mtu_set,
3038         .mac_addr_set         = bond_ethdev_mac_address_set,
3039         .mac_addr_add         = bond_ethdev_mac_addr_add,
3040         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3041         .filter_ctrl          = bond_filter_ctrl
3042 };
3043
3044 static int
3045 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3046 {
3047         const char *name = rte_vdev_device_name(dev);
3048         uint8_t socket_id = dev->device.numa_node;
3049         struct bond_dev_private *internals = NULL;
3050         struct rte_eth_dev *eth_dev = NULL;
3051         uint32_t vlan_filter_bmp_size;
3052
3053         /* now do all data allocation - for eth_dev structure, dummy pci driver
3054          * and internal (private) data
3055          */
3056
3057         /* reserve an ethdev entry */
3058         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3059         if (eth_dev == NULL) {
3060                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3061                 goto err;
3062         }
3063
3064         internals = eth_dev->data->dev_private;
3065         eth_dev->data->nb_rx_queues = (uint16_t)1;
3066         eth_dev->data->nb_tx_queues = (uint16_t)1;
3067
3068         /* Allocate memory for storing MAC addresses */
3069         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3070                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3071         if (eth_dev->data->mac_addrs == NULL) {
3072                 RTE_BOND_LOG(ERR,
3073                              "Failed to allocate %u bytes needed to store MAC addresses",
3074                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3075                 goto err;
3076         }
3077
3078         eth_dev->dev_ops = &default_dev_ops;
3079         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3080
3081         rte_spinlock_init(&internals->lock);
3082         rte_spinlock_init(&internals->lsc_lock);
3083
3084         internals->port_id = eth_dev->data->port_id;
3085         internals->mode = BONDING_MODE_INVALID;
3086         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3087         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3088         internals->burst_xmit_hash = burst_xmit_l2_hash;
3089         internals->user_defined_mac = 0;
3090
3091         internals->link_status_polling_enabled = 0;
3092
3093         internals->link_status_polling_interval_ms =
3094                 DEFAULT_POLLING_INTERVAL_10_MS;
3095         internals->link_down_delay_ms = 0;
3096         internals->link_up_delay_ms = 0;
3097
3098         internals->slave_count = 0;
3099         internals->active_slave_count = 0;
3100         internals->rx_offload_capa = 0;
3101         internals->tx_offload_capa = 0;
3102         internals->rx_queue_offload_capa = 0;
3103         internals->tx_queue_offload_capa = 0;
3104         internals->candidate_max_rx_pktlen = 0;
3105         internals->max_rx_pktlen = 0;
3106
3107         /* Initially allow to choose any offload type */
3108         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3109
3110         memset(&internals->default_rxconf, 0,
3111                sizeof(internals->default_rxconf));
3112         memset(&internals->default_txconf, 0,
3113                sizeof(internals->default_txconf));
3114
3115         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3116         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3117
3118         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3119         memset(internals->slaves, 0, sizeof(internals->slaves));
3120
3121         TAILQ_INIT(&internals->flow_list);
3122         internals->flow_isolated_valid = 0;
3123
3124         /* Set mode 4 default configuration */
3125         bond_mode_8023ad_setup(eth_dev, NULL);
3126         if (bond_ethdev_mode_set(eth_dev, mode)) {
3127                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3128                                  eth_dev->data->port_id, mode);
3129                 goto err;
3130         }
3131
3132         vlan_filter_bmp_size =
3133                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3134         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3135                                                    RTE_CACHE_LINE_SIZE);
3136         if (internals->vlan_filter_bmpmem == NULL) {
3137                 RTE_BOND_LOG(ERR,
3138                              "Failed to allocate vlan bitmap for bonded device %u",
3139                              eth_dev->data->port_id);
3140                 goto err;
3141         }
3142
3143         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3144                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3145         if (internals->vlan_filter_bmp == NULL) {
3146                 RTE_BOND_LOG(ERR,
3147                              "Failed to init vlan bitmap for bonded device %u",
3148                              eth_dev->data->port_id);
3149                 rte_free(internals->vlan_filter_bmpmem);
3150                 goto err;
3151         }
3152
3153         return eth_dev->data->port_id;
3154
3155 err:
3156         rte_free(internals);
3157         if (eth_dev != NULL)
3158                 eth_dev->data->dev_private = NULL;
3159         rte_eth_dev_release_port(eth_dev);
3160         return -1;
3161 }
3162
3163 static int
3164 bond_probe(struct rte_vdev_device *dev)
3165 {
3166         const char *name;
3167         struct bond_dev_private *internals;
3168         struct rte_kvargs *kvlist;
3169         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3170         int  arg_count, port_id;
3171         uint8_t agg_mode;
3172         struct rte_eth_dev *eth_dev;
3173
3174         if (!dev)
3175                 return -EINVAL;
3176
3177         name = rte_vdev_device_name(dev);
3178         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3179
3180         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3181                 eth_dev = rte_eth_dev_attach_secondary(name);
3182                 if (!eth_dev) {
3183                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3184                         return -1;
3185                 }
3186                 /* TODO: request info from primary to set up Rx and Tx */
3187                 eth_dev->dev_ops = &default_dev_ops;
3188                 eth_dev->device = &dev->device;
3189                 rte_eth_dev_probing_finish(eth_dev);
3190                 return 0;
3191         }
3192
3193         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3194                 pmd_bond_init_valid_arguments);
3195         if (kvlist == NULL)
3196                 return -1;
3197
3198         /* Parse link bonding mode */
3199         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3200                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3201                                 &bond_ethdev_parse_slave_mode_kvarg,
3202                                 &bonding_mode) != 0) {
3203                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3204                                         name);
3205                         goto parse_error;
3206                 }
3207         } else {
3208                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3209                                 "device %s", name);
3210                 goto parse_error;
3211         }
3212
3213         /* Parse socket id to create bonding device on */
3214         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3215         if (arg_count == 1) {
3216                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3217                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3218                                 != 0) {
3219                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3220                                         "bonded device %s", name);
3221                         goto parse_error;
3222                 }
3223         } else if (arg_count > 1) {
3224                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3225                                 "bonded device %s", name);
3226                 goto parse_error;
3227         } else {
3228                 socket_id = rte_socket_id();
3229         }
3230
3231         dev->device.numa_node = socket_id;
3232
3233         /* Create link bonding eth device */
3234         port_id = bond_alloc(dev, bonding_mode);
3235         if (port_id < 0) {
3236                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3237                                 "socket %u.",   name, bonding_mode, socket_id);
3238                 goto parse_error;
3239         }
3240         internals = rte_eth_devices[port_id].data->dev_private;
3241         internals->kvlist = kvlist;
3242
3243         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3244                 if (rte_kvargs_process(kvlist,
3245                                 PMD_BOND_AGG_MODE_KVARG,
3246                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3247                                 &agg_mode) != 0) {
3248                         RTE_BOND_LOG(ERR,
3249                                         "Failed to parse agg selection mode for bonded device %s",
3250                                         name);
3251                         goto parse_error;
3252                 }
3253
3254                 if (internals->mode == BONDING_MODE_8023AD)
3255                         internals->mode4.agg_selection = agg_mode;
3256         } else {
3257                 internals->mode4.agg_selection = AGG_STABLE;
3258         }
3259
3260         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3261         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3262                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3263         return 0;
3264
3265 parse_error:
3266         rte_kvargs_free(kvlist);
3267
3268         return -1;
3269 }
3270
3271 static int
3272 bond_remove(struct rte_vdev_device *dev)
3273 {
3274         struct rte_eth_dev *eth_dev;
3275         struct bond_dev_private *internals;
3276         const char *name;
3277
3278         if (!dev)
3279                 return -EINVAL;
3280
3281         name = rte_vdev_device_name(dev);
3282         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3283
3284         /* now free all data allocation - for eth_dev structure,
3285          * dummy pci driver and internal (private) data
3286          */
3287
3288         /* find an ethdev entry */
3289         eth_dev = rte_eth_dev_allocated(name);
3290         if (eth_dev == NULL)
3291                 return -ENODEV;
3292
3293         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3294                 return rte_eth_dev_release_port(eth_dev);
3295
3296         RTE_ASSERT(eth_dev->device == &dev->device);
3297
3298         internals = eth_dev->data->dev_private;
3299         if (internals->slave_count != 0)
3300                 return -EBUSY;
3301
3302         if (eth_dev->data->dev_started == 1) {
3303                 bond_ethdev_stop(eth_dev);
3304                 bond_ethdev_close(eth_dev);
3305         }
3306
3307         eth_dev->dev_ops = NULL;
3308         eth_dev->rx_pkt_burst = NULL;
3309         eth_dev->tx_pkt_burst = NULL;
3310
3311         internals = eth_dev->data->dev_private;
3312         /* Try to release mempool used in mode6. If the bond
3313          * device is not mode6, free the NULL is not problem.
3314          */
3315         rte_mempool_free(internals->mode6.mempool);
3316         rte_bitmap_free(internals->vlan_filter_bmp);
3317         rte_free(internals->vlan_filter_bmpmem);
3318
3319         rte_eth_dev_release_port(eth_dev);
3320
3321         return 0;
3322 }
3323
3324 /* this part will resolve the slave portids after all the other pdev and vdev
3325  * have been allocated */
3326 static int
3327 bond_ethdev_configure(struct rte_eth_dev *dev)
3328 {
3329         const char *name = dev->device->name;
3330         struct bond_dev_private *internals = dev->data->dev_private;
3331         struct rte_kvargs *kvlist = internals->kvlist;
3332         int arg_count;
3333         uint16_t port_id = dev - rte_eth_devices;
3334         uint8_t agg_mode;
3335
3336         static const uint8_t default_rss_key[40] = {
3337                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3338                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3339                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3340                 0xBE, 0xAC, 0x01, 0xFA
3341         };
3342
3343         unsigned i, j;
3344
3345         /*
3346          * If RSS is enabled, fill table with default values and
3347          * set key to the the value specified in port RSS configuration.
3348          * Fall back to default RSS key if the key is not specified
3349          */
3350         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3351                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3352                         internals->rss_key_len =
3353                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3354                         memcpy(internals->rss_key,
3355                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3356                                internals->rss_key_len);
3357                 } else {
3358                         internals->rss_key_len = sizeof(default_rss_key);
3359                         memcpy(internals->rss_key, default_rss_key,
3360                                internals->rss_key_len);
3361                 }
3362
3363                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3364                         internals->reta_conf[i].mask = ~0LL;
3365                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3366                                 internals->reta_conf[i].reta[j] =
3367                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3368                                                 dev->data->nb_rx_queues;
3369                 }
3370         }
3371
3372         /* set the max_rx_pktlen */
3373         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3374
3375         /*
3376          * if no kvlist, it means that this bonded device has been created
3377          * through the bonding api.
3378          */
3379         if (!kvlist)
3380                 return 0;
3381
3382         /* Parse MAC address for bonded device */
3383         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3384         if (arg_count == 1) {
3385                 struct ether_addr bond_mac;
3386
3387                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3388                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3389                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3390                                      name);
3391                         return -1;
3392                 }
3393
3394                 /* Set MAC address */
3395                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3396                         RTE_BOND_LOG(ERR,
3397                                      "Failed to set mac address on bonded device %s",
3398                                      name);
3399                         return -1;
3400                 }
3401         } else if (arg_count > 1) {
3402                 RTE_BOND_LOG(ERR,
3403                              "MAC address can be specified only once for bonded device %s",
3404                              name);
3405                 return -1;
3406         }
3407
3408         /* Parse/set balance mode transmit policy */
3409         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3410         if (arg_count == 1) {
3411                 uint8_t xmit_policy;
3412
3413                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3414                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3415                     0) {
3416                         RTE_BOND_LOG(INFO,
3417                                      "Invalid xmit policy specified for bonded device %s",
3418                                      name);
3419                         return -1;
3420                 }
3421
3422                 /* Set balance mode transmit policy*/
3423                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3424                         RTE_BOND_LOG(ERR,
3425                                      "Failed to set balance xmit policy on bonded device %s",
3426                                      name);
3427                         return -1;
3428                 }
3429         } else if (arg_count > 1) {
3430                 RTE_BOND_LOG(ERR,
3431                              "Transmit policy can be specified only once for bonded device %s",
3432                              name);
3433                 return -1;
3434         }
3435
3436         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3437                 if (rte_kvargs_process(kvlist,
3438                                        PMD_BOND_AGG_MODE_KVARG,
3439                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3440                                        &agg_mode) != 0) {
3441                         RTE_BOND_LOG(ERR,
3442                                      "Failed to parse agg selection mode for bonded device %s",
3443                                      name);
3444                 }
3445                 if (internals->mode == BONDING_MODE_8023AD) {
3446                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3447                                         agg_mode);
3448                         if (ret < 0) {
3449                                 RTE_BOND_LOG(ERR,
3450                                         "Invalid args for agg selection set for bonded device %s",
3451                                         name);
3452                                 return -1;
3453                         }
3454                 }
3455         }
3456
3457         /* Parse/add slave ports to bonded device */
3458         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3459                 struct bond_ethdev_slave_ports slave_ports;
3460                 unsigned i;
3461
3462                 memset(&slave_ports, 0, sizeof(slave_ports));
3463
3464                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3465                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3466                         RTE_BOND_LOG(ERR,
3467                                      "Failed to parse slave ports for bonded device %s",
3468                                      name);
3469                         return -1;
3470                 }
3471
3472                 for (i = 0; i < slave_ports.slave_count; i++) {
3473                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3474                                 RTE_BOND_LOG(ERR,
3475                                              "Failed to add port %d as slave to bonded device %s",
3476                                              slave_ports.slaves[i], name);
3477                         }
3478                 }
3479
3480         } else {
3481                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3482                 return -1;
3483         }
3484
3485         /* Parse/set primary slave port id*/
3486         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3487         if (arg_count == 1) {
3488                 uint16_t primary_slave_port_id;
3489
3490                 if (rte_kvargs_process(kvlist,
3491                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3492                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3493                                        &primary_slave_port_id) < 0) {
3494                         RTE_BOND_LOG(INFO,
3495                                      "Invalid primary slave port id specified for bonded device %s",
3496                                      name);
3497                         return -1;
3498                 }
3499
3500                 /* Set balance mode transmit policy*/
3501                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3502                     != 0) {
3503                         RTE_BOND_LOG(ERR,
3504                                      "Failed to set primary slave port %d on bonded device %s",
3505                                      primary_slave_port_id, name);
3506                         return -1;
3507                 }
3508         } else if (arg_count > 1) {
3509                 RTE_BOND_LOG(INFO,
3510                              "Primary slave can be specified only once for bonded device %s",
3511                              name);
3512                 return -1;
3513         }
3514
3515         /* Parse link status monitor polling interval */
3516         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3517         if (arg_count == 1) {
3518                 uint32_t lsc_poll_interval_ms;
3519
3520                 if (rte_kvargs_process(kvlist,
3521                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3522                                        &bond_ethdev_parse_time_ms_kvarg,
3523                                        &lsc_poll_interval_ms) < 0) {
3524                         RTE_BOND_LOG(INFO,
3525                                      "Invalid lsc polling interval value specified for bonded"
3526                                      " device %s", name);
3527                         return -1;
3528                 }
3529
3530                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3531                     != 0) {
3532                         RTE_BOND_LOG(ERR,
3533                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3534                                      lsc_poll_interval_ms, name);
3535                         return -1;
3536                 }
3537         } else if (arg_count > 1) {
3538                 RTE_BOND_LOG(INFO,
3539                              "LSC polling interval can be specified only once for bonded"
3540                              " device %s", name);
3541                 return -1;
3542         }
3543
3544         /* Parse link up interrupt propagation delay */
3545         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3546         if (arg_count == 1) {
3547                 uint32_t link_up_delay_ms;
3548
3549                 if (rte_kvargs_process(kvlist,
3550                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3551                                        &bond_ethdev_parse_time_ms_kvarg,
3552                                        &link_up_delay_ms) < 0) {
3553                         RTE_BOND_LOG(INFO,
3554                                      "Invalid link up propagation delay value specified for"
3555                                      " bonded device %s", name);
3556                         return -1;
3557                 }
3558
3559                 /* Set balance mode transmit policy*/
3560                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3561                     != 0) {
3562                         RTE_BOND_LOG(ERR,
3563                                      "Failed to set link up propagation delay (%u ms) on bonded"
3564                                      " device %s", link_up_delay_ms, name);
3565                         return -1;
3566                 }
3567         } else if (arg_count > 1) {
3568                 RTE_BOND_LOG(INFO,
3569                              "Link up propagation delay can be specified only once for"
3570                              " bonded device %s", name);
3571                 return -1;
3572         }
3573
3574         /* Parse link down interrupt propagation delay */
3575         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3576         if (arg_count == 1) {
3577                 uint32_t link_down_delay_ms;
3578
3579                 if (rte_kvargs_process(kvlist,
3580                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3581                                        &bond_ethdev_parse_time_ms_kvarg,
3582                                        &link_down_delay_ms) < 0) {
3583                         RTE_BOND_LOG(INFO,
3584                                      "Invalid link down propagation delay value specified for"
3585                                      " bonded device %s", name);
3586                         return -1;
3587                 }
3588
3589                 /* Set balance mode transmit policy*/
3590                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3591                     != 0) {
3592                         RTE_BOND_LOG(ERR,
3593                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3594                                      link_down_delay_ms, name);
3595                         return -1;
3596                 }
3597         } else if (arg_count > 1) {
3598                 RTE_BOND_LOG(INFO,
3599                              "Link down propagation delay can be specified only once for  bonded device %s",
3600                              name);
3601                 return -1;
3602         }
3603
3604         return 0;
3605 }
3606
3607 struct rte_vdev_driver pmd_bond_drv = {
3608         .probe = bond_probe,
3609         .remove = bond_remove,
3610 };
3611
3612 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3613 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3614
3615 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3616         "slave=<ifc> "
3617         "primary=<ifc> "
3618         "mode=[0-6] "
3619         "xmit_policy=[l2 | l23 | l34] "
3620         "agg_mode=[count | stable | bandwidth] "
3621         "socket_id=<int> "
3622         "mac=<mac addr> "
3623         "lsc_poll_period_ms=<int> "
3624         "up_delay=<int> "
3625         "down_delay=<int>");
3626
3627 int bond_logtype;
3628
3629 RTE_INIT(bond_init_log)
3630 {
3631         bond_logtype = rte_log_register("pmd.net.bond");
3632         if (bond_logtype >= 0)
3633                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3634 }