net/octeontx2: add flow MCAM utility functions
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
42                 struct rte_vlan_hdr *vlan_hdr =
43                         (struct rte_vlan_hdr *)(eth_hdr + 1);
44
45                 vlan_offset = sizeof(struct rte_vlan_hdr);
46                 *proto = vlan_hdr->eth_proto;
47
48                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
49                         vlan_hdr = vlan_hdr + 1;
50                         *proto = vlan_hdr->eth_proto;
51                         vlan_offset += sizeof(struct rte_vlan_hdr);
52                 }
53         }
54         return vlan_offset;
55 }
56
57 static uint16_t
58 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 {
60         struct bond_dev_private *internals;
61
62         uint16_t num_rx_total = 0;
63         uint16_t slave_count;
64         uint16_t active_slave;
65         int i;
66
67         /* Cast to structure, containing bonded device's port id and queue id */
68         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
69         internals = bd_rx_q->dev_private;
70         slave_count = internals->active_slave_count;
71         active_slave = internals->active_slave;
72
73         for (i = 0; i < slave_count && nb_pkts; i++) {
74                 uint16_t num_rx_slave;
75
76                 /* Offset of pointer to *bufs increases as packets are received
77                  * from other slaves */
78                 num_rx_slave =
79                         rte_eth_rx_burst(internals->active_slaves[active_slave],
80                                          bd_rx_q->queue_id,
81                                          bufs + num_rx_total, nb_pkts);
82                 num_rx_total += num_rx_slave;
83                 nb_pkts -= num_rx_slave;
84                 if (++active_slave == slave_count)
85                         active_slave = 0;
86         }
87
88         if (++internals->active_slave >= slave_count)
89                 internals->active_slave = 0;
90         return num_rx_total;
91 }
92
93 static uint16_t
94 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
95                 uint16_t nb_pkts)
96 {
97         struct bond_dev_private *internals;
98
99         /* Cast to structure, containing bonded device's port id and queue id */
100         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
101
102         internals = bd_rx_q->dev_private;
103
104         return rte_eth_rx_burst(internals->current_primary_port,
105                         bd_rx_q->queue_id, bufs, nb_pkts);
106 }
107
108 static inline uint8_t
109 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
110 {
111         const uint16_t ether_type_slow_be =
112                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
113
114         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
115                 (ethertype == ether_type_slow_be &&
116                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
117 }
118
119 /*****************************************************************************
120  * Flow director's setup for mode 4 optimization
121  */
122
123 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
124         .dst.addr_bytes = { 0 },
125         .src.addr_bytes = { 0 },
126         .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
127 };
128
129 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
130         .dst.addr_bytes = { 0 },
131         .src.addr_bytes = { 0 },
132         .type = 0xFFFF,
133 };
134
135 static struct rte_flow_item flow_item_8023ad[] = {
136         {
137                 .type = RTE_FLOW_ITEM_TYPE_ETH,
138                 .spec = &flow_item_eth_type_8023ad,
139                 .last = NULL,
140                 .mask = &flow_item_eth_mask_type_8023ad,
141         },
142         {
143                 .type = RTE_FLOW_ITEM_TYPE_END,
144                 .spec = NULL,
145                 .last = NULL,
146                 .mask = NULL,
147         }
148 };
149
150 const struct rte_flow_attr flow_attr_8023ad = {
151         .group = 0,
152         .priority = 0,
153         .ingress = 1,
154         .egress = 0,
155         .reserved = 0,
156 };
157
158 int
159 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
160                 uint16_t slave_port) {
161         struct rte_eth_dev_info slave_info;
162         struct rte_flow_error error;
163         struct bond_dev_private *internals = bond_dev->data->dev_private;
164
165         const struct rte_flow_action_queue lacp_queue_conf = {
166                 .index = 0,
167         };
168
169         const struct rte_flow_action actions[] = {
170                 {
171                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
172                         .conf = &lacp_queue_conf
173                 },
174                 {
175                         .type = RTE_FLOW_ACTION_TYPE_END,
176                 }
177         };
178
179         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
180                         flow_item_8023ad, actions, &error);
181         if (ret < 0) {
182                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
183                                 __func__, error.message, slave_port,
184                                 internals->mode4.dedicated_queues.rx_qid);
185                 return -1;
186         }
187
188         rte_eth_dev_info_get(slave_port, &slave_info);
189         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
190                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
191                 RTE_BOND_LOG(ERR,
192                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
193                         __func__, slave_port);
194                 return -1;
195         }
196
197         return 0;
198 }
199
200 int
201 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
202         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
203         struct bond_dev_private *internals = bond_dev->data->dev_private;
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = bond_dev->data->dev_private;
229         struct rte_flow_action_queue lacp_queue_conf = {
230                 .index = internals->mode4.dedicated_queues.rx_qid,
231         };
232
233         const struct rte_flow_action actions[] = {
234                 {
235                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
236                         .conf = &lacp_queue_conf
237                 },
238                 {
239                         .type = RTE_FLOW_ACTION_TYPE_END,
240                 }
241         };
242
243         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
244                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
245         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
246                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
247                                 "(slave_port=%d queue_id=%d)",
248                                 error.message, slave_port,
249                                 internals->mode4.dedicated_queues.rx_qid);
250                 return -1;
251         }
252
253         return 0;
254 }
255
256 static uint16_t
257 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
258                 uint16_t nb_pkts)
259 {
260         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
261         struct bond_dev_private *internals = bd_rx_q->dev_private;
262         uint16_t num_rx_total = 0;      /* Total number of received packets */
263         uint16_t slaves[RTE_MAX_ETHPORTS];
264         uint16_t slave_count;
265         uint16_t active_slave;
266         uint16_t i;
267
268         /* Copy slave list to protect against slave up/down changes during tx
269          * bursting */
270         slave_count = internals->active_slave_count;
271         active_slave = internals->active_slave;
272         memcpy(slaves, internals->active_slaves,
273                         sizeof(internals->active_slaves[0]) * slave_count);
274
275         for (i = 0; i < slave_count && nb_pkts; i++) {
276                 uint16_t num_rx_slave;
277
278                 /* Read packets from this slave */
279                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
280                                                 bd_rx_q->queue_id,
281                                                 bufs + num_rx_total, nb_pkts);
282                 num_rx_total += num_rx_slave;
283                 nb_pkts -= num_rx_slave;
284
285                 if (++active_slave == slave_count)
286                         active_slave = 0;
287         }
288
289         if (++internals->active_slave >= slave_count)
290                 internals->active_slave = 0;
291
292         return num_rx_total;
293 }
294
295 static uint16_t
296 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
297                 uint16_t nb_bufs)
298 {
299         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
300         struct bond_dev_private *internals = bd_tx_q->dev_private;
301
302         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
303         uint16_t slave_count;
304
305         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
306         uint16_t dist_slave_count;
307
308         /* 2-D array to sort mbufs for transmission on each slave into */
309         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
310         /* Number of mbufs for transmission on each slave */
311         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
312         /* Mapping array generated by hash function to map mbufs to slaves */
313         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
314
315         uint16_t slave_tx_count;
316         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
317
318         uint16_t i;
319
320         if (unlikely(nb_bufs == 0))
321                 return 0;
322
323         /* Copy slave list to protect against slave up/down changes during tx
324          * bursting */
325         slave_count = internals->active_slave_count;
326         if (unlikely(slave_count < 1))
327                 return 0;
328
329         memcpy(slave_port_ids, internals->active_slaves,
330                         sizeof(slave_port_ids[0]) * slave_count);
331
332
333         dist_slave_count = 0;
334         for (i = 0; i < slave_count; i++) {
335                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
336
337                 if (ACTOR_STATE(port, DISTRIBUTING))
338                         dist_slave_port_ids[dist_slave_count++] =
339                                         slave_port_ids[i];
340         }
341
342         if (unlikely(dist_slave_count < 1))
343                 return 0;
344
345         /*
346          * Populate slaves mbuf with the packets which are to be sent on it
347          * selecting output slave using hash based on xmit policy
348          */
349         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
350                         bufs_slave_port_idxs);
351
352         for (i = 0; i < nb_bufs; i++) {
353                 /* Populate slave mbuf arrays with mbufs for that slave. */
354                 uint16_t slave_idx = bufs_slave_port_idxs[i];
355
356                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
357         }
358
359
360         /* Send packet burst on each slave device */
361         for (i = 0; i < dist_slave_count; i++) {
362                 if (slave_nb_bufs[i] == 0)
363                         continue;
364
365                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
366                                 bd_tx_q->queue_id, slave_bufs[i],
367                                 slave_nb_bufs[i]);
368
369                 total_tx_count += slave_tx_count;
370
371                 /* If tx burst fails move packets to end of bufs */
372                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
373                         int slave_tx_fail_count = slave_nb_bufs[i] -
374                                         slave_tx_count;
375                         total_tx_fail_count += slave_tx_fail_count;
376                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
377                                &slave_bufs[i][slave_tx_count],
378                                slave_tx_fail_count * sizeof(bufs[0]));
379                 }
380         }
381
382         return total_tx_count;
383 }
384
385
386 static uint16_t
387 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
388                 uint16_t nb_pkts)
389 {
390         /* Cast to structure, containing bonded device's port id and queue id */
391         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
392         struct bond_dev_private *internals = bd_rx_q->dev_private;
393         struct rte_eth_dev *bonded_eth_dev =
394                                         &rte_eth_devices[internals->port_id];
395         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
396         struct rte_ether_hdr *hdr;
397
398         const uint16_t ether_type_slow_be =
399                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
400         uint16_t num_rx_total = 0;      /* Total number of received packets */
401         uint16_t slaves[RTE_MAX_ETHPORTS];
402         uint16_t slave_count, idx;
403
404         uint8_t collecting;  /* current slave collecting status */
405         const uint8_t promisc = internals->promiscuous_en;
406         uint8_t subtype;
407         uint16_t i;
408         uint16_t j;
409         uint16_t k;
410
411         /* Copy slave list to protect against slave up/down changes during tx
412          * bursting */
413         slave_count = internals->active_slave_count;
414         memcpy(slaves, internals->active_slaves,
415                         sizeof(internals->active_slaves[0]) * slave_count);
416
417         idx = internals->active_slave;
418         if (idx >= slave_count) {
419                 internals->active_slave = 0;
420                 idx = 0;
421         }
422         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
423                 j = num_rx_total;
424                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
425                                          COLLECTING);
426
427                 /* Read packets from this slave */
428                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
429                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
430
431                 for (k = j; k < 2 && k < num_rx_total; k++)
432                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
433
434                 /* Handle slow protocol packets. */
435                 while (j < num_rx_total) {
436
437                         /* If packet is not pure L2 and is known, skip it */
438                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
439                                 j++;
440                                 continue;
441                         }
442
443                         if (j + 3 < num_rx_total)
444                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
445
446                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
447                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
448
449                         /* Remove packet from array if it is slow packet or slave is not
450                          * in collecting state or bonding interface is not in promiscuous
451                          * mode and packet address does not match. */
452                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
453                                 !collecting ||
454                                 (!promisc &&
455                                  !rte_is_multicast_ether_addr(&hdr->d_addr) &&
456                                  !rte_is_same_ether_addr(bond_mac,
457                                                      &hdr->d_addr)))) {
458
459                                 if (hdr->ether_type == ether_type_slow_be) {
460                                         bond_mode_8023ad_handle_slow_pkt(
461                                             internals, slaves[idx], bufs[j]);
462                                 } else
463                                         rte_pktmbuf_free(bufs[j]);
464
465                                 /* Packet is managed by mode 4 or dropped, shift the array */
466                                 num_rx_total--;
467                                 if (j < num_rx_total) {
468                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
469                                                 (num_rx_total - j));
470                                 }
471                         } else
472                                 j++;
473                 }
474                 if (unlikely(++idx == slave_count))
475                         idx = 0;
476         }
477
478         if (++internals->active_slave >= slave_count)
479                 internals->active_slave = 0;
480
481         return num_rx_total;
482 }
483
484 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
485 uint32_t burstnumberRX;
486 uint32_t burstnumberTX;
487
488 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
489
490 static void
491 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
492 {
493         switch (arp_op) {
494         case RTE_ARP_OP_REQUEST:
495                 strlcpy(buf, "ARP Request", buf_len);
496                 return;
497         case RTE_ARP_OP_REPLY:
498                 strlcpy(buf, "ARP Reply", buf_len);
499                 return;
500         case RTE_ARP_OP_REVREQUEST:
501                 strlcpy(buf, "Reverse ARP Request", buf_len);
502                 return;
503         case RTE_ARP_OP_REVREPLY:
504                 strlcpy(buf, "Reverse ARP Reply", buf_len);
505                 return;
506         case RTE_ARP_OP_INVREQUEST:
507                 strlcpy(buf, "Peer Identify Request", buf_len);
508                 return;
509         case RTE_ARP_OP_INVREPLY:
510                 strlcpy(buf, "Peer Identify Reply", buf_len);
511                 return;
512         default:
513                 break;
514         }
515         strlcpy(buf, "Unknown", buf_len);
516         return;
517 }
518 #endif
519 #define MaxIPv4String   16
520 static void
521 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
522 {
523         uint32_t ipv4_addr;
524
525         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
526         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
527                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
528                 ipv4_addr & 0xFF);
529 }
530
531 #define MAX_CLIENTS_NUMBER      128
532 uint8_t active_clients;
533 struct client_stats_t {
534         uint16_t port;
535         uint32_t ipv4_addr;
536         uint32_t ipv4_rx_packets;
537         uint32_t ipv4_tx_packets;
538 };
539 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
540
541 static void
542 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
543 {
544         int i = 0;
545
546         for (; i < MAX_CLIENTS_NUMBER; i++)     {
547                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
548                         /* Just update RX packets number for this client */
549                         if (TXorRXindicator == &burstnumberRX)
550                                 client_stats[i].ipv4_rx_packets++;
551                         else
552                                 client_stats[i].ipv4_tx_packets++;
553                         return;
554                 }
555         }
556         /* We have a new client. Insert him to the table, and increment stats */
557         if (TXorRXindicator == &burstnumberRX)
558                 client_stats[active_clients].ipv4_rx_packets++;
559         else
560                 client_stats[active_clients].ipv4_tx_packets++;
561         client_stats[active_clients].ipv4_addr = addr;
562         client_stats[active_clients].port = port;
563         active_clients++;
564
565 }
566
567 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
568 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
569         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
570                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
571                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
572                 info,                                                   \
573                 port,                                                   \
574                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
575                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
576                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
577                 src_ip,                                                 \
578                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
579                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
580                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
581                 dst_ip,                                                 \
582                 arp_op, ++burstnumber)
583 #endif
584
585 static void
586 mode6_debug(const char __attribute__((unused)) *info,
587         struct rte_ether_hdr *eth_h, uint16_t port,
588         uint32_t __attribute__((unused)) *burstnumber)
589 {
590         struct rte_ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592         struct rte_arp_hdr *arp_h;
593         char dst_ip[16];
594         char ArpOp[24];
595         char buf[16];
596 #endif
597         char src_ip[16];
598
599         uint16_t ether_type = eth_h->ether_type;
600         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603         strlcpy(buf, info, 16);
604 #endif
605
606         if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
607                 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
608                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612 #endif
613                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614         }
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616         else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
617                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
618                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
621                                 ArpOp, sizeof(ArpOp));
622                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
623         }
624 #endif
625 }
626 #endif
627
628 static uint16_t
629 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
630 {
631         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
632         struct bond_dev_private *internals = bd_tx_q->dev_private;
633         struct rte_ether_hdr *eth_h;
634         uint16_t ether_type, offset;
635         uint16_t nb_recv_pkts;
636         int i;
637
638         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
639
640         for (i = 0; i < nb_recv_pkts; i++) {
641                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
642                 ether_type = eth_h->ether_type;
643                 offset = get_vlan_offset(eth_h, &ether_type);
644
645                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
646 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
647                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
648 #endif
649                         bond_mode_alb_arp_recv(eth_h, offset, internals);
650                 }
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652                 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
653                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
654 #endif
655         }
656
657         return nb_recv_pkts;
658 }
659
660 static uint16_t
661 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
662                 uint16_t nb_pkts)
663 {
664         struct bond_dev_private *internals;
665         struct bond_tx_queue *bd_tx_q;
666
667         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
668         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
669
670         uint16_t num_of_slaves;
671         uint16_t slaves[RTE_MAX_ETHPORTS];
672
673         uint16_t num_tx_total = 0, num_tx_slave;
674
675         static int slave_idx = 0;
676         int i, cslave_idx = 0, tx_fail_total = 0;
677
678         bd_tx_q = (struct bond_tx_queue *)queue;
679         internals = bd_tx_q->dev_private;
680
681         /* Copy slave list to protect against slave up/down changes during tx
682          * bursting */
683         num_of_slaves = internals->active_slave_count;
684         memcpy(slaves, internals->active_slaves,
685                         sizeof(internals->active_slaves[0]) * num_of_slaves);
686
687         if (num_of_slaves < 1)
688                 return num_tx_total;
689
690         /* Populate slaves mbuf with which packets are to be sent on it  */
691         for (i = 0; i < nb_pkts; i++) {
692                 cslave_idx = (slave_idx + i) % num_of_slaves;
693                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
694         }
695
696         /* increment current slave index so the next call to tx burst starts on the
697          * next slave */
698         slave_idx = ++cslave_idx;
699
700         /* Send packet burst on each slave device */
701         for (i = 0; i < num_of_slaves; i++) {
702                 if (slave_nb_pkts[i] > 0) {
703                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
704                                         slave_bufs[i], slave_nb_pkts[i]);
705
706                         /* if tx burst fails move packets to end of bufs */
707                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
708                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
709
710                                 tx_fail_total += tx_fail_slave;
711
712                                 memcpy(&bufs[nb_pkts - tx_fail_total],
713                                        &slave_bufs[i][num_tx_slave],
714                                        tx_fail_slave * sizeof(bufs[0]));
715                         }
716                         num_tx_total += num_tx_slave;
717                 }
718         }
719
720         return num_tx_total;
721 }
722
723 static uint16_t
724 bond_ethdev_tx_burst_active_backup(void *queue,
725                 struct rte_mbuf **bufs, uint16_t nb_pkts)
726 {
727         struct bond_dev_private *internals;
728         struct bond_tx_queue *bd_tx_q;
729
730         bd_tx_q = (struct bond_tx_queue *)queue;
731         internals = bd_tx_q->dev_private;
732
733         if (internals->active_slave_count < 1)
734                 return 0;
735
736         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
737                         bufs, nb_pkts);
738 }
739
740 static inline uint16_t
741 ether_hash(struct rte_ether_hdr *eth_hdr)
742 {
743         unaligned_uint16_t *word_src_addr =
744                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
745         unaligned_uint16_t *word_dst_addr =
746                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
747
748         return (word_src_addr[0] ^ word_dst_addr[0]) ^
749                         (word_src_addr[1] ^ word_dst_addr[1]) ^
750                         (word_src_addr[2] ^ word_dst_addr[2]);
751 }
752
753 static inline uint32_t
754 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
755 {
756         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
757 }
758
759 static inline uint32_t
760 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
761 {
762         unaligned_uint32_t *word_src_addr =
763                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
764         unaligned_uint32_t *word_dst_addr =
765                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
766
767         return (word_src_addr[0] ^ word_dst_addr[0]) ^
768                         (word_src_addr[1] ^ word_dst_addr[1]) ^
769                         (word_src_addr[2] ^ word_dst_addr[2]) ^
770                         (word_src_addr[3] ^ word_dst_addr[3]);
771 }
772
773
774 void
775 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
776                 uint16_t slave_count, uint16_t *slaves)
777 {
778         struct rte_ether_hdr *eth_hdr;
779         uint32_t hash;
780         int i;
781
782         for (i = 0; i < nb_pkts; i++) {
783                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
784
785                 hash = ether_hash(eth_hdr);
786
787                 slaves[i] = (hash ^= hash >> 8) % slave_count;
788         }
789 }
790
791 void
792 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint16_t slave_count, uint16_t *slaves)
794 {
795         uint16_t i;
796         struct rte_ether_hdr *eth_hdr;
797         uint16_t proto;
798         size_t vlan_offset;
799         uint32_t hash, l3hash;
800
801         for (i = 0; i < nb_pkts; i++) {
802                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
803                 l3hash = 0;
804
805                 proto = eth_hdr->ether_type;
806                 hash = ether_hash(eth_hdr);
807
808                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
809
810                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
811                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
812                                         ((char *)(eth_hdr + 1) + vlan_offset);
813                         l3hash = ipv4_hash(ipv4_hdr);
814
815                 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
816                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
817                                         ((char *)(eth_hdr + 1) + vlan_offset);
818                         l3hash = ipv6_hash(ipv6_hdr);
819                 }
820
821                 hash = hash ^ l3hash;
822                 hash ^= hash >> 16;
823                 hash ^= hash >> 8;
824
825                 slaves[i] = hash % slave_count;
826         }
827 }
828
829 void
830 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
831                 uint16_t slave_count, uint16_t *slaves)
832 {
833         struct rte_ether_hdr *eth_hdr;
834         uint16_t proto;
835         size_t vlan_offset;
836         int i;
837
838         struct rte_udp_hdr *udp_hdr;
839         struct rte_tcp_hdr *tcp_hdr;
840         uint32_t hash, l3hash, l4hash;
841
842         for (i = 0; i < nb_pkts; i++) {
843                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
844                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
851                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & RTE_IPV4_HDR_IHL_MASK) *
862                                         RTE_IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct rte_tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
869                                                         < pkt_end)
870                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
871                                 } else if (ipv4_hdr->next_proto_id ==
872                                                                 IPPROTO_UDP) {
873                                         udp_hdr = (struct rte_udp_hdr *)
874                                                 ((char *)ipv4_hdr +
875                                                         ip_hdr_offset);
876                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
877                                                         < pkt_end)
878                                                 l4hash = HASH_L4_PORTS(udp_hdr);
879                                 }
880                         }
881                 } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
882                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
883                                         ((char *)(eth_hdr + 1) + vlan_offset);
884                         l3hash = ipv6_hash(ipv6_hdr);
885
886                         if (ipv6_hdr->proto == IPPROTO_TCP) {
887                                 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
888                                 l4hash = HASH_L4_PORTS(tcp_hdr);
889                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
890                                 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
891                                 l4hash = HASH_L4_PORTS(udp_hdr);
892                         }
893                 }
894
895                 hash = l3hash ^ l4hash;
896                 hash ^= hash >> 16;
897                 hash ^= hash >> 8;
898
899                 slaves[i] = hash % slave_count;
900         }
901 }
902
903 struct bwg_slave {
904         uint64_t bwg_left_int;
905         uint64_t bwg_left_remainder;
906         uint16_t slave;
907 };
908
909 void
910 bond_tlb_activate_slave(struct bond_dev_private *internals) {
911         int i;
912
913         for (i = 0; i < internals->active_slave_count; i++) {
914                 tlb_last_obytets[internals->active_slaves[i]] = 0;
915         }
916 }
917
918 static int
919 bandwidth_cmp(const void *a, const void *b)
920 {
921         const struct bwg_slave *bwg_a = a;
922         const struct bwg_slave *bwg_b = b;
923         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
924         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
925                         (int64_t)bwg_a->bwg_left_remainder;
926         if (diff > 0)
927                 return 1;
928         else if (diff < 0)
929                 return -1;
930         else if (diff2 > 0)
931                 return 1;
932         else if (diff2 < 0)
933                 return -1;
934         else
935                 return 0;
936 }
937
938 static void
939 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
940                 struct bwg_slave *bwg_slave)
941 {
942         struct rte_eth_link link_status;
943
944         rte_eth_link_get_nowait(port_id, &link_status);
945         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
946         if (link_bwg == 0)
947                 return;
948         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
949         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
950         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
951 }
952
953 static void
954 bond_ethdev_update_tlb_slave_cb(void *arg)
955 {
956         struct bond_dev_private *internals = arg;
957         struct rte_eth_stats slave_stats;
958         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
959         uint16_t slave_count;
960         uint64_t tx_bytes;
961
962         uint8_t update_stats = 0;
963         uint16_t slave_id;
964         uint16_t i;
965
966         internals->slave_update_idx++;
967
968
969         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
970                 update_stats = 1;
971
972         for (i = 0; i < internals->active_slave_count; i++) {
973                 slave_id = internals->active_slaves[i];
974                 rte_eth_stats_get(slave_id, &slave_stats);
975                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
976                 bandwidth_left(slave_id, tx_bytes,
977                                 internals->slave_update_idx, &bwg_array[i]);
978                 bwg_array[i].slave = slave_id;
979
980                 if (update_stats) {
981                         tlb_last_obytets[slave_id] = slave_stats.obytes;
982                 }
983         }
984
985         if (update_stats == 1)
986                 internals->slave_update_idx = 0;
987
988         slave_count = i;
989         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
990         for (i = 0; i < slave_count; i++)
991                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
992
993         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
994                         (struct bond_dev_private *)internals);
995 }
996
997 static uint16_t
998 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
999 {
1000         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1001         struct bond_dev_private *internals = bd_tx_q->dev_private;
1002
1003         struct rte_eth_dev *primary_port =
1004                         &rte_eth_devices[internals->primary_port];
1005         uint16_t num_tx_total = 0;
1006         uint16_t i, j;
1007
1008         uint16_t num_of_slaves = internals->active_slave_count;
1009         uint16_t slaves[RTE_MAX_ETHPORTS];
1010
1011         struct rte_ether_hdr *ether_hdr;
1012         struct rte_ether_addr primary_slave_addr;
1013         struct rte_ether_addr active_slave_addr;
1014
1015         if (num_of_slaves < 1)
1016                 return num_tx_total;
1017
1018         memcpy(slaves, internals->tlb_slaves_order,
1019                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1020
1021
1022         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1023
1024         if (nb_pkts > 3) {
1025                 for (i = 0; i < 3; i++)
1026                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1027         }
1028
1029         for (i = 0; i < num_of_slaves; i++) {
1030                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1031                 for (j = num_tx_total; j < nb_pkts; j++) {
1032                         if (j + 3 < nb_pkts)
1033                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1034
1035                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
1036                                                 struct rte_ether_hdr *);
1037                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
1038                                                         &primary_slave_addr))
1039                                 rte_ether_addr_copy(&active_slave_addr,
1040                                                 &ether_hdr->s_addr);
1041 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1042                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1043 #endif
1044                 }
1045
1046                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1047                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1048
1049                 if (num_tx_total == nb_pkts)
1050                         break;
1051         }
1052
1053         return num_tx_total;
1054 }
1055
1056 void
1057 bond_tlb_disable(struct bond_dev_private *internals)
1058 {
1059         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1060 }
1061
1062 void
1063 bond_tlb_enable(struct bond_dev_private *internals)
1064 {
1065         bond_ethdev_update_tlb_slave_cb(internals);
1066 }
1067
1068 static uint16_t
1069 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1070 {
1071         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1072         struct bond_dev_private *internals = bd_tx_q->dev_private;
1073
1074         struct rte_ether_hdr *eth_h;
1075         uint16_t ether_type, offset;
1076
1077         struct client_data *client_info;
1078
1079         /*
1080          * We create transmit buffers for every slave and one additional to send
1081          * through tlb. In worst case every packet will be send on one port.
1082          */
1083         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1084         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1085
1086         /*
1087          * We create separate transmit buffers for update packets as they won't
1088          * be counted in num_tx_total.
1089          */
1090         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1091         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1092
1093         struct rte_mbuf *upd_pkt;
1094         size_t pkt_size;
1095
1096         uint16_t num_send, num_not_send = 0;
1097         uint16_t num_tx_total = 0;
1098         uint16_t slave_idx;
1099
1100         int i, j;
1101
1102         /* Search tx buffer for ARP packets and forward them to alb */
1103         for (i = 0; i < nb_pkts; i++) {
1104                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1105                 ether_type = eth_h->ether_type;
1106                 offset = get_vlan_offset(eth_h, &ether_type);
1107
1108                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1109                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1110
1111                         /* Change src mac in eth header */
1112                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1113
1114                         /* Add packet to slave tx buffer */
1115                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1116                         slave_bufs_pkts[slave_idx]++;
1117                 } else {
1118                         /* If packet is not ARP, send it with TLB policy */
1119                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1120                                         bufs[i];
1121                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1122                 }
1123         }
1124
1125         /* Update connected client ARP tables */
1126         if (internals->mode6.ntt) {
1127                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1128                         client_info = &internals->mode6.client_table[i];
1129
1130                         if (client_info->in_use) {
1131                                 /* Allocate new packet to send ARP update on current slave */
1132                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1133                                 if (upd_pkt == NULL) {
1134                                         RTE_BOND_LOG(ERR,
1135                                                      "Failed to allocate ARP packet from pool");
1136                                         continue;
1137                                 }
1138                                 pkt_size = sizeof(struct rte_ether_hdr) +
1139                                         sizeof(struct rte_arp_hdr) +
1140                                         client_info->vlan_count *
1141                                         sizeof(struct rte_vlan_hdr);
1142                                 upd_pkt->data_len = pkt_size;
1143                                 upd_pkt->pkt_len = pkt_size;
1144
1145                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1146                                                 internals);
1147
1148                                 /* Add packet to update tx buffer */
1149                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1150                                 update_bufs_pkts[slave_idx]++;
1151                         }
1152                 }
1153                 internals->mode6.ntt = 0;
1154         }
1155
1156         /* Send ARP packets on proper slaves */
1157         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1158                 if (slave_bufs_pkts[i] > 0) {
1159                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1160                                         slave_bufs[i], slave_bufs_pkts[i]);
1161                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1162                                 bufs[nb_pkts - 1 - num_not_send - j] =
1163                                                 slave_bufs[i][nb_pkts - 1 - j];
1164                         }
1165
1166                         num_tx_total += num_send;
1167                         num_not_send += slave_bufs_pkts[i] - num_send;
1168
1169 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1170         /* Print TX stats including update packets */
1171                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1172                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1173                                                         struct rte_ether_hdr *);
1174                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1175                         }
1176 #endif
1177                 }
1178         }
1179
1180         /* Send update packets on proper slaves */
1181         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1182                 if (update_bufs_pkts[i] > 0) {
1183                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1184                                         update_bufs_pkts[i]);
1185                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1186                                 rte_pktmbuf_free(update_bufs[i][j]);
1187                         }
1188 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1189                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1190                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1191                                                         struct rte_ether_hdr *);
1192                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1193                         }
1194 #endif
1195                 }
1196         }
1197
1198         /* Send non-ARP packets using tlb policy */
1199         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1200                 num_send = bond_ethdev_tx_burst_tlb(queue,
1201                                 slave_bufs[RTE_MAX_ETHPORTS],
1202                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1203
1204                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1205                         bufs[nb_pkts - 1 - num_not_send - j] =
1206                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1207                 }
1208
1209                 num_tx_total += num_send;
1210         }
1211
1212         return num_tx_total;
1213 }
1214
1215 static uint16_t
1216 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1217                 uint16_t nb_bufs)
1218 {
1219         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1220         struct bond_dev_private *internals = bd_tx_q->dev_private;
1221
1222         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1223         uint16_t slave_count;
1224
1225         /* Array to sort mbufs for transmission on each slave into */
1226         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1227         /* Number of mbufs for transmission on each slave */
1228         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1229         /* Mapping array generated by hash function to map mbufs to slaves */
1230         uint16_t bufs_slave_port_idxs[nb_bufs];
1231
1232         uint16_t slave_tx_count;
1233         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1234
1235         uint16_t i;
1236
1237         if (unlikely(nb_bufs == 0))
1238                 return 0;
1239
1240         /* Copy slave list to protect against slave up/down changes during tx
1241          * bursting */
1242         slave_count = internals->active_slave_count;
1243         if (unlikely(slave_count < 1))
1244                 return 0;
1245
1246         memcpy(slave_port_ids, internals->active_slaves,
1247                         sizeof(slave_port_ids[0]) * slave_count);
1248
1249         /*
1250          * Populate slaves mbuf with the packets which are to be sent on it
1251          * selecting output slave using hash based on xmit policy
1252          */
1253         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1254                         bufs_slave_port_idxs);
1255
1256         for (i = 0; i < nb_bufs; i++) {
1257                 /* Populate slave mbuf arrays with mbufs for that slave. */
1258                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1259
1260                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1261         }
1262
1263         /* Send packet burst on each slave device */
1264         for (i = 0; i < slave_count; i++) {
1265                 if (slave_nb_bufs[i] == 0)
1266                         continue;
1267
1268                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1269                                 bd_tx_q->queue_id, slave_bufs[i],
1270                                 slave_nb_bufs[i]);
1271
1272                 total_tx_count += slave_tx_count;
1273
1274                 /* If tx burst fails move packets to end of bufs */
1275                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1276                         int slave_tx_fail_count = slave_nb_bufs[i] -
1277                                         slave_tx_count;
1278                         total_tx_fail_count += slave_tx_fail_count;
1279                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1280                                &slave_bufs[i][slave_tx_count],
1281                                slave_tx_fail_count * sizeof(bufs[0]));
1282                 }
1283         }
1284
1285         return total_tx_count;
1286 }
1287
1288 static uint16_t
1289 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1290                 uint16_t nb_bufs)
1291 {
1292         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1293         struct bond_dev_private *internals = bd_tx_q->dev_private;
1294
1295         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1296         uint16_t slave_count;
1297
1298         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1299         uint16_t dist_slave_count;
1300
1301         /* 2-D array to sort mbufs for transmission on each slave into */
1302         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1303         /* Number of mbufs for transmission on each slave */
1304         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1305         /* Mapping array generated by hash function to map mbufs to slaves */
1306         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1307
1308         uint16_t slave_tx_count;
1309         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1310
1311         uint16_t i;
1312
1313         /* Copy slave list to protect against slave up/down changes during tx
1314          * bursting */
1315         slave_count = internals->active_slave_count;
1316         if (unlikely(slave_count < 1))
1317                 return 0;
1318
1319         memcpy(slave_port_ids, internals->active_slaves,
1320                         sizeof(slave_port_ids[0]) * slave_count);
1321
1322         /* Check for LACP control packets and send if available */
1323         for (i = 0; i < slave_count; i++) {
1324                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1325                 struct rte_mbuf *ctrl_pkt = NULL;
1326
1327                 if (likely(rte_ring_empty(port->tx_ring)))
1328                         continue;
1329
1330                 if (rte_ring_dequeue(port->tx_ring,
1331                                      (void **)&ctrl_pkt) != -ENOENT) {
1332                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1333                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1334                         /*
1335                          * re-enqueue LAG control plane packets to buffering
1336                          * ring if transmission fails so the packet isn't lost.
1337                          */
1338                         if (slave_tx_count != 1)
1339                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1340                 }
1341         }
1342
1343         if (unlikely(nb_bufs == 0))
1344                 return 0;
1345
1346         dist_slave_count = 0;
1347         for (i = 0; i < slave_count; i++) {
1348                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1349
1350                 if (ACTOR_STATE(port, DISTRIBUTING))
1351                         dist_slave_port_ids[dist_slave_count++] =
1352                                         slave_port_ids[i];
1353         }
1354
1355         if (likely(dist_slave_count > 0)) {
1356
1357                 /*
1358                  * Populate slaves mbuf with the packets which are to be sent
1359                  * on it, selecting output slave using hash based on xmit policy
1360                  */
1361                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1362                                 bufs_slave_port_idxs);
1363
1364                 for (i = 0; i < nb_bufs; i++) {
1365                         /*
1366                          * Populate slave mbuf arrays with mbufs for that
1367                          * slave
1368                          */
1369                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1370
1371                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1372                                         bufs[i];
1373                 }
1374
1375
1376                 /* Send packet burst on each slave device */
1377                 for (i = 0; i < dist_slave_count; i++) {
1378                         if (slave_nb_bufs[i] == 0)
1379                                 continue;
1380
1381                         slave_tx_count = rte_eth_tx_burst(
1382                                         dist_slave_port_ids[i],
1383                                         bd_tx_q->queue_id, slave_bufs[i],
1384                                         slave_nb_bufs[i]);
1385
1386                         total_tx_count += slave_tx_count;
1387
1388                         /* If tx burst fails move packets to end of bufs */
1389                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1390                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1391                                                 slave_tx_count;
1392                                 total_tx_fail_count += slave_tx_fail_count;
1393
1394                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1395                                        &slave_bufs[i][slave_tx_count],
1396                                        slave_tx_fail_count * sizeof(bufs[0]));
1397                         }
1398                 }
1399         }
1400
1401         return total_tx_count;
1402 }
1403
1404 static uint16_t
1405 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1406                 uint16_t nb_pkts)
1407 {
1408         struct bond_dev_private *internals;
1409         struct bond_tx_queue *bd_tx_q;
1410
1411         uint16_t slaves[RTE_MAX_ETHPORTS];
1412         uint8_t tx_failed_flag = 0;
1413         uint16_t num_of_slaves;
1414
1415         uint16_t max_nb_of_tx_pkts = 0;
1416
1417         int slave_tx_total[RTE_MAX_ETHPORTS];
1418         int i, most_successful_tx_slave = -1;
1419
1420         bd_tx_q = (struct bond_tx_queue *)queue;
1421         internals = bd_tx_q->dev_private;
1422
1423         /* Copy slave list to protect against slave up/down changes during tx
1424          * bursting */
1425         num_of_slaves = internals->active_slave_count;
1426         memcpy(slaves, internals->active_slaves,
1427                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1428
1429         if (num_of_slaves < 1)
1430                 return 0;
1431
1432         /* Increment reference count on mbufs */
1433         for (i = 0; i < nb_pkts; i++)
1434                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1435
1436         /* Transmit burst on each active slave */
1437         for (i = 0; i < num_of_slaves; i++) {
1438                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1439                                         bufs, nb_pkts);
1440
1441                 if (unlikely(slave_tx_total[i] < nb_pkts))
1442                         tx_failed_flag = 1;
1443
1444                 /* record the value and slave index for the slave which transmits the
1445                  * maximum number of packets */
1446                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1447                         max_nb_of_tx_pkts = slave_tx_total[i];
1448                         most_successful_tx_slave = i;
1449                 }
1450         }
1451
1452         /* if slaves fail to transmit packets from burst, the calling application
1453          * is not expected to know about multiple references to packets so we must
1454          * handle failures of all packets except those of the most successful slave
1455          */
1456         if (unlikely(tx_failed_flag))
1457                 for (i = 0; i < num_of_slaves; i++)
1458                         if (i != most_successful_tx_slave)
1459                                 while (slave_tx_total[i] < nb_pkts)
1460                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1461
1462         return max_nb_of_tx_pkts;
1463 }
1464
1465 static void
1466 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1467 {
1468         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1469
1470         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1471                 /**
1472                  * If in mode 4 then save the link properties of the first
1473                  * slave, all subsequent slaves must match these properties
1474                  */
1475                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1476
1477                 bond_link->link_autoneg = slave_link->link_autoneg;
1478                 bond_link->link_duplex = slave_link->link_duplex;
1479                 bond_link->link_speed = slave_link->link_speed;
1480         } else {
1481                 /**
1482                  * In any other mode the link properties are set to default
1483                  * values of AUTONEG/DUPLEX
1484                  */
1485                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1486                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1487         }
1488 }
1489
1490 static int
1491 link_properties_valid(struct rte_eth_dev *ethdev,
1492                 struct rte_eth_link *slave_link)
1493 {
1494         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1495
1496         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1497                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1498
1499                 if (bond_link->link_duplex != slave_link->link_duplex ||
1500                         bond_link->link_autoneg != slave_link->link_autoneg ||
1501                         bond_link->link_speed != slave_link->link_speed)
1502                         return -1;
1503         }
1504
1505         return 0;
1506 }
1507
1508 int
1509 mac_address_get(struct rte_eth_dev *eth_dev,
1510                 struct rte_ether_addr *dst_mac_addr)
1511 {
1512         struct rte_ether_addr *mac_addr;
1513
1514         if (eth_dev == NULL) {
1515                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1516                 return -1;
1517         }
1518
1519         if (dst_mac_addr == NULL) {
1520                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1521                 return -1;
1522         }
1523
1524         mac_addr = eth_dev->data->mac_addrs;
1525
1526         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1527         return 0;
1528 }
1529
1530 int
1531 mac_address_set(struct rte_eth_dev *eth_dev,
1532                 struct rte_ether_addr *new_mac_addr)
1533 {
1534         struct rte_ether_addr *mac_addr;
1535
1536         if (eth_dev == NULL) {
1537                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1538                 return -1;
1539         }
1540
1541         if (new_mac_addr == NULL) {
1542                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1543                 return -1;
1544         }
1545
1546         mac_addr = eth_dev->data->mac_addrs;
1547
1548         /* If new MAC is different to current MAC then update */
1549         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1550                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1551
1552         return 0;
1553 }
1554
1555 static const struct rte_ether_addr null_mac_addr;
1556
1557 /*
1558  * Add additional MAC addresses to the slave
1559  */
1560 int
1561 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1562                 uint16_t slave_port_id)
1563 {
1564         int i, ret;
1565         struct rte_ether_addr *mac_addr;
1566
1567         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1568                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1569                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1570                         break;
1571
1572                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1573                 if (ret < 0) {
1574                         /* rollback */
1575                         for (i--; i > 0; i--)
1576                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1577                                         &bonded_eth_dev->data->mac_addrs[i]);
1578                         return ret;
1579                 }
1580         }
1581
1582         return 0;
1583 }
1584
1585 /*
1586  * Remove additional MAC addresses from the slave
1587  */
1588 int
1589 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1590                 uint16_t slave_port_id)
1591 {
1592         int i, rc, ret;
1593         struct rte_ether_addr *mac_addr;
1594
1595         rc = 0;
1596         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1597                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1598                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1599                         break;
1600
1601                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1602                 /* save only the first error */
1603                 if (ret < 0 && rc == 0)
1604                         rc = ret;
1605         }
1606
1607         return rc;
1608 }
1609
1610 int
1611 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1612 {
1613         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1614         int i;
1615
1616         /* Update slave devices MAC addresses */
1617         if (internals->slave_count < 1)
1618                 return -1;
1619
1620         switch (internals->mode) {
1621         case BONDING_MODE_ROUND_ROBIN:
1622         case BONDING_MODE_BALANCE:
1623         case BONDING_MODE_BROADCAST:
1624                 for (i = 0; i < internals->slave_count; i++) {
1625                         if (rte_eth_dev_default_mac_addr_set(
1626                                         internals->slaves[i].port_id,
1627                                         bonded_eth_dev->data->mac_addrs)) {
1628                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1629                                                 internals->slaves[i].port_id);
1630                                 return -1;
1631                         }
1632                 }
1633                 break;
1634         case BONDING_MODE_8023AD:
1635                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1636                 break;
1637         case BONDING_MODE_ACTIVE_BACKUP:
1638         case BONDING_MODE_TLB:
1639         case BONDING_MODE_ALB:
1640         default:
1641                 for (i = 0; i < internals->slave_count; i++) {
1642                         if (internals->slaves[i].port_id ==
1643                                         internals->current_primary_port) {
1644                                 if (rte_eth_dev_default_mac_addr_set(
1645                                                 internals->primary_port,
1646                                                 bonded_eth_dev->data->mac_addrs)) {
1647                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1648                                                         internals->current_primary_port);
1649                                         return -1;
1650                                 }
1651                         } else {
1652                                 if (rte_eth_dev_default_mac_addr_set(
1653                                                 internals->slaves[i].port_id,
1654                                                 &internals->slaves[i].persisted_mac_addr)) {
1655                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1656                                                         internals->slaves[i].port_id);
1657                                         return -1;
1658                                 }
1659                         }
1660                 }
1661         }
1662
1663         return 0;
1664 }
1665
1666 int
1667 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1668 {
1669         struct bond_dev_private *internals;
1670
1671         internals = eth_dev->data->dev_private;
1672
1673         switch (mode) {
1674         case BONDING_MODE_ROUND_ROBIN:
1675                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1676                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1677                 break;
1678         case BONDING_MODE_ACTIVE_BACKUP:
1679                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1680                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1681                 break;
1682         case BONDING_MODE_BALANCE:
1683                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1684                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1685                 break;
1686         case BONDING_MODE_BROADCAST:
1687                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1688                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1689                 break;
1690         case BONDING_MODE_8023AD:
1691                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1692                         return -1;
1693
1694                 if (internals->mode4.dedicated_queues.enabled == 0) {
1695                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1696                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1697                         RTE_BOND_LOG(WARNING,
1698                                 "Using mode 4, it is necessary to do TX burst "
1699                                 "and RX burst at least every 100ms.");
1700                 } else {
1701                         /* Use flow director's optimization */
1702                         eth_dev->rx_pkt_burst =
1703                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1704                         eth_dev->tx_pkt_burst =
1705                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1706                 }
1707                 break;
1708         case BONDING_MODE_TLB:
1709                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1710                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1711                 break;
1712         case BONDING_MODE_ALB:
1713                 if (bond_mode_alb_enable(eth_dev) != 0)
1714                         return -1;
1715
1716                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1717                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1718                 break;
1719         default:
1720                 return -1;
1721         }
1722
1723         internals->mode = mode;
1724
1725         return 0;
1726 }
1727
1728
1729 static int
1730 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1731                 struct rte_eth_dev *slave_eth_dev)
1732 {
1733         int errval = 0;
1734         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1735         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1736
1737         if (port->slow_pool == NULL) {
1738                 char mem_name[256];
1739                 int slave_id = slave_eth_dev->data->port_id;
1740
1741                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1742                                 slave_id);
1743                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1744                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1745                         slave_eth_dev->data->numa_node);
1746
1747                 /* Any memory allocation failure in initialization is critical because
1748                  * resources can't be free, so reinitialization is impossible. */
1749                 if (port->slow_pool == NULL) {
1750                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1751                                 slave_id, mem_name, rte_strerror(rte_errno));
1752                 }
1753         }
1754
1755         if (internals->mode4.dedicated_queues.enabled == 1) {
1756                 /* Configure slow Rx queue */
1757
1758                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1759                                 internals->mode4.dedicated_queues.rx_qid, 128,
1760                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1761                                 NULL, port->slow_pool);
1762                 if (errval != 0) {
1763                         RTE_BOND_LOG(ERR,
1764                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1765                                         slave_eth_dev->data->port_id,
1766                                         internals->mode4.dedicated_queues.rx_qid,
1767                                         errval);
1768                         return errval;
1769                 }
1770
1771                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1772                                 internals->mode4.dedicated_queues.tx_qid, 512,
1773                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1774                                 NULL);
1775                 if (errval != 0) {
1776                         RTE_BOND_LOG(ERR,
1777                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1778                                 slave_eth_dev->data->port_id,
1779                                 internals->mode4.dedicated_queues.tx_qid,
1780                                 errval);
1781                         return errval;
1782                 }
1783         }
1784         return 0;
1785 }
1786
1787 int
1788 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1789                 struct rte_eth_dev *slave_eth_dev)
1790 {
1791         struct bond_rx_queue *bd_rx_q;
1792         struct bond_tx_queue *bd_tx_q;
1793         uint16_t nb_rx_queues;
1794         uint16_t nb_tx_queues;
1795
1796         int errval;
1797         uint16_t q_id;
1798         struct rte_flow_error flow_error;
1799
1800         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1801
1802         /* Stop slave */
1803         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1804
1805         /* Enable interrupts on slave device if supported */
1806         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1807                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1808
1809         /* If RSS is enabled for bonding, try to enable it for slaves  */
1810         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1811                 if (internals->rss_key_len != 0) {
1812                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1813                                         internals->rss_key_len;
1814                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1815                                         internals->rss_key;
1816                 } else {
1817                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1818                 }
1819
1820                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1821                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1822                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1823                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1824         }
1825
1826         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1827                         DEV_RX_OFFLOAD_VLAN_FILTER)
1828                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1829                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1830         else
1831                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1832                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1833
1834         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1835         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1836
1837         if (internals->mode == BONDING_MODE_8023AD) {
1838                 if (internals->mode4.dedicated_queues.enabled == 1) {
1839                         nb_rx_queues++;
1840                         nb_tx_queues++;
1841                 }
1842         }
1843
1844         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1845                                      bonded_eth_dev->data->mtu);
1846         if (errval != 0 && errval != -ENOTSUP) {
1847                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1848                                 slave_eth_dev->data->port_id, errval);
1849                 return errval;
1850         }
1851
1852         /* Configure device */
1853         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1854                         nb_rx_queues, nb_tx_queues,
1855                         &(slave_eth_dev->data->dev_conf));
1856         if (errval != 0) {
1857                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1858                                 slave_eth_dev->data->port_id, errval);
1859                 return errval;
1860         }
1861
1862         /* Setup Rx Queues */
1863         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1864                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1865
1866                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1867                                 bd_rx_q->nb_rx_desc,
1868                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1869                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1870                 if (errval != 0) {
1871                         RTE_BOND_LOG(ERR,
1872                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1873                                         slave_eth_dev->data->port_id, q_id, errval);
1874                         return errval;
1875                 }
1876         }
1877
1878         /* Setup Tx Queues */
1879         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1880                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1881
1882                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1883                                 bd_tx_q->nb_tx_desc,
1884                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1885                                 &bd_tx_q->tx_conf);
1886                 if (errval != 0) {
1887                         RTE_BOND_LOG(ERR,
1888                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1889                                 slave_eth_dev->data->port_id, q_id, errval);
1890                         return errval;
1891                 }
1892         }
1893
1894         if (internals->mode == BONDING_MODE_8023AD &&
1895                         internals->mode4.dedicated_queues.enabled == 1) {
1896                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1897                                 != 0)
1898                         return errval;
1899
1900                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1901                                 slave_eth_dev->data->port_id) != 0) {
1902                         RTE_BOND_LOG(ERR,
1903                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1904                                 slave_eth_dev->data->port_id, q_id, errval);
1905                         return -1;
1906                 }
1907
1908                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1909                         rte_flow_destroy(slave_eth_dev->data->port_id,
1910                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1911                                         &flow_error);
1912
1913                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1914                                 slave_eth_dev->data->port_id);
1915         }
1916
1917         /* Start device */
1918         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1919         if (errval != 0) {
1920                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1921                                 slave_eth_dev->data->port_id, errval);
1922                 return -1;
1923         }
1924
1925         /* If RSS is enabled for bonding, synchronize RETA */
1926         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1927                 int i;
1928                 struct bond_dev_private *internals;
1929
1930                 internals = bonded_eth_dev->data->dev_private;
1931
1932                 for (i = 0; i < internals->slave_count; i++) {
1933                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1934                                 errval = rte_eth_dev_rss_reta_update(
1935                                                 slave_eth_dev->data->port_id,
1936                                                 &internals->reta_conf[0],
1937                                                 internals->slaves[i].reta_size);
1938                                 if (errval != 0) {
1939                                         RTE_BOND_LOG(WARNING,
1940                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1941                                                      " RSS Configuration for bonding may be inconsistent.",
1942                                                      slave_eth_dev->data->port_id, errval);
1943                                 }
1944                                 break;
1945                         }
1946                 }
1947         }
1948
1949         /* If lsc interrupt is set, check initial slave's link status */
1950         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1951                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1952                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1953                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1954                         NULL);
1955         }
1956
1957         return 0;
1958 }
1959
1960 void
1961 slave_remove(struct bond_dev_private *internals,
1962                 struct rte_eth_dev *slave_eth_dev)
1963 {
1964         uint16_t i;
1965
1966         for (i = 0; i < internals->slave_count; i++)
1967                 if (internals->slaves[i].port_id ==
1968                                 slave_eth_dev->data->port_id)
1969                         break;
1970
1971         if (i < (internals->slave_count - 1)) {
1972                 struct rte_flow *flow;
1973
1974                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1975                                 sizeof(internals->slaves[0]) *
1976                                 (internals->slave_count - i - 1));
1977                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1978                         memmove(&flow->flows[i], &flow->flows[i + 1],
1979                                 sizeof(flow->flows[0]) *
1980                                 (internals->slave_count - i - 1));
1981                         flow->flows[internals->slave_count - 1] = NULL;
1982                 }
1983         }
1984
1985         internals->slave_count--;
1986
1987         /* force reconfiguration of slave interfaces */
1988         _rte_eth_dev_reset(slave_eth_dev);
1989 }
1990
1991 static void
1992 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1993
1994 void
1995 slave_add(struct bond_dev_private *internals,
1996                 struct rte_eth_dev *slave_eth_dev)
1997 {
1998         struct bond_slave_details *slave_details =
1999                         &internals->slaves[internals->slave_count];
2000
2001         slave_details->port_id = slave_eth_dev->data->port_id;
2002         slave_details->last_link_status = 0;
2003
2004         /* Mark slave devices that don't support interrupts so we can
2005          * compensate when we start the bond
2006          */
2007         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2008                 slave_details->link_status_poll_enabled = 1;
2009         }
2010
2011         slave_details->link_status_wait_to_complete = 0;
2012         /* clean tlb_last_obytes when adding port for bonding device */
2013         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2014                         sizeof(struct rte_ether_addr));
2015 }
2016
2017 void
2018 bond_ethdev_primary_set(struct bond_dev_private *internals,
2019                 uint16_t slave_port_id)
2020 {
2021         int i;
2022
2023         if (internals->active_slave_count < 1)
2024                 internals->current_primary_port = slave_port_id;
2025         else
2026                 /* Search bonded device slave ports for new proposed primary port */
2027                 for (i = 0; i < internals->active_slave_count; i++) {
2028                         if (internals->active_slaves[i] == slave_port_id)
2029                                 internals->current_primary_port = slave_port_id;
2030                 }
2031 }
2032
2033 static void
2034 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2035
2036 static int
2037 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2038 {
2039         struct bond_dev_private *internals;
2040         int i;
2041
2042         /* slave eth dev will be started by bonded device */
2043         if (check_for_bonded_ethdev(eth_dev)) {
2044                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2045                                 eth_dev->data->port_id);
2046                 return -1;
2047         }
2048
2049         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2050         eth_dev->data->dev_started = 1;
2051
2052         internals = eth_dev->data->dev_private;
2053
2054         if (internals->slave_count == 0) {
2055                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2056                 goto out_err;
2057         }
2058
2059         if (internals->user_defined_mac == 0) {
2060                 struct rte_ether_addr *new_mac_addr = NULL;
2061
2062                 for (i = 0; i < internals->slave_count; i++)
2063                         if (internals->slaves[i].port_id == internals->primary_port)
2064                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2065
2066                 if (new_mac_addr == NULL)
2067                         goto out_err;
2068
2069                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2070                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2071                                         eth_dev->data->port_id);
2072                         goto out_err;
2073                 }
2074         }
2075
2076         /* If bonded device is configure in promiscuous mode then re-apply config */
2077         if (internals->promiscuous_en)
2078                 bond_ethdev_promiscuous_enable(eth_dev);
2079
2080         if (internals->mode == BONDING_MODE_8023AD) {
2081                 if (internals->mode4.dedicated_queues.enabled == 1) {
2082                         internals->mode4.dedicated_queues.rx_qid =
2083                                         eth_dev->data->nb_rx_queues;
2084                         internals->mode4.dedicated_queues.tx_qid =
2085                                         eth_dev->data->nb_tx_queues;
2086                 }
2087         }
2088
2089
2090         /* Reconfigure each slave device if starting bonded device */
2091         for (i = 0; i < internals->slave_count; i++) {
2092                 struct rte_eth_dev *slave_ethdev =
2093                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2094                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2095                         RTE_BOND_LOG(ERR,
2096                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2097                                 eth_dev->data->port_id,
2098                                 internals->slaves[i].port_id);
2099                         goto out_err;
2100                 }
2101                 /* We will need to poll for link status if any slave doesn't
2102                  * support interrupts
2103                  */
2104                 if (internals->slaves[i].link_status_poll_enabled)
2105                         internals->link_status_polling_enabled = 1;
2106         }
2107
2108         /* start polling if needed */
2109         if (internals->link_status_polling_enabled) {
2110                 rte_eal_alarm_set(
2111                         internals->link_status_polling_interval_ms * 1000,
2112                         bond_ethdev_slave_link_status_change_monitor,
2113                         (void *)&rte_eth_devices[internals->port_id]);
2114         }
2115
2116         /* Update all slave devices MACs*/
2117         if (mac_address_slaves_update(eth_dev) != 0)
2118                 goto out_err;
2119
2120         if (internals->user_defined_primary_port)
2121                 bond_ethdev_primary_set(internals, internals->primary_port);
2122
2123         if (internals->mode == BONDING_MODE_8023AD)
2124                 bond_mode_8023ad_start(eth_dev);
2125
2126         if (internals->mode == BONDING_MODE_TLB ||
2127                         internals->mode == BONDING_MODE_ALB)
2128                 bond_tlb_enable(internals);
2129
2130         return 0;
2131
2132 out_err:
2133         eth_dev->data->dev_started = 0;
2134         return -1;
2135 }
2136
2137 static void
2138 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2139 {
2140         uint16_t i;
2141
2142         if (dev->data->rx_queues != NULL) {
2143                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2144                         rte_free(dev->data->rx_queues[i]);
2145                         dev->data->rx_queues[i] = NULL;
2146                 }
2147                 dev->data->nb_rx_queues = 0;
2148         }
2149
2150         if (dev->data->tx_queues != NULL) {
2151                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2152                         rte_free(dev->data->tx_queues[i]);
2153                         dev->data->tx_queues[i] = NULL;
2154                 }
2155                 dev->data->nb_tx_queues = 0;
2156         }
2157 }
2158
2159 void
2160 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2161 {
2162         struct bond_dev_private *internals = eth_dev->data->dev_private;
2163         uint16_t i;
2164
2165         if (internals->mode == BONDING_MODE_8023AD) {
2166                 struct port *port;
2167                 void *pkt = NULL;
2168
2169                 bond_mode_8023ad_stop(eth_dev);
2170
2171                 /* Discard all messages to/from mode 4 state machines */
2172                 for (i = 0; i < internals->active_slave_count; i++) {
2173                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2174
2175                         RTE_ASSERT(port->rx_ring != NULL);
2176                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2177                                 rte_pktmbuf_free(pkt);
2178
2179                         RTE_ASSERT(port->tx_ring != NULL);
2180                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2181                                 rte_pktmbuf_free(pkt);
2182                 }
2183         }
2184
2185         if (internals->mode == BONDING_MODE_TLB ||
2186                         internals->mode == BONDING_MODE_ALB) {
2187                 bond_tlb_disable(internals);
2188                 for (i = 0; i < internals->active_slave_count; i++)
2189                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2190         }
2191
2192         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2193         eth_dev->data->dev_started = 0;
2194
2195         internals->link_status_polling_enabled = 0;
2196         for (i = 0; i < internals->slave_count; i++) {
2197                 uint16_t slave_id = internals->slaves[i].port_id;
2198                 if (find_slave_by_id(internals->active_slaves,
2199                                 internals->active_slave_count, slave_id) !=
2200                                                 internals->active_slave_count) {
2201                         internals->slaves[i].last_link_status = 0;
2202                         rte_eth_dev_stop(slave_id);
2203                         deactivate_slave(eth_dev, slave_id);
2204                 }
2205         }
2206 }
2207
2208 void
2209 bond_ethdev_close(struct rte_eth_dev *dev)
2210 {
2211         struct bond_dev_private *internals = dev->data->dev_private;
2212         uint16_t bond_port_id = internals->port_id;
2213         int skipped = 0;
2214         struct rte_flow_error ferror;
2215
2216         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2217         while (internals->slave_count != skipped) {
2218                 uint16_t port_id = internals->slaves[skipped].port_id;
2219
2220                 rte_eth_dev_stop(port_id);
2221
2222                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2223                         RTE_BOND_LOG(ERR,
2224                                      "Failed to remove port %d from bonded device %s",
2225                                      port_id, dev->device->name);
2226                         skipped++;
2227                 }
2228         }
2229         bond_flow_ops.flush(dev, &ferror);
2230         bond_ethdev_free_queues(dev);
2231         rte_bitmap_reset(internals->vlan_filter_bmp);
2232 }
2233
2234 /* forward declaration */
2235 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2236
2237 static void
2238 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2239 {
2240         struct bond_dev_private *internals = dev->data->dev_private;
2241
2242         uint16_t max_nb_rx_queues = UINT16_MAX;
2243         uint16_t max_nb_tx_queues = UINT16_MAX;
2244         uint16_t max_rx_desc_lim = UINT16_MAX;
2245         uint16_t max_tx_desc_lim = UINT16_MAX;
2246
2247         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2248
2249         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2250                         internals->candidate_max_rx_pktlen :
2251                         RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2252
2253         /* Max number of tx/rx queues that the bonded device can support is the
2254          * minimum values of the bonded slaves, as all slaves must be capable
2255          * of supporting the same number of tx/rx queues.
2256          */
2257         if (internals->slave_count > 0) {
2258                 struct rte_eth_dev_info slave_info;
2259                 uint16_t idx;
2260
2261                 for (idx = 0; idx < internals->slave_count; idx++) {
2262                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2263                                         &slave_info);
2264
2265                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2266                                 max_nb_rx_queues = slave_info.max_rx_queues;
2267
2268                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2269                                 max_nb_tx_queues = slave_info.max_tx_queues;
2270
2271                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2272                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2273
2274                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2275                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2276                 }
2277         }
2278
2279         dev_info->max_rx_queues = max_nb_rx_queues;
2280         dev_info->max_tx_queues = max_nb_tx_queues;
2281
2282         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2283                sizeof(dev_info->default_rxconf));
2284         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2285                sizeof(dev_info->default_txconf));
2286
2287         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2288         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2289
2290         /**
2291          * If dedicated hw queues enabled for link bonding device in LACP mode
2292          * then we need to reduce the maximum number of data path queues by 1.
2293          */
2294         if (internals->mode == BONDING_MODE_8023AD &&
2295                 internals->mode4.dedicated_queues.enabled == 1) {
2296                 dev_info->max_rx_queues--;
2297                 dev_info->max_tx_queues--;
2298         }
2299
2300         dev_info->min_rx_bufsize = 0;
2301
2302         dev_info->rx_offload_capa = internals->rx_offload_capa;
2303         dev_info->tx_offload_capa = internals->tx_offload_capa;
2304         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2305         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2306         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2307
2308         dev_info->reta_size = internals->reta_size;
2309 }
2310
2311 static int
2312 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2313 {
2314         int res;
2315         uint16_t i;
2316         struct bond_dev_private *internals = dev->data->dev_private;
2317
2318         /* don't do this while a slave is being added */
2319         rte_spinlock_lock(&internals->lock);
2320
2321         if (on)
2322                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2323         else
2324                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2325
2326         for (i = 0; i < internals->slave_count; i++) {
2327                 uint16_t port_id = internals->slaves[i].port_id;
2328
2329                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2330                 if (res == ENOTSUP)
2331                         RTE_BOND_LOG(WARNING,
2332                                      "Setting VLAN filter on slave port %u not supported.",
2333                                      port_id);
2334         }
2335
2336         rte_spinlock_unlock(&internals->lock);
2337         return 0;
2338 }
2339
2340 static int
2341 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2342                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2343                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2344 {
2345         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2346                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2347                                         0, dev->data->numa_node);
2348         if (bd_rx_q == NULL)
2349                 return -1;
2350
2351         bd_rx_q->queue_id = rx_queue_id;
2352         bd_rx_q->dev_private = dev->data->dev_private;
2353
2354         bd_rx_q->nb_rx_desc = nb_rx_desc;
2355
2356         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2357         bd_rx_q->mb_pool = mb_pool;
2358
2359         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2360
2361         return 0;
2362 }
2363
2364 static int
2365 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2366                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2367                 const struct rte_eth_txconf *tx_conf)
2368 {
2369         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2370                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2371                                         0, dev->data->numa_node);
2372
2373         if (bd_tx_q == NULL)
2374                 return -1;
2375
2376         bd_tx_q->queue_id = tx_queue_id;
2377         bd_tx_q->dev_private = dev->data->dev_private;
2378
2379         bd_tx_q->nb_tx_desc = nb_tx_desc;
2380         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2381
2382         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2383
2384         return 0;
2385 }
2386
2387 static void
2388 bond_ethdev_rx_queue_release(void *queue)
2389 {
2390         if (queue == NULL)
2391                 return;
2392
2393         rte_free(queue);
2394 }
2395
2396 static void
2397 bond_ethdev_tx_queue_release(void *queue)
2398 {
2399         if (queue == NULL)
2400                 return;
2401
2402         rte_free(queue);
2403 }
2404
2405 static void
2406 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2407 {
2408         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2409         struct bond_dev_private *internals;
2410
2411         /* Default value for polling slave found is true as we don't want to
2412          * disable the polling thread if we cannot get the lock */
2413         int i, polling_slave_found = 1;
2414
2415         if (cb_arg == NULL)
2416                 return;
2417
2418         bonded_ethdev = cb_arg;
2419         internals = bonded_ethdev->data->dev_private;
2420
2421         if (!bonded_ethdev->data->dev_started ||
2422                 !internals->link_status_polling_enabled)
2423                 return;
2424
2425         /* If device is currently being configured then don't check slaves link
2426          * status, wait until next period */
2427         if (rte_spinlock_trylock(&internals->lock)) {
2428                 if (internals->slave_count > 0)
2429                         polling_slave_found = 0;
2430
2431                 for (i = 0; i < internals->slave_count; i++) {
2432                         if (!internals->slaves[i].link_status_poll_enabled)
2433                                 continue;
2434
2435                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2436                         polling_slave_found = 1;
2437
2438                         /* Update slave link status */
2439                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2440                                         internals->slaves[i].link_status_wait_to_complete);
2441
2442                         /* if link status has changed since last checked then call lsc
2443                          * event callback */
2444                         if (slave_ethdev->data->dev_link.link_status !=
2445                                         internals->slaves[i].last_link_status) {
2446                                 internals->slaves[i].last_link_status =
2447                                                 slave_ethdev->data->dev_link.link_status;
2448
2449                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2450                                                 RTE_ETH_EVENT_INTR_LSC,
2451                                                 &bonded_ethdev->data->port_id,
2452                                                 NULL);
2453                         }
2454                 }
2455                 rte_spinlock_unlock(&internals->lock);
2456         }
2457
2458         if (polling_slave_found)
2459                 /* Set alarm to continue monitoring link status of slave ethdev's */
2460                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2461                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2462 }
2463
2464 static int
2465 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2466 {
2467         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2468
2469         struct bond_dev_private *bond_ctx;
2470         struct rte_eth_link slave_link;
2471
2472         uint32_t idx;
2473
2474         bond_ctx = ethdev->data->dev_private;
2475
2476         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2477
2478         if (ethdev->data->dev_started == 0 ||
2479                         bond_ctx->active_slave_count == 0) {
2480                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2481                 return 0;
2482         }
2483
2484         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2485
2486         if (wait_to_complete)
2487                 link_update = rte_eth_link_get;
2488         else
2489                 link_update = rte_eth_link_get_nowait;
2490
2491         switch (bond_ctx->mode) {
2492         case BONDING_MODE_BROADCAST:
2493                 /**
2494                  * Setting link speed to UINT32_MAX to ensure we pick up the
2495                  * value of the first active slave
2496                  */
2497                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2498
2499                 /**
2500                  * link speed is minimum value of all the slaves link speed as
2501                  * packet loss will occur on this slave if transmission at rates
2502                  * greater than this are attempted
2503                  */
2504                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2505                         link_update(bond_ctx->active_slaves[0], &slave_link);
2506
2507                         if (slave_link.link_speed <
2508                                         ethdev->data->dev_link.link_speed)
2509                                 ethdev->data->dev_link.link_speed =
2510                                                 slave_link.link_speed;
2511                 }
2512                 break;
2513         case BONDING_MODE_ACTIVE_BACKUP:
2514                 /* Current primary slave */
2515                 link_update(bond_ctx->current_primary_port, &slave_link);
2516
2517                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2518                 break;
2519         case BONDING_MODE_8023AD:
2520                 ethdev->data->dev_link.link_autoneg =
2521                                 bond_ctx->mode4.slave_link.link_autoneg;
2522                 ethdev->data->dev_link.link_duplex =
2523                                 bond_ctx->mode4.slave_link.link_duplex;
2524                 /* fall through to update link speed */
2525         case BONDING_MODE_ROUND_ROBIN:
2526         case BONDING_MODE_BALANCE:
2527         case BONDING_MODE_TLB:
2528         case BONDING_MODE_ALB:
2529         default:
2530                 /**
2531                  * In theses mode the maximum theoretical link speed is the sum
2532                  * of all the slaves
2533                  */
2534                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2535
2536                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2537                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2538
2539                         ethdev->data->dev_link.link_speed +=
2540                                         slave_link.link_speed;
2541                 }
2542         }
2543
2544
2545         return 0;
2546 }
2547
2548
2549 static int
2550 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2551 {
2552         struct bond_dev_private *internals = dev->data->dev_private;
2553         struct rte_eth_stats slave_stats;
2554         int i, j;
2555
2556         for (i = 0; i < internals->slave_count; i++) {
2557                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2558
2559                 stats->ipackets += slave_stats.ipackets;
2560                 stats->opackets += slave_stats.opackets;
2561                 stats->ibytes += slave_stats.ibytes;
2562                 stats->obytes += slave_stats.obytes;
2563                 stats->imissed += slave_stats.imissed;
2564                 stats->ierrors += slave_stats.ierrors;
2565                 stats->oerrors += slave_stats.oerrors;
2566                 stats->rx_nombuf += slave_stats.rx_nombuf;
2567
2568                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2569                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2570                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2571                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2572                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2573                         stats->q_errors[j] += slave_stats.q_errors[j];
2574                 }
2575
2576         }
2577
2578         return 0;
2579 }
2580
2581 static void
2582 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2583 {
2584         struct bond_dev_private *internals = dev->data->dev_private;
2585         int i;
2586
2587         for (i = 0; i < internals->slave_count; i++)
2588                 rte_eth_stats_reset(internals->slaves[i].port_id);
2589 }
2590
2591 static void
2592 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2593 {
2594         struct bond_dev_private *internals = eth_dev->data->dev_private;
2595         int i;
2596
2597         internals->promiscuous_en = 1;
2598
2599         switch (internals->mode) {
2600         /* Promiscuous mode is propagated to all slaves */
2601         case BONDING_MODE_ROUND_ROBIN:
2602         case BONDING_MODE_BALANCE:
2603         case BONDING_MODE_BROADCAST:
2604                 for (i = 0; i < internals->slave_count; i++)
2605                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2606                 break;
2607         /* In mode4 promiscus mode is managed when slave is added/removed */
2608         case BONDING_MODE_8023AD:
2609                 break;
2610         /* Promiscuous mode is propagated only to primary slave */
2611         case BONDING_MODE_ACTIVE_BACKUP:
2612         case BONDING_MODE_TLB:
2613         case BONDING_MODE_ALB:
2614         default:
2615                 /* Do not touch promisc when there cannot be primary ports */
2616                 if (internals->slave_count == 0)
2617                         break;
2618                 rte_eth_promiscuous_enable(internals->current_primary_port);
2619         }
2620 }
2621
2622 static void
2623 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2624 {
2625         struct bond_dev_private *internals = dev->data->dev_private;
2626         int i;
2627
2628         internals->promiscuous_en = 0;
2629
2630         switch (internals->mode) {
2631         /* Promiscuous mode is propagated to all slaves */
2632         case BONDING_MODE_ROUND_ROBIN:
2633         case BONDING_MODE_BALANCE:
2634         case BONDING_MODE_BROADCAST:
2635                 for (i = 0; i < internals->slave_count; i++)
2636                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2637                 break;
2638         /* In mode4 promiscus mode is set managed when slave is added/removed */
2639         case BONDING_MODE_8023AD:
2640                 break;
2641         /* Promiscuous mode is propagated only to primary slave */
2642         case BONDING_MODE_ACTIVE_BACKUP:
2643         case BONDING_MODE_TLB:
2644         case BONDING_MODE_ALB:
2645         default:
2646                 /* Do not touch promisc when there cannot be primary ports */
2647                 if (internals->slave_count == 0)
2648                         break;
2649                 rte_eth_promiscuous_disable(internals->current_primary_port);
2650         }
2651 }
2652
2653 static void
2654 bond_ethdev_delayed_lsc_propagation(void *arg)
2655 {
2656         if (arg == NULL)
2657                 return;
2658
2659         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2660                         RTE_ETH_EVENT_INTR_LSC, NULL);
2661 }
2662
2663 int
2664 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2665                 void *param, void *ret_param __rte_unused)
2666 {
2667         struct rte_eth_dev *bonded_eth_dev;
2668         struct bond_dev_private *internals;
2669         struct rte_eth_link link;
2670         int rc = -1;
2671
2672         uint8_t lsc_flag = 0;
2673         int valid_slave = 0;
2674         uint16_t active_pos;
2675         uint16_t i;
2676
2677         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2678                 return rc;
2679
2680         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2681
2682         if (check_for_bonded_ethdev(bonded_eth_dev))
2683                 return rc;
2684
2685         internals = bonded_eth_dev->data->dev_private;
2686
2687         /* If the device isn't started don't handle interrupts */
2688         if (!bonded_eth_dev->data->dev_started)
2689                 return rc;
2690
2691         /* verify that port_id is a valid slave of bonded port */
2692         for (i = 0; i < internals->slave_count; i++) {
2693                 if (internals->slaves[i].port_id == port_id) {
2694                         valid_slave = 1;
2695                         break;
2696                 }
2697         }
2698
2699         if (!valid_slave)
2700                 return rc;
2701
2702         /* Synchronize lsc callback parallel calls either by real link event
2703          * from the slaves PMDs or by the bonding PMD itself.
2704          */
2705         rte_spinlock_lock(&internals->lsc_lock);
2706
2707         /* Search for port in active port list */
2708         active_pos = find_slave_by_id(internals->active_slaves,
2709                         internals->active_slave_count, port_id);
2710
2711         rte_eth_link_get_nowait(port_id, &link);
2712         if (link.link_status) {
2713                 if (active_pos < internals->active_slave_count)
2714                         goto link_update;
2715
2716                 /* check link state properties if bonded link is up*/
2717                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2718                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2719                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2720                                              "for slave %d in bonding mode %d",
2721                                              port_id, internals->mode);
2722                 } else {
2723                         /* inherit slave link properties */
2724                         link_properties_set(bonded_eth_dev, &link);
2725                 }
2726
2727                 /* If no active slave ports then set this port to be
2728                  * the primary port.
2729                  */
2730                 if (internals->active_slave_count < 1) {
2731                         /* If first active slave, then change link status */
2732                         bonded_eth_dev->data->dev_link.link_status =
2733                                                                 ETH_LINK_UP;
2734                         internals->current_primary_port = port_id;
2735                         lsc_flag = 1;
2736
2737                         mac_address_slaves_update(bonded_eth_dev);
2738                 }
2739
2740                 activate_slave(bonded_eth_dev, port_id);
2741
2742                 /* If the user has defined the primary port then default to
2743                  * using it.
2744                  */
2745                 if (internals->user_defined_primary_port &&
2746                                 internals->primary_port == port_id)
2747                         bond_ethdev_primary_set(internals, port_id);
2748         } else {
2749                 if (active_pos == internals->active_slave_count)
2750                         goto link_update;
2751
2752                 /* Remove from active slave list */
2753                 deactivate_slave(bonded_eth_dev, port_id);
2754
2755                 if (internals->active_slave_count < 1)
2756                         lsc_flag = 1;
2757
2758                 /* Update primary id, take first active slave from list or if none
2759                  * available set to -1 */
2760                 if (port_id == internals->current_primary_port) {
2761                         if (internals->active_slave_count > 0)
2762                                 bond_ethdev_primary_set(internals,
2763                                                 internals->active_slaves[0]);
2764                         else
2765                                 internals->current_primary_port = internals->primary_port;
2766                 }
2767         }
2768
2769 link_update:
2770         /**
2771          * Update bonded device link properties after any change to active
2772          * slaves
2773          */
2774         bond_ethdev_link_update(bonded_eth_dev, 0);
2775
2776         if (lsc_flag) {
2777                 /* Cancel any possible outstanding interrupts if delays are enabled */
2778                 if (internals->link_up_delay_ms > 0 ||
2779                         internals->link_down_delay_ms > 0)
2780                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2781                                         bonded_eth_dev);
2782
2783                 if (bonded_eth_dev->data->dev_link.link_status) {
2784                         if (internals->link_up_delay_ms > 0)
2785                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2786                                                 bond_ethdev_delayed_lsc_propagation,
2787                                                 (void *)bonded_eth_dev);
2788                         else
2789                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2790                                                 RTE_ETH_EVENT_INTR_LSC,
2791                                                 NULL);
2792
2793                 } else {
2794                         if (internals->link_down_delay_ms > 0)
2795                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2796                                                 bond_ethdev_delayed_lsc_propagation,
2797                                                 (void *)bonded_eth_dev);
2798                         else
2799                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2800                                                 RTE_ETH_EVENT_INTR_LSC,
2801                                                 NULL);
2802                 }
2803         }
2804
2805         rte_spinlock_unlock(&internals->lsc_lock);
2806
2807         return rc;
2808 }
2809
2810 static int
2811 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2812                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2813 {
2814         unsigned i, j;
2815         int result = 0;
2816         int slave_reta_size;
2817         unsigned reta_count;
2818         struct bond_dev_private *internals = dev->data->dev_private;
2819
2820         if (reta_size != internals->reta_size)
2821                 return -EINVAL;
2822
2823          /* Copy RETA table */
2824         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2825
2826         for (i = 0; i < reta_count; i++) {
2827                 internals->reta_conf[i].mask = reta_conf[i].mask;
2828                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2829                         if ((reta_conf[i].mask >> j) & 0x01)
2830                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2831         }
2832
2833         /* Fill rest of array */
2834         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2835                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2836                                 sizeof(internals->reta_conf[0]) * reta_count);
2837
2838         /* Propagate RETA over slaves */
2839         for (i = 0; i < internals->slave_count; i++) {
2840                 slave_reta_size = internals->slaves[i].reta_size;
2841                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2842                                 &internals->reta_conf[0], slave_reta_size);
2843                 if (result < 0)
2844                         return result;
2845         }
2846
2847         return 0;
2848 }
2849
2850 static int
2851 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2852                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2853 {
2854         int i, j;
2855         struct bond_dev_private *internals = dev->data->dev_private;
2856
2857         if (reta_size != internals->reta_size)
2858                 return -EINVAL;
2859
2860          /* Copy RETA table */
2861         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2862                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2863                         if ((reta_conf[i].mask >> j) & 0x01)
2864                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2865
2866         return 0;
2867 }
2868
2869 static int
2870 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2871                 struct rte_eth_rss_conf *rss_conf)
2872 {
2873         int i, result = 0;
2874         struct bond_dev_private *internals = dev->data->dev_private;
2875         struct rte_eth_rss_conf bond_rss_conf;
2876
2877         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2878
2879         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2880
2881         if (bond_rss_conf.rss_hf != 0)
2882                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2883
2884         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2885                         sizeof(internals->rss_key)) {
2886                 if (bond_rss_conf.rss_key_len == 0)
2887                         bond_rss_conf.rss_key_len = 40;
2888                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2889                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2890                                 internals->rss_key_len);
2891         }
2892
2893         for (i = 0; i < internals->slave_count; i++) {
2894                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2895                                 &bond_rss_conf);
2896                 if (result < 0)
2897                         return result;
2898         }
2899
2900         return 0;
2901 }
2902
2903 static int
2904 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2905                 struct rte_eth_rss_conf *rss_conf)
2906 {
2907         struct bond_dev_private *internals = dev->data->dev_private;
2908
2909         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2910         rss_conf->rss_key_len = internals->rss_key_len;
2911         if (rss_conf->rss_key)
2912                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2913
2914         return 0;
2915 }
2916
2917 static int
2918 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2919 {
2920         struct rte_eth_dev *slave_eth_dev;
2921         struct bond_dev_private *internals = dev->data->dev_private;
2922         int ret, i;
2923
2924         rte_spinlock_lock(&internals->lock);
2925
2926         for (i = 0; i < internals->slave_count; i++) {
2927                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2928                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2929                         rte_spinlock_unlock(&internals->lock);
2930                         return -ENOTSUP;
2931                 }
2932         }
2933         for (i = 0; i < internals->slave_count; i++) {
2934                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2935                 if (ret < 0) {
2936                         rte_spinlock_unlock(&internals->lock);
2937                         return ret;
2938                 }
2939         }
2940
2941         rte_spinlock_unlock(&internals->lock);
2942         return 0;
2943 }
2944
2945 static int
2946 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2947                         struct rte_ether_addr *addr)
2948 {
2949         if (mac_address_set(dev, addr)) {
2950                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2951                 return -EINVAL;
2952         }
2953
2954         return 0;
2955 }
2956
2957 static int
2958 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2959                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2960 {
2961         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2962                 *(const void **)arg = &bond_flow_ops;
2963                 return 0;
2964         }
2965         return -ENOTSUP;
2966 }
2967
2968 static int
2969 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2970                         struct rte_ether_addr *mac_addr,
2971                         __rte_unused uint32_t index, uint32_t vmdq)
2972 {
2973         struct rte_eth_dev *slave_eth_dev;
2974         struct bond_dev_private *internals = dev->data->dev_private;
2975         int ret, i;
2976
2977         rte_spinlock_lock(&internals->lock);
2978
2979         for (i = 0; i < internals->slave_count; i++) {
2980                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2981                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2982                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2983                         ret = -ENOTSUP;
2984                         goto end;
2985                 }
2986         }
2987
2988         for (i = 0; i < internals->slave_count; i++) {
2989                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2990                                 mac_addr, vmdq);
2991                 if (ret < 0) {
2992                         /* rollback */
2993                         for (i--; i >= 0; i--)
2994                                 rte_eth_dev_mac_addr_remove(
2995                                         internals->slaves[i].port_id, mac_addr);
2996                         goto end;
2997                 }
2998         }
2999
3000         ret = 0;
3001 end:
3002         rte_spinlock_unlock(&internals->lock);
3003         return ret;
3004 }
3005
3006 static void
3007 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3008 {
3009         struct rte_eth_dev *slave_eth_dev;
3010         struct bond_dev_private *internals = dev->data->dev_private;
3011         int i;
3012
3013         rte_spinlock_lock(&internals->lock);
3014
3015         for (i = 0; i < internals->slave_count; i++) {
3016                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3017                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3018                         goto end;
3019         }
3020
3021         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3022
3023         for (i = 0; i < internals->slave_count; i++)
3024                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3025                                 mac_addr);
3026
3027 end:
3028         rte_spinlock_unlock(&internals->lock);
3029 }
3030
3031 const struct eth_dev_ops default_dev_ops = {
3032         .dev_start            = bond_ethdev_start,
3033         .dev_stop             = bond_ethdev_stop,
3034         .dev_close            = bond_ethdev_close,
3035         .dev_configure        = bond_ethdev_configure,
3036         .dev_infos_get        = bond_ethdev_info,
3037         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3038         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3039         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3040         .rx_queue_release     = bond_ethdev_rx_queue_release,
3041         .tx_queue_release     = bond_ethdev_tx_queue_release,
3042         .link_update          = bond_ethdev_link_update,
3043         .stats_get            = bond_ethdev_stats_get,
3044         .stats_reset          = bond_ethdev_stats_reset,
3045         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3046         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3047         .reta_update          = bond_ethdev_rss_reta_update,
3048         .reta_query           = bond_ethdev_rss_reta_query,
3049         .rss_hash_update      = bond_ethdev_rss_hash_update,
3050         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3051         .mtu_set              = bond_ethdev_mtu_set,
3052         .mac_addr_set         = bond_ethdev_mac_address_set,
3053         .mac_addr_add         = bond_ethdev_mac_addr_add,
3054         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3055         .filter_ctrl          = bond_filter_ctrl
3056 };
3057
3058 static int
3059 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3060 {
3061         const char *name = rte_vdev_device_name(dev);
3062         uint8_t socket_id = dev->device.numa_node;
3063         struct bond_dev_private *internals = NULL;
3064         struct rte_eth_dev *eth_dev = NULL;
3065         uint32_t vlan_filter_bmp_size;
3066
3067         /* now do all data allocation - for eth_dev structure, dummy pci driver
3068          * and internal (private) data
3069          */
3070
3071         /* reserve an ethdev entry */
3072         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3073         if (eth_dev == NULL) {
3074                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3075                 goto err;
3076         }
3077
3078         internals = eth_dev->data->dev_private;
3079         eth_dev->data->nb_rx_queues = (uint16_t)1;
3080         eth_dev->data->nb_tx_queues = (uint16_t)1;
3081
3082         /* Allocate memory for storing MAC addresses */
3083         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3084                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3085         if (eth_dev->data->mac_addrs == NULL) {
3086                 RTE_BOND_LOG(ERR,
3087                              "Failed to allocate %u bytes needed to store MAC addresses",
3088                              RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3089                 goto err;
3090         }
3091
3092         eth_dev->dev_ops = &default_dev_ops;
3093         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3094
3095         rte_spinlock_init(&internals->lock);
3096         rte_spinlock_init(&internals->lsc_lock);
3097
3098         internals->port_id = eth_dev->data->port_id;
3099         internals->mode = BONDING_MODE_INVALID;
3100         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3101         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3102         internals->burst_xmit_hash = burst_xmit_l2_hash;
3103         internals->user_defined_mac = 0;
3104
3105         internals->link_status_polling_enabled = 0;
3106
3107         internals->link_status_polling_interval_ms =
3108                 DEFAULT_POLLING_INTERVAL_10_MS;
3109         internals->link_down_delay_ms = 0;
3110         internals->link_up_delay_ms = 0;
3111
3112         internals->slave_count = 0;
3113         internals->active_slave_count = 0;
3114         internals->rx_offload_capa = 0;
3115         internals->tx_offload_capa = 0;
3116         internals->rx_queue_offload_capa = 0;
3117         internals->tx_queue_offload_capa = 0;
3118         internals->candidate_max_rx_pktlen = 0;
3119         internals->max_rx_pktlen = 0;
3120
3121         /* Initially allow to choose any offload type */
3122         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3123
3124         memset(&internals->default_rxconf, 0,
3125                sizeof(internals->default_rxconf));
3126         memset(&internals->default_txconf, 0,
3127                sizeof(internals->default_txconf));
3128
3129         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3130         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3131
3132         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3133         memset(internals->slaves, 0, sizeof(internals->slaves));
3134
3135         TAILQ_INIT(&internals->flow_list);
3136         internals->flow_isolated_valid = 0;
3137
3138         /* Set mode 4 default configuration */
3139         bond_mode_8023ad_setup(eth_dev, NULL);
3140         if (bond_ethdev_mode_set(eth_dev, mode)) {
3141                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3142                                  eth_dev->data->port_id, mode);
3143                 goto err;
3144         }
3145
3146         vlan_filter_bmp_size =
3147                 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3148         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3149                                                    RTE_CACHE_LINE_SIZE);
3150         if (internals->vlan_filter_bmpmem == NULL) {
3151                 RTE_BOND_LOG(ERR,
3152                              "Failed to allocate vlan bitmap for bonded device %u",
3153                              eth_dev->data->port_id);
3154                 goto err;
3155         }
3156
3157         internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3158                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3159         if (internals->vlan_filter_bmp == NULL) {
3160                 RTE_BOND_LOG(ERR,
3161                              "Failed to init vlan bitmap for bonded device %u",
3162                              eth_dev->data->port_id);
3163                 rte_free(internals->vlan_filter_bmpmem);
3164                 goto err;
3165         }
3166
3167         return eth_dev->data->port_id;
3168
3169 err:
3170         rte_free(internals);
3171         if (eth_dev != NULL)
3172                 eth_dev->data->dev_private = NULL;
3173         rte_eth_dev_release_port(eth_dev);
3174         return -1;
3175 }
3176
3177 static int
3178 bond_probe(struct rte_vdev_device *dev)
3179 {
3180         const char *name;
3181         struct bond_dev_private *internals;
3182         struct rte_kvargs *kvlist;
3183         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3184         int  arg_count, port_id;
3185         uint8_t agg_mode;
3186         struct rte_eth_dev *eth_dev;
3187
3188         if (!dev)
3189                 return -EINVAL;
3190
3191         name = rte_vdev_device_name(dev);
3192         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3193
3194         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3195                 eth_dev = rte_eth_dev_attach_secondary(name);
3196                 if (!eth_dev) {
3197                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3198                         return -1;
3199                 }
3200                 /* TODO: request info from primary to set up Rx and Tx */
3201                 eth_dev->dev_ops = &default_dev_ops;
3202                 eth_dev->device = &dev->device;
3203                 rte_eth_dev_probing_finish(eth_dev);
3204                 return 0;
3205         }
3206
3207         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3208                 pmd_bond_init_valid_arguments);
3209         if (kvlist == NULL)
3210                 return -1;
3211
3212         /* Parse link bonding mode */
3213         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3214                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3215                                 &bond_ethdev_parse_slave_mode_kvarg,
3216                                 &bonding_mode) != 0) {
3217                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3218                                         name);
3219                         goto parse_error;
3220                 }
3221         } else {
3222                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3223                                 "device %s", name);
3224                 goto parse_error;
3225         }
3226
3227         /* Parse socket id to create bonding device on */
3228         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3229         if (arg_count == 1) {
3230                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3231                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3232                                 != 0) {
3233                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3234                                         "bonded device %s", name);
3235                         goto parse_error;
3236                 }
3237         } else if (arg_count > 1) {
3238                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3239                                 "bonded device %s", name);
3240                 goto parse_error;
3241         } else {
3242                 socket_id = rte_socket_id();
3243         }
3244
3245         dev->device.numa_node = socket_id;
3246
3247         /* Create link bonding eth device */
3248         port_id = bond_alloc(dev, bonding_mode);
3249         if (port_id < 0) {
3250                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3251                                 "socket %u.",   name, bonding_mode, socket_id);
3252                 goto parse_error;
3253         }
3254         internals = rte_eth_devices[port_id].data->dev_private;
3255         internals->kvlist = kvlist;
3256
3257         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3258                 if (rte_kvargs_process(kvlist,
3259                                 PMD_BOND_AGG_MODE_KVARG,
3260                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3261                                 &agg_mode) != 0) {
3262                         RTE_BOND_LOG(ERR,
3263                                         "Failed to parse agg selection mode for bonded device %s",
3264                                         name);
3265                         goto parse_error;
3266                 }
3267
3268                 if (internals->mode == BONDING_MODE_8023AD)
3269                         internals->mode4.agg_selection = agg_mode;
3270         } else {
3271                 internals->mode4.agg_selection = AGG_STABLE;
3272         }
3273
3274         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3275         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3276                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3277         return 0;
3278
3279 parse_error:
3280         rte_kvargs_free(kvlist);
3281
3282         return -1;
3283 }
3284
3285 static int
3286 bond_remove(struct rte_vdev_device *dev)
3287 {
3288         struct rte_eth_dev *eth_dev;
3289         struct bond_dev_private *internals;
3290         const char *name;
3291
3292         if (!dev)
3293                 return -EINVAL;
3294
3295         name = rte_vdev_device_name(dev);
3296         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3297
3298         /* now free all data allocation - for eth_dev structure,
3299          * dummy pci driver and internal (private) data
3300          */
3301
3302         /* find an ethdev entry */
3303         eth_dev = rte_eth_dev_allocated(name);
3304         if (eth_dev == NULL)
3305                 return -ENODEV;
3306
3307         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3308                 return rte_eth_dev_release_port(eth_dev);
3309
3310         RTE_ASSERT(eth_dev->device == &dev->device);
3311
3312         internals = eth_dev->data->dev_private;
3313         if (internals->slave_count != 0)
3314                 return -EBUSY;
3315
3316         if (eth_dev->data->dev_started == 1) {
3317                 bond_ethdev_stop(eth_dev);
3318                 bond_ethdev_close(eth_dev);
3319         }
3320
3321         eth_dev->dev_ops = NULL;
3322         eth_dev->rx_pkt_burst = NULL;
3323         eth_dev->tx_pkt_burst = NULL;
3324
3325         internals = eth_dev->data->dev_private;
3326         /* Try to release mempool used in mode6. If the bond
3327          * device is not mode6, free the NULL is not problem.
3328          */
3329         rte_mempool_free(internals->mode6.mempool);
3330         rte_bitmap_free(internals->vlan_filter_bmp);
3331         rte_free(internals->vlan_filter_bmpmem);
3332
3333         rte_eth_dev_release_port(eth_dev);
3334
3335         return 0;
3336 }
3337
3338 /* this part will resolve the slave portids after all the other pdev and vdev
3339  * have been allocated */
3340 static int
3341 bond_ethdev_configure(struct rte_eth_dev *dev)
3342 {
3343         const char *name = dev->device->name;
3344         struct bond_dev_private *internals = dev->data->dev_private;
3345         struct rte_kvargs *kvlist = internals->kvlist;
3346         int arg_count;
3347         uint16_t port_id = dev - rte_eth_devices;
3348         uint8_t agg_mode;
3349
3350         static const uint8_t default_rss_key[40] = {
3351                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3352                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3353                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3354                 0xBE, 0xAC, 0x01, 0xFA
3355         };
3356
3357         unsigned i, j;
3358
3359         /*
3360          * If RSS is enabled, fill table with default values and
3361          * set key to the the value specified in port RSS configuration.
3362          * Fall back to default RSS key if the key is not specified
3363          */
3364         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3365                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3366                         internals->rss_key_len =
3367                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3368                         memcpy(internals->rss_key,
3369                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3370                                internals->rss_key_len);
3371                 } else {
3372                         internals->rss_key_len = sizeof(default_rss_key);
3373                         memcpy(internals->rss_key, default_rss_key,
3374                                internals->rss_key_len);
3375                 }
3376
3377                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3378                         internals->reta_conf[i].mask = ~0LL;
3379                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3380                                 internals->reta_conf[i].reta[j] =
3381                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3382                                                 dev->data->nb_rx_queues;
3383                 }
3384         }
3385
3386         /* set the max_rx_pktlen */
3387         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3388
3389         /*
3390          * if no kvlist, it means that this bonded device has been created
3391          * through the bonding api.
3392          */
3393         if (!kvlist)
3394                 return 0;
3395
3396         /* Parse MAC address for bonded device */
3397         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3398         if (arg_count == 1) {
3399                 struct rte_ether_addr bond_mac;
3400
3401                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3402                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3403                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3404                                      name);
3405                         return -1;
3406                 }
3407
3408                 /* Set MAC address */
3409                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3410                         RTE_BOND_LOG(ERR,
3411                                      "Failed to set mac address on bonded device %s",
3412                                      name);
3413                         return -1;
3414                 }
3415         } else if (arg_count > 1) {
3416                 RTE_BOND_LOG(ERR,
3417                              "MAC address can be specified only once for bonded device %s",
3418                              name);
3419                 return -1;
3420         }
3421
3422         /* Parse/set balance mode transmit policy */
3423         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3424         if (arg_count == 1) {
3425                 uint8_t xmit_policy;
3426
3427                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3428                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3429                     0) {
3430                         RTE_BOND_LOG(INFO,
3431                                      "Invalid xmit policy specified for bonded device %s",
3432                                      name);
3433                         return -1;
3434                 }
3435
3436                 /* Set balance mode transmit policy*/
3437                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3438                         RTE_BOND_LOG(ERR,
3439                                      "Failed to set balance xmit policy on bonded device %s",
3440                                      name);
3441                         return -1;
3442                 }
3443         } else if (arg_count > 1) {
3444                 RTE_BOND_LOG(ERR,
3445                              "Transmit policy can be specified only once for bonded device %s",
3446                              name);
3447                 return -1;
3448         }
3449
3450         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3451                 if (rte_kvargs_process(kvlist,
3452                                        PMD_BOND_AGG_MODE_KVARG,
3453                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3454                                        &agg_mode) != 0) {
3455                         RTE_BOND_LOG(ERR,
3456                                      "Failed to parse agg selection mode for bonded device %s",
3457                                      name);
3458                 }
3459                 if (internals->mode == BONDING_MODE_8023AD) {
3460                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3461                                         agg_mode);
3462                         if (ret < 0) {
3463                                 RTE_BOND_LOG(ERR,
3464                                         "Invalid args for agg selection set for bonded device %s",
3465                                         name);
3466                                 return -1;
3467                         }
3468                 }
3469         }
3470
3471         /* Parse/add slave ports to bonded device */
3472         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3473                 struct bond_ethdev_slave_ports slave_ports;
3474                 unsigned i;
3475
3476                 memset(&slave_ports, 0, sizeof(slave_ports));
3477
3478                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3479                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3480                         RTE_BOND_LOG(ERR,
3481                                      "Failed to parse slave ports for bonded device %s",
3482                                      name);
3483                         return -1;
3484                 }
3485
3486                 for (i = 0; i < slave_ports.slave_count; i++) {
3487                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3488                                 RTE_BOND_LOG(ERR,
3489                                              "Failed to add port %d as slave to bonded device %s",
3490                                              slave_ports.slaves[i], name);
3491                         }
3492                 }
3493
3494         } else {
3495                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3496                 return -1;
3497         }
3498
3499         /* Parse/set primary slave port id*/
3500         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3501         if (arg_count == 1) {
3502                 uint16_t primary_slave_port_id;
3503
3504                 if (rte_kvargs_process(kvlist,
3505                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3506                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3507                                        &primary_slave_port_id) < 0) {
3508                         RTE_BOND_LOG(INFO,
3509                                      "Invalid primary slave port id specified for bonded device %s",
3510                                      name);
3511                         return -1;
3512                 }
3513
3514                 /* Set balance mode transmit policy*/
3515                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3516                     != 0) {
3517                         RTE_BOND_LOG(ERR,
3518                                      "Failed to set primary slave port %d on bonded device %s",
3519                                      primary_slave_port_id, name);
3520                         return -1;
3521                 }
3522         } else if (arg_count > 1) {
3523                 RTE_BOND_LOG(INFO,
3524                              "Primary slave can be specified only once for bonded device %s",
3525                              name);
3526                 return -1;
3527         }
3528
3529         /* Parse link status monitor polling interval */
3530         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3531         if (arg_count == 1) {
3532                 uint32_t lsc_poll_interval_ms;
3533
3534                 if (rte_kvargs_process(kvlist,
3535                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3536                                        &bond_ethdev_parse_time_ms_kvarg,
3537                                        &lsc_poll_interval_ms) < 0) {
3538                         RTE_BOND_LOG(INFO,
3539                                      "Invalid lsc polling interval value specified for bonded"
3540                                      " device %s", name);
3541                         return -1;
3542                 }
3543
3544                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3545                     != 0) {
3546                         RTE_BOND_LOG(ERR,
3547                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3548                                      lsc_poll_interval_ms, name);
3549                         return -1;
3550                 }
3551         } else if (arg_count > 1) {
3552                 RTE_BOND_LOG(INFO,
3553                              "LSC polling interval can be specified only once for bonded"
3554                              " device %s", name);
3555                 return -1;
3556         }
3557
3558         /* Parse link up interrupt propagation delay */
3559         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3560         if (arg_count == 1) {
3561                 uint32_t link_up_delay_ms;
3562
3563                 if (rte_kvargs_process(kvlist,
3564                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3565                                        &bond_ethdev_parse_time_ms_kvarg,
3566                                        &link_up_delay_ms) < 0) {
3567                         RTE_BOND_LOG(INFO,
3568                                      "Invalid link up propagation delay value specified for"
3569                                      " bonded device %s", name);
3570                         return -1;
3571                 }
3572
3573                 /* Set balance mode transmit policy*/
3574                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3575                     != 0) {
3576                         RTE_BOND_LOG(ERR,
3577                                      "Failed to set link up propagation delay (%u ms) on bonded"
3578                                      " device %s", link_up_delay_ms, name);
3579                         return -1;
3580                 }
3581         } else if (arg_count > 1) {
3582                 RTE_BOND_LOG(INFO,
3583                              "Link up propagation delay can be specified only once for"
3584                              " bonded device %s", name);
3585                 return -1;
3586         }
3587
3588         /* Parse link down interrupt propagation delay */
3589         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3590         if (arg_count == 1) {
3591                 uint32_t link_down_delay_ms;
3592
3593                 if (rte_kvargs_process(kvlist,
3594                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3595                                        &bond_ethdev_parse_time_ms_kvarg,
3596                                        &link_down_delay_ms) < 0) {
3597                         RTE_BOND_LOG(INFO,
3598                                      "Invalid link down propagation delay value specified for"
3599                                      " bonded device %s", name);
3600                         return -1;
3601                 }
3602
3603                 /* Set balance mode transmit policy*/
3604                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3605                     != 0) {
3606                         RTE_BOND_LOG(ERR,
3607                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3608                                      link_down_delay_ms, name);
3609                         return -1;
3610                 }
3611         } else if (arg_count > 1) {
3612                 RTE_BOND_LOG(INFO,
3613                              "Link down propagation delay can be specified only once for  bonded device %s",
3614                              name);
3615                 return -1;
3616         }
3617
3618         return 0;
3619 }
3620
3621 struct rte_vdev_driver pmd_bond_drv = {
3622         .probe = bond_probe,
3623         .remove = bond_remove,
3624 };
3625
3626 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3627 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3628
3629 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3630         "slave=<ifc> "
3631         "primary=<ifc> "
3632         "mode=[0-6] "
3633         "xmit_policy=[l2 | l23 | l34] "
3634         "agg_mode=[count | stable | bandwidth] "
3635         "socket_id=<int> "
3636         "mac=<mac addr> "
3637         "lsc_poll_period_ms=<int> "
3638         "up_delay=<int> "
3639         "down_delay=<int>");
3640
3641 int bond_logtype;
3642
3643 RTE_INIT(bond_init_log)
3644 {
3645         bond_logtype = rte_log_register("pmd.net.bond");
3646         if (bond_logtype >= 0)
3647                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3648 }