net/bonding: fix values of descriptor limits
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t i, j, k;
408         uint8_t subtype;
409
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting ||
453                                 (!promisc &&
454                                  !is_multicast_ether_addr(&hdr->d_addr) &&
455                                  !is_same_ether_addr(bond_mac,
456                                                      &hdr->d_addr)))) {
457
458                                 if (hdr->ether_type == ether_type_slow_be) {
459                                         bond_mode_8023ad_handle_slow_pkt(
460                                             internals, slaves[idx], bufs[j]);
461                                 } else
462                                         rte_pktmbuf_free(bufs[j]);
463
464                                 /* Packet is managed by mode 4 or dropped, shift the array */
465                                 num_rx_total--;
466                                 if (j < num_rx_total) {
467                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
468                                                 (num_rx_total - j));
469                                 }
470                         } else
471                                 j++;
472                 }
473                 if (unlikely(++idx == slave_count))
474                         idx = 0;
475         }
476
477         if (++internals->active_slave == slave_count)
478                 internals->active_slave = 0;
479
480         return num_rx_total;
481 }
482
483 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
484 uint32_t burstnumberRX;
485 uint32_t burstnumberTX;
486
487 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
488
489 static void
490 arp_op_name(uint16_t arp_op, char *buf)
491 {
492         switch (arp_op) {
493         case ARP_OP_REQUEST:
494                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
495                 return;
496         case ARP_OP_REPLY:
497                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
498                 return;
499         case ARP_OP_REVREQUEST:
500                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
501                                 "Reverse ARP Request");
502                 return;
503         case ARP_OP_REVREPLY:
504                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
505                                 "Reverse ARP Reply");
506                 return;
507         case ARP_OP_INVREQUEST:
508                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
509                                 "Peer Identify Request");
510                 return;
511         case ARP_OP_INVREPLY:
512                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
513                                 "Peer Identify Reply");
514                 return;
515         default:
516                 break;
517         }
518         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
519         return;
520 }
521 #endif
522 #define MaxIPv4String   16
523 static void
524 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
525 {
526         uint32_t ipv4_addr;
527
528         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
529         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
530                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
531                 ipv4_addr & 0xFF);
532 }
533
534 #define MAX_CLIENTS_NUMBER      128
535 uint8_t active_clients;
536 struct client_stats_t {
537         uint16_t port;
538         uint32_t ipv4_addr;
539         uint32_t ipv4_rx_packets;
540         uint32_t ipv4_tx_packets;
541 };
542 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
543
544 static void
545 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
546 {
547         int i = 0;
548
549         for (; i < MAX_CLIENTS_NUMBER; i++)     {
550                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
551                         /* Just update RX packets number for this client */
552                         if (TXorRXindicator == &burstnumberRX)
553                                 client_stats[i].ipv4_rx_packets++;
554                         else
555                                 client_stats[i].ipv4_tx_packets++;
556                         return;
557                 }
558         }
559         /* We have a new client. Insert him to the table, and increment stats */
560         if (TXorRXindicator == &burstnumberRX)
561                 client_stats[active_clients].ipv4_rx_packets++;
562         else
563                 client_stats[active_clients].ipv4_tx_packets++;
564         client_stats[active_clients].ipv4_addr = addr;
565         client_stats[active_clients].port = port;
566         active_clients++;
567
568 }
569
570 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
571 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
572         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
573                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
574                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
575                 info,                                                   \
576                 port,                                                   \
577                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
578                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
579                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
580                 src_ip,                                                 \
581                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
582                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
583                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
584                 dst_ip,                                                 \
585                 arp_op, ++burstnumber)
586 #endif
587
588 static void
589 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
590                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
591 {
592         struct ipv4_hdr *ipv4_h;
593 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
594         struct arp_hdr *arp_h;
595         char dst_ip[16];
596         char ArpOp[24];
597         char buf[16];
598 #endif
599         char src_ip[16];
600
601         uint16_t ether_type = eth_h->ether_type;
602         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
603
604 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
605         strlcpy(buf, info, 16);
606 #endif
607
608         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
609                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
610                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
611 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
612                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
613                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
614 #endif
615                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
616         }
617 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
618         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
619                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
620                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
622                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
623                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
624         }
625 #endif
626 }
627 #endif
628
629 static uint16_t
630 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
631 {
632         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
633         struct bond_dev_private *internals = bd_tx_q->dev_private;
634         struct ether_hdr *eth_h;
635         uint16_t ether_type, offset;
636         uint16_t nb_recv_pkts;
637         int i;
638
639         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
640
641         for (i = 0; i < nb_recv_pkts; i++) {
642                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
643                 ether_type = eth_h->ether_type;
644                 offset = get_vlan_offset(eth_h, &ether_type);
645
646                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
647 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
648                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
649 #endif
650                         bond_mode_alb_arp_recv(eth_h, offset, internals);
651                 }
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
654                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
655 #endif
656         }
657
658         return nb_recv_pkts;
659 }
660
661 static uint16_t
662 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
663                 uint16_t nb_pkts)
664 {
665         struct bond_dev_private *internals;
666         struct bond_tx_queue *bd_tx_q;
667
668         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
669         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
670
671         uint16_t num_of_slaves;
672         uint16_t slaves[RTE_MAX_ETHPORTS];
673
674         uint16_t num_tx_total = 0, num_tx_slave;
675
676         static int slave_idx = 0;
677         int i, cslave_idx = 0, tx_fail_total = 0;
678
679         bd_tx_q = (struct bond_tx_queue *)queue;
680         internals = bd_tx_q->dev_private;
681
682         /* Copy slave list to protect against slave up/down changes during tx
683          * bursting */
684         num_of_slaves = internals->active_slave_count;
685         memcpy(slaves, internals->active_slaves,
686                         sizeof(internals->active_slaves[0]) * num_of_slaves);
687
688         if (num_of_slaves < 1)
689                 return num_tx_total;
690
691         /* Populate slaves mbuf with which packets are to be sent on it  */
692         for (i = 0; i < nb_pkts; i++) {
693                 cslave_idx = (slave_idx + i) % num_of_slaves;
694                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
695         }
696
697         /* increment current slave index so the next call to tx burst starts on the
698          * next slave */
699         slave_idx = ++cslave_idx;
700
701         /* Send packet burst on each slave device */
702         for (i = 0; i < num_of_slaves; i++) {
703                 if (slave_nb_pkts[i] > 0) {
704                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
705                                         slave_bufs[i], slave_nb_pkts[i]);
706
707                         /* if tx burst fails move packets to end of bufs */
708                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
709                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
710
711                                 tx_fail_total += tx_fail_slave;
712
713                                 memcpy(&bufs[nb_pkts - tx_fail_total],
714                                        &slave_bufs[i][num_tx_slave],
715                                        tx_fail_slave * sizeof(bufs[0]));
716                         }
717                         num_tx_total += num_tx_slave;
718                 }
719         }
720
721         return num_tx_total;
722 }
723
724 static uint16_t
725 bond_ethdev_tx_burst_active_backup(void *queue,
726                 struct rte_mbuf **bufs, uint16_t nb_pkts)
727 {
728         struct bond_dev_private *internals;
729         struct bond_tx_queue *bd_tx_q;
730
731         bd_tx_q = (struct bond_tx_queue *)queue;
732         internals = bd_tx_q->dev_private;
733
734         if (internals->active_slave_count < 1)
735                 return 0;
736
737         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
738                         bufs, nb_pkts);
739 }
740
741 static inline uint16_t
742 ether_hash(struct ether_hdr *eth_hdr)
743 {
744         unaligned_uint16_t *word_src_addr =
745                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
746         unaligned_uint16_t *word_dst_addr =
747                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
748
749         return (word_src_addr[0] ^ word_dst_addr[0]) ^
750                         (word_src_addr[1] ^ word_dst_addr[1]) ^
751                         (word_src_addr[2] ^ word_dst_addr[2]);
752 }
753
754 static inline uint32_t
755 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
756 {
757         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
758 }
759
760 static inline uint32_t
761 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
762 {
763         unaligned_uint32_t *word_src_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
765         unaligned_uint32_t *word_dst_addr =
766                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
767
768         return (word_src_addr[0] ^ word_dst_addr[0]) ^
769                         (word_src_addr[1] ^ word_dst_addr[1]) ^
770                         (word_src_addr[2] ^ word_dst_addr[2]) ^
771                         (word_src_addr[3] ^ word_dst_addr[3]);
772 }
773
774
775 void
776 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777                 uint8_t slave_count, uint16_t *slaves)
778 {
779         struct ether_hdr *eth_hdr;
780         uint32_t hash;
781         int i;
782
783         for (i = 0; i < nb_pkts; i++) {
784                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
785
786                 hash = ether_hash(eth_hdr);
787
788                 slaves[i] = (hash ^= hash >> 8) % slave_count;
789         }
790 }
791
792 void
793 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
794                 uint8_t slave_count, uint16_t *slaves)
795 {
796         uint16_t i;
797         struct ether_hdr *eth_hdr;
798         uint16_t proto;
799         size_t vlan_offset;
800         uint32_t hash, l3hash;
801
802         for (i = 0; i < nb_pkts; i++) {
803                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
804                 l3hash = 0;
805
806                 proto = eth_hdr->ether_type;
807                 hash = ether_hash(eth_hdr);
808
809                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
810
811                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                         ((char *)(eth_hdr + 1) + vlan_offset);
814                         l3hash = ipv4_hash(ipv4_hdr);
815
816                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                         ((char *)(eth_hdr + 1) + vlan_offset);
819                         l3hash = ipv6_hash(ipv6_hdr);
820                 }
821
822                 hash = hash ^ l3hash;
823                 hash ^= hash >> 16;
824                 hash ^= hash >> 8;
825
826                 slaves[i] = hash % slave_count;
827         }
828 }
829
830 void
831 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
832                 uint8_t slave_count, uint16_t *slaves)
833 {
834         struct ether_hdr *eth_hdr;
835         uint16_t proto;
836         size_t vlan_offset;
837         int i;
838
839         struct udp_hdr *udp_hdr;
840         struct tcp_hdr *tcp_hdr;
841         uint32_t hash, l3hash, l4hash;
842
843         for (i = 0; i < nb_pkts; i++) {
844                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & IPV4_HDR_IHL_MASK) *
862                                         IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         l4hash = HASH_L4_PORTS(tcp_hdr);
869                                 } else if (ipv4_hdr->next_proto_id ==
870                                                                 IPPROTO_UDP) {
871                                         udp_hdr = (struct udp_hdr *)
872                                                 ((char *)ipv4_hdr +
873                                                         ip_hdr_offset);
874                                         l4hash = HASH_L4_PORTS(udp_hdr);
875                                 }
876                         }
877                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
878                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
879                                         ((char *)(eth_hdr + 1) + vlan_offset);
880                         l3hash = ipv6_hash(ipv6_hdr);
881
882                         if (ipv6_hdr->proto == IPPROTO_TCP) {
883                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
884                                 l4hash = HASH_L4_PORTS(tcp_hdr);
885                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
886                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
887                                 l4hash = HASH_L4_PORTS(udp_hdr);
888                         }
889                 }
890
891                 hash = l3hash ^ l4hash;
892                 hash ^= hash >> 16;
893                 hash ^= hash >> 8;
894
895                 slaves[i] = hash % slave_count;
896         }
897 }
898
899 struct bwg_slave {
900         uint64_t bwg_left_int;
901         uint64_t bwg_left_remainder;
902         uint8_t slave;
903 };
904
905 void
906 bond_tlb_activate_slave(struct bond_dev_private *internals) {
907         int i;
908
909         for (i = 0; i < internals->active_slave_count; i++) {
910                 tlb_last_obytets[internals->active_slaves[i]] = 0;
911         }
912 }
913
914 static int
915 bandwidth_cmp(const void *a, const void *b)
916 {
917         const struct bwg_slave *bwg_a = a;
918         const struct bwg_slave *bwg_b = b;
919         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
920         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
921                         (int64_t)bwg_a->bwg_left_remainder;
922         if (diff > 0)
923                 return 1;
924         else if (diff < 0)
925                 return -1;
926         else if (diff2 > 0)
927                 return 1;
928         else if (diff2 < 0)
929                 return -1;
930         else
931                 return 0;
932 }
933
934 static void
935 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
936                 struct bwg_slave *bwg_slave)
937 {
938         struct rte_eth_link link_status;
939
940         rte_eth_link_get_nowait(port_id, &link_status);
941         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
942         if (link_bwg == 0)
943                 return;
944         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
945         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
946         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
947 }
948
949 static void
950 bond_ethdev_update_tlb_slave_cb(void *arg)
951 {
952         struct bond_dev_private *internals = arg;
953         struct rte_eth_stats slave_stats;
954         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
955         uint8_t slave_count;
956         uint64_t tx_bytes;
957
958         uint8_t update_stats = 0;
959         uint8_t i, slave_id;
960
961         internals->slave_update_idx++;
962
963
964         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
965                 update_stats = 1;
966
967         for (i = 0; i < internals->active_slave_count; i++) {
968                 slave_id = internals->active_slaves[i];
969                 rte_eth_stats_get(slave_id, &slave_stats);
970                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971                 bandwidth_left(slave_id, tx_bytes,
972                                 internals->slave_update_idx, &bwg_array[i]);
973                 bwg_array[i].slave = slave_id;
974
975                 if (update_stats) {
976                         tlb_last_obytets[slave_id] = slave_stats.obytes;
977                 }
978         }
979
980         if (update_stats == 1)
981                 internals->slave_update_idx = 0;
982
983         slave_count = i;
984         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985         for (i = 0; i < slave_count; i++)
986                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
987
988         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989                         (struct bond_dev_private *)internals);
990 }
991
992 static uint16_t
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
994 {
995         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996         struct bond_dev_private *internals = bd_tx_q->dev_private;
997
998         struct rte_eth_dev *primary_port =
999                         &rte_eth_devices[internals->primary_port];
1000         uint16_t num_tx_total = 0;
1001         uint16_t i, j;
1002
1003         uint16_t num_of_slaves = internals->active_slave_count;
1004         uint16_t slaves[RTE_MAX_ETHPORTS];
1005
1006         struct ether_hdr *ether_hdr;
1007         struct ether_addr primary_slave_addr;
1008         struct ether_addr active_slave_addr;
1009
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->tlb_slaves_order,
1014                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1015
1016
1017         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1018
1019         if (nb_pkts > 3) {
1020                 for (i = 0; i < 3; i++)
1021                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1022         }
1023
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026                 for (j = num_tx_total; j < nb_pkts; j++) {
1027                         if (j + 3 < nb_pkts)
1028                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1029
1030                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1032                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1035 #endif
1036                 }
1037
1038                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1040
1041                 if (num_tx_total == nb_pkts)
1042                         break;
1043         }
1044
1045         return num_tx_total;
1046 }
1047
1048 void
1049 bond_tlb_disable(struct bond_dev_private *internals)
1050 {
1051         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1052 }
1053
1054 void
1055 bond_tlb_enable(struct bond_dev_private *internals)
1056 {
1057         bond_ethdev_update_tlb_slave_cb(internals);
1058 }
1059
1060 static uint16_t
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1062 {
1063         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064         struct bond_dev_private *internals = bd_tx_q->dev_private;
1065
1066         struct ether_hdr *eth_h;
1067         uint16_t ether_type, offset;
1068
1069         struct client_data *client_info;
1070
1071         /*
1072          * We create transmit buffers for every slave and one additional to send
1073          * through tlb. In worst case every packet will be send on one port.
1074          */
1075         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1077
1078         /*
1079          * We create separate transmit buffers for update packets as they won't
1080          * be counted in num_tx_total.
1081          */
1082         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1084
1085         struct rte_mbuf *upd_pkt;
1086         size_t pkt_size;
1087
1088         uint16_t num_send, num_not_send = 0;
1089         uint16_t num_tx_total = 0;
1090         uint16_t slave_idx;
1091
1092         int i, j;
1093
1094         /* Search tx buffer for ARP packets and forward them to alb */
1095         for (i = 0; i < nb_pkts; i++) {
1096                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097                 ether_type = eth_h->ether_type;
1098                 offset = get_vlan_offset(eth_h, &ether_type);
1099
1100                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1102
1103                         /* Change src mac in eth header */
1104                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1105
1106                         /* Add packet to slave tx buffer */
1107                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108                         slave_bufs_pkts[slave_idx]++;
1109                 } else {
1110                         /* If packet is not ARP, send it with TLB policy */
1111                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1112                                         bufs[i];
1113                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1114                 }
1115         }
1116
1117         /* Update connected client ARP tables */
1118         if (internals->mode6.ntt) {
1119                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120                         client_info = &internals->mode6.client_table[i];
1121
1122                         if (client_info->in_use) {
1123                                 /* Allocate new packet to send ARP update on current slave */
1124                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125                                 if (upd_pkt == NULL) {
1126                                         RTE_BOND_LOG(ERR,
1127                                                      "Failed to allocate ARP packet from pool");
1128                                         continue;
1129                                 }
1130                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1131                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1132                                 upd_pkt->data_len = pkt_size;
1133                                 upd_pkt->pkt_len = pkt_size;
1134
1135                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1136                                                 internals);
1137
1138                                 /* Add packet to update tx buffer */
1139                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1140                                 update_bufs_pkts[slave_idx]++;
1141                         }
1142                 }
1143                 internals->mode6.ntt = 0;
1144         }
1145
1146         /* Send ARP packets on proper slaves */
1147         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1148                 if (slave_bufs_pkts[i] > 0) {
1149                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1150                                         slave_bufs[i], slave_bufs_pkts[i]);
1151                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1152                                 bufs[nb_pkts - 1 - num_not_send - j] =
1153                                                 slave_bufs[i][nb_pkts - 1 - j];
1154                         }
1155
1156                         num_tx_total += num_send;
1157                         num_not_send += slave_bufs_pkts[i] - num_send;
1158
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160         /* Print TX stats including update packets */
1161                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send update packets on proper slaves */
1170         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1171                 if (update_bufs_pkts[i] > 0) {
1172                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1173                                         update_bufs_pkts[i]);
1174                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1175                                 rte_pktmbuf_free(update_bufs[i][j]);
1176                         }
1177 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1178                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1179                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1180                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1181                         }
1182 #endif
1183                 }
1184         }
1185
1186         /* Send non-ARP packets using tlb policy */
1187         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1188                 num_send = bond_ethdev_tx_burst_tlb(queue,
1189                                 slave_bufs[RTE_MAX_ETHPORTS],
1190                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1191
1192                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1193                         bufs[nb_pkts - 1 - num_not_send - j] =
1194                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1195                 }
1196
1197                 num_tx_total += num_send;
1198         }
1199
1200         return num_tx_total;
1201 }
1202
1203 static uint16_t
1204 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1205                 uint16_t nb_bufs)
1206 {
1207         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1208         struct bond_dev_private *internals = bd_tx_q->dev_private;
1209
1210         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1211         uint16_t slave_count;
1212
1213         /* Array to sort mbufs for transmission on each slave into */
1214         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1215         /* Number of mbufs for transmission on each slave */
1216         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1217         /* Mapping array generated by hash function to map mbufs to slaves */
1218         uint16_t bufs_slave_port_idxs[nb_bufs];
1219
1220         uint16_t slave_tx_count;
1221         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1222
1223         uint16_t i;
1224
1225         if (unlikely(nb_bufs == 0))
1226                 return 0;
1227
1228         /* Copy slave list to protect against slave up/down changes during tx
1229          * bursting */
1230         slave_count = internals->active_slave_count;
1231         if (unlikely(slave_count < 1))
1232                 return 0;
1233
1234         memcpy(slave_port_ids, internals->active_slaves,
1235                         sizeof(slave_port_ids[0]) * slave_count);
1236
1237         /*
1238          * Populate slaves mbuf with the packets which are to be sent on it
1239          * selecting output slave using hash based on xmit policy
1240          */
1241         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1242                         bufs_slave_port_idxs);
1243
1244         for (i = 0; i < nb_bufs; i++) {
1245                 /* Populate slave mbuf arrays with mbufs for that slave. */
1246                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1247
1248                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1249         }
1250
1251         /* Send packet burst on each slave device */
1252         for (i = 0; i < slave_count; i++) {
1253                 if (slave_nb_bufs[i] == 0)
1254                         continue;
1255
1256                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1257                                 bd_tx_q->queue_id, slave_bufs[i],
1258                                 slave_nb_bufs[i]);
1259
1260                 total_tx_count += slave_tx_count;
1261
1262                 /* If tx burst fails move packets to end of bufs */
1263                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1264                         int slave_tx_fail_count = slave_nb_bufs[i] -
1265                                         slave_tx_count;
1266                         total_tx_fail_count += slave_tx_fail_count;
1267                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1268                                &slave_bufs[i][slave_tx_count],
1269                                slave_tx_fail_count * sizeof(bufs[0]));
1270                 }
1271         }
1272
1273         return total_tx_count;
1274 }
1275
1276 static uint16_t
1277 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1278                 uint16_t nb_bufs)
1279 {
1280         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1281         struct bond_dev_private *internals = bd_tx_q->dev_private;
1282
1283         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1284         uint16_t slave_count;
1285
1286         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1287         uint16_t dist_slave_count;
1288
1289         /* 2-D array to sort mbufs for transmission on each slave into */
1290         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1291         /* Number of mbufs for transmission on each slave */
1292         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1293         /* Mapping array generated by hash function to map mbufs to slaves */
1294         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1295
1296         uint16_t slave_tx_count;
1297         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1298
1299         uint16_t i;
1300
1301         if (unlikely(nb_bufs == 0))
1302                 return 0;
1303
1304         /* Copy slave list to protect against slave up/down changes during tx
1305          * bursting */
1306         slave_count = internals->active_slave_count;
1307         if (unlikely(slave_count < 1))
1308                 return 0;
1309
1310         memcpy(slave_port_ids, internals->active_slaves,
1311                         sizeof(slave_port_ids[0]) * slave_count);
1312
1313         dist_slave_count = 0;
1314         for (i = 0; i < slave_count; i++) {
1315                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1316
1317                 if (ACTOR_STATE(port, DISTRIBUTING))
1318                         dist_slave_port_ids[dist_slave_count++] =
1319                                         slave_port_ids[i];
1320         }
1321
1322         if (likely(dist_slave_count > 1)) {
1323
1324                 /*
1325                  * Populate slaves mbuf with the packets which are to be sent
1326                  * on it, selecting output slave using hash based on xmit policy
1327                  */
1328                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1329                                 bufs_slave_port_idxs);
1330
1331                 for (i = 0; i < nb_bufs; i++) {
1332                         /*
1333                          * Populate slave mbuf arrays with mbufs for that
1334                          * slave
1335                          */
1336                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1337
1338                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1339                                         bufs[i];
1340                 }
1341
1342
1343                 /* Send packet burst on each slave device */
1344                 for (i = 0; i < dist_slave_count; i++) {
1345                         if (slave_nb_bufs[i] == 0)
1346                                 continue;
1347
1348                         slave_tx_count = rte_eth_tx_burst(
1349                                         dist_slave_port_ids[i],
1350                                         bd_tx_q->queue_id, slave_bufs[i],
1351                                         slave_nb_bufs[i]);
1352
1353                         total_tx_count += slave_tx_count;
1354
1355                         /* If tx burst fails move packets to end of bufs */
1356                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1357                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1358                                                 slave_tx_count;
1359                                 total_tx_fail_count += slave_tx_fail_count;
1360
1361                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1362                                        &slave_bufs[i][slave_tx_count],
1363                                        slave_tx_fail_count * sizeof(bufs[0]));
1364                         }
1365                 }
1366         }
1367
1368         /* Check for LACP control packets and send if available */
1369         for (i = 0; i < slave_count; i++) {
1370                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1371                 struct rte_mbuf *ctrl_pkt = NULL;
1372
1373                 if (likely(rte_ring_empty(port->tx_ring)))
1374                         continue;
1375
1376                 if (rte_ring_dequeue(port->tx_ring,
1377                                      (void **)&ctrl_pkt) != -ENOENT) {
1378                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1379                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1380                         /*
1381                          * re-enqueue LAG control plane packets to buffering
1382                          * ring if transmission fails so the packet isn't lost.
1383                          */
1384                         if (slave_tx_count != 1)
1385                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1386                 }
1387         }
1388
1389         return total_tx_count;
1390 }
1391
1392 static uint16_t
1393 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1394                 uint16_t nb_pkts)
1395 {
1396         struct bond_dev_private *internals;
1397         struct bond_tx_queue *bd_tx_q;
1398
1399         uint8_t tx_failed_flag = 0, num_of_slaves;
1400         uint16_t slaves[RTE_MAX_ETHPORTS];
1401
1402         uint16_t max_nb_of_tx_pkts = 0;
1403
1404         int slave_tx_total[RTE_MAX_ETHPORTS];
1405         int i, most_successful_tx_slave = -1;
1406
1407         bd_tx_q = (struct bond_tx_queue *)queue;
1408         internals = bd_tx_q->dev_private;
1409
1410         /* Copy slave list to protect against slave up/down changes during tx
1411          * bursting */
1412         num_of_slaves = internals->active_slave_count;
1413         memcpy(slaves, internals->active_slaves,
1414                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1415
1416         if (num_of_slaves < 1)
1417                 return 0;
1418
1419         /* Increment reference count on mbufs */
1420         for (i = 0; i < nb_pkts; i++)
1421                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1422
1423         /* Transmit burst on each active slave */
1424         for (i = 0; i < num_of_slaves; i++) {
1425                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1426                                         bufs, nb_pkts);
1427
1428                 if (unlikely(slave_tx_total[i] < nb_pkts))
1429                         tx_failed_flag = 1;
1430
1431                 /* record the value and slave index for the slave which transmits the
1432                  * maximum number of packets */
1433                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1434                         max_nb_of_tx_pkts = slave_tx_total[i];
1435                         most_successful_tx_slave = i;
1436                 }
1437         }
1438
1439         /* if slaves fail to transmit packets from burst, the calling application
1440          * is not expected to know about multiple references to packets so we must
1441          * handle failures of all packets except those of the most successful slave
1442          */
1443         if (unlikely(tx_failed_flag))
1444                 for (i = 0; i < num_of_slaves; i++)
1445                         if (i != most_successful_tx_slave)
1446                                 while (slave_tx_total[i] < nb_pkts)
1447                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1448
1449         return max_nb_of_tx_pkts;
1450 }
1451
1452 void
1453 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1454 {
1455         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1456
1457         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1458                 /**
1459                  * If in mode 4 then save the link properties of the first
1460                  * slave, all subsequent slaves must match these properties
1461                  */
1462                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1463
1464                 bond_link->link_autoneg = slave_link->link_autoneg;
1465                 bond_link->link_duplex = slave_link->link_duplex;
1466                 bond_link->link_speed = slave_link->link_speed;
1467         } else {
1468                 /**
1469                  * In any other mode the link properties are set to default
1470                  * values of AUTONEG/DUPLEX
1471                  */
1472                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1473                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1474         }
1475 }
1476
1477 int
1478 link_properties_valid(struct rte_eth_dev *ethdev,
1479                 struct rte_eth_link *slave_link)
1480 {
1481         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1482
1483         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1484                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1485
1486                 if (bond_link->link_duplex != slave_link->link_duplex ||
1487                         bond_link->link_autoneg != slave_link->link_autoneg ||
1488                         bond_link->link_speed != slave_link->link_speed)
1489                         return -1;
1490         }
1491
1492         return 0;
1493 }
1494
1495 int
1496 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1497 {
1498         struct ether_addr *mac_addr;
1499
1500         if (eth_dev == NULL) {
1501                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1502                 return -1;
1503         }
1504
1505         if (dst_mac_addr == NULL) {
1506                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1507                 return -1;
1508         }
1509
1510         mac_addr = eth_dev->data->mac_addrs;
1511
1512         ether_addr_copy(mac_addr, dst_mac_addr);
1513         return 0;
1514 }
1515
1516 int
1517 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1518 {
1519         struct ether_addr *mac_addr;
1520
1521         if (eth_dev == NULL) {
1522                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1523                 return -1;
1524         }
1525
1526         if (new_mac_addr == NULL) {
1527                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1528                 return -1;
1529         }
1530
1531         mac_addr = eth_dev->data->mac_addrs;
1532
1533         /* If new MAC is different to current MAC then update */
1534         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1535                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1536
1537         return 0;
1538 }
1539
1540 static const struct ether_addr null_mac_addr;
1541
1542 /*
1543  * Add additional MAC addresses to the slave
1544  */
1545 int
1546 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1547                 uint16_t slave_port_id)
1548 {
1549         int i, ret;
1550         struct ether_addr *mac_addr;
1551
1552         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1553                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1554                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1555                         break;
1556
1557                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1558                 if (ret < 0) {
1559                         /* rollback */
1560                         for (i--; i > 0; i--)
1561                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1562                                         &bonded_eth_dev->data->mac_addrs[i]);
1563                         return ret;
1564                 }
1565         }
1566
1567         return 0;
1568 }
1569
1570 /*
1571  * Remove additional MAC addresses from the slave
1572  */
1573 int
1574 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1575                 uint16_t slave_port_id)
1576 {
1577         int i, rc, ret;
1578         struct ether_addr *mac_addr;
1579
1580         rc = 0;
1581         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1582                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1583                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1584                         break;
1585
1586                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1587                 /* save only the first error */
1588                 if (ret < 0 && rc == 0)
1589                         rc = ret;
1590         }
1591
1592         return rc;
1593 }
1594
1595 int
1596 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1597 {
1598         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1599         int i;
1600
1601         /* Update slave devices MAC addresses */
1602         if (internals->slave_count < 1)
1603                 return -1;
1604
1605         switch (internals->mode) {
1606         case BONDING_MODE_ROUND_ROBIN:
1607         case BONDING_MODE_BALANCE:
1608         case BONDING_MODE_BROADCAST:
1609                 for (i = 0; i < internals->slave_count; i++) {
1610                         if (rte_eth_dev_default_mac_addr_set(
1611                                         internals->slaves[i].port_id,
1612                                         bonded_eth_dev->data->mac_addrs)) {
1613                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1614                                                 internals->slaves[i].port_id);
1615                                 return -1;
1616                         }
1617                 }
1618                 break;
1619         case BONDING_MODE_8023AD:
1620                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1621                 break;
1622         case BONDING_MODE_ACTIVE_BACKUP:
1623         case BONDING_MODE_TLB:
1624         case BONDING_MODE_ALB:
1625         default:
1626                 for (i = 0; i < internals->slave_count; i++) {
1627                         if (internals->slaves[i].port_id ==
1628                                         internals->current_primary_port) {
1629                                 if (rte_eth_dev_default_mac_addr_set(
1630                                                 internals->primary_port,
1631                                                 bonded_eth_dev->data->mac_addrs)) {
1632                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1633                                                         internals->current_primary_port);
1634                                         return -1;
1635                                 }
1636                         } else {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->slaves[i].port_id,
1639                                                 &internals->slaves[i].persisted_mac_addr)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->slaves[i].port_id);
1642                                         return -1;
1643                                 }
1644                         }
1645                 }
1646         }
1647
1648         return 0;
1649 }
1650
1651 int
1652 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1653 {
1654         struct bond_dev_private *internals;
1655
1656         internals = eth_dev->data->dev_private;
1657
1658         switch (mode) {
1659         case BONDING_MODE_ROUND_ROBIN:
1660                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1661                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1662                 break;
1663         case BONDING_MODE_ACTIVE_BACKUP:
1664                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1665                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1666                 break;
1667         case BONDING_MODE_BALANCE:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_BROADCAST:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1674                 break;
1675         case BONDING_MODE_8023AD:
1676                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1677                         return -1;
1678
1679                 if (internals->mode4.dedicated_queues.enabled == 0) {
1680                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1681                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1682                         RTE_BOND_LOG(WARNING,
1683                                 "Using mode 4, it is necessary to do TX burst "
1684                                 "and RX burst at least every 100ms.");
1685                 } else {
1686                         /* Use flow director's optimization */
1687                         eth_dev->rx_pkt_burst =
1688                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1689                         eth_dev->tx_pkt_burst =
1690                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1691                 }
1692                 break;
1693         case BONDING_MODE_TLB:
1694                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1695                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1696                 break;
1697         case BONDING_MODE_ALB:
1698                 if (bond_mode_alb_enable(eth_dev) != 0)
1699                         return -1;
1700
1701                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1702                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1703                 break;
1704         default:
1705                 return -1;
1706         }
1707
1708         internals->mode = mode;
1709
1710         return 0;
1711 }
1712
1713
1714 static int
1715 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1716                 struct rte_eth_dev *slave_eth_dev)
1717 {
1718         int errval = 0;
1719         struct bond_dev_private *internals = (struct bond_dev_private *)
1720                 bonded_eth_dev->data->dev_private;
1721         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1722
1723         if (port->slow_pool == NULL) {
1724                 char mem_name[256];
1725                 int slave_id = slave_eth_dev->data->port_id;
1726
1727                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1728                                 slave_id);
1729                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1730                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1731                         slave_eth_dev->data->numa_node);
1732
1733                 /* Any memory allocation failure in initialization is critical because
1734                  * resources can't be free, so reinitialization is impossible. */
1735                 if (port->slow_pool == NULL) {
1736                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1737                                 slave_id, mem_name, rte_strerror(rte_errno));
1738                 }
1739         }
1740
1741         if (internals->mode4.dedicated_queues.enabled == 1) {
1742                 /* Configure slow Rx queue */
1743
1744                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1745                                 internals->mode4.dedicated_queues.rx_qid, 128,
1746                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1747                                 NULL, port->slow_pool);
1748                 if (errval != 0) {
1749                         RTE_BOND_LOG(ERR,
1750                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1751                                         slave_eth_dev->data->port_id,
1752                                         internals->mode4.dedicated_queues.rx_qid,
1753                                         errval);
1754                         return errval;
1755                 }
1756
1757                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1758                                 internals->mode4.dedicated_queues.tx_qid, 512,
1759                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1760                                 NULL);
1761                 if (errval != 0) {
1762                         RTE_BOND_LOG(ERR,
1763                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1764                                 slave_eth_dev->data->port_id,
1765                                 internals->mode4.dedicated_queues.tx_qid,
1766                                 errval);
1767                         return errval;
1768                 }
1769         }
1770         return 0;
1771 }
1772
1773 int
1774 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1775                 struct rte_eth_dev *slave_eth_dev)
1776 {
1777         struct bond_rx_queue *bd_rx_q;
1778         struct bond_tx_queue *bd_tx_q;
1779         uint16_t nb_rx_queues;
1780         uint16_t nb_tx_queues;
1781
1782         int errval;
1783         uint16_t q_id;
1784         struct rte_flow_error flow_error;
1785
1786         struct bond_dev_private *internals = (struct bond_dev_private *)
1787                 bonded_eth_dev->data->dev_private;
1788
1789         /* Stop slave */
1790         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1791
1792         /* Enable interrupts on slave device if supported */
1793         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1794                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1795
1796         /* If RSS is enabled for bonding, try to enable it for slaves  */
1797         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1798                 if (internals->rss_key_len != 0) {
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1800                                         internals->rss_key_len;
1801                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1802                                         internals->rss_key;
1803                 } else {
1804                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1805                 }
1806
1807                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1808                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1809                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1810                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1811         }
1812
1813         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1814                         DEV_RX_OFFLOAD_VLAN_FILTER)
1815                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1816                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1817         else
1818                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1819                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1820
1821         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1822         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1823
1824         if (internals->mode == BONDING_MODE_8023AD) {
1825                 if (internals->mode4.dedicated_queues.enabled == 1) {
1826                         nb_rx_queues++;
1827                         nb_tx_queues++;
1828                 }
1829         }
1830
1831         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1832                                      bonded_eth_dev->data->mtu);
1833         if (errval != 0 && errval != -ENOTSUP) {
1834                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1835                                 slave_eth_dev->data->port_id, errval);
1836                 return errval;
1837         }
1838
1839         /* Configure device */
1840         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1841                         nb_rx_queues, nb_tx_queues,
1842                         &(slave_eth_dev->data->dev_conf));
1843         if (errval != 0) {
1844                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1845                                 slave_eth_dev->data->port_id, errval);
1846                 return errval;
1847         }
1848
1849         /* Setup Rx Queues */
1850         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1851                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1852
1853                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1854                                 bd_rx_q->nb_rx_desc,
1855                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1856                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1857                 if (errval != 0) {
1858                         RTE_BOND_LOG(ERR,
1859                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1860                                         slave_eth_dev->data->port_id, q_id, errval);
1861                         return errval;
1862                 }
1863         }
1864
1865         /* Setup Tx Queues */
1866         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1867                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1868
1869                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1870                                 bd_tx_q->nb_tx_desc,
1871                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1872                                 &bd_tx_q->tx_conf);
1873                 if (errval != 0) {
1874                         RTE_BOND_LOG(ERR,
1875                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1876                                 slave_eth_dev->data->port_id, q_id, errval);
1877                         return errval;
1878                 }
1879         }
1880
1881         if (internals->mode == BONDING_MODE_8023AD &&
1882                         internals->mode4.dedicated_queues.enabled == 1) {
1883                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1884                                 != 0)
1885                         return errval;
1886
1887                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1888                                 slave_eth_dev->data->port_id) != 0) {
1889                         RTE_BOND_LOG(ERR,
1890                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1891                                 slave_eth_dev->data->port_id, q_id, errval);
1892                         return -1;
1893                 }
1894
1895                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1896                         rte_flow_destroy(slave_eth_dev->data->port_id,
1897                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1898                                         &flow_error);
1899
1900                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1901                                 slave_eth_dev->data->port_id);
1902         }
1903
1904         /* Start device */
1905         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1906         if (errval != 0) {
1907                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1908                                 slave_eth_dev->data->port_id, errval);
1909                 return -1;
1910         }
1911
1912         /* If RSS is enabled for bonding, synchronize RETA */
1913         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1914                 int i;
1915                 struct bond_dev_private *internals;
1916
1917                 internals = bonded_eth_dev->data->dev_private;
1918
1919                 for (i = 0; i < internals->slave_count; i++) {
1920                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1921                                 errval = rte_eth_dev_rss_reta_update(
1922                                                 slave_eth_dev->data->port_id,
1923                                                 &internals->reta_conf[0],
1924                                                 internals->slaves[i].reta_size);
1925                                 if (errval != 0) {
1926                                         RTE_BOND_LOG(WARNING,
1927                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1928                                                      " RSS Configuration for bonding may be inconsistent.",
1929                                                      slave_eth_dev->data->port_id, errval);
1930                                 }
1931                                 break;
1932                         }
1933                 }
1934         }
1935
1936         /* If lsc interrupt is set, check initial slave's link status */
1937         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1938                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1939                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1940                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1941                         NULL);
1942         }
1943
1944         return 0;
1945 }
1946
1947 void
1948 slave_remove(struct bond_dev_private *internals,
1949                 struct rte_eth_dev *slave_eth_dev)
1950 {
1951         uint8_t i;
1952
1953         for (i = 0; i < internals->slave_count; i++)
1954                 if (internals->slaves[i].port_id ==
1955                                 slave_eth_dev->data->port_id)
1956                         break;
1957
1958         if (i < (internals->slave_count - 1)) {
1959                 struct rte_flow *flow;
1960
1961                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1962                                 sizeof(internals->slaves[0]) *
1963                                 (internals->slave_count - i - 1));
1964                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1965                         memmove(&flow->flows[i], &flow->flows[i + 1],
1966                                 sizeof(flow->flows[0]) *
1967                                 (internals->slave_count - i - 1));
1968                         flow->flows[internals->slave_count - 1] = NULL;
1969                 }
1970         }
1971
1972         internals->slave_count--;
1973
1974         /* force reconfiguration of slave interfaces */
1975         _rte_eth_dev_reset(slave_eth_dev);
1976 }
1977
1978 static void
1979 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1980
1981 void
1982 slave_add(struct bond_dev_private *internals,
1983                 struct rte_eth_dev *slave_eth_dev)
1984 {
1985         struct bond_slave_details *slave_details =
1986                         &internals->slaves[internals->slave_count];
1987
1988         slave_details->port_id = slave_eth_dev->data->port_id;
1989         slave_details->last_link_status = 0;
1990
1991         /* Mark slave devices that don't support interrupts so we can
1992          * compensate when we start the bond
1993          */
1994         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1995                 slave_details->link_status_poll_enabled = 1;
1996         }
1997
1998         slave_details->link_status_wait_to_complete = 0;
1999         /* clean tlb_last_obytes when adding port for bonding device */
2000         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2001                         sizeof(struct ether_addr));
2002 }
2003
2004 void
2005 bond_ethdev_primary_set(struct bond_dev_private *internals,
2006                 uint16_t slave_port_id)
2007 {
2008         int i;
2009
2010         if (internals->active_slave_count < 1)
2011                 internals->current_primary_port = slave_port_id;
2012         else
2013                 /* Search bonded device slave ports for new proposed primary port */
2014                 for (i = 0; i < internals->active_slave_count; i++) {
2015                         if (internals->active_slaves[i] == slave_port_id)
2016                                 internals->current_primary_port = slave_port_id;
2017                 }
2018 }
2019
2020 static void
2021 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2022
2023 static int
2024 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2025 {
2026         struct bond_dev_private *internals;
2027         int i;
2028
2029         /* slave eth dev will be started by bonded device */
2030         if (check_for_bonded_ethdev(eth_dev)) {
2031                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2032                                 eth_dev->data->port_id);
2033                 return -1;
2034         }
2035
2036         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2037         eth_dev->data->dev_started = 1;
2038
2039         internals = eth_dev->data->dev_private;
2040
2041         if (internals->slave_count == 0) {
2042                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2043                 goto out_err;
2044         }
2045
2046         if (internals->user_defined_mac == 0) {
2047                 struct ether_addr *new_mac_addr = NULL;
2048
2049                 for (i = 0; i < internals->slave_count; i++)
2050                         if (internals->slaves[i].port_id == internals->primary_port)
2051                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2052
2053                 if (new_mac_addr == NULL)
2054                         goto out_err;
2055
2056                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2057                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2058                                         eth_dev->data->port_id);
2059                         goto out_err;
2060                 }
2061         }
2062
2063         /* If bonded device is configure in promiscuous mode then re-apply config */
2064         if (internals->promiscuous_en)
2065                 bond_ethdev_promiscuous_enable(eth_dev);
2066
2067         if (internals->mode == BONDING_MODE_8023AD) {
2068                 if (internals->mode4.dedicated_queues.enabled == 1) {
2069                         internals->mode4.dedicated_queues.rx_qid =
2070                                         eth_dev->data->nb_rx_queues;
2071                         internals->mode4.dedicated_queues.tx_qid =
2072                                         eth_dev->data->nb_tx_queues;
2073                 }
2074         }
2075
2076
2077         /* Reconfigure each slave device if starting bonded device */
2078         for (i = 0; i < internals->slave_count; i++) {
2079                 struct rte_eth_dev *slave_ethdev =
2080                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2081                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2082                         RTE_BOND_LOG(ERR,
2083                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2084                                 eth_dev->data->port_id,
2085                                 internals->slaves[i].port_id);
2086                         goto out_err;
2087                 }
2088                 /* We will need to poll for link status if any slave doesn't
2089                  * support interrupts
2090                  */
2091                 if (internals->slaves[i].link_status_poll_enabled)
2092                         internals->link_status_polling_enabled = 1;
2093         }
2094
2095         /* start polling if needed */
2096         if (internals->link_status_polling_enabled) {
2097                 rte_eal_alarm_set(
2098                         internals->link_status_polling_interval_ms * 1000,
2099                         bond_ethdev_slave_link_status_change_monitor,
2100                         (void *)&rte_eth_devices[internals->port_id]);
2101         }
2102
2103         /* Update all slave devices MACs*/
2104         if (mac_address_slaves_update(eth_dev) != 0)
2105                 goto out_err;
2106
2107         if (internals->user_defined_primary_port)
2108                 bond_ethdev_primary_set(internals, internals->primary_port);
2109
2110         if (internals->mode == BONDING_MODE_8023AD)
2111                 bond_mode_8023ad_start(eth_dev);
2112
2113         if (internals->mode == BONDING_MODE_TLB ||
2114                         internals->mode == BONDING_MODE_ALB)
2115                 bond_tlb_enable(internals);
2116
2117         return 0;
2118
2119 out_err:
2120         eth_dev->data->dev_started = 0;
2121         return -1;
2122 }
2123
2124 static void
2125 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2126 {
2127         uint8_t i;
2128
2129         if (dev->data->rx_queues != NULL) {
2130                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2131                         rte_free(dev->data->rx_queues[i]);
2132                         dev->data->rx_queues[i] = NULL;
2133                 }
2134                 dev->data->nb_rx_queues = 0;
2135         }
2136
2137         if (dev->data->tx_queues != NULL) {
2138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2139                         rte_free(dev->data->tx_queues[i]);
2140                         dev->data->tx_queues[i] = NULL;
2141                 }
2142                 dev->data->nb_tx_queues = 0;
2143         }
2144 }
2145
2146 void
2147 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2148 {
2149         struct bond_dev_private *internals = eth_dev->data->dev_private;
2150         uint8_t i;
2151
2152         if (internals->mode == BONDING_MODE_8023AD) {
2153                 struct port *port;
2154                 void *pkt = NULL;
2155
2156                 bond_mode_8023ad_stop(eth_dev);
2157
2158                 /* Discard all messages to/from mode 4 state machines */
2159                 for (i = 0; i < internals->active_slave_count; i++) {
2160                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2161
2162                         RTE_ASSERT(port->rx_ring != NULL);
2163                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2164                                 rte_pktmbuf_free(pkt);
2165
2166                         RTE_ASSERT(port->tx_ring != NULL);
2167                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2168                                 rte_pktmbuf_free(pkt);
2169                 }
2170         }
2171
2172         if (internals->mode == BONDING_MODE_TLB ||
2173                         internals->mode == BONDING_MODE_ALB) {
2174                 bond_tlb_disable(internals);
2175                 for (i = 0; i < internals->active_slave_count; i++)
2176                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2177         }
2178
2179         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2180         eth_dev->data->dev_started = 0;
2181
2182         internals->link_status_polling_enabled = 0;
2183         for (i = 0; i < internals->slave_count; i++) {
2184                 uint16_t slave_id = internals->slaves[i].port_id;
2185                 if (find_slave_by_id(internals->active_slaves,
2186                                 internals->active_slave_count, slave_id) !=
2187                                                 internals->active_slave_count) {
2188                         internals->slaves[i].last_link_status = 0;
2189                         rte_eth_dev_stop(slave_id);
2190                         deactivate_slave(eth_dev, slave_id);
2191                 }
2192         }
2193 }
2194
2195 void
2196 bond_ethdev_close(struct rte_eth_dev *dev)
2197 {
2198         struct bond_dev_private *internals = dev->data->dev_private;
2199         uint8_t bond_port_id = internals->port_id;
2200         int skipped = 0;
2201         struct rte_flow_error ferror;
2202
2203         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2204         while (internals->slave_count != skipped) {
2205                 uint16_t port_id = internals->slaves[skipped].port_id;
2206
2207                 rte_eth_dev_stop(port_id);
2208
2209                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2210                         RTE_BOND_LOG(ERR,
2211                                      "Failed to remove port %d from bonded device %s",
2212                                      port_id, dev->device->name);
2213                         skipped++;
2214                 }
2215         }
2216         bond_flow_ops.flush(dev, &ferror);
2217         bond_ethdev_free_queues(dev);
2218         rte_bitmap_reset(internals->vlan_filter_bmp);
2219 }
2220
2221 /* forward declaration */
2222 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2223
2224 static void
2225 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2226 {
2227         struct bond_dev_private *internals = dev->data->dev_private;
2228
2229         uint16_t max_nb_rx_queues = UINT16_MAX;
2230         uint16_t max_nb_tx_queues = UINT16_MAX;
2231         uint16_t max_rx_desc_lim = UINT16_MAX;
2232         uint16_t max_tx_desc_lim = UINT16_MAX;
2233
2234         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2235
2236         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2237                         internals->candidate_max_rx_pktlen :
2238                         ETHER_MAX_JUMBO_FRAME_LEN;
2239
2240         /* Max number of tx/rx queues that the bonded device can support is the
2241          * minimum values of the bonded slaves, as all slaves must be capable
2242          * of supporting the same number of tx/rx queues.
2243          */
2244         if (internals->slave_count > 0) {
2245                 struct rte_eth_dev_info slave_info;
2246                 uint8_t idx;
2247
2248                 for (idx = 0; idx < internals->slave_count; idx++) {
2249                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2250                                         &slave_info);
2251
2252                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2253                                 max_nb_rx_queues = slave_info.max_rx_queues;
2254
2255                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2256                                 max_nb_tx_queues = slave_info.max_tx_queues;
2257
2258                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2259                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2260
2261                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2262                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2263                 }
2264         }
2265
2266         dev_info->max_rx_queues = max_nb_rx_queues;
2267         dev_info->max_tx_queues = max_nb_tx_queues;
2268
2269         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2270                sizeof(dev_info->default_rxconf));
2271         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2272                sizeof(dev_info->default_txconf));
2273
2274         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2275         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2276
2277         /**
2278          * If dedicated hw queues enabled for link bonding device in LACP mode
2279          * then we need to reduce the maximum number of data path queues by 1.
2280          */
2281         if (internals->mode == BONDING_MODE_8023AD &&
2282                 internals->mode4.dedicated_queues.enabled == 1) {
2283                 dev_info->max_rx_queues--;
2284                 dev_info->max_tx_queues--;
2285         }
2286
2287         dev_info->min_rx_bufsize = 0;
2288
2289         dev_info->rx_offload_capa = internals->rx_offload_capa;
2290         dev_info->tx_offload_capa = internals->tx_offload_capa;
2291         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2292         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2293         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2294
2295         dev_info->reta_size = internals->reta_size;
2296 }
2297
2298 static int
2299 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2300 {
2301         int res;
2302         uint16_t i;
2303         struct bond_dev_private *internals = dev->data->dev_private;
2304
2305         /* don't do this while a slave is being added */
2306         rte_spinlock_lock(&internals->lock);
2307
2308         if (on)
2309                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2310         else
2311                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2312
2313         for (i = 0; i < internals->slave_count; i++) {
2314                 uint16_t port_id = internals->slaves[i].port_id;
2315
2316                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2317                 if (res == ENOTSUP)
2318                         RTE_BOND_LOG(WARNING,
2319                                      "Setting VLAN filter on slave port %u not supported.",
2320                                      port_id);
2321         }
2322
2323         rte_spinlock_unlock(&internals->lock);
2324         return 0;
2325 }
2326
2327 static int
2328 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2329                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2330                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2331 {
2332         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2333                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2334                                         0, dev->data->numa_node);
2335         if (bd_rx_q == NULL)
2336                 return -1;
2337
2338         bd_rx_q->queue_id = rx_queue_id;
2339         bd_rx_q->dev_private = dev->data->dev_private;
2340
2341         bd_rx_q->nb_rx_desc = nb_rx_desc;
2342
2343         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2344         bd_rx_q->mb_pool = mb_pool;
2345
2346         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2347
2348         return 0;
2349 }
2350
2351 static int
2352 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2353                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2354                 const struct rte_eth_txconf *tx_conf)
2355 {
2356         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2357                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2358                                         0, dev->data->numa_node);
2359
2360         if (bd_tx_q == NULL)
2361                 return -1;
2362
2363         bd_tx_q->queue_id = tx_queue_id;
2364         bd_tx_q->dev_private = dev->data->dev_private;
2365
2366         bd_tx_q->nb_tx_desc = nb_tx_desc;
2367         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2368
2369         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2370
2371         return 0;
2372 }
2373
2374 static void
2375 bond_ethdev_rx_queue_release(void *queue)
2376 {
2377         if (queue == NULL)
2378                 return;
2379
2380         rte_free(queue);
2381 }
2382
2383 static void
2384 bond_ethdev_tx_queue_release(void *queue)
2385 {
2386         if (queue == NULL)
2387                 return;
2388
2389         rte_free(queue);
2390 }
2391
2392 static void
2393 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2394 {
2395         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2396         struct bond_dev_private *internals;
2397
2398         /* Default value for polling slave found is true as we don't want to
2399          * disable the polling thread if we cannot get the lock */
2400         int i, polling_slave_found = 1;
2401
2402         if (cb_arg == NULL)
2403                 return;
2404
2405         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2406         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2407
2408         if (!bonded_ethdev->data->dev_started ||
2409                 !internals->link_status_polling_enabled)
2410                 return;
2411
2412         /* If device is currently being configured then don't check slaves link
2413          * status, wait until next period */
2414         if (rte_spinlock_trylock(&internals->lock)) {
2415                 if (internals->slave_count > 0)
2416                         polling_slave_found = 0;
2417
2418                 for (i = 0; i < internals->slave_count; i++) {
2419                         if (!internals->slaves[i].link_status_poll_enabled)
2420                                 continue;
2421
2422                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2423                         polling_slave_found = 1;
2424
2425                         /* Update slave link status */
2426                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2427                                         internals->slaves[i].link_status_wait_to_complete);
2428
2429                         /* if link status has changed since last checked then call lsc
2430                          * event callback */
2431                         if (slave_ethdev->data->dev_link.link_status !=
2432                                         internals->slaves[i].last_link_status) {
2433                                 internals->slaves[i].last_link_status =
2434                                                 slave_ethdev->data->dev_link.link_status;
2435
2436                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2437                                                 RTE_ETH_EVENT_INTR_LSC,
2438                                                 &bonded_ethdev->data->port_id,
2439                                                 NULL);
2440                         }
2441                 }
2442                 rte_spinlock_unlock(&internals->lock);
2443         }
2444
2445         if (polling_slave_found)
2446                 /* Set alarm to continue monitoring link status of slave ethdev's */
2447                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2448                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2449 }
2450
2451 static int
2452 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2453 {
2454         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2455
2456         struct bond_dev_private *bond_ctx;
2457         struct rte_eth_link slave_link;
2458
2459         uint32_t idx;
2460
2461         bond_ctx = ethdev->data->dev_private;
2462
2463         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2464
2465         if (ethdev->data->dev_started == 0 ||
2466                         bond_ctx->active_slave_count == 0) {
2467                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2468                 return 0;
2469         }
2470
2471         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2472
2473         if (wait_to_complete)
2474                 link_update = rte_eth_link_get;
2475         else
2476                 link_update = rte_eth_link_get_nowait;
2477
2478         switch (bond_ctx->mode) {
2479         case BONDING_MODE_BROADCAST:
2480                 /**
2481                  * Setting link speed to UINT32_MAX to ensure we pick up the
2482                  * value of the first active slave
2483                  */
2484                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2485
2486                 /**
2487                  * link speed is minimum value of all the slaves link speed as
2488                  * packet loss will occur on this slave if transmission at rates
2489                  * greater than this are attempted
2490                  */
2491                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2492                         link_update(bond_ctx->active_slaves[0], &slave_link);
2493
2494                         if (slave_link.link_speed <
2495                                         ethdev->data->dev_link.link_speed)
2496                                 ethdev->data->dev_link.link_speed =
2497                                                 slave_link.link_speed;
2498                 }
2499                 break;
2500         case BONDING_MODE_ACTIVE_BACKUP:
2501                 /* Current primary slave */
2502                 link_update(bond_ctx->current_primary_port, &slave_link);
2503
2504                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2505                 break;
2506         case BONDING_MODE_8023AD:
2507                 ethdev->data->dev_link.link_autoneg =
2508                                 bond_ctx->mode4.slave_link.link_autoneg;
2509                 ethdev->data->dev_link.link_duplex =
2510                                 bond_ctx->mode4.slave_link.link_duplex;
2511                 /* fall through to update link speed */
2512         case BONDING_MODE_ROUND_ROBIN:
2513         case BONDING_MODE_BALANCE:
2514         case BONDING_MODE_TLB:
2515         case BONDING_MODE_ALB:
2516         default:
2517                 /**
2518                  * In theses mode the maximum theoretical link speed is the sum
2519                  * of all the slaves
2520                  */
2521                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2522
2523                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2524                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2525
2526                         ethdev->data->dev_link.link_speed +=
2527                                         slave_link.link_speed;
2528                 }
2529         }
2530
2531
2532         return 0;
2533 }
2534
2535
2536 static int
2537 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2538 {
2539         struct bond_dev_private *internals = dev->data->dev_private;
2540         struct rte_eth_stats slave_stats;
2541         int i, j;
2542
2543         for (i = 0; i < internals->slave_count; i++) {
2544                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2545
2546                 stats->ipackets += slave_stats.ipackets;
2547                 stats->opackets += slave_stats.opackets;
2548                 stats->ibytes += slave_stats.ibytes;
2549                 stats->obytes += slave_stats.obytes;
2550                 stats->imissed += slave_stats.imissed;
2551                 stats->ierrors += slave_stats.ierrors;
2552                 stats->oerrors += slave_stats.oerrors;
2553                 stats->rx_nombuf += slave_stats.rx_nombuf;
2554
2555                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2556                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2557                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2558                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2559                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2560                         stats->q_errors[j] += slave_stats.q_errors[j];
2561                 }
2562
2563         }
2564
2565         return 0;
2566 }
2567
2568 static void
2569 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2570 {
2571         struct bond_dev_private *internals = dev->data->dev_private;
2572         int i;
2573
2574         for (i = 0; i < internals->slave_count; i++)
2575                 rte_eth_stats_reset(internals->slaves[i].port_id);
2576 }
2577
2578 static void
2579 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2580 {
2581         struct bond_dev_private *internals = eth_dev->data->dev_private;
2582         int i;
2583
2584         internals->promiscuous_en = 1;
2585
2586         switch (internals->mode) {
2587         /* Promiscuous mode is propagated to all slaves */
2588         case BONDING_MODE_ROUND_ROBIN:
2589         case BONDING_MODE_BALANCE:
2590         case BONDING_MODE_BROADCAST:
2591                 for (i = 0; i < internals->slave_count; i++)
2592                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2593                 break;
2594         /* In mode4 promiscus mode is managed when slave is added/removed */
2595         case BONDING_MODE_8023AD:
2596                 break;
2597         /* Promiscuous mode is propagated only to primary slave */
2598         case BONDING_MODE_ACTIVE_BACKUP:
2599         case BONDING_MODE_TLB:
2600         case BONDING_MODE_ALB:
2601         default:
2602                 rte_eth_promiscuous_enable(internals->current_primary_port);
2603         }
2604 }
2605
2606 static void
2607 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2608 {
2609         struct bond_dev_private *internals = dev->data->dev_private;
2610         int i;
2611
2612         internals->promiscuous_en = 0;
2613
2614         switch (internals->mode) {
2615         /* Promiscuous mode is propagated to all slaves */
2616         case BONDING_MODE_ROUND_ROBIN:
2617         case BONDING_MODE_BALANCE:
2618         case BONDING_MODE_BROADCAST:
2619                 for (i = 0; i < internals->slave_count; i++)
2620                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2621                 break;
2622         /* In mode4 promiscus mode is set managed when slave is added/removed */
2623         case BONDING_MODE_8023AD:
2624                 break;
2625         /* Promiscuous mode is propagated only to primary slave */
2626         case BONDING_MODE_ACTIVE_BACKUP:
2627         case BONDING_MODE_TLB:
2628         case BONDING_MODE_ALB:
2629         default:
2630                 rte_eth_promiscuous_disable(internals->current_primary_port);
2631         }
2632 }
2633
2634 static void
2635 bond_ethdev_delayed_lsc_propagation(void *arg)
2636 {
2637         if (arg == NULL)
2638                 return;
2639
2640         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2641                         RTE_ETH_EVENT_INTR_LSC, NULL);
2642 }
2643
2644 int
2645 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2646                 void *param, void *ret_param __rte_unused)
2647 {
2648         struct rte_eth_dev *bonded_eth_dev;
2649         struct bond_dev_private *internals;
2650         struct rte_eth_link link;
2651         int rc = -1;
2652
2653         int i, valid_slave = 0;
2654         uint8_t active_pos;
2655         uint8_t lsc_flag = 0;
2656
2657         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2658                 return rc;
2659
2660         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2661
2662         if (check_for_bonded_ethdev(bonded_eth_dev))
2663                 return rc;
2664
2665         internals = bonded_eth_dev->data->dev_private;
2666
2667         /* If the device isn't started don't handle interrupts */
2668         if (!bonded_eth_dev->data->dev_started)
2669                 return rc;
2670
2671         /* verify that port_id is a valid slave of bonded port */
2672         for (i = 0; i < internals->slave_count; i++) {
2673                 if (internals->slaves[i].port_id == port_id) {
2674                         valid_slave = 1;
2675                         break;
2676                 }
2677         }
2678
2679         if (!valid_slave)
2680                 return rc;
2681
2682         /* Synchronize lsc callback parallel calls either by real link event
2683          * from the slaves PMDs or by the bonding PMD itself.
2684          */
2685         rte_spinlock_lock(&internals->lsc_lock);
2686
2687         /* Search for port in active port list */
2688         active_pos = find_slave_by_id(internals->active_slaves,
2689                         internals->active_slave_count, port_id);
2690
2691         rte_eth_link_get_nowait(port_id, &link);
2692         if (link.link_status) {
2693                 if (active_pos < internals->active_slave_count)
2694                         goto link_update;
2695
2696                 /* if no active slave ports then set this port to be primary port */
2697                 if (internals->active_slave_count < 1) {
2698                         /* If first active slave, then change link status */
2699                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2700                         internals->current_primary_port = port_id;
2701                         lsc_flag = 1;
2702
2703                         mac_address_slaves_update(bonded_eth_dev);
2704                 }
2705
2706                 /* check link state properties if bonded link is up*/
2707                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2708                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2709                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2710                                              "for slave %d in bonding mode %d",
2711                                              port_id, internals->mode);
2712                 } else {
2713                         /* inherit slave link properties */
2714                         link_properties_set(bonded_eth_dev, &link);
2715                 }
2716
2717                 activate_slave(bonded_eth_dev, port_id);
2718
2719                 /* If user has defined the primary port then default to using it */
2720                 if (internals->user_defined_primary_port &&
2721                                 internals->primary_port == port_id)
2722                         bond_ethdev_primary_set(internals, port_id);
2723         } else {
2724                 if (active_pos == internals->active_slave_count)
2725                         goto link_update;
2726
2727                 /* Remove from active slave list */
2728                 deactivate_slave(bonded_eth_dev, port_id);
2729
2730                 if (internals->active_slave_count < 1)
2731                         lsc_flag = 1;
2732
2733                 /* Update primary id, take first active slave from list or if none
2734                  * available set to -1 */
2735                 if (port_id == internals->current_primary_port) {
2736                         if (internals->active_slave_count > 0)
2737                                 bond_ethdev_primary_set(internals,
2738                                                 internals->active_slaves[0]);
2739                         else
2740                                 internals->current_primary_port = internals->primary_port;
2741                 }
2742         }
2743
2744 link_update:
2745         /**
2746          * Update bonded device link properties after any change to active
2747          * slaves
2748          */
2749         bond_ethdev_link_update(bonded_eth_dev, 0);
2750
2751         if (lsc_flag) {
2752                 /* Cancel any possible outstanding interrupts if delays are enabled */
2753                 if (internals->link_up_delay_ms > 0 ||
2754                         internals->link_down_delay_ms > 0)
2755                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2756                                         bonded_eth_dev);
2757
2758                 if (bonded_eth_dev->data->dev_link.link_status) {
2759                         if (internals->link_up_delay_ms > 0)
2760                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2761                                                 bond_ethdev_delayed_lsc_propagation,
2762                                                 (void *)bonded_eth_dev);
2763                         else
2764                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2765                                                 RTE_ETH_EVENT_INTR_LSC,
2766                                                 NULL);
2767
2768                 } else {
2769                         if (internals->link_down_delay_ms > 0)
2770                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2771                                                 bond_ethdev_delayed_lsc_propagation,
2772                                                 (void *)bonded_eth_dev);
2773                         else
2774                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2775                                                 RTE_ETH_EVENT_INTR_LSC,
2776                                                 NULL);
2777                 }
2778         }
2779
2780         rte_spinlock_unlock(&internals->lsc_lock);
2781
2782         return rc;
2783 }
2784
2785 static int
2786 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2787                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2788 {
2789         unsigned i, j;
2790         int result = 0;
2791         int slave_reta_size;
2792         unsigned reta_count;
2793         struct bond_dev_private *internals = dev->data->dev_private;
2794
2795         if (reta_size != internals->reta_size)
2796                 return -EINVAL;
2797
2798          /* Copy RETA table */
2799         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2800
2801         for (i = 0; i < reta_count; i++) {
2802                 internals->reta_conf[i].mask = reta_conf[i].mask;
2803                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2804                         if ((reta_conf[i].mask >> j) & 0x01)
2805                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2806         }
2807
2808         /* Fill rest of array */
2809         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2810                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2811                                 sizeof(internals->reta_conf[0]) * reta_count);
2812
2813         /* Propagate RETA over slaves */
2814         for (i = 0; i < internals->slave_count; i++) {
2815                 slave_reta_size = internals->slaves[i].reta_size;
2816                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2817                                 &internals->reta_conf[0], slave_reta_size);
2818                 if (result < 0)
2819                         return result;
2820         }
2821
2822         return 0;
2823 }
2824
2825 static int
2826 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2827                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2828 {
2829         int i, j;
2830         struct bond_dev_private *internals = dev->data->dev_private;
2831
2832         if (reta_size != internals->reta_size)
2833                 return -EINVAL;
2834
2835          /* Copy RETA table */
2836         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2837                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2838                         if ((reta_conf[i].mask >> j) & 0x01)
2839                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2840
2841         return 0;
2842 }
2843
2844 static int
2845 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2846                 struct rte_eth_rss_conf *rss_conf)
2847 {
2848         int i, result = 0;
2849         struct bond_dev_private *internals = dev->data->dev_private;
2850         struct rte_eth_rss_conf bond_rss_conf;
2851
2852         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2853
2854         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2855
2856         if (bond_rss_conf.rss_hf != 0)
2857                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2858
2859         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2860                         sizeof(internals->rss_key)) {
2861                 if (bond_rss_conf.rss_key_len == 0)
2862                         bond_rss_conf.rss_key_len = 40;
2863                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2864                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2865                                 internals->rss_key_len);
2866         }
2867
2868         for (i = 0; i < internals->slave_count; i++) {
2869                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2870                                 &bond_rss_conf);
2871                 if (result < 0)
2872                         return result;
2873         }
2874
2875         return 0;
2876 }
2877
2878 static int
2879 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2880                 struct rte_eth_rss_conf *rss_conf)
2881 {
2882         struct bond_dev_private *internals = dev->data->dev_private;
2883
2884         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2885         rss_conf->rss_key_len = internals->rss_key_len;
2886         if (rss_conf->rss_key)
2887                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2888
2889         return 0;
2890 }
2891
2892 static int
2893 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2894 {
2895         struct rte_eth_dev *slave_eth_dev;
2896         struct bond_dev_private *internals = dev->data->dev_private;
2897         int ret, i;
2898
2899         rte_spinlock_lock(&internals->lock);
2900
2901         for (i = 0; i < internals->slave_count; i++) {
2902                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2903                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2904                         rte_spinlock_unlock(&internals->lock);
2905                         return -ENOTSUP;
2906                 }
2907         }
2908         for (i = 0; i < internals->slave_count; i++) {
2909                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2910                 if (ret < 0) {
2911                         rte_spinlock_unlock(&internals->lock);
2912                         return ret;
2913                 }
2914         }
2915
2916         rte_spinlock_unlock(&internals->lock);
2917         return 0;
2918 }
2919
2920 static int
2921 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2922 {
2923         if (mac_address_set(dev, addr)) {
2924                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2925                 return -EINVAL;
2926         }
2927
2928         return 0;
2929 }
2930
2931 static int
2932 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2933                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2934 {
2935         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2936                 *(const void **)arg = &bond_flow_ops;
2937                 return 0;
2938         }
2939         return -ENOTSUP;
2940 }
2941
2942 static int
2943 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2944                                 __rte_unused uint32_t index, uint32_t vmdq)
2945 {
2946         struct rte_eth_dev *slave_eth_dev;
2947         struct bond_dev_private *internals = dev->data->dev_private;
2948         int ret, i;
2949
2950         rte_spinlock_lock(&internals->lock);
2951
2952         for (i = 0; i < internals->slave_count; i++) {
2953                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2954                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2955                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2956                         ret = -ENOTSUP;
2957                         goto end;
2958                 }
2959         }
2960
2961         for (i = 0; i < internals->slave_count; i++) {
2962                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2963                                 mac_addr, vmdq);
2964                 if (ret < 0) {
2965                         /* rollback */
2966                         for (i--; i >= 0; i--)
2967                                 rte_eth_dev_mac_addr_remove(
2968                                         internals->slaves[i].port_id, mac_addr);
2969                         goto end;
2970                 }
2971         }
2972
2973         ret = 0;
2974 end:
2975         rte_spinlock_unlock(&internals->lock);
2976         return ret;
2977 }
2978
2979 static void
2980 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2981 {
2982         struct rte_eth_dev *slave_eth_dev;
2983         struct bond_dev_private *internals = dev->data->dev_private;
2984         int i;
2985
2986         rte_spinlock_lock(&internals->lock);
2987
2988         for (i = 0; i < internals->slave_count; i++) {
2989                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2990                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2991                         goto end;
2992         }
2993
2994         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2995
2996         for (i = 0; i < internals->slave_count; i++)
2997                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2998                                 mac_addr);
2999
3000 end:
3001         rte_spinlock_unlock(&internals->lock);
3002 }
3003
3004 const struct eth_dev_ops default_dev_ops = {
3005         .dev_start            = bond_ethdev_start,
3006         .dev_stop             = bond_ethdev_stop,
3007         .dev_close            = bond_ethdev_close,
3008         .dev_configure        = bond_ethdev_configure,
3009         .dev_infos_get        = bond_ethdev_info,
3010         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3011         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3012         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3013         .rx_queue_release     = bond_ethdev_rx_queue_release,
3014         .tx_queue_release     = bond_ethdev_tx_queue_release,
3015         .link_update          = bond_ethdev_link_update,
3016         .stats_get            = bond_ethdev_stats_get,
3017         .stats_reset          = bond_ethdev_stats_reset,
3018         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3019         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3020         .reta_update          = bond_ethdev_rss_reta_update,
3021         .reta_query           = bond_ethdev_rss_reta_query,
3022         .rss_hash_update      = bond_ethdev_rss_hash_update,
3023         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3024         .mtu_set              = bond_ethdev_mtu_set,
3025         .mac_addr_set         = bond_ethdev_mac_address_set,
3026         .mac_addr_add         = bond_ethdev_mac_addr_add,
3027         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3028         .filter_ctrl          = bond_filter_ctrl
3029 };
3030
3031 static int
3032 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3033 {
3034         const char *name = rte_vdev_device_name(dev);
3035         uint8_t socket_id = dev->device.numa_node;
3036         struct bond_dev_private *internals = NULL;
3037         struct rte_eth_dev *eth_dev = NULL;
3038         uint32_t vlan_filter_bmp_size;
3039
3040         /* now do all data allocation - for eth_dev structure, dummy pci driver
3041          * and internal (private) data
3042          */
3043
3044         /* reserve an ethdev entry */
3045         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3046         if (eth_dev == NULL) {
3047                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3048                 goto err;
3049         }
3050
3051         internals = eth_dev->data->dev_private;
3052         eth_dev->data->nb_rx_queues = (uint16_t)1;
3053         eth_dev->data->nb_tx_queues = (uint16_t)1;
3054
3055         /* Allocate memory for storing MAC addresses */
3056         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3057                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3058         if (eth_dev->data->mac_addrs == NULL) {
3059                 RTE_BOND_LOG(ERR,
3060                              "Failed to allocate %u bytes needed to store MAC addresses",
3061                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3062                 goto err;
3063         }
3064
3065         eth_dev->dev_ops = &default_dev_ops;
3066         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3067
3068         rte_spinlock_init(&internals->lock);
3069         rte_spinlock_init(&internals->lsc_lock);
3070
3071         internals->port_id = eth_dev->data->port_id;
3072         internals->mode = BONDING_MODE_INVALID;
3073         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3074         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3075         internals->burst_xmit_hash = burst_xmit_l2_hash;
3076         internals->user_defined_mac = 0;
3077
3078         internals->link_status_polling_enabled = 0;
3079
3080         internals->link_status_polling_interval_ms =
3081                 DEFAULT_POLLING_INTERVAL_10_MS;
3082         internals->link_down_delay_ms = 0;
3083         internals->link_up_delay_ms = 0;
3084
3085         internals->slave_count = 0;
3086         internals->active_slave_count = 0;
3087         internals->rx_offload_capa = 0;
3088         internals->tx_offload_capa = 0;
3089         internals->rx_queue_offload_capa = 0;
3090         internals->tx_queue_offload_capa = 0;
3091         internals->candidate_max_rx_pktlen = 0;
3092         internals->max_rx_pktlen = 0;
3093
3094         /* Initially allow to choose any offload type */
3095         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3096
3097         memset(&internals->default_rxconf, 0,
3098                sizeof(internals->default_rxconf));
3099         memset(&internals->default_txconf, 0,
3100                sizeof(internals->default_txconf));
3101
3102         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3103         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3104
3105         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3106         memset(internals->slaves, 0, sizeof(internals->slaves));
3107
3108         TAILQ_INIT(&internals->flow_list);
3109         internals->flow_isolated_valid = 0;
3110
3111         /* Set mode 4 default configuration */
3112         bond_mode_8023ad_setup(eth_dev, NULL);
3113         if (bond_ethdev_mode_set(eth_dev, mode)) {
3114                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3115                                  eth_dev->data->port_id, mode);
3116                 goto err;
3117         }
3118
3119         vlan_filter_bmp_size =
3120                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3121         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3122                                                    RTE_CACHE_LINE_SIZE);
3123         if (internals->vlan_filter_bmpmem == NULL) {
3124                 RTE_BOND_LOG(ERR,
3125                              "Failed to allocate vlan bitmap for bonded device %u",
3126                              eth_dev->data->port_id);
3127                 goto err;
3128         }
3129
3130         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3131                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3132         if (internals->vlan_filter_bmp == NULL) {
3133                 RTE_BOND_LOG(ERR,
3134                              "Failed to init vlan bitmap for bonded device %u",
3135                              eth_dev->data->port_id);
3136                 rte_free(internals->vlan_filter_bmpmem);
3137                 goto err;
3138         }
3139
3140         return eth_dev->data->port_id;
3141
3142 err:
3143         rte_free(internals);
3144         if (eth_dev != NULL)
3145                 eth_dev->data->dev_private = NULL;
3146         rte_eth_dev_release_port(eth_dev);
3147         return -1;
3148 }
3149
3150 static int
3151 bond_probe(struct rte_vdev_device *dev)
3152 {
3153         const char *name;
3154         struct bond_dev_private *internals;
3155         struct rte_kvargs *kvlist;
3156         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3157         int  arg_count, port_id;
3158         uint8_t agg_mode;
3159         struct rte_eth_dev *eth_dev;
3160
3161         if (!dev)
3162                 return -EINVAL;
3163
3164         name = rte_vdev_device_name(dev);
3165         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3166
3167         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3168                 eth_dev = rte_eth_dev_attach_secondary(name);
3169                 if (!eth_dev) {
3170                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3171                         return -1;
3172                 }
3173                 /* TODO: request info from primary to set up Rx and Tx */
3174                 eth_dev->dev_ops = &default_dev_ops;
3175                 eth_dev->device = &dev->device;
3176                 rte_eth_dev_probing_finish(eth_dev);
3177                 return 0;
3178         }
3179
3180         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3181                 pmd_bond_init_valid_arguments);
3182         if (kvlist == NULL)
3183                 return -1;
3184
3185         /* Parse link bonding mode */
3186         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3187                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3188                                 &bond_ethdev_parse_slave_mode_kvarg,
3189                                 &bonding_mode) != 0) {
3190                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3191                                         name);
3192                         goto parse_error;
3193                 }
3194         } else {
3195                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3196                                 "device %s", name);
3197                 goto parse_error;
3198         }
3199
3200         /* Parse socket id to create bonding device on */
3201         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3202         if (arg_count == 1) {
3203                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3204                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3205                                 != 0) {
3206                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3207                                         "bonded device %s", name);
3208                         goto parse_error;
3209                 }
3210         } else if (arg_count > 1) {
3211                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3212                                 "bonded device %s", name);
3213                 goto parse_error;
3214         } else {
3215                 socket_id = rte_socket_id();
3216         }
3217
3218         dev->device.numa_node = socket_id;
3219
3220         /* Create link bonding eth device */
3221         port_id = bond_alloc(dev, bonding_mode);
3222         if (port_id < 0) {
3223                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3224                                 "socket %u.",   name, bonding_mode, socket_id);
3225                 goto parse_error;
3226         }
3227         internals = rte_eth_devices[port_id].data->dev_private;
3228         internals->kvlist = kvlist;
3229
3230         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3231                 if (rte_kvargs_process(kvlist,
3232                                 PMD_BOND_AGG_MODE_KVARG,
3233                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3234                                 &agg_mode) != 0) {
3235                         RTE_BOND_LOG(ERR,
3236                                         "Failed to parse agg selection mode for bonded device %s",
3237                                         name);
3238                         goto parse_error;
3239                 }
3240
3241                 if (internals->mode == BONDING_MODE_8023AD)
3242                         internals->mode4.agg_selection = agg_mode;
3243         } else {
3244                 internals->mode4.agg_selection = AGG_STABLE;
3245         }
3246
3247         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3248         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3249                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3250         return 0;
3251
3252 parse_error:
3253         rte_kvargs_free(kvlist);
3254
3255         return -1;
3256 }
3257
3258 static int
3259 bond_remove(struct rte_vdev_device *dev)
3260 {
3261         struct rte_eth_dev *eth_dev;
3262         struct bond_dev_private *internals;
3263         const char *name;
3264
3265         if (!dev)
3266                 return -EINVAL;
3267
3268         name = rte_vdev_device_name(dev);
3269         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3270
3271         /* now free all data allocation - for eth_dev structure,
3272          * dummy pci driver and internal (private) data
3273          */
3274
3275         /* find an ethdev entry */
3276         eth_dev = rte_eth_dev_allocated(name);
3277         if (eth_dev == NULL)
3278                 return -ENODEV;
3279
3280         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3281                 return rte_eth_dev_release_port(eth_dev);
3282
3283         RTE_ASSERT(eth_dev->device == &dev->device);
3284
3285         internals = eth_dev->data->dev_private;
3286         if (internals->slave_count != 0)
3287                 return -EBUSY;
3288
3289         if (eth_dev->data->dev_started == 1) {
3290                 bond_ethdev_stop(eth_dev);
3291                 bond_ethdev_close(eth_dev);
3292         }
3293
3294         eth_dev->dev_ops = NULL;
3295         eth_dev->rx_pkt_burst = NULL;
3296         eth_dev->tx_pkt_burst = NULL;
3297
3298         internals = eth_dev->data->dev_private;
3299         /* Try to release mempool used in mode6. If the bond
3300          * device is not mode6, free the NULL is not problem.
3301          */
3302         rte_mempool_free(internals->mode6.mempool);
3303         rte_bitmap_free(internals->vlan_filter_bmp);
3304         rte_free(internals->vlan_filter_bmpmem);
3305
3306         rte_eth_dev_release_port(eth_dev);
3307
3308         return 0;
3309 }
3310
3311 /* this part will resolve the slave portids after all the other pdev and vdev
3312  * have been allocated */
3313 static int
3314 bond_ethdev_configure(struct rte_eth_dev *dev)
3315 {
3316         const char *name = dev->device->name;
3317         struct bond_dev_private *internals = dev->data->dev_private;
3318         struct rte_kvargs *kvlist = internals->kvlist;
3319         int arg_count;
3320         uint16_t port_id = dev - rte_eth_devices;
3321         uint8_t agg_mode;
3322
3323         static const uint8_t default_rss_key[40] = {
3324                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3325                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3326                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3327                 0xBE, 0xAC, 0x01, 0xFA
3328         };
3329
3330         unsigned i, j;
3331
3332         /*
3333          * If RSS is enabled, fill table with default values and
3334          * set key to the the value specified in port RSS configuration.
3335          * Fall back to default RSS key if the key is not specified
3336          */
3337         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3338                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3339                         internals->rss_key_len =
3340                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3341                         memcpy(internals->rss_key,
3342                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3343                                internals->rss_key_len);
3344                 } else {
3345                         internals->rss_key_len = sizeof(default_rss_key);
3346                         memcpy(internals->rss_key, default_rss_key,
3347                                internals->rss_key_len);
3348                 }
3349
3350                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3351                         internals->reta_conf[i].mask = ~0LL;
3352                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3353                                 internals->reta_conf[i].reta[j] =
3354                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3355                                                 dev->data->nb_rx_queues;
3356                 }
3357         }
3358
3359         /* set the max_rx_pktlen */
3360         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3361
3362         /*
3363          * if no kvlist, it means that this bonded device has been created
3364          * through the bonding api.
3365          */
3366         if (!kvlist)
3367                 return 0;
3368
3369         /* Parse MAC address for bonded device */
3370         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3371         if (arg_count == 1) {
3372                 struct ether_addr bond_mac;
3373
3374                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3375                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3376                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3377                                      name);
3378                         return -1;
3379                 }
3380
3381                 /* Set MAC address */
3382                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3383                         RTE_BOND_LOG(ERR,
3384                                      "Failed to set mac address on bonded device %s",
3385                                      name);
3386                         return -1;
3387                 }
3388         } else if (arg_count > 1) {
3389                 RTE_BOND_LOG(ERR,
3390                              "MAC address can be specified only once for bonded device %s",
3391                              name);
3392                 return -1;
3393         }
3394
3395         /* Parse/set balance mode transmit policy */
3396         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3397         if (arg_count == 1) {
3398                 uint8_t xmit_policy;
3399
3400                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3401                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3402                     0) {
3403                         RTE_BOND_LOG(INFO,
3404                                      "Invalid xmit policy specified for bonded device %s",
3405                                      name);
3406                         return -1;
3407                 }
3408
3409                 /* Set balance mode transmit policy*/
3410                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3411                         RTE_BOND_LOG(ERR,
3412                                      "Failed to set balance xmit policy on bonded device %s",
3413                                      name);
3414                         return -1;
3415                 }
3416         } else if (arg_count > 1) {
3417                 RTE_BOND_LOG(ERR,
3418                              "Transmit policy can be specified only once for bonded device %s",
3419                              name);
3420                 return -1;
3421         }
3422
3423         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3424                 if (rte_kvargs_process(kvlist,
3425                                        PMD_BOND_AGG_MODE_KVARG,
3426                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3427                                        &agg_mode) != 0) {
3428                         RTE_BOND_LOG(ERR,
3429                                      "Failed to parse agg selection mode for bonded device %s",
3430                                      name);
3431                 }
3432                 if (internals->mode == BONDING_MODE_8023AD) {
3433                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3434                                         agg_mode);
3435                         if (ret < 0) {
3436                                 RTE_BOND_LOG(ERR,
3437                                         "Invalid args for agg selection set for bonded device %s",
3438                                         name);
3439                                 return -1;
3440                         }
3441                 }
3442         }
3443
3444         /* Parse/add slave ports to bonded device */
3445         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3446                 struct bond_ethdev_slave_ports slave_ports;
3447                 unsigned i;
3448
3449                 memset(&slave_ports, 0, sizeof(slave_ports));
3450
3451                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3452                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3453                         RTE_BOND_LOG(ERR,
3454                                      "Failed to parse slave ports for bonded device %s",
3455                                      name);
3456                         return -1;
3457                 }
3458
3459                 for (i = 0; i < slave_ports.slave_count; i++) {
3460                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3461                                 RTE_BOND_LOG(ERR,
3462                                              "Failed to add port %d as slave to bonded device %s",
3463                                              slave_ports.slaves[i], name);
3464                         }
3465                 }
3466
3467         } else {
3468                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3469                 return -1;
3470         }
3471
3472         /* Parse/set primary slave port id*/
3473         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3474         if (arg_count == 1) {
3475                 uint16_t primary_slave_port_id;
3476
3477                 if (rte_kvargs_process(kvlist,
3478                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3479                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3480                                        &primary_slave_port_id) < 0) {
3481                         RTE_BOND_LOG(INFO,
3482                                      "Invalid primary slave port id specified for bonded device %s",
3483                                      name);
3484                         return -1;
3485                 }
3486
3487                 /* Set balance mode transmit policy*/
3488                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3489                     != 0) {
3490                         RTE_BOND_LOG(ERR,
3491                                      "Failed to set primary slave port %d on bonded device %s",
3492                                      primary_slave_port_id, name);
3493                         return -1;
3494                 }
3495         } else if (arg_count > 1) {
3496                 RTE_BOND_LOG(INFO,
3497                              "Primary slave can be specified only once for bonded device %s",
3498                              name);
3499                 return -1;
3500         }
3501
3502         /* Parse link status monitor polling interval */
3503         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3504         if (arg_count == 1) {
3505                 uint32_t lsc_poll_interval_ms;
3506
3507                 if (rte_kvargs_process(kvlist,
3508                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3509                                        &bond_ethdev_parse_time_ms_kvarg,
3510                                        &lsc_poll_interval_ms) < 0) {
3511                         RTE_BOND_LOG(INFO,
3512                                      "Invalid lsc polling interval value specified for bonded"
3513                                      " device %s", name);
3514                         return -1;
3515                 }
3516
3517                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3518                     != 0) {
3519                         RTE_BOND_LOG(ERR,
3520                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3521                                      lsc_poll_interval_ms, name);
3522                         return -1;
3523                 }
3524         } else if (arg_count > 1) {
3525                 RTE_BOND_LOG(INFO,
3526                              "LSC polling interval can be specified only once for bonded"
3527                              " device %s", name);
3528                 return -1;
3529         }
3530
3531         /* Parse link up interrupt propagation delay */
3532         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3533         if (arg_count == 1) {
3534                 uint32_t link_up_delay_ms;
3535
3536                 if (rte_kvargs_process(kvlist,
3537                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3538                                        &bond_ethdev_parse_time_ms_kvarg,
3539                                        &link_up_delay_ms) < 0) {
3540                         RTE_BOND_LOG(INFO,
3541                                      "Invalid link up propagation delay value specified for"
3542                                      " bonded device %s", name);
3543                         return -1;
3544                 }
3545
3546                 /* Set balance mode transmit policy*/
3547                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3548                     != 0) {
3549                         RTE_BOND_LOG(ERR,
3550                                      "Failed to set link up propagation delay (%u ms) on bonded"
3551                                      " device %s", link_up_delay_ms, name);
3552                         return -1;
3553                 }
3554         } else if (arg_count > 1) {
3555                 RTE_BOND_LOG(INFO,
3556                              "Link up propagation delay can be specified only once for"
3557                              " bonded device %s", name);
3558                 return -1;
3559         }
3560
3561         /* Parse link down interrupt propagation delay */
3562         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3563         if (arg_count == 1) {
3564                 uint32_t link_down_delay_ms;
3565
3566                 if (rte_kvargs_process(kvlist,
3567                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3568                                        &bond_ethdev_parse_time_ms_kvarg,
3569                                        &link_down_delay_ms) < 0) {
3570                         RTE_BOND_LOG(INFO,
3571                                      "Invalid link down propagation delay value specified for"
3572                                      " bonded device %s", name);
3573                         return -1;
3574                 }
3575
3576                 /* Set balance mode transmit policy*/
3577                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3578                     != 0) {
3579                         RTE_BOND_LOG(ERR,
3580                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3581                                      link_down_delay_ms, name);
3582                         return -1;
3583                 }
3584         } else if (arg_count > 1) {
3585                 RTE_BOND_LOG(INFO,
3586                              "Link down propagation delay can be specified only once for  bonded device %s",
3587                              name);
3588                 return -1;
3589         }
3590
3591         return 0;
3592 }
3593
3594 struct rte_vdev_driver pmd_bond_drv = {
3595         .probe = bond_probe,
3596         .remove = bond_remove,
3597 };
3598
3599 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3600 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3601
3602 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3603         "slave=<ifc> "
3604         "primary=<ifc> "
3605         "mode=[0-6] "
3606         "xmit_policy=[l2 | l23 | l34] "
3607         "agg_mode=[count | stable | bandwidth] "
3608         "socket_id=<int> "
3609         "mac=<mac addr> "
3610         "lsc_poll_period_ms=<int> "
3611         "up_delay=<int> "
3612         "down_delay=<int>");
3613
3614 int bond_logtype;
3615
3616 RTE_INIT(bond_init_log)
3617 {
3618         bond_logtype = rte_log_register("pmd.net.bond");
3619         if (bond_logtype >= 0)
3620                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3621 }