net: add rte prefix to IP defines
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
42                 struct rte_vlan_hdr *vlan_hdr =
43                         (struct rte_vlan_hdr *)(eth_hdr + 1);
44
45                 vlan_offset = sizeof(struct rte_vlan_hdr);
46                 *proto = vlan_hdr->eth_proto;
47
48                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
49                         vlan_hdr = vlan_hdr + 1;
50                         *proto = vlan_hdr->eth_proto;
51                         vlan_offset += sizeof(struct rte_vlan_hdr);
52                 }
53         }
54         return vlan_offset;
55 }
56
57 static uint16_t
58 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 {
60         struct bond_dev_private *internals;
61
62         uint16_t num_rx_total = 0;
63         uint16_t slave_count;
64         uint16_t active_slave;
65         int i;
66
67         /* Cast to structure, containing bonded device's port id and queue id */
68         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
69         internals = bd_rx_q->dev_private;
70         slave_count = internals->active_slave_count;
71         active_slave = internals->active_slave;
72
73         for (i = 0; i < slave_count && nb_pkts; i++) {
74                 uint16_t num_rx_slave;
75
76                 /* Offset of pointer to *bufs increases as packets are received
77                  * from other slaves */
78                 num_rx_slave =
79                         rte_eth_rx_burst(internals->active_slaves[active_slave],
80                                          bd_rx_q->queue_id,
81                                          bufs + num_rx_total, nb_pkts);
82                 num_rx_total += num_rx_slave;
83                 nb_pkts -= num_rx_slave;
84                 if (++active_slave == slave_count)
85                         active_slave = 0;
86         }
87
88         if (++internals->active_slave >= slave_count)
89                 internals->active_slave = 0;
90         return num_rx_total;
91 }
92
93 static uint16_t
94 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
95                 uint16_t nb_pkts)
96 {
97         struct bond_dev_private *internals;
98
99         /* Cast to structure, containing bonded device's port id and queue id */
100         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
101
102         internals = bd_rx_q->dev_private;
103
104         return rte_eth_rx_burst(internals->current_primary_port,
105                         bd_rx_q->queue_id, bufs, nb_pkts);
106 }
107
108 static inline uint8_t
109 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
110 {
111         const uint16_t ether_type_slow_be =
112                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
113
114         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
115                 (ethertype == ether_type_slow_be &&
116                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
117 }
118
119 /*****************************************************************************
120  * Flow director's setup for mode 4 optimization
121  */
122
123 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
124         .dst.addr_bytes = { 0 },
125         .src.addr_bytes = { 0 },
126         .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
127 };
128
129 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
130         .dst.addr_bytes = { 0 },
131         .src.addr_bytes = { 0 },
132         .type = 0xFFFF,
133 };
134
135 static struct rte_flow_item flow_item_8023ad[] = {
136         {
137                 .type = RTE_FLOW_ITEM_TYPE_ETH,
138                 .spec = &flow_item_eth_type_8023ad,
139                 .last = NULL,
140                 .mask = &flow_item_eth_mask_type_8023ad,
141         },
142         {
143                 .type = RTE_FLOW_ITEM_TYPE_END,
144                 .spec = NULL,
145                 .last = NULL,
146                 .mask = NULL,
147         }
148 };
149
150 const struct rte_flow_attr flow_attr_8023ad = {
151         .group = 0,
152         .priority = 0,
153         .ingress = 1,
154         .egress = 0,
155         .reserved = 0,
156 };
157
158 int
159 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
160                 uint16_t slave_port) {
161         struct rte_eth_dev_info slave_info;
162         struct rte_flow_error error;
163         struct bond_dev_private *internals = (struct bond_dev_private *)
164                         (bond_dev->data->dev_private);
165
166         const struct rte_flow_action_queue lacp_queue_conf = {
167                 .index = 0,
168         };
169
170         const struct rte_flow_action actions[] = {
171                 {
172                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173                         .conf = &lacp_queue_conf
174                 },
175                 {
176                         .type = RTE_FLOW_ACTION_TYPE_END,
177                 }
178         };
179
180         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181                         flow_item_8023ad, actions, &error);
182         if (ret < 0) {
183                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184                                 __func__, error.message, slave_port,
185                                 internals->mode4.dedicated_queues.rx_qid);
186                 return -1;
187         }
188
189         rte_eth_dev_info_get(slave_port, &slave_info);
190         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
192                 RTE_BOND_LOG(ERR,
193                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194                         __func__, slave_port);
195                 return -1;
196         }
197
198         return 0;
199 }
200
201 int
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204         struct bond_dev_private *internals = (struct bond_dev_private *)
205                         (bond_dev->data->dev_private);
206         struct rte_eth_dev_info bond_info;
207         uint16_t idx;
208
209         /* Verify if all slaves in bonding supports flow director and */
210         if (internals->slave_count > 0) {
211                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
212
213                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
214                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
215
216                 for (idx = 0; idx < internals->slave_count; idx++) {
217                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
218                                         internals->slaves[idx].port_id) != 0)
219                                 return -1;
220                 }
221         }
222
223         return 0;
224 }
225
226 int
227 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
228
229         struct rte_flow_error error;
230         struct bond_dev_private *internals = (struct bond_dev_private *)
231                         (bond_dev->data->dev_private);
232
233         struct rte_flow_action_queue lacp_queue_conf = {
234                 .index = internals->mode4.dedicated_queues.rx_qid,
235         };
236
237         const struct rte_flow_action actions[] = {
238                 {
239                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
240                         .conf = &lacp_queue_conf
241                 },
242                 {
243                         .type = RTE_FLOW_ACTION_TYPE_END,
244                 }
245         };
246
247         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
248                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
249         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
250                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
251                                 "(slave_port=%d queue_id=%d)",
252                                 error.message, slave_port,
253                                 internals->mode4.dedicated_queues.rx_qid);
254                 return -1;
255         }
256
257         return 0;
258 }
259
260 static uint16_t
261 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
262                 uint16_t nb_pkts)
263 {
264         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
265         struct bond_dev_private *internals = bd_rx_q->dev_private;
266         uint16_t num_rx_total = 0;      /* Total number of received packets */
267         uint16_t slaves[RTE_MAX_ETHPORTS];
268         uint16_t slave_count;
269         uint16_t active_slave;
270         uint16_t i;
271
272         /* Copy slave list to protect against slave up/down changes during tx
273          * bursting */
274         slave_count = internals->active_slave_count;
275         active_slave = internals->active_slave;
276         memcpy(slaves, internals->active_slaves,
277                         sizeof(internals->active_slaves[0]) * slave_count);
278
279         for (i = 0; i < slave_count && nb_pkts; i++) {
280                 uint16_t num_rx_slave;
281
282                 /* Read packets from this slave */
283                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
284                                                 bd_rx_q->queue_id,
285                                                 bufs + num_rx_total, nb_pkts);
286                 num_rx_total += num_rx_slave;
287                 nb_pkts -= num_rx_slave;
288
289                 if (++active_slave == slave_count)
290                         active_slave = 0;
291         }
292
293         if (++internals->active_slave >= slave_count)
294                 internals->active_slave = 0;
295
296         return num_rx_total;
297 }
298
299 static uint16_t
300 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
301                 uint16_t nb_bufs)
302 {
303         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
304         struct bond_dev_private *internals = bd_tx_q->dev_private;
305
306         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
307         uint16_t slave_count;
308
309         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
310         uint16_t dist_slave_count;
311
312         /* 2-D array to sort mbufs for transmission on each slave into */
313         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
314         /* Number of mbufs for transmission on each slave */
315         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
316         /* Mapping array generated by hash function to map mbufs to slaves */
317         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
318
319         uint16_t slave_tx_count;
320         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
321
322         uint16_t i;
323
324         if (unlikely(nb_bufs == 0))
325                 return 0;
326
327         /* Copy slave list to protect against slave up/down changes during tx
328          * bursting */
329         slave_count = internals->active_slave_count;
330         if (unlikely(slave_count < 1))
331                 return 0;
332
333         memcpy(slave_port_ids, internals->active_slaves,
334                         sizeof(slave_port_ids[0]) * slave_count);
335
336
337         dist_slave_count = 0;
338         for (i = 0; i < slave_count; i++) {
339                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
340
341                 if (ACTOR_STATE(port, DISTRIBUTING))
342                         dist_slave_port_ids[dist_slave_count++] =
343                                         slave_port_ids[i];
344         }
345
346         if (unlikely(dist_slave_count < 1))
347                 return 0;
348
349         /*
350          * Populate slaves mbuf with the packets which are to be sent on it
351          * selecting output slave using hash based on xmit policy
352          */
353         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
354                         bufs_slave_port_idxs);
355
356         for (i = 0; i < nb_bufs; i++) {
357                 /* Populate slave mbuf arrays with mbufs for that slave. */
358                 uint16_t slave_idx = bufs_slave_port_idxs[i];
359
360                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
361         }
362
363
364         /* Send packet burst on each slave device */
365         for (i = 0; i < dist_slave_count; i++) {
366                 if (slave_nb_bufs[i] == 0)
367                         continue;
368
369                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
370                                 bd_tx_q->queue_id, slave_bufs[i],
371                                 slave_nb_bufs[i]);
372
373                 total_tx_count += slave_tx_count;
374
375                 /* If tx burst fails move packets to end of bufs */
376                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
377                         int slave_tx_fail_count = slave_nb_bufs[i] -
378                                         slave_tx_count;
379                         total_tx_fail_count += slave_tx_fail_count;
380                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
381                                &slave_bufs[i][slave_tx_count],
382                                slave_tx_fail_count * sizeof(bufs[0]));
383                 }
384         }
385
386         return total_tx_count;
387 }
388
389
390 static uint16_t
391 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
392                 uint16_t nb_pkts)
393 {
394         /* Cast to structure, containing bonded device's port id and queue id */
395         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
396         struct bond_dev_private *internals = bd_rx_q->dev_private;
397         struct rte_eth_dev *bonded_eth_dev =
398                                         &rte_eth_devices[internals->port_id];
399         struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
400         struct rte_ether_hdr *hdr;
401
402         const uint16_t ether_type_slow_be =
403                 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
404         uint16_t num_rx_total = 0;      /* Total number of received packets */
405         uint16_t slaves[RTE_MAX_ETHPORTS];
406         uint16_t slave_count, idx;
407
408         uint8_t collecting;  /* current slave collecting status */
409         const uint8_t promisc = internals->promiscuous_en;
410         uint8_t subtype;
411         uint16_t i;
412         uint16_t j;
413         uint16_t k;
414
415         /* Copy slave list to protect against slave up/down changes during tx
416          * bursting */
417         slave_count = internals->active_slave_count;
418         memcpy(slaves, internals->active_slaves,
419                         sizeof(internals->active_slaves[0]) * slave_count);
420
421         idx = internals->active_slave;
422         if (idx >= slave_count) {
423                 internals->active_slave = 0;
424                 idx = 0;
425         }
426         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
427                 j = num_rx_total;
428                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
429                                          COLLECTING);
430
431                 /* Read packets from this slave */
432                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
433                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
434
435                 for (k = j; k < 2 && k < num_rx_total; k++)
436                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
437
438                 /* Handle slow protocol packets. */
439                 while (j < num_rx_total) {
440
441                         /* If packet is not pure L2 and is known, skip it */
442                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
443                                 j++;
444                                 continue;
445                         }
446
447                         if (j + 3 < num_rx_total)
448                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
449
450                         hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
451                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
452
453                         /* Remove packet from array if it is slow packet or slave is not
454                          * in collecting state or bonding interface is not in promiscuous
455                          * mode and packet address does not match. */
456                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
457                                 !collecting ||
458                                 (!promisc &&
459                                  !rte_is_multicast_ether_addr(&hdr->d_addr) &&
460                                  !rte_is_same_ether_addr(bond_mac,
461                                                      &hdr->d_addr)))) {
462
463                                 if (hdr->ether_type == ether_type_slow_be) {
464                                         bond_mode_8023ad_handle_slow_pkt(
465                                             internals, slaves[idx], bufs[j]);
466                                 } else
467                                         rte_pktmbuf_free(bufs[j]);
468
469                                 /* Packet is managed by mode 4 or dropped, shift the array */
470                                 num_rx_total--;
471                                 if (j < num_rx_total) {
472                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
473                                                 (num_rx_total - j));
474                                 }
475                         } else
476                                 j++;
477                 }
478                 if (unlikely(++idx == slave_count))
479                         idx = 0;
480         }
481
482         if (++internals->active_slave >= slave_count)
483                 internals->active_slave = 0;
484
485         return num_rx_total;
486 }
487
488 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
489 uint32_t burstnumberRX;
490 uint32_t burstnumberTX;
491
492 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
493
494 static void
495 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
496 {
497         switch (arp_op) {
498         case RTE_ARP_OP_REQUEST:
499                 strlcpy(buf, "ARP Request", buf_len);
500                 return;
501         case RTE_ARP_OP_REPLY:
502                 strlcpy(buf, "ARP Reply", buf_len);
503                 return;
504         case RTE_ARP_OP_REVREQUEST:
505                 strlcpy(buf, "Reverse ARP Request", buf_len);
506                 return;
507         case RTE_ARP_OP_REVREPLY:
508                 strlcpy(buf, "Reverse ARP Reply", buf_len);
509                 return;
510         case RTE_ARP_OP_INVREQUEST:
511                 strlcpy(buf, "Peer Identify Request", buf_len);
512                 return;
513         case RTE_ARP_OP_INVREPLY:
514                 strlcpy(buf, "Peer Identify Reply", buf_len);
515                 return;
516         default:
517                 break;
518         }
519         strlcpy(buf, "Unknown", buf_len);
520         return;
521 }
522 #endif
523 #define MaxIPv4String   16
524 static void
525 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
526 {
527         uint32_t ipv4_addr;
528
529         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
530         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
531                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
532                 ipv4_addr & 0xFF);
533 }
534
535 #define MAX_CLIENTS_NUMBER      128
536 uint8_t active_clients;
537 struct client_stats_t {
538         uint16_t port;
539         uint32_t ipv4_addr;
540         uint32_t ipv4_rx_packets;
541         uint32_t ipv4_tx_packets;
542 };
543 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
544
545 static void
546 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
547 {
548         int i = 0;
549
550         for (; i < MAX_CLIENTS_NUMBER; i++)     {
551                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
552                         /* Just update RX packets number for this client */
553                         if (TXorRXindicator == &burstnumberRX)
554                                 client_stats[i].ipv4_rx_packets++;
555                         else
556                                 client_stats[i].ipv4_tx_packets++;
557                         return;
558                 }
559         }
560         /* We have a new client. Insert him to the table, and increment stats */
561         if (TXorRXindicator == &burstnumberRX)
562                 client_stats[active_clients].ipv4_rx_packets++;
563         else
564                 client_stats[active_clients].ipv4_tx_packets++;
565         client_stats[active_clients].ipv4_addr = addr;
566         client_stats[active_clients].port = port;
567         active_clients++;
568
569 }
570
571 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
572 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
573         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
574                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
575                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
576                 info,                                                   \
577                 port,                                                   \
578                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
579                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
580                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
581                 src_ip,                                                 \
582                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
583                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
584                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
585                 dst_ip,                                                 \
586                 arp_op, ++burstnumber)
587 #endif
588
589 static void
590 mode6_debug(const char __attribute__((unused)) *info,
591         struct rte_ether_hdr *eth_h, uint16_t port,
592         uint32_t __attribute__((unused)) *burstnumber)
593 {
594         struct rte_ipv4_hdr *ipv4_h;
595 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
596         struct rte_arp_hdr *arp_h;
597         char dst_ip[16];
598         char ArpOp[24];
599         char buf[16];
600 #endif
601         char src_ip[16];
602
603         uint16_t ether_type = eth_h->ether_type;
604         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
605
606 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
607         strlcpy(buf, info, 16);
608 #endif
609
610         if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4)) {
611                 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
612                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
613 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
614                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
615                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
616 #endif
617                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
618         }
619 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
620         else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
621                 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
622                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
623                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
624                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
625                                 ArpOp, sizeof(ArpOp));
626                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
627         }
628 #endif
629 }
630 #endif
631
632 static uint16_t
633 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
634 {
635         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
636         struct bond_dev_private *internals = bd_tx_q->dev_private;
637         struct rte_ether_hdr *eth_h;
638         uint16_t ether_type, offset;
639         uint16_t nb_recv_pkts;
640         int i;
641
642         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
643
644         for (i = 0; i < nb_recv_pkts; i++) {
645                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
646                 ether_type = eth_h->ether_type;
647                 offset = get_vlan_offset(eth_h, &ether_type);
648
649                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
652 #endif
653                         bond_mode_alb_arp_recv(eth_h, offset, internals);
654                 }
655 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
656                 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4))
657                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
658 #endif
659         }
660
661         return nb_recv_pkts;
662 }
663
664 static uint16_t
665 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
666                 uint16_t nb_pkts)
667 {
668         struct bond_dev_private *internals;
669         struct bond_tx_queue *bd_tx_q;
670
671         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
672         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
673
674         uint16_t num_of_slaves;
675         uint16_t slaves[RTE_MAX_ETHPORTS];
676
677         uint16_t num_tx_total = 0, num_tx_slave;
678
679         static int slave_idx = 0;
680         int i, cslave_idx = 0, tx_fail_total = 0;
681
682         bd_tx_q = (struct bond_tx_queue *)queue;
683         internals = bd_tx_q->dev_private;
684
685         /* Copy slave list to protect against slave up/down changes during tx
686          * bursting */
687         num_of_slaves = internals->active_slave_count;
688         memcpy(slaves, internals->active_slaves,
689                         sizeof(internals->active_slaves[0]) * num_of_slaves);
690
691         if (num_of_slaves < 1)
692                 return num_tx_total;
693
694         /* Populate slaves mbuf with which packets are to be sent on it  */
695         for (i = 0; i < nb_pkts; i++) {
696                 cslave_idx = (slave_idx + i) % num_of_slaves;
697                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
698         }
699
700         /* increment current slave index so the next call to tx burst starts on the
701          * next slave */
702         slave_idx = ++cslave_idx;
703
704         /* Send packet burst on each slave device */
705         for (i = 0; i < num_of_slaves; i++) {
706                 if (slave_nb_pkts[i] > 0) {
707                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
708                                         slave_bufs[i], slave_nb_pkts[i]);
709
710                         /* if tx burst fails move packets to end of bufs */
711                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
712                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
713
714                                 tx_fail_total += tx_fail_slave;
715
716                                 memcpy(&bufs[nb_pkts - tx_fail_total],
717                                        &slave_bufs[i][num_tx_slave],
718                                        tx_fail_slave * sizeof(bufs[0]));
719                         }
720                         num_tx_total += num_tx_slave;
721                 }
722         }
723
724         return num_tx_total;
725 }
726
727 static uint16_t
728 bond_ethdev_tx_burst_active_backup(void *queue,
729                 struct rte_mbuf **bufs, uint16_t nb_pkts)
730 {
731         struct bond_dev_private *internals;
732         struct bond_tx_queue *bd_tx_q;
733
734         bd_tx_q = (struct bond_tx_queue *)queue;
735         internals = bd_tx_q->dev_private;
736
737         if (internals->active_slave_count < 1)
738                 return 0;
739
740         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
741                         bufs, nb_pkts);
742 }
743
744 static inline uint16_t
745 ether_hash(struct rte_ether_hdr *eth_hdr)
746 {
747         unaligned_uint16_t *word_src_addr =
748                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
749         unaligned_uint16_t *word_dst_addr =
750                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
751
752         return (word_src_addr[0] ^ word_dst_addr[0]) ^
753                         (word_src_addr[1] ^ word_dst_addr[1]) ^
754                         (word_src_addr[2] ^ word_dst_addr[2]);
755 }
756
757 static inline uint32_t
758 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
759 {
760         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
761 }
762
763 static inline uint32_t
764 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
765 {
766         unaligned_uint32_t *word_src_addr =
767                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
768         unaligned_uint32_t *word_dst_addr =
769                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
770
771         return (word_src_addr[0] ^ word_dst_addr[0]) ^
772                         (word_src_addr[1] ^ word_dst_addr[1]) ^
773                         (word_src_addr[2] ^ word_dst_addr[2]) ^
774                         (word_src_addr[3] ^ word_dst_addr[3]);
775 }
776
777
778 void
779 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
780                 uint16_t slave_count, uint16_t *slaves)
781 {
782         struct rte_ether_hdr *eth_hdr;
783         uint32_t hash;
784         int i;
785
786         for (i = 0; i < nb_pkts; i++) {
787                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
788
789                 hash = ether_hash(eth_hdr);
790
791                 slaves[i] = (hash ^= hash >> 8) % slave_count;
792         }
793 }
794
795 void
796 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
797                 uint16_t slave_count, uint16_t *slaves)
798 {
799         uint16_t i;
800         struct rte_ether_hdr *eth_hdr;
801         uint16_t proto;
802         size_t vlan_offset;
803         uint32_t hash, l3hash;
804
805         for (i = 0; i < nb_pkts; i++) {
806                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
807                 l3hash = 0;
808
809                 proto = eth_hdr->ether_type;
810                 hash = ether_hash(eth_hdr);
811
812                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
813
814                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4) == proto) {
815                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
816                                         ((char *)(eth_hdr + 1) + vlan_offset);
817                         l3hash = ipv4_hash(ipv4_hdr);
818
819                 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6) == proto) {
820                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
821                                         ((char *)(eth_hdr + 1) + vlan_offset);
822                         l3hash = ipv6_hash(ipv6_hdr);
823                 }
824
825                 hash = hash ^ l3hash;
826                 hash ^= hash >> 16;
827                 hash ^= hash >> 8;
828
829                 slaves[i] = hash % slave_count;
830         }
831 }
832
833 void
834 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
835                 uint16_t slave_count, uint16_t *slaves)
836 {
837         struct rte_ether_hdr *eth_hdr;
838         uint16_t proto;
839         size_t vlan_offset;
840         int i;
841
842         struct udp_hdr *udp_hdr;
843         struct tcp_hdr *tcp_hdr;
844         uint32_t hash, l3hash, l4hash;
845
846         for (i = 0; i < nb_pkts; i++) {
847                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
848                 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
849                 proto = eth_hdr->ether_type;
850                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
851                 l3hash = 0;
852                 l4hash = 0;
853
854                 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4) == proto) {
855                         struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
856                                         ((char *)(eth_hdr + 1) + vlan_offset);
857                         size_t ip_hdr_offset;
858
859                         l3hash = ipv4_hash(ipv4_hdr);
860
861                         /* there is no L4 header in fragmented packet */
862                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
863                                                                 == 0)) {
864                                 ip_hdr_offset = (ipv4_hdr->version_ihl
865                                         & RTE_IPV4_HDR_IHL_MASK) *
866                                         RTE_IPV4_IHL_MULTIPLIER;
867
868                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
869                                         tcp_hdr = (struct tcp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
873                                                         < pkt_end)
874                                                 l4hash = HASH_L4_PORTS(tcp_hdr);
875                                 } else if (ipv4_hdr->next_proto_id ==
876                                                                 IPPROTO_UDP) {
877                                         udp_hdr = (struct udp_hdr *)
878                                                 ((char *)ipv4_hdr +
879                                                         ip_hdr_offset);
880                                         if ((size_t)udp_hdr + sizeof(*udp_hdr)
881                                                         < pkt_end)
882                                                 l4hash = HASH_L4_PORTS(udp_hdr);
883                                 }
884                         }
885                 } else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6) == proto) {
886                         struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
887                                         ((char *)(eth_hdr + 1) + vlan_offset);
888                         l3hash = ipv6_hash(ipv6_hdr);
889
890                         if (ipv6_hdr->proto == IPPROTO_TCP) {
891                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
892                                 l4hash = HASH_L4_PORTS(tcp_hdr);
893                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
894                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
895                                 l4hash = HASH_L4_PORTS(udp_hdr);
896                         }
897                 }
898
899                 hash = l3hash ^ l4hash;
900                 hash ^= hash >> 16;
901                 hash ^= hash >> 8;
902
903                 slaves[i] = hash % slave_count;
904         }
905 }
906
907 struct bwg_slave {
908         uint64_t bwg_left_int;
909         uint64_t bwg_left_remainder;
910         uint16_t slave;
911 };
912
913 void
914 bond_tlb_activate_slave(struct bond_dev_private *internals) {
915         int i;
916
917         for (i = 0; i < internals->active_slave_count; i++) {
918                 tlb_last_obytets[internals->active_slaves[i]] = 0;
919         }
920 }
921
922 static int
923 bandwidth_cmp(const void *a, const void *b)
924 {
925         const struct bwg_slave *bwg_a = a;
926         const struct bwg_slave *bwg_b = b;
927         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
928         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
929                         (int64_t)bwg_a->bwg_left_remainder;
930         if (diff > 0)
931                 return 1;
932         else if (diff < 0)
933                 return -1;
934         else if (diff2 > 0)
935                 return 1;
936         else if (diff2 < 0)
937                 return -1;
938         else
939                 return 0;
940 }
941
942 static void
943 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
944                 struct bwg_slave *bwg_slave)
945 {
946         struct rte_eth_link link_status;
947
948         rte_eth_link_get_nowait(port_id, &link_status);
949         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
950         if (link_bwg == 0)
951                 return;
952         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
953         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
954         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
955 }
956
957 static void
958 bond_ethdev_update_tlb_slave_cb(void *arg)
959 {
960         struct bond_dev_private *internals = arg;
961         struct rte_eth_stats slave_stats;
962         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
963         uint16_t slave_count;
964         uint64_t tx_bytes;
965
966         uint8_t update_stats = 0;
967         uint16_t slave_id;
968         uint16_t i;
969
970         internals->slave_update_idx++;
971
972
973         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
974                 update_stats = 1;
975
976         for (i = 0; i < internals->active_slave_count; i++) {
977                 slave_id = internals->active_slaves[i];
978                 rte_eth_stats_get(slave_id, &slave_stats);
979                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
980                 bandwidth_left(slave_id, tx_bytes,
981                                 internals->slave_update_idx, &bwg_array[i]);
982                 bwg_array[i].slave = slave_id;
983
984                 if (update_stats) {
985                         tlb_last_obytets[slave_id] = slave_stats.obytes;
986                 }
987         }
988
989         if (update_stats == 1)
990                 internals->slave_update_idx = 0;
991
992         slave_count = i;
993         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
994         for (i = 0; i < slave_count; i++)
995                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
996
997         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
998                         (struct bond_dev_private *)internals);
999 }
1000
1001 static uint16_t
1002 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1003 {
1004         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1005         struct bond_dev_private *internals = bd_tx_q->dev_private;
1006
1007         struct rte_eth_dev *primary_port =
1008                         &rte_eth_devices[internals->primary_port];
1009         uint16_t num_tx_total = 0;
1010         uint16_t i, j;
1011
1012         uint16_t num_of_slaves = internals->active_slave_count;
1013         uint16_t slaves[RTE_MAX_ETHPORTS];
1014
1015         struct rte_ether_hdr *ether_hdr;
1016         struct rte_ether_addr primary_slave_addr;
1017         struct rte_ether_addr active_slave_addr;
1018
1019         if (num_of_slaves < 1)
1020                 return num_tx_total;
1021
1022         memcpy(slaves, internals->tlb_slaves_order,
1023                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1024
1025
1026         rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1027
1028         if (nb_pkts > 3) {
1029                 for (i = 0; i < 3; i++)
1030                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1031         }
1032
1033         for (i = 0; i < num_of_slaves; i++) {
1034                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1035                 for (j = num_tx_total; j < nb_pkts; j++) {
1036                         if (j + 3 < nb_pkts)
1037                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1038
1039                         ether_hdr = rte_pktmbuf_mtod(bufs[j],
1040                                                 struct rte_ether_hdr *);
1041                         if (rte_is_same_ether_addr(&ether_hdr->s_addr,
1042                                                         &primary_slave_addr))
1043                                 rte_ether_addr_copy(&active_slave_addr,
1044                                                 &ether_hdr->s_addr);
1045 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1046                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1047 #endif
1048                 }
1049
1050                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1051                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1052
1053                 if (num_tx_total == nb_pkts)
1054                         break;
1055         }
1056
1057         return num_tx_total;
1058 }
1059
1060 void
1061 bond_tlb_disable(struct bond_dev_private *internals)
1062 {
1063         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1064 }
1065
1066 void
1067 bond_tlb_enable(struct bond_dev_private *internals)
1068 {
1069         bond_ethdev_update_tlb_slave_cb(internals);
1070 }
1071
1072 static uint16_t
1073 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1074 {
1075         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1076         struct bond_dev_private *internals = bd_tx_q->dev_private;
1077
1078         struct rte_ether_hdr *eth_h;
1079         uint16_t ether_type, offset;
1080
1081         struct client_data *client_info;
1082
1083         /*
1084          * We create transmit buffers for every slave and one additional to send
1085          * through tlb. In worst case every packet will be send on one port.
1086          */
1087         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1088         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1089
1090         /*
1091          * We create separate transmit buffers for update packets as they won't
1092          * be counted in num_tx_total.
1093          */
1094         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1095         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1096
1097         struct rte_mbuf *upd_pkt;
1098         size_t pkt_size;
1099
1100         uint16_t num_send, num_not_send = 0;
1101         uint16_t num_tx_total = 0;
1102         uint16_t slave_idx;
1103
1104         int i, j;
1105
1106         /* Search tx buffer for ARP packets and forward them to alb */
1107         for (i = 0; i < nb_pkts; i++) {
1108                 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1109                 ether_type = eth_h->ether_type;
1110                 offset = get_vlan_offset(eth_h, &ether_type);
1111
1112                 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1113                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1114
1115                         /* Change src mac in eth header */
1116                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1117
1118                         /* Add packet to slave tx buffer */
1119                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1120                         slave_bufs_pkts[slave_idx]++;
1121                 } else {
1122                         /* If packet is not ARP, send it with TLB policy */
1123                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1124                                         bufs[i];
1125                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1126                 }
1127         }
1128
1129         /* Update connected client ARP tables */
1130         if (internals->mode6.ntt) {
1131                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1132                         client_info = &internals->mode6.client_table[i];
1133
1134                         if (client_info->in_use) {
1135                                 /* Allocate new packet to send ARP update on current slave */
1136                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1137                                 if (upd_pkt == NULL) {
1138                                         RTE_BOND_LOG(ERR,
1139                                                      "Failed to allocate ARP packet from pool");
1140                                         continue;
1141                                 }
1142                                 pkt_size = sizeof(struct rte_ether_hdr) +
1143                                         sizeof(struct rte_arp_hdr) +
1144                                         client_info->vlan_count *
1145                                         sizeof(struct rte_vlan_hdr);
1146                                 upd_pkt->data_len = pkt_size;
1147                                 upd_pkt->pkt_len = pkt_size;
1148
1149                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1150                                                 internals);
1151
1152                                 /* Add packet to update tx buffer */
1153                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1154                                 update_bufs_pkts[slave_idx]++;
1155                         }
1156                 }
1157                 internals->mode6.ntt = 0;
1158         }
1159
1160         /* Send ARP packets on proper slaves */
1161         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1162                 if (slave_bufs_pkts[i] > 0) {
1163                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1164                                         slave_bufs[i], slave_bufs_pkts[i]);
1165                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1166                                 bufs[nb_pkts - 1 - num_not_send - j] =
1167                                                 slave_bufs[i][nb_pkts - 1 - j];
1168                         }
1169
1170                         num_tx_total += num_send;
1171                         num_not_send += slave_bufs_pkts[i] - num_send;
1172
1173 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1174         /* Print TX stats including update packets */
1175                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1176                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1177                                                         struct rte_ether_hdr *);
1178                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send update packets on proper slaves */
1185         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1186                 if (update_bufs_pkts[i] > 0) {
1187                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1188                                         update_bufs_pkts[i]);
1189                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1190                                 rte_pktmbuf_free(update_bufs[i][j]);
1191                         }
1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1193                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1194                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1195                                                         struct rte_ether_hdr *);
1196                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1197                         }
1198 #endif
1199                 }
1200         }
1201
1202         /* Send non-ARP packets using tlb policy */
1203         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1204                 num_send = bond_ethdev_tx_burst_tlb(queue,
1205                                 slave_bufs[RTE_MAX_ETHPORTS],
1206                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1207
1208                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1209                         bufs[nb_pkts - 1 - num_not_send - j] =
1210                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1211                 }
1212
1213                 num_tx_total += num_send;
1214         }
1215
1216         return num_tx_total;
1217 }
1218
1219 static uint16_t
1220 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1221                 uint16_t nb_bufs)
1222 {
1223         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1224         struct bond_dev_private *internals = bd_tx_q->dev_private;
1225
1226         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1227         uint16_t slave_count;
1228
1229         /* Array to sort mbufs for transmission on each slave into */
1230         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1231         /* Number of mbufs for transmission on each slave */
1232         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1233         /* Mapping array generated by hash function to map mbufs to slaves */
1234         uint16_t bufs_slave_port_idxs[nb_bufs];
1235
1236         uint16_t slave_tx_count;
1237         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1238
1239         uint16_t i;
1240
1241         if (unlikely(nb_bufs == 0))
1242                 return 0;
1243
1244         /* Copy slave list to protect against slave up/down changes during tx
1245          * bursting */
1246         slave_count = internals->active_slave_count;
1247         if (unlikely(slave_count < 1))
1248                 return 0;
1249
1250         memcpy(slave_port_ids, internals->active_slaves,
1251                         sizeof(slave_port_ids[0]) * slave_count);
1252
1253         /*
1254          * Populate slaves mbuf with the packets which are to be sent on it
1255          * selecting output slave using hash based on xmit policy
1256          */
1257         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1258                         bufs_slave_port_idxs);
1259
1260         for (i = 0; i < nb_bufs; i++) {
1261                 /* Populate slave mbuf arrays with mbufs for that slave. */
1262                 uint16_t slave_idx = bufs_slave_port_idxs[i];
1263
1264                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1265         }
1266
1267         /* Send packet burst on each slave device */
1268         for (i = 0; i < slave_count; i++) {
1269                 if (slave_nb_bufs[i] == 0)
1270                         continue;
1271
1272                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1273                                 bd_tx_q->queue_id, slave_bufs[i],
1274                                 slave_nb_bufs[i]);
1275
1276                 total_tx_count += slave_tx_count;
1277
1278                 /* If tx burst fails move packets to end of bufs */
1279                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1280                         int slave_tx_fail_count = slave_nb_bufs[i] -
1281                                         slave_tx_count;
1282                         total_tx_fail_count += slave_tx_fail_count;
1283                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1284                                &slave_bufs[i][slave_tx_count],
1285                                slave_tx_fail_count * sizeof(bufs[0]));
1286                 }
1287         }
1288
1289         return total_tx_count;
1290 }
1291
1292 static uint16_t
1293 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1294                 uint16_t nb_bufs)
1295 {
1296         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1297         struct bond_dev_private *internals = bd_tx_q->dev_private;
1298
1299         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1300         uint16_t slave_count;
1301
1302         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1303         uint16_t dist_slave_count;
1304
1305         /* 2-D array to sort mbufs for transmission on each slave into */
1306         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1307         /* Number of mbufs for transmission on each slave */
1308         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1309         /* Mapping array generated by hash function to map mbufs to slaves */
1310         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1311
1312         uint16_t slave_tx_count;
1313         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1314
1315         uint16_t i;
1316
1317         /* Copy slave list to protect against slave up/down changes during tx
1318          * bursting */
1319         slave_count = internals->active_slave_count;
1320         if (unlikely(slave_count < 1))
1321                 return 0;
1322
1323         memcpy(slave_port_ids, internals->active_slaves,
1324                         sizeof(slave_port_ids[0]) * slave_count);
1325
1326         /* Check for LACP control packets and send if available */
1327         for (i = 0; i < slave_count; i++) {
1328                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1329                 struct rte_mbuf *ctrl_pkt = NULL;
1330
1331                 if (likely(rte_ring_empty(port->tx_ring)))
1332                         continue;
1333
1334                 if (rte_ring_dequeue(port->tx_ring,
1335                                      (void **)&ctrl_pkt) != -ENOENT) {
1336                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1337                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1338                         /*
1339                          * re-enqueue LAG control plane packets to buffering
1340                          * ring if transmission fails so the packet isn't lost.
1341                          */
1342                         if (slave_tx_count != 1)
1343                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1344                 }
1345         }
1346
1347         if (unlikely(nb_bufs == 0))
1348                 return 0;
1349
1350         dist_slave_count = 0;
1351         for (i = 0; i < slave_count; i++) {
1352                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1353
1354                 if (ACTOR_STATE(port, DISTRIBUTING))
1355                         dist_slave_port_ids[dist_slave_count++] =
1356                                         slave_port_ids[i];
1357         }
1358
1359         if (likely(dist_slave_count > 0)) {
1360
1361                 /*
1362                  * Populate slaves mbuf with the packets which are to be sent
1363                  * on it, selecting output slave using hash based on xmit policy
1364                  */
1365                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1366                                 bufs_slave_port_idxs);
1367
1368                 for (i = 0; i < nb_bufs; i++) {
1369                         /*
1370                          * Populate slave mbuf arrays with mbufs for that
1371                          * slave
1372                          */
1373                         uint16_t slave_idx = bufs_slave_port_idxs[i];
1374
1375                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1376                                         bufs[i];
1377                 }
1378
1379
1380                 /* Send packet burst on each slave device */
1381                 for (i = 0; i < dist_slave_count; i++) {
1382                         if (slave_nb_bufs[i] == 0)
1383                                 continue;
1384
1385                         slave_tx_count = rte_eth_tx_burst(
1386                                         dist_slave_port_ids[i],
1387                                         bd_tx_q->queue_id, slave_bufs[i],
1388                                         slave_nb_bufs[i]);
1389
1390                         total_tx_count += slave_tx_count;
1391
1392                         /* If tx burst fails move packets to end of bufs */
1393                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1394                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1395                                                 slave_tx_count;
1396                                 total_tx_fail_count += slave_tx_fail_count;
1397
1398                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1399                                        &slave_bufs[i][slave_tx_count],
1400                                        slave_tx_fail_count * sizeof(bufs[0]));
1401                         }
1402                 }
1403         }
1404
1405         return total_tx_count;
1406 }
1407
1408 static uint16_t
1409 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1410                 uint16_t nb_pkts)
1411 {
1412         struct bond_dev_private *internals;
1413         struct bond_tx_queue *bd_tx_q;
1414
1415         uint16_t slaves[RTE_MAX_ETHPORTS];
1416         uint8_t tx_failed_flag = 0;
1417         uint16_t num_of_slaves;
1418
1419         uint16_t max_nb_of_tx_pkts = 0;
1420
1421         int slave_tx_total[RTE_MAX_ETHPORTS];
1422         int i, most_successful_tx_slave = -1;
1423
1424         bd_tx_q = (struct bond_tx_queue *)queue;
1425         internals = bd_tx_q->dev_private;
1426
1427         /* Copy slave list to protect against slave up/down changes during tx
1428          * bursting */
1429         num_of_slaves = internals->active_slave_count;
1430         memcpy(slaves, internals->active_slaves,
1431                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1432
1433         if (num_of_slaves < 1)
1434                 return 0;
1435
1436         /* Increment reference count on mbufs */
1437         for (i = 0; i < nb_pkts; i++)
1438                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1439
1440         /* Transmit burst on each active slave */
1441         for (i = 0; i < num_of_slaves; i++) {
1442                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1443                                         bufs, nb_pkts);
1444
1445                 if (unlikely(slave_tx_total[i] < nb_pkts))
1446                         tx_failed_flag = 1;
1447
1448                 /* record the value and slave index for the slave which transmits the
1449                  * maximum number of packets */
1450                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1451                         max_nb_of_tx_pkts = slave_tx_total[i];
1452                         most_successful_tx_slave = i;
1453                 }
1454         }
1455
1456         /* if slaves fail to transmit packets from burst, the calling application
1457          * is not expected to know about multiple references to packets so we must
1458          * handle failures of all packets except those of the most successful slave
1459          */
1460         if (unlikely(tx_failed_flag))
1461                 for (i = 0; i < num_of_slaves; i++)
1462                         if (i != most_successful_tx_slave)
1463                                 while (slave_tx_total[i] < nb_pkts)
1464                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1465
1466         return max_nb_of_tx_pkts;
1467 }
1468
1469 static void
1470 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1471 {
1472         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1473
1474         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1475                 /**
1476                  * If in mode 4 then save the link properties of the first
1477                  * slave, all subsequent slaves must match these properties
1478                  */
1479                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1480
1481                 bond_link->link_autoneg = slave_link->link_autoneg;
1482                 bond_link->link_duplex = slave_link->link_duplex;
1483                 bond_link->link_speed = slave_link->link_speed;
1484         } else {
1485                 /**
1486                  * In any other mode the link properties are set to default
1487                  * values of AUTONEG/DUPLEX
1488                  */
1489                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1490                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1491         }
1492 }
1493
1494 static int
1495 link_properties_valid(struct rte_eth_dev *ethdev,
1496                 struct rte_eth_link *slave_link)
1497 {
1498         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1499
1500         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1501                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1502
1503                 if (bond_link->link_duplex != slave_link->link_duplex ||
1504                         bond_link->link_autoneg != slave_link->link_autoneg ||
1505                         bond_link->link_speed != slave_link->link_speed)
1506                         return -1;
1507         }
1508
1509         return 0;
1510 }
1511
1512 int
1513 mac_address_get(struct rte_eth_dev *eth_dev,
1514                 struct rte_ether_addr *dst_mac_addr)
1515 {
1516         struct rte_ether_addr *mac_addr;
1517
1518         if (eth_dev == NULL) {
1519                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1520                 return -1;
1521         }
1522
1523         if (dst_mac_addr == NULL) {
1524                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1525                 return -1;
1526         }
1527
1528         mac_addr = eth_dev->data->mac_addrs;
1529
1530         rte_ether_addr_copy(mac_addr, dst_mac_addr);
1531         return 0;
1532 }
1533
1534 int
1535 mac_address_set(struct rte_eth_dev *eth_dev,
1536                 struct rte_ether_addr *new_mac_addr)
1537 {
1538         struct rte_ether_addr *mac_addr;
1539
1540         if (eth_dev == NULL) {
1541                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1542                 return -1;
1543         }
1544
1545         if (new_mac_addr == NULL) {
1546                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1547                 return -1;
1548         }
1549
1550         mac_addr = eth_dev->data->mac_addrs;
1551
1552         /* If new MAC is different to current MAC then update */
1553         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1554                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1555
1556         return 0;
1557 }
1558
1559 static const struct rte_ether_addr null_mac_addr;
1560
1561 /*
1562  * Add additional MAC addresses to the slave
1563  */
1564 int
1565 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1566                 uint16_t slave_port_id)
1567 {
1568         int i, ret;
1569         struct rte_ether_addr *mac_addr;
1570
1571         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1572                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1573                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1574                         break;
1575
1576                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1577                 if (ret < 0) {
1578                         /* rollback */
1579                         for (i--; i > 0; i--)
1580                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1581                                         &bonded_eth_dev->data->mac_addrs[i]);
1582                         return ret;
1583                 }
1584         }
1585
1586         return 0;
1587 }
1588
1589 /*
1590  * Remove additional MAC addresses from the slave
1591  */
1592 int
1593 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1594                 uint16_t slave_port_id)
1595 {
1596         int i, rc, ret;
1597         struct rte_ether_addr *mac_addr;
1598
1599         rc = 0;
1600         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1601                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1602                 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1603                         break;
1604
1605                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1606                 /* save only the first error */
1607                 if (ret < 0 && rc == 0)
1608                         rc = ret;
1609         }
1610
1611         return rc;
1612 }
1613
1614 int
1615 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1616 {
1617         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1618         int i;
1619
1620         /* Update slave devices MAC addresses */
1621         if (internals->slave_count < 1)
1622                 return -1;
1623
1624         switch (internals->mode) {
1625         case BONDING_MODE_ROUND_ROBIN:
1626         case BONDING_MODE_BALANCE:
1627         case BONDING_MODE_BROADCAST:
1628                 for (i = 0; i < internals->slave_count; i++) {
1629                         if (rte_eth_dev_default_mac_addr_set(
1630                                         internals->slaves[i].port_id,
1631                                         bonded_eth_dev->data->mac_addrs)) {
1632                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1633                                                 internals->slaves[i].port_id);
1634                                 return -1;
1635                         }
1636                 }
1637                 break;
1638         case BONDING_MODE_8023AD:
1639                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1640                 break;
1641         case BONDING_MODE_ACTIVE_BACKUP:
1642         case BONDING_MODE_TLB:
1643         case BONDING_MODE_ALB:
1644         default:
1645                 for (i = 0; i < internals->slave_count; i++) {
1646                         if (internals->slaves[i].port_id ==
1647                                         internals->current_primary_port) {
1648                                 if (rte_eth_dev_default_mac_addr_set(
1649                                                 internals->primary_port,
1650                                                 bonded_eth_dev->data->mac_addrs)) {
1651                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1652                                                         internals->current_primary_port);
1653                                         return -1;
1654                                 }
1655                         } else {
1656                                 if (rte_eth_dev_default_mac_addr_set(
1657                                                 internals->slaves[i].port_id,
1658                                                 &internals->slaves[i].persisted_mac_addr)) {
1659                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1660                                                         internals->slaves[i].port_id);
1661                                         return -1;
1662                                 }
1663                         }
1664                 }
1665         }
1666
1667         return 0;
1668 }
1669
1670 int
1671 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1672 {
1673         struct bond_dev_private *internals;
1674
1675         internals = eth_dev->data->dev_private;
1676
1677         switch (mode) {
1678         case BONDING_MODE_ROUND_ROBIN:
1679                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1680                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1681                 break;
1682         case BONDING_MODE_ACTIVE_BACKUP:
1683                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1684                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1685                 break;
1686         case BONDING_MODE_BALANCE:
1687                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1688                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1689                 break;
1690         case BONDING_MODE_BROADCAST:
1691                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1692                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1693                 break;
1694         case BONDING_MODE_8023AD:
1695                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1696                         return -1;
1697
1698                 if (internals->mode4.dedicated_queues.enabled == 0) {
1699                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1700                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1701                         RTE_BOND_LOG(WARNING,
1702                                 "Using mode 4, it is necessary to do TX burst "
1703                                 "and RX burst at least every 100ms.");
1704                 } else {
1705                         /* Use flow director's optimization */
1706                         eth_dev->rx_pkt_burst =
1707                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1708                         eth_dev->tx_pkt_burst =
1709                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1710                 }
1711                 break;
1712         case BONDING_MODE_TLB:
1713                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1714                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1715                 break;
1716         case BONDING_MODE_ALB:
1717                 if (bond_mode_alb_enable(eth_dev) != 0)
1718                         return -1;
1719
1720                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1721                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1722                 break;
1723         default:
1724                 return -1;
1725         }
1726
1727         internals->mode = mode;
1728
1729         return 0;
1730 }
1731
1732
1733 static int
1734 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1735                 struct rte_eth_dev *slave_eth_dev)
1736 {
1737         int errval = 0;
1738         struct bond_dev_private *internals = (struct bond_dev_private *)
1739                 bonded_eth_dev->data->dev_private;
1740         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1741
1742         if (port->slow_pool == NULL) {
1743                 char mem_name[256];
1744                 int slave_id = slave_eth_dev->data->port_id;
1745
1746                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1747                                 slave_id);
1748                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1749                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1750                         slave_eth_dev->data->numa_node);
1751
1752                 /* Any memory allocation failure in initialization is critical because
1753                  * resources can't be free, so reinitialization is impossible. */
1754                 if (port->slow_pool == NULL) {
1755                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1756                                 slave_id, mem_name, rte_strerror(rte_errno));
1757                 }
1758         }
1759
1760         if (internals->mode4.dedicated_queues.enabled == 1) {
1761                 /* Configure slow Rx queue */
1762
1763                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1764                                 internals->mode4.dedicated_queues.rx_qid, 128,
1765                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1766                                 NULL, port->slow_pool);
1767                 if (errval != 0) {
1768                         RTE_BOND_LOG(ERR,
1769                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1770                                         slave_eth_dev->data->port_id,
1771                                         internals->mode4.dedicated_queues.rx_qid,
1772                                         errval);
1773                         return errval;
1774                 }
1775
1776                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1777                                 internals->mode4.dedicated_queues.tx_qid, 512,
1778                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1779                                 NULL);
1780                 if (errval != 0) {
1781                         RTE_BOND_LOG(ERR,
1782                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1783                                 slave_eth_dev->data->port_id,
1784                                 internals->mode4.dedicated_queues.tx_qid,
1785                                 errval);
1786                         return errval;
1787                 }
1788         }
1789         return 0;
1790 }
1791
1792 int
1793 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1794                 struct rte_eth_dev *slave_eth_dev)
1795 {
1796         struct bond_rx_queue *bd_rx_q;
1797         struct bond_tx_queue *bd_tx_q;
1798         uint16_t nb_rx_queues;
1799         uint16_t nb_tx_queues;
1800
1801         int errval;
1802         uint16_t q_id;
1803         struct rte_flow_error flow_error;
1804
1805         struct bond_dev_private *internals = (struct bond_dev_private *)
1806                 bonded_eth_dev->data->dev_private;
1807
1808         /* Stop slave */
1809         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1810
1811         /* Enable interrupts on slave device if supported */
1812         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1813                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1814
1815         /* If RSS is enabled for bonding, try to enable it for slaves  */
1816         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1817                 if (internals->rss_key_len != 0) {
1818                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1819                                         internals->rss_key_len;
1820                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1821                                         internals->rss_key;
1822                 } else {
1823                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1824                 }
1825
1826                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1827                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1828                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1829                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1830         }
1831
1832         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1833                         DEV_RX_OFFLOAD_VLAN_FILTER)
1834                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1835                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1836         else
1837                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1838                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1839
1840         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1841         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1842
1843         if (internals->mode == BONDING_MODE_8023AD) {
1844                 if (internals->mode4.dedicated_queues.enabled == 1) {
1845                         nb_rx_queues++;
1846                         nb_tx_queues++;
1847                 }
1848         }
1849
1850         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1851                                      bonded_eth_dev->data->mtu);
1852         if (errval != 0 && errval != -ENOTSUP) {
1853                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1854                                 slave_eth_dev->data->port_id, errval);
1855                 return errval;
1856         }
1857
1858         /* Configure device */
1859         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1860                         nb_rx_queues, nb_tx_queues,
1861                         &(slave_eth_dev->data->dev_conf));
1862         if (errval != 0) {
1863                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1864                                 slave_eth_dev->data->port_id, errval);
1865                 return errval;
1866         }
1867
1868         /* Setup Rx Queues */
1869         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1870                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1871
1872                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1873                                 bd_rx_q->nb_rx_desc,
1874                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1875                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1876                 if (errval != 0) {
1877                         RTE_BOND_LOG(ERR,
1878                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1879                                         slave_eth_dev->data->port_id, q_id, errval);
1880                         return errval;
1881                 }
1882         }
1883
1884         /* Setup Tx Queues */
1885         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1886                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1887
1888                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1889                                 bd_tx_q->nb_tx_desc,
1890                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1891                                 &bd_tx_q->tx_conf);
1892                 if (errval != 0) {
1893                         RTE_BOND_LOG(ERR,
1894                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1895                                 slave_eth_dev->data->port_id, q_id, errval);
1896                         return errval;
1897                 }
1898         }
1899
1900         if (internals->mode == BONDING_MODE_8023AD &&
1901                         internals->mode4.dedicated_queues.enabled == 1) {
1902                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1903                                 != 0)
1904                         return errval;
1905
1906                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1907                                 slave_eth_dev->data->port_id) != 0) {
1908                         RTE_BOND_LOG(ERR,
1909                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1910                                 slave_eth_dev->data->port_id, q_id, errval);
1911                         return -1;
1912                 }
1913
1914                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1915                         rte_flow_destroy(slave_eth_dev->data->port_id,
1916                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1917                                         &flow_error);
1918
1919                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1920                                 slave_eth_dev->data->port_id);
1921         }
1922
1923         /* Start device */
1924         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1925         if (errval != 0) {
1926                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1927                                 slave_eth_dev->data->port_id, errval);
1928                 return -1;
1929         }
1930
1931         /* If RSS is enabled for bonding, synchronize RETA */
1932         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1933                 int i;
1934                 struct bond_dev_private *internals;
1935
1936                 internals = bonded_eth_dev->data->dev_private;
1937
1938                 for (i = 0; i < internals->slave_count; i++) {
1939                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1940                                 errval = rte_eth_dev_rss_reta_update(
1941                                                 slave_eth_dev->data->port_id,
1942                                                 &internals->reta_conf[0],
1943                                                 internals->slaves[i].reta_size);
1944                                 if (errval != 0) {
1945                                         RTE_BOND_LOG(WARNING,
1946                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1947                                                      " RSS Configuration for bonding may be inconsistent.",
1948                                                      slave_eth_dev->data->port_id, errval);
1949                                 }
1950                                 break;
1951                         }
1952                 }
1953         }
1954
1955         /* If lsc interrupt is set, check initial slave's link status */
1956         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1957                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1958                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1959                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1960                         NULL);
1961         }
1962
1963         return 0;
1964 }
1965
1966 void
1967 slave_remove(struct bond_dev_private *internals,
1968                 struct rte_eth_dev *slave_eth_dev)
1969 {
1970         uint16_t i;
1971
1972         for (i = 0; i < internals->slave_count; i++)
1973                 if (internals->slaves[i].port_id ==
1974                                 slave_eth_dev->data->port_id)
1975                         break;
1976
1977         if (i < (internals->slave_count - 1)) {
1978                 struct rte_flow *flow;
1979
1980                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1981                                 sizeof(internals->slaves[0]) *
1982                                 (internals->slave_count - i - 1));
1983                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1984                         memmove(&flow->flows[i], &flow->flows[i + 1],
1985                                 sizeof(flow->flows[0]) *
1986                                 (internals->slave_count - i - 1));
1987                         flow->flows[internals->slave_count - 1] = NULL;
1988                 }
1989         }
1990
1991         internals->slave_count--;
1992
1993         /* force reconfiguration of slave interfaces */
1994         _rte_eth_dev_reset(slave_eth_dev);
1995 }
1996
1997 static void
1998 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1999
2000 void
2001 slave_add(struct bond_dev_private *internals,
2002                 struct rte_eth_dev *slave_eth_dev)
2003 {
2004         struct bond_slave_details *slave_details =
2005                         &internals->slaves[internals->slave_count];
2006
2007         slave_details->port_id = slave_eth_dev->data->port_id;
2008         slave_details->last_link_status = 0;
2009
2010         /* Mark slave devices that don't support interrupts so we can
2011          * compensate when we start the bond
2012          */
2013         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2014                 slave_details->link_status_poll_enabled = 1;
2015         }
2016
2017         slave_details->link_status_wait_to_complete = 0;
2018         /* clean tlb_last_obytes when adding port for bonding device */
2019         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2020                         sizeof(struct rte_ether_addr));
2021 }
2022
2023 void
2024 bond_ethdev_primary_set(struct bond_dev_private *internals,
2025                 uint16_t slave_port_id)
2026 {
2027         int i;
2028
2029         if (internals->active_slave_count < 1)
2030                 internals->current_primary_port = slave_port_id;
2031         else
2032                 /* Search bonded device slave ports for new proposed primary port */
2033                 for (i = 0; i < internals->active_slave_count; i++) {
2034                         if (internals->active_slaves[i] == slave_port_id)
2035                                 internals->current_primary_port = slave_port_id;
2036                 }
2037 }
2038
2039 static void
2040 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2041
2042 static int
2043 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2044 {
2045         struct bond_dev_private *internals;
2046         int i;
2047
2048         /* slave eth dev will be started by bonded device */
2049         if (check_for_bonded_ethdev(eth_dev)) {
2050                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2051                                 eth_dev->data->port_id);
2052                 return -1;
2053         }
2054
2055         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2056         eth_dev->data->dev_started = 1;
2057
2058         internals = eth_dev->data->dev_private;
2059
2060         if (internals->slave_count == 0) {
2061                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2062                 goto out_err;
2063         }
2064
2065         if (internals->user_defined_mac == 0) {
2066                 struct rte_ether_addr *new_mac_addr = NULL;
2067
2068                 for (i = 0; i < internals->slave_count; i++)
2069                         if (internals->slaves[i].port_id == internals->primary_port)
2070                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2071
2072                 if (new_mac_addr == NULL)
2073                         goto out_err;
2074
2075                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2076                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2077                                         eth_dev->data->port_id);
2078                         goto out_err;
2079                 }
2080         }
2081
2082         /* If bonded device is configure in promiscuous mode then re-apply config */
2083         if (internals->promiscuous_en)
2084                 bond_ethdev_promiscuous_enable(eth_dev);
2085
2086         if (internals->mode == BONDING_MODE_8023AD) {
2087                 if (internals->mode4.dedicated_queues.enabled == 1) {
2088                         internals->mode4.dedicated_queues.rx_qid =
2089                                         eth_dev->data->nb_rx_queues;
2090                         internals->mode4.dedicated_queues.tx_qid =
2091                                         eth_dev->data->nb_tx_queues;
2092                 }
2093         }
2094
2095
2096         /* Reconfigure each slave device if starting bonded device */
2097         for (i = 0; i < internals->slave_count; i++) {
2098                 struct rte_eth_dev *slave_ethdev =
2099                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2100                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2101                         RTE_BOND_LOG(ERR,
2102                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2103                                 eth_dev->data->port_id,
2104                                 internals->slaves[i].port_id);
2105                         goto out_err;
2106                 }
2107                 /* We will need to poll for link status if any slave doesn't
2108                  * support interrupts
2109                  */
2110                 if (internals->slaves[i].link_status_poll_enabled)
2111                         internals->link_status_polling_enabled = 1;
2112         }
2113
2114         /* start polling if needed */
2115         if (internals->link_status_polling_enabled) {
2116                 rte_eal_alarm_set(
2117                         internals->link_status_polling_interval_ms * 1000,
2118                         bond_ethdev_slave_link_status_change_monitor,
2119                         (void *)&rte_eth_devices[internals->port_id]);
2120         }
2121
2122         /* Update all slave devices MACs*/
2123         if (mac_address_slaves_update(eth_dev) != 0)
2124                 goto out_err;
2125
2126         if (internals->user_defined_primary_port)
2127                 bond_ethdev_primary_set(internals, internals->primary_port);
2128
2129         if (internals->mode == BONDING_MODE_8023AD)
2130                 bond_mode_8023ad_start(eth_dev);
2131
2132         if (internals->mode == BONDING_MODE_TLB ||
2133                         internals->mode == BONDING_MODE_ALB)
2134                 bond_tlb_enable(internals);
2135
2136         return 0;
2137
2138 out_err:
2139         eth_dev->data->dev_started = 0;
2140         return -1;
2141 }
2142
2143 static void
2144 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2145 {
2146         uint16_t i;
2147
2148         if (dev->data->rx_queues != NULL) {
2149                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2150                         rte_free(dev->data->rx_queues[i]);
2151                         dev->data->rx_queues[i] = NULL;
2152                 }
2153                 dev->data->nb_rx_queues = 0;
2154         }
2155
2156         if (dev->data->tx_queues != NULL) {
2157                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2158                         rte_free(dev->data->tx_queues[i]);
2159                         dev->data->tx_queues[i] = NULL;
2160                 }
2161                 dev->data->nb_tx_queues = 0;
2162         }
2163 }
2164
2165 void
2166 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2167 {
2168         struct bond_dev_private *internals = eth_dev->data->dev_private;
2169         uint16_t i;
2170
2171         if (internals->mode == BONDING_MODE_8023AD) {
2172                 struct port *port;
2173                 void *pkt = NULL;
2174
2175                 bond_mode_8023ad_stop(eth_dev);
2176
2177                 /* Discard all messages to/from mode 4 state machines */
2178                 for (i = 0; i < internals->active_slave_count; i++) {
2179                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2180
2181                         RTE_ASSERT(port->rx_ring != NULL);
2182                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2183                                 rte_pktmbuf_free(pkt);
2184
2185                         RTE_ASSERT(port->tx_ring != NULL);
2186                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2187                                 rte_pktmbuf_free(pkt);
2188                 }
2189         }
2190
2191         if (internals->mode == BONDING_MODE_TLB ||
2192                         internals->mode == BONDING_MODE_ALB) {
2193                 bond_tlb_disable(internals);
2194                 for (i = 0; i < internals->active_slave_count; i++)
2195                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2196         }
2197
2198         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2199         eth_dev->data->dev_started = 0;
2200
2201         internals->link_status_polling_enabled = 0;
2202         for (i = 0; i < internals->slave_count; i++) {
2203                 uint16_t slave_id = internals->slaves[i].port_id;
2204                 if (find_slave_by_id(internals->active_slaves,
2205                                 internals->active_slave_count, slave_id) !=
2206                                                 internals->active_slave_count) {
2207                         internals->slaves[i].last_link_status = 0;
2208                         rte_eth_dev_stop(slave_id);
2209                         deactivate_slave(eth_dev, slave_id);
2210                 }
2211         }
2212 }
2213
2214 void
2215 bond_ethdev_close(struct rte_eth_dev *dev)
2216 {
2217         struct bond_dev_private *internals = dev->data->dev_private;
2218         uint16_t bond_port_id = internals->port_id;
2219         int skipped = 0;
2220         struct rte_flow_error ferror;
2221
2222         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2223         while (internals->slave_count != skipped) {
2224                 uint16_t port_id = internals->slaves[skipped].port_id;
2225
2226                 rte_eth_dev_stop(port_id);
2227
2228                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2229                         RTE_BOND_LOG(ERR,
2230                                      "Failed to remove port %d from bonded device %s",
2231                                      port_id, dev->device->name);
2232                         skipped++;
2233                 }
2234         }
2235         bond_flow_ops.flush(dev, &ferror);
2236         bond_ethdev_free_queues(dev);
2237         rte_bitmap_reset(internals->vlan_filter_bmp);
2238 }
2239
2240 /* forward declaration */
2241 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2242
2243 static void
2244 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2245 {
2246         struct bond_dev_private *internals = dev->data->dev_private;
2247
2248         uint16_t max_nb_rx_queues = UINT16_MAX;
2249         uint16_t max_nb_tx_queues = UINT16_MAX;
2250         uint16_t max_rx_desc_lim = UINT16_MAX;
2251         uint16_t max_tx_desc_lim = UINT16_MAX;
2252
2253         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2254
2255         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2256                         internals->candidate_max_rx_pktlen :
2257                         RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2258
2259         /* Max number of tx/rx queues that the bonded device can support is the
2260          * minimum values of the bonded slaves, as all slaves must be capable
2261          * of supporting the same number of tx/rx queues.
2262          */
2263         if (internals->slave_count > 0) {
2264                 struct rte_eth_dev_info slave_info;
2265                 uint16_t idx;
2266
2267                 for (idx = 0; idx < internals->slave_count; idx++) {
2268                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2269                                         &slave_info);
2270
2271                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2272                                 max_nb_rx_queues = slave_info.max_rx_queues;
2273
2274                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2275                                 max_nb_tx_queues = slave_info.max_tx_queues;
2276
2277                         if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2278                                 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2279
2280                         if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2281                                 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2282                 }
2283         }
2284
2285         dev_info->max_rx_queues = max_nb_rx_queues;
2286         dev_info->max_tx_queues = max_nb_tx_queues;
2287
2288         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2289                sizeof(dev_info->default_rxconf));
2290         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2291                sizeof(dev_info->default_txconf));
2292
2293         dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2294         dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2295
2296         /**
2297          * If dedicated hw queues enabled for link bonding device in LACP mode
2298          * then we need to reduce the maximum number of data path queues by 1.
2299          */
2300         if (internals->mode == BONDING_MODE_8023AD &&
2301                 internals->mode4.dedicated_queues.enabled == 1) {
2302                 dev_info->max_rx_queues--;
2303                 dev_info->max_tx_queues--;
2304         }
2305
2306         dev_info->min_rx_bufsize = 0;
2307
2308         dev_info->rx_offload_capa = internals->rx_offload_capa;
2309         dev_info->tx_offload_capa = internals->tx_offload_capa;
2310         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2311         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2312         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2313
2314         dev_info->reta_size = internals->reta_size;
2315 }
2316
2317 static int
2318 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2319 {
2320         int res;
2321         uint16_t i;
2322         struct bond_dev_private *internals = dev->data->dev_private;
2323
2324         /* don't do this while a slave is being added */
2325         rte_spinlock_lock(&internals->lock);
2326
2327         if (on)
2328                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2329         else
2330                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2331
2332         for (i = 0; i < internals->slave_count; i++) {
2333                 uint16_t port_id = internals->slaves[i].port_id;
2334
2335                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2336                 if (res == ENOTSUP)
2337                         RTE_BOND_LOG(WARNING,
2338                                      "Setting VLAN filter on slave port %u not supported.",
2339                                      port_id);
2340         }
2341
2342         rte_spinlock_unlock(&internals->lock);
2343         return 0;
2344 }
2345
2346 static int
2347 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2348                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2349                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2350 {
2351         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2352                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2353                                         0, dev->data->numa_node);
2354         if (bd_rx_q == NULL)
2355                 return -1;
2356
2357         bd_rx_q->queue_id = rx_queue_id;
2358         bd_rx_q->dev_private = dev->data->dev_private;
2359
2360         bd_rx_q->nb_rx_desc = nb_rx_desc;
2361
2362         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2363         bd_rx_q->mb_pool = mb_pool;
2364
2365         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2366
2367         return 0;
2368 }
2369
2370 static int
2371 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2372                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2373                 const struct rte_eth_txconf *tx_conf)
2374 {
2375         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2376                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2377                                         0, dev->data->numa_node);
2378
2379         if (bd_tx_q == NULL)
2380                 return -1;
2381
2382         bd_tx_q->queue_id = tx_queue_id;
2383         bd_tx_q->dev_private = dev->data->dev_private;
2384
2385         bd_tx_q->nb_tx_desc = nb_tx_desc;
2386         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2387
2388         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2389
2390         return 0;
2391 }
2392
2393 static void
2394 bond_ethdev_rx_queue_release(void *queue)
2395 {
2396         if (queue == NULL)
2397                 return;
2398
2399         rte_free(queue);
2400 }
2401
2402 static void
2403 bond_ethdev_tx_queue_release(void *queue)
2404 {
2405         if (queue == NULL)
2406                 return;
2407
2408         rte_free(queue);
2409 }
2410
2411 static void
2412 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2413 {
2414         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2415         struct bond_dev_private *internals;
2416
2417         /* Default value for polling slave found is true as we don't want to
2418          * disable the polling thread if we cannot get the lock */
2419         int i, polling_slave_found = 1;
2420
2421         if (cb_arg == NULL)
2422                 return;
2423
2424         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2425         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2426
2427         if (!bonded_ethdev->data->dev_started ||
2428                 !internals->link_status_polling_enabled)
2429                 return;
2430
2431         /* If device is currently being configured then don't check slaves link
2432          * status, wait until next period */
2433         if (rte_spinlock_trylock(&internals->lock)) {
2434                 if (internals->slave_count > 0)
2435                         polling_slave_found = 0;
2436
2437                 for (i = 0; i < internals->slave_count; i++) {
2438                         if (!internals->slaves[i].link_status_poll_enabled)
2439                                 continue;
2440
2441                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2442                         polling_slave_found = 1;
2443
2444                         /* Update slave link status */
2445                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2446                                         internals->slaves[i].link_status_wait_to_complete);
2447
2448                         /* if link status has changed since last checked then call lsc
2449                          * event callback */
2450                         if (slave_ethdev->data->dev_link.link_status !=
2451                                         internals->slaves[i].last_link_status) {
2452                                 internals->slaves[i].last_link_status =
2453                                                 slave_ethdev->data->dev_link.link_status;
2454
2455                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2456                                                 RTE_ETH_EVENT_INTR_LSC,
2457                                                 &bonded_ethdev->data->port_id,
2458                                                 NULL);
2459                         }
2460                 }
2461                 rte_spinlock_unlock(&internals->lock);
2462         }
2463
2464         if (polling_slave_found)
2465                 /* Set alarm to continue monitoring link status of slave ethdev's */
2466                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2467                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2468 }
2469
2470 static int
2471 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2472 {
2473         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2474
2475         struct bond_dev_private *bond_ctx;
2476         struct rte_eth_link slave_link;
2477
2478         uint32_t idx;
2479
2480         bond_ctx = ethdev->data->dev_private;
2481
2482         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2483
2484         if (ethdev->data->dev_started == 0 ||
2485                         bond_ctx->active_slave_count == 0) {
2486                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2487                 return 0;
2488         }
2489
2490         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2491
2492         if (wait_to_complete)
2493                 link_update = rte_eth_link_get;
2494         else
2495                 link_update = rte_eth_link_get_nowait;
2496
2497         switch (bond_ctx->mode) {
2498         case BONDING_MODE_BROADCAST:
2499                 /**
2500                  * Setting link speed to UINT32_MAX to ensure we pick up the
2501                  * value of the first active slave
2502                  */
2503                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2504
2505                 /**
2506                  * link speed is minimum value of all the slaves link speed as
2507                  * packet loss will occur on this slave if transmission at rates
2508                  * greater than this are attempted
2509                  */
2510                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2511                         link_update(bond_ctx->active_slaves[0], &slave_link);
2512
2513                         if (slave_link.link_speed <
2514                                         ethdev->data->dev_link.link_speed)
2515                                 ethdev->data->dev_link.link_speed =
2516                                                 slave_link.link_speed;
2517                 }
2518                 break;
2519         case BONDING_MODE_ACTIVE_BACKUP:
2520                 /* Current primary slave */
2521                 link_update(bond_ctx->current_primary_port, &slave_link);
2522
2523                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2524                 break;
2525         case BONDING_MODE_8023AD:
2526                 ethdev->data->dev_link.link_autoneg =
2527                                 bond_ctx->mode4.slave_link.link_autoneg;
2528                 ethdev->data->dev_link.link_duplex =
2529                                 bond_ctx->mode4.slave_link.link_duplex;
2530                 /* fall through to update link speed */
2531         case BONDING_MODE_ROUND_ROBIN:
2532         case BONDING_MODE_BALANCE:
2533         case BONDING_MODE_TLB:
2534         case BONDING_MODE_ALB:
2535         default:
2536                 /**
2537                  * In theses mode the maximum theoretical link speed is the sum
2538                  * of all the slaves
2539                  */
2540                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2541
2542                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2543                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2544
2545                         ethdev->data->dev_link.link_speed +=
2546                                         slave_link.link_speed;
2547                 }
2548         }
2549
2550
2551         return 0;
2552 }
2553
2554
2555 static int
2556 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2557 {
2558         struct bond_dev_private *internals = dev->data->dev_private;
2559         struct rte_eth_stats slave_stats;
2560         int i, j;
2561
2562         for (i = 0; i < internals->slave_count; i++) {
2563                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2564
2565                 stats->ipackets += slave_stats.ipackets;
2566                 stats->opackets += slave_stats.opackets;
2567                 stats->ibytes += slave_stats.ibytes;
2568                 stats->obytes += slave_stats.obytes;
2569                 stats->imissed += slave_stats.imissed;
2570                 stats->ierrors += slave_stats.ierrors;
2571                 stats->oerrors += slave_stats.oerrors;
2572                 stats->rx_nombuf += slave_stats.rx_nombuf;
2573
2574                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2575                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2576                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2577                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2578                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2579                         stats->q_errors[j] += slave_stats.q_errors[j];
2580                 }
2581
2582         }
2583
2584         return 0;
2585 }
2586
2587 static void
2588 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2589 {
2590         struct bond_dev_private *internals = dev->data->dev_private;
2591         int i;
2592
2593         for (i = 0; i < internals->slave_count; i++)
2594                 rte_eth_stats_reset(internals->slaves[i].port_id);
2595 }
2596
2597 static void
2598 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2599 {
2600         struct bond_dev_private *internals = eth_dev->data->dev_private;
2601         int i;
2602
2603         internals->promiscuous_en = 1;
2604
2605         switch (internals->mode) {
2606         /* Promiscuous mode is propagated to all slaves */
2607         case BONDING_MODE_ROUND_ROBIN:
2608         case BONDING_MODE_BALANCE:
2609         case BONDING_MODE_BROADCAST:
2610                 for (i = 0; i < internals->slave_count; i++)
2611                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2612                 break;
2613         /* In mode4 promiscus mode is managed when slave is added/removed */
2614         case BONDING_MODE_8023AD:
2615                 break;
2616         /* Promiscuous mode is propagated only to primary slave */
2617         case BONDING_MODE_ACTIVE_BACKUP:
2618         case BONDING_MODE_TLB:
2619         case BONDING_MODE_ALB:
2620         default:
2621                 /* Do not touch promisc when there cannot be primary ports */
2622                 if (internals->slave_count == 0)
2623                         break;
2624                 rte_eth_promiscuous_enable(internals->current_primary_port);
2625         }
2626 }
2627
2628 static void
2629 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2630 {
2631         struct bond_dev_private *internals = dev->data->dev_private;
2632         int i;
2633
2634         internals->promiscuous_en = 0;
2635
2636         switch (internals->mode) {
2637         /* Promiscuous mode is propagated to all slaves */
2638         case BONDING_MODE_ROUND_ROBIN:
2639         case BONDING_MODE_BALANCE:
2640         case BONDING_MODE_BROADCAST:
2641                 for (i = 0; i < internals->slave_count; i++)
2642                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2643                 break;
2644         /* In mode4 promiscus mode is set managed when slave is added/removed */
2645         case BONDING_MODE_8023AD:
2646                 break;
2647         /* Promiscuous mode is propagated only to primary slave */
2648         case BONDING_MODE_ACTIVE_BACKUP:
2649         case BONDING_MODE_TLB:
2650         case BONDING_MODE_ALB:
2651         default:
2652                 /* Do not touch promisc when there cannot be primary ports */
2653                 if (internals->slave_count == 0)
2654                         break;
2655                 rte_eth_promiscuous_disable(internals->current_primary_port);
2656         }
2657 }
2658
2659 static void
2660 bond_ethdev_delayed_lsc_propagation(void *arg)
2661 {
2662         if (arg == NULL)
2663                 return;
2664
2665         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2666                         RTE_ETH_EVENT_INTR_LSC, NULL);
2667 }
2668
2669 int
2670 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2671                 void *param, void *ret_param __rte_unused)
2672 {
2673         struct rte_eth_dev *bonded_eth_dev;
2674         struct bond_dev_private *internals;
2675         struct rte_eth_link link;
2676         int rc = -1;
2677
2678         uint8_t lsc_flag = 0;
2679         int valid_slave = 0;
2680         uint16_t active_pos;
2681         uint16_t i;
2682
2683         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2684                 return rc;
2685
2686         bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2687
2688         if (check_for_bonded_ethdev(bonded_eth_dev))
2689                 return rc;
2690
2691         internals = bonded_eth_dev->data->dev_private;
2692
2693         /* If the device isn't started don't handle interrupts */
2694         if (!bonded_eth_dev->data->dev_started)
2695                 return rc;
2696
2697         /* verify that port_id is a valid slave of bonded port */
2698         for (i = 0; i < internals->slave_count; i++) {
2699                 if (internals->slaves[i].port_id == port_id) {
2700                         valid_slave = 1;
2701                         break;
2702                 }
2703         }
2704
2705         if (!valid_slave)
2706                 return rc;
2707
2708         /* Synchronize lsc callback parallel calls either by real link event
2709          * from the slaves PMDs or by the bonding PMD itself.
2710          */
2711         rte_spinlock_lock(&internals->lsc_lock);
2712
2713         /* Search for port in active port list */
2714         active_pos = find_slave_by_id(internals->active_slaves,
2715                         internals->active_slave_count, port_id);
2716
2717         rte_eth_link_get_nowait(port_id, &link);
2718         if (link.link_status) {
2719                 if (active_pos < internals->active_slave_count)
2720                         goto link_update;
2721
2722                 /* check link state properties if bonded link is up*/
2723                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2724                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2725                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2726                                              "for slave %d in bonding mode %d",
2727                                              port_id, internals->mode);
2728                 } else {
2729                         /* inherit slave link properties */
2730                         link_properties_set(bonded_eth_dev, &link);
2731                 }
2732
2733                 /* If no active slave ports then set this port to be
2734                  * the primary port.
2735                  */
2736                 if (internals->active_slave_count < 1) {
2737                         /* If first active slave, then change link status */
2738                         bonded_eth_dev->data->dev_link.link_status =
2739                                                                 ETH_LINK_UP;
2740                         internals->current_primary_port = port_id;
2741                         lsc_flag = 1;
2742
2743                         mac_address_slaves_update(bonded_eth_dev);
2744                 }
2745
2746                 activate_slave(bonded_eth_dev, port_id);
2747
2748                 /* If the user has defined the primary port then default to
2749                  * using it.
2750                  */
2751                 if (internals->user_defined_primary_port &&
2752                                 internals->primary_port == port_id)
2753                         bond_ethdev_primary_set(internals, port_id);
2754         } else {
2755                 if (active_pos == internals->active_slave_count)
2756                         goto link_update;
2757
2758                 /* Remove from active slave list */
2759                 deactivate_slave(bonded_eth_dev, port_id);
2760
2761                 if (internals->active_slave_count < 1)
2762                         lsc_flag = 1;
2763
2764                 /* Update primary id, take first active slave from list or if none
2765                  * available set to -1 */
2766                 if (port_id == internals->current_primary_port) {
2767                         if (internals->active_slave_count > 0)
2768                                 bond_ethdev_primary_set(internals,
2769                                                 internals->active_slaves[0]);
2770                         else
2771                                 internals->current_primary_port = internals->primary_port;
2772                 }
2773         }
2774
2775 link_update:
2776         /**
2777          * Update bonded device link properties after any change to active
2778          * slaves
2779          */
2780         bond_ethdev_link_update(bonded_eth_dev, 0);
2781
2782         if (lsc_flag) {
2783                 /* Cancel any possible outstanding interrupts if delays are enabled */
2784                 if (internals->link_up_delay_ms > 0 ||
2785                         internals->link_down_delay_ms > 0)
2786                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2787                                         bonded_eth_dev);
2788
2789                 if (bonded_eth_dev->data->dev_link.link_status) {
2790                         if (internals->link_up_delay_ms > 0)
2791                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2792                                                 bond_ethdev_delayed_lsc_propagation,
2793                                                 (void *)bonded_eth_dev);
2794                         else
2795                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2796                                                 RTE_ETH_EVENT_INTR_LSC,
2797                                                 NULL);
2798
2799                 } else {
2800                         if (internals->link_down_delay_ms > 0)
2801                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2802                                                 bond_ethdev_delayed_lsc_propagation,
2803                                                 (void *)bonded_eth_dev);
2804                         else
2805                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2806                                                 RTE_ETH_EVENT_INTR_LSC,
2807                                                 NULL);
2808                 }
2809         }
2810
2811         rte_spinlock_unlock(&internals->lsc_lock);
2812
2813         return rc;
2814 }
2815
2816 static int
2817 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2818                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2819 {
2820         unsigned i, j;
2821         int result = 0;
2822         int slave_reta_size;
2823         unsigned reta_count;
2824         struct bond_dev_private *internals = dev->data->dev_private;
2825
2826         if (reta_size != internals->reta_size)
2827                 return -EINVAL;
2828
2829          /* Copy RETA table */
2830         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2831
2832         for (i = 0; i < reta_count; i++) {
2833                 internals->reta_conf[i].mask = reta_conf[i].mask;
2834                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2835                         if ((reta_conf[i].mask >> j) & 0x01)
2836                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2837         }
2838
2839         /* Fill rest of array */
2840         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2841                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2842                                 sizeof(internals->reta_conf[0]) * reta_count);
2843
2844         /* Propagate RETA over slaves */
2845         for (i = 0; i < internals->slave_count; i++) {
2846                 slave_reta_size = internals->slaves[i].reta_size;
2847                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2848                                 &internals->reta_conf[0], slave_reta_size);
2849                 if (result < 0)
2850                         return result;
2851         }
2852
2853         return 0;
2854 }
2855
2856 static int
2857 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2858                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2859 {
2860         int i, j;
2861         struct bond_dev_private *internals = dev->data->dev_private;
2862
2863         if (reta_size != internals->reta_size)
2864                 return -EINVAL;
2865
2866          /* Copy RETA table */
2867         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2868                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2869                         if ((reta_conf[i].mask >> j) & 0x01)
2870                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2871
2872         return 0;
2873 }
2874
2875 static int
2876 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2877                 struct rte_eth_rss_conf *rss_conf)
2878 {
2879         int i, result = 0;
2880         struct bond_dev_private *internals = dev->data->dev_private;
2881         struct rte_eth_rss_conf bond_rss_conf;
2882
2883         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2884
2885         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2886
2887         if (bond_rss_conf.rss_hf != 0)
2888                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2889
2890         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2891                         sizeof(internals->rss_key)) {
2892                 if (bond_rss_conf.rss_key_len == 0)
2893                         bond_rss_conf.rss_key_len = 40;
2894                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2895                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2896                                 internals->rss_key_len);
2897         }
2898
2899         for (i = 0; i < internals->slave_count; i++) {
2900                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2901                                 &bond_rss_conf);
2902                 if (result < 0)
2903                         return result;
2904         }
2905
2906         return 0;
2907 }
2908
2909 static int
2910 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2911                 struct rte_eth_rss_conf *rss_conf)
2912 {
2913         struct bond_dev_private *internals = dev->data->dev_private;
2914
2915         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2916         rss_conf->rss_key_len = internals->rss_key_len;
2917         if (rss_conf->rss_key)
2918                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2919
2920         return 0;
2921 }
2922
2923 static int
2924 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2925 {
2926         struct rte_eth_dev *slave_eth_dev;
2927         struct bond_dev_private *internals = dev->data->dev_private;
2928         int ret, i;
2929
2930         rte_spinlock_lock(&internals->lock);
2931
2932         for (i = 0; i < internals->slave_count; i++) {
2933                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2934                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2935                         rte_spinlock_unlock(&internals->lock);
2936                         return -ENOTSUP;
2937                 }
2938         }
2939         for (i = 0; i < internals->slave_count; i++) {
2940                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2941                 if (ret < 0) {
2942                         rte_spinlock_unlock(&internals->lock);
2943                         return ret;
2944                 }
2945         }
2946
2947         rte_spinlock_unlock(&internals->lock);
2948         return 0;
2949 }
2950
2951 static int
2952 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2953                         struct rte_ether_addr *addr)
2954 {
2955         if (mac_address_set(dev, addr)) {
2956                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2957                 return -EINVAL;
2958         }
2959
2960         return 0;
2961 }
2962
2963 static int
2964 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2965                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2966 {
2967         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2968                 *(const void **)arg = &bond_flow_ops;
2969                 return 0;
2970         }
2971         return -ENOTSUP;
2972 }
2973
2974 static int
2975 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2976                         struct rte_ether_addr *mac_addr,
2977                         __rte_unused uint32_t index, uint32_t vmdq)
2978 {
2979         struct rte_eth_dev *slave_eth_dev;
2980         struct bond_dev_private *internals = dev->data->dev_private;
2981         int ret, i;
2982
2983         rte_spinlock_lock(&internals->lock);
2984
2985         for (i = 0; i < internals->slave_count; i++) {
2986                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2987                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2988                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2989                         ret = -ENOTSUP;
2990                         goto end;
2991                 }
2992         }
2993
2994         for (i = 0; i < internals->slave_count; i++) {
2995                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2996                                 mac_addr, vmdq);
2997                 if (ret < 0) {
2998                         /* rollback */
2999                         for (i--; i >= 0; i--)
3000                                 rte_eth_dev_mac_addr_remove(
3001                                         internals->slaves[i].port_id, mac_addr);
3002                         goto end;
3003                 }
3004         }
3005
3006         ret = 0;
3007 end:
3008         rte_spinlock_unlock(&internals->lock);
3009         return ret;
3010 }
3011
3012 static void
3013 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3014 {
3015         struct rte_eth_dev *slave_eth_dev;
3016         struct bond_dev_private *internals = dev->data->dev_private;
3017         int i;
3018
3019         rte_spinlock_lock(&internals->lock);
3020
3021         for (i = 0; i < internals->slave_count; i++) {
3022                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3023                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3024                         goto end;
3025         }
3026
3027         struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3028
3029         for (i = 0; i < internals->slave_count; i++)
3030                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3031                                 mac_addr);
3032
3033 end:
3034         rte_spinlock_unlock(&internals->lock);
3035 }
3036
3037 const struct eth_dev_ops default_dev_ops = {
3038         .dev_start            = bond_ethdev_start,
3039         .dev_stop             = bond_ethdev_stop,
3040         .dev_close            = bond_ethdev_close,
3041         .dev_configure        = bond_ethdev_configure,
3042         .dev_infos_get        = bond_ethdev_info,
3043         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3044         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3045         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3046         .rx_queue_release     = bond_ethdev_rx_queue_release,
3047         .tx_queue_release     = bond_ethdev_tx_queue_release,
3048         .link_update          = bond_ethdev_link_update,
3049         .stats_get            = bond_ethdev_stats_get,
3050         .stats_reset          = bond_ethdev_stats_reset,
3051         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3052         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3053         .reta_update          = bond_ethdev_rss_reta_update,
3054         .reta_query           = bond_ethdev_rss_reta_query,
3055         .rss_hash_update      = bond_ethdev_rss_hash_update,
3056         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3057         .mtu_set              = bond_ethdev_mtu_set,
3058         .mac_addr_set         = bond_ethdev_mac_address_set,
3059         .mac_addr_add         = bond_ethdev_mac_addr_add,
3060         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3061         .filter_ctrl          = bond_filter_ctrl
3062 };
3063
3064 static int
3065 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3066 {
3067         const char *name = rte_vdev_device_name(dev);
3068         uint8_t socket_id = dev->device.numa_node;
3069         struct bond_dev_private *internals = NULL;
3070         struct rte_eth_dev *eth_dev = NULL;
3071         uint32_t vlan_filter_bmp_size;
3072
3073         /* now do all data allocation - for eth_dev structure, dummy pci driver
3074          * and internal (private) data
3075          */
3076
3077         /* reserve an ethdev entry */
3078         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3079         if (eth_dev == NULL) {
3080                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3081                 goto err;
3082         }
3083
3084         internals = eth_dev->data->dev_private;
3085         eth_dev->data->nb_rx_queues = (uint16_t)1;
3086         eth_dev->data->nb_tx_queues = (uint16_t)1;
3087
3088         /* Allocate memory for storing MAC addresses */
3089         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3090                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3091         if (eth_dev->data->mac_addrs == NULL) {
3092                 RTE_BOND_LOG(ERR,
3093                              "Failed to allocate %u bytes needed to store MAC addresses",
3094                              RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3095                 goto err;
3096         }
3097
3098         eth_dev->dev_ops = &default_dev_ops;
3099         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3100
3101         rte_spinlock_init(&internals->lock);
3102         rte_spinlock_init(&internals->lsc_lock);
3103
3104         internals->port_id = eth_dev->data->port_id;
3105         internals->mode = BONDING_MODE_INVALID;
3106         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3107         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3108         internals->burst_xmit_hash = burst_xmit_l2_hash;
3109         internals->user_defined_mac = 0;
3110
3111         internals->link_status_polling_enabled = 0;
3112
3113         internals->link_status_polling_interval_ms =
3114                 DEFAULT_POLLING_INTERVAL_10_MS;
3115         internals->link_down_delay_ms = 0;
3116         internals->link_up_delay_ms = 0;
3117
3118         internals->slave_count = 0;
3119         internals->active_slave_count = 0;
3120         internals->rx_offload_capa = 0;
3121         internals->tx_offload_capa = 0;
3122         internals->rx_queue_offload_capa = 0;
3123         internals->tx_queue_offload_capa = 0;
3124         internals->candidate_max_rx_pktlen = 0;
3125         internals->max_rx_pktlen = 0;
3126
3127         /* Initially allow to choose any offload type */
3128         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3129
3130         memset(&internals->default_rxconf, 0,
3131                sizeof(internals->default_rxconf));
3132         memset(&internals->default_txconf, 0,
3133                sizeof(internals->default_txconf));
3134
3135         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3136         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3137
3138         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3139         memset(internals->slaves, 0, sizeof(internals->slaves));
3140
3141         TAILQ_INIT(&internals->flow_list);
3142         internals->flow_isolated_valid = 0;
3143
3144         /* Set mode 4 default configuration */
3145         bond_mode_8023ad_setup(eth_dev, NULL);
3146         if (bond_ethdev_mode_set(eth_dev, mode)) {
3147                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3148                                  eth_dev->data->port_id, mode);
3149                 goto err;
3150         }
3151
3152         vlan_filter_bmp_size =
3153                 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3154         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3155                                                    RTE_CACHE_LINE_SIZE);
3156         if (internals->vlan_filter_bmpmem == NULL) {
3157                 RTE_BOND_LOG(ERR,
3158                              "Failed to allocate vlan bitmap for bonded device %u",
3159                              eth_dev->data->port_id);
3160                 goto err;
3161         }
3162
3163         internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3164                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3165         if (internals->vlan_filter_bmp == NULL) {
3166                 RTE_BOND_LOG(ERR,
3167                              "Failed to init vlan bitmap for bonded device %u",
3168                              eth_dev->data->port_id);
3169                 rte_free(internals->vlan_filter_bmpmem);
3170                 goto err;
3171         }
3172
3173         return eth_dev->data->port_id;
3174
3175 err:
3176         rte_free(internals);
3177         if (eth_dev != NULL)
3178                 eth_dev->data->dev_private = NULL;
3179         rte_eth_dev_release_port(eth_dev);
3180         return -1;
3181 }
3182
3183 static int
3184 bond_probe(struct rte_vdev_device *dev)
3185 {
3186         const char *name;
3187         struct bond_dev_private *internals;
3188         struct rte_kvargs *kvlist;
3189         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3190         int  arg_count, port_id;
3191         uint8_t agg_mode;
3192         struct rte_eth_dev *eth_dev;
3193
3194         if (!dev)
3195                 return -EINVAL;
3196
3197         name = rte_vdev_device_name(dev);
3198         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3199
3200         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3201                 eth_dev = rte_eth_dev_attach_secondary(name);
3202                 if (!eth_dev) {
3203                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3204                         return -1;
3205                 }
3206                 /* TODO: request info from primary to set up Rx and Tx */
3207                 eth_dev->dev_ops = &default_dev_ops;
3208                 eth_dev->device = &dev->device;
3209                 rte_eth_dev_probing_finish(eth_dev);
3210                 return 0;
3211         }
3212
3213         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3214                 pmd_bond_init_valid_arguments);
3215         if (kvlist == NULL)
3216                 return -1;
3217
3218         /* Parse link bonding mode */
3219         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3220                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3221                                 &bond_ethdev_parse_slave_mode_kvarg,
3222                                 &bonding_mode) != 0) {
3223                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3224                                         name);
3225                         goto parse_error;
3226                 }
3227         } else {
3228                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3229                                 "device %s", name);
3230                 goto parse_error;
3231         }
3232
3233         /* Parse socket id to create bonding device on */
3234         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3235         if (arg_count == 1) {
3236                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3237                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3238                                 != 0) {
3239                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3240                                         "bonded device %s", name);
3241                         goto parse_error;
3242                 }
3243         } else if (arg_count > 1) {
3244                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3245                                 "bonded device %s", name);
3246                 goto parse_error;
3247         } else {
3248                 socket_id = rte_socket_id();
3249         }
3250
3251         dev->device.numa_node = socket_id;
3252
3253         /* Create link bonding eth device */
3254         port_id = bond_alloc(dev, bonding_mode);
3255         if (port_id < 0) {
3256                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3257                                 "socket %u.",   name, bonding_mode, socket_id);
3258                 goto parse_error;
3259         }
3260         internals = rte_eth_devices[port_id].data->dev_private;
3261         internals->kvlist = kvlist;
3262
3263         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3264                 if (rte_kvargs_process(kvlist,
3265                                 PMD_BOND_AGG_MODE_KVARG,
3266                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3267                                 &agg_mode) != 0) {
3268                         RTE_BOND_LOG(ERR,
3269                                         "Failed to parse agg selection mode for bonded device %s",
3270                                         name);
3271                         goto parse_error;
3272                 }
3273
3274                 if (internals->mode == BONDING_MODE_8023AD)
3275                         internals->mode4.agg_selection = agg_mode;
3276         } else {
3277                 internals->mode4.agg_selection = AGG_STABLE;
3278         }
3279
3280         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3281         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3282                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3283         return 0;
3284
3285 parse_error:
3286         rte_kvargs_free(kvlist);
3287
3288         return -1;
3289 }
3290
3291 static int
3292 bond_remove(struct rte_vdev_device *dev)
3293 {
3294         struct rte_eth_dev *eth_dev;
3295         struct bond_dev_private *internals;
3296         const char *name;
3297
3298         if (!dev)
3299                 return -EINVAL;
3300
3301         name = rte_vdev_device_name(dev);
3302         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3303
3304         /* now free all data allocation - for eth_dev structure,
3305          * dummy pci driver and internal (private) data
3306          */
3307
3308         /* find an ethdev entry */
3309         eth_dev = rte_eth_dev_allocated(name);
3310         if (eth_dev == NULL)
3311                 return -ENODEV;
3312
3313         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3314                 return rte_eth_dev_release_port(eth_dev);
3315
3316         RTE_ASSERT(eth_dev->device == &dev->device);
3317
3318         internals = eth_dev->data->dev_private;
3319         if (internals->slave_count != 0)
3320                 return -EBUSY;
3321
3322         if (eth_dev->data->dev_started == 1) {
3323                 bond_ethdev_stop(eth_dev);
3324                 bond_ethdev_close(eth_dev);
3325         }
3326
3327         eth_dev->dev_ops = NULL;
3328         eth_dev->rx_pkt_burst = NULL;
3329         eth_dev->tx_pkt_burst = NULL;
3330
3331         internals = eth_dev->data->dev_private;
3332         /* Try to release mempool used in mode6. If the bond
3333          * device is not mode6, free the NULL is not problem.
3334          */
3335         rte_mempool_free(internals->mode6.mempool);
3336         rte_bitmap_free(internals->vlan_filter_bmp);
3337         rte_free(internals->vlan_filter_bmpmem);
3338
3339         rte_eth_dev_release_port(eth_dev);
3340
3341         return 0;
3342 }
3343
3344 /* this part will resolve the slave portids after all the other pdev and vdev
3345  * have been allocated */
3346 static int
3347 bond_ethdev_configure(struct rte_eth_dev *dev)
3348 {
3349         const char *name = dev->device->name;
3350         struct bond_dev_private *internals = dev->data->dev_private;
3351         struct rte_kvargs *kvlist = internals->kvlist;
3352         int arg_count;
3353         uint16_t port_id = dev - rte_eth_devices;
3354         uint8_t agg_mode;
3355
3356         static const uint8_t default_rss_key[40] = {
3357                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3358                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3359                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3360                 0xBE, 0xAC, 0x01, 0xFA
3361         };
3362
3363         unsigned i, j;
3364
3365         /*
3366          * If RSS is enabled, fill table with default values and
3367          * set key to the the value specified in port RSS configuration.
3368          * Fall back to default RSS key if the key is not specified
3369          */
3370         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3371                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3372                         internals->rss_key_len =
3373                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3374                         memcpy(internals->rss_key,
3375                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3376                                internals->rss_key_len);
3377                 } else {
3378                         internals->rss_key_len = sizeof(default_rss_key);
3379                         memcpy(internals->rss_key, default_rss_key,
3380                                internals->rss_key_len);
3381                 }
3382
3383                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3384                         internals->reta_conf[i].mask = ~0LL;
3385                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3386                                 internals->reta_conf[i].reta[j] =
3387                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3388                                                 dev->data->nb_rx_queues;
3389                 }
3390         }
3391
3392         /* set the max_rx_pktlen */
3393         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3394
3395         /*
3396          * if no kvlist, it means that this bonded device has been created
3397          * through the bonding api.
3398          */
3399         if (!kvlist)
3400                 return 0;
3401
3402         /* Parse MAC address for bonded device */
3403         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3404         if (arg_count == 1) {
3405                 struct rte_ether_addr bond_mac;
3406
3407                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3408                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3409                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3410                                      name);
3411                         return -1;
3412                 }
3413
3414                 /* Set MAC address */
3415                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3416                         RTE_BOND_LOG(ERR,
3417                                      "Failed to set mac address on bonded device %s",
3418                                      name);
3419                         return -1;
3420                 }
3421         } else if (arg_count > 1) {
3422                 RTE_BOND_LOG(ERR,
3423                              "MAC address can be specified only once for bonded device %s",
3424                              name);
3425                 return -1;
3426         }
3427
3428         /* Parse/set balance mode transmit policy */
3429         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3430         if (arg_count == 1) {
3431                 uint8_t xmit_policy;
3432
3433                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3434                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3435                     0) {
3436                         RTE_BOND_LOG(INFO,
3437                                      "Invalid xmit policy specified for bonded device %s",
3438                                      name);
3439                         return -1;
3440                 }
3441
3442                 /* Set balance mode transmit policy*/
3443                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3444                         RTE_BOND_LOG(ERR,
3445                                      "Failed to set balance xmit policy on bonded device %s",
3446                                      name);
3447                         return -1;
3448                 }
3449         } else if (arg_count > 1) {
3450                 RTE_BOND_LOG(ERR,
3451                              "Transmit policy can be specified only once for bonded device %s",
3452                              name);
3453                 return -1;
3454         }
3455
3456         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3457                 if (rte_kvargs_process(kvlist,
3458                                        PMD_BOND_AGG_MODE_KVARG,
3459                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3460                                        &agg_mode) != 0) {
3461                         RTE_BOND_LOG(ERR,
3462                                      "Failed to parse agg selection mode for bonded device %s",
3463                                      name);
3464                 }
3465                 if (internals->mode == BONDING_MODE_8023AD) {
3466                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3467                                         agg_mode);
3468                         if (ret < 0) {
3469                                 RTE_BOND_LOG(ERR,
3470                                         "Invalid args for agg selection set for bonded device %s",
3471                                         name);
3472                                 return -1;
3473                         }
3474                 }
3475         }
3476
3477         /* Parse/add slave ports to bonded device */
3478         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3479                 struct bond_ethdev_slave_ports slave_ports;
3480                 unsigned i;
3481
3482                 memset(&slave_ports, 0, sizeof(slave_ports));
3483
3484                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3485                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3486                         RTE_BOND_LOG(ERR,
3487                                      "Failed to parse slave ports for bonded device %s",
3488                                      name);
3489                         return -1;
3490                 }
3491
3492                 for (i = 0; i < slave_ports.slave_count; i++) {
3493                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3494                                 RTE_BOND_LOG(ERR,
3495                                              "Failed to add port %d as slave to bonded device %s",
3496                                              slave_ports.slaves[i], name);
3497                         }
3498                 }
3499
3500         } else {
3501                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3502                 return -1;
3503         }
3504
3505         /* Parse/set primary slave port id*/
3506         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3507         if (arg_count == 1) {
3508                 uint16_t primary_slave_port_id;
3509
3510                 if (rte_kvargs_process(kvlist,
3511                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3512                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3513                                        &primary_slave_port_id) < 0) {
3514                         RTE_BOND_LOG(INFO,
3515                                      "Invalid primary slave port id specified for bonded device %s",
3516                                      name);
3517                         return -1;
3518                 }
3519
3520                 /* Set balance mode transmit policy*/
3521                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3522                     != 0) {
3523                         RTE_BOND_LOG(ERR,
3524                                      "Failed to set primary slave port %d on bonded device %s",
3525                                      primary_slave_port_id, name);
3526                         return -1;
3527                 }
3528         } else if (arg_count > 1) {
3529                 RTE_BOND_LOG(INFO,
3530                              "Primary slave can be specified only once for bonded device %s",
3531                              name);
3532                 return -1;
3533         }
3534
3535         /* Parse link status monitor polling interval */
3536         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3537         if (arg_count == 1) {
3538                 uint32_t lsc_poll_interval_ms;
3539
3540                 if (rte_kvargs_process(kvlist,
3541                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3542                                        &bond_ethdev_parse_time_ms_kvarg,
3543                                        &lsc_poll_interval_ms) < 0) {
3544                         RTE_BOND_LOG(INFO,
3545                                      "Invalid lsc polling interval value specified for bonded"
3546                                      " device %s", name);
3547                         return -1;
3548                 }
3549
3550                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3551                     != 0) {
3552                         RTE_BOND_LOG(ERR,
3553                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3554                                      lsc_poll_interval_ms, name);
3555                         return -1;
3556                 }
3557         } else if (arg_count > 1) {
3558                 RTE_BOND_LOG(INFO,
3559                              "LSC polling interval can be specified only once for bonded"
3560                              " device %s", name);
3561                 return -1;
3562         }
3563
3564         /* Parse link up interrupt propagation delay */
3565         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3566         if (arg_count == 1) {
3567                 uint32_t link_up_delay_ms;
3568
3569                 if (rte_kvargs_process(kvlist,
3570                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3571                                        &bond_ethdev_parse_time_ms_kvarg,
3572                                        &link_up_delay_ms) < 0) {
3573                         RTE_BOND_LOG(INFO,
3574                                      "Invalid link up propagation delay value specified for"
3575                                      " bonded device %s", name);
3576                         return -1;
3577                 }
3578
3579                 /* Set balance mode transmit policy*/
3580                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3581                     != 0) {
3582                         RTE_BOND_LOG(ERR,
3583                                      "Failed to set link up propagation delay (%u ms) on bonded"
3584                                      " device %s", link_up_delay_ms, name);
3585                         return -1;
3586                 }
3587         } else if (arg_count > 1) {
3588                 RTE_BOND_LOG(INFO,
3589                              "Link up propagation delay can be specified only once for"
3590                              " bonded device %s", name);
3591                 return -1;
3592         }
3593
3594         /* Parse link down interrupt propagation delay */
3595         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3596         if (arg_count == 1) {
3597                 uint32_t link_down_delay_ms;
3598
3599                 if (rte_kvargs_process(kvlist,
3600                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3601                                        &bond_ethdev_parse_time_ms_kvarg,
3602                                        &link_down_delay_ms) < 0) {
3603                         RTE_BOND_LOG(INFO,
3604                                      "Invalid link down propagation delay value specified for"
3605                                      " bonded device %s", name);
3606                         return -1;
3607                 }
3608
3609                 /* Set balance mode transmit policy*/
3610                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3611                     != 0) {
3612                         RTE_BOND_LOG(ERR,
3613                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3614                                      link_down_delay_ms, name);
3615                         return -1;
3616                 }
3617         } else if (arg_count > 1) {
3618                 RTE_BOND_LOG(INFO,
3619                              "Link down propagation delay can be specified only once for  bonded device %s",
3620                              name);
3621                 return -1;
3622         }
3623
3624         return 0;
3625 }
3626
3627 struct rte_vdev_driver pmd_bond_drv = {
3628         .probe = bond_probe,
3629         .remove = bond_remove,
3630 };
3631
3632 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3633 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3634
3635 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3636         "slave=<ifc> "
3637         "primary=<ifc> "
3638         "mode=[0-6] "
3639         "xmit_policy=[l2 | l23 | l34] "
3640         "agg_mode=[count | stable | bandwidth] "
3641         "socket_id=<int> "
3642         "mac=<mac addr> "
3643         "lsc_poll_period_ms=<int> "
3644         "up_delay=<int> "
3645         "down_delay=<int>");
3646
3647 int bond_logtype;
3648
3649 RTE_INIT(bond_init_log)
3650 {
3651         bond_logtype = rte_log_register("pmd.net.bond");
3652         if (bond_logtype >= 0)
3653                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3654 }