net/bonding: set MTU on slave configure
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
24
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
27
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
29
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
32
33 static inline size_t
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
35 {
36         size_t vlan_offset = 0;
37
38         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
40
41                 vlan_offset = sizeof(struct vlan_hdr);
42                 *proto = vlan_hdr->eth_proto;
43
44                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45                         vlan_hdr = vlan_hdr + 1;
46                         *proto = vlan_hdr->eth_proto;
47                         vlan_offset += sizeof(struct vlan_hdr);
48                 }
49         }
50         return vlan_offset;
51 }
52
53 static uint16_t
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
55 {
56         struct bond_dev_private *internals;
57
58         uint16_t num_rx_slave = 0;
59         uint16_t num_rx_total = 0;
60
61         int i;
62
63         /* Cast to structure, containing bonded device's port id and queue id */
64         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
65
66         internals = bd_rx_q->dev_private;
67
68
69         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70                 /* Offset of pointer to *bufs increases as packets are received
71                  * from other slaves */
72                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
74                 if (num_rx_slave) {
75                         num_rx_total += num_rx_slave;
76                         nb_pkts -= num_rx_slave;
77                 }
78         }
79
80         return num_rx_total;
81 }
82
83 static uint16_t
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
85                 uint16_t nb_pkts)
86 {
87         struct bond_dev_private *internals;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94         return rte_eth_rx_burst(internals->current_primary_port,
95                         bd_rx_q->queue_id, bufs, nb_pkts);
96 }
97
98 static inline uint8_t
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
100 {
101         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
102
103         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104                 (ethertype == ether_type_slow_be &&
105                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
106 }
107
108 /*****************************************************************************
109  * Flow director's setup for mode 4 optimization
110  */
111
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113         .dst.addr_bytes = { 0 },
114         .src.addr_bytes = { 0 },
115         .type = RTE_BE16(ETHER_TYPE_SLOW),
116 };
117
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119         .dst.addr_bytes = { 0 },
120         .src.addr_bytes = { 0 },
121         .type = 0xFFFF,
122 };
123
124 static struct rte_flow_item flow_item_8023ad[] = {
125         {
126                 .type = RTE_FLOW_ITEM_TYPE_ETH,
127                 .spec = &flow_item_eth_type_8023ad,
128                 .last = NULL,
129                 .mask = &flow_item_eth_mask_type_8023ad,
130         },
131         {
132                 .type = RTE_FLOW_ITEM_TYPE_END,
133                 .spec = NULL,
134                 .last = NULL,
135                 .mask = NULL,
136         }
137 };
138
139 const struct rte_flow_attr flow_attr_8023ad = {
140         .group = 0,
141         .priority = 0,
142         .ingress = 1,
143         .egress = 0,
144         .reserved = 0,
145 };
146
147 int
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149                 uint16_t slave_port) {
150         struct rte_eth_dev_info slave_info;
151         struct rte_flow_error error;
152         struct bond_dev_private *internals = (struct bond_dev_private *)
153                         (bond_dev->data->dev_private);
154
155         const struct rte_flow_action_queue lacp_queue_conf = {
156                 .index = 0,
157         };
158
159         const struct rte_flow_action actions[] = {
160                 {
161                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162                         .conf = &lacp_queue_conf
163                 },
164                 {
165                         .type = RTE_FLOW_ACTION_TYPE_END,
166                 }
167         };
168
169         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170                         flow_item_8023ad, actions, &error);
171         if (ret < 0) {
172                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173                                 __func__, error.message, slave_port,
174                                 internals->mode4.dedicated_queues.rx_qid);
175                 return -1;
176         }
177
178         rte_eth_dev_info_get(slave_port, &slave_info);
179         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
181                 RTE_BOND_LOG(ERR,
182                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183                         __func__, slave_port);
184                 return -1;
185         }
186
187         return 0;
188 }
189
190 int
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193         struct bond_dev_private *internals = (struct bond_dev_private *)
194                         (bond_dev->data->dev_private);
195         struct rte_eth_dev_info bond_info;
196         uint16_t idx;
197
198         /* Verify if all slaves in bonding supports flow director and */
199         if (internals->slave_count > 0) {
200                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
201
202                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
204
205                 for (idx = 0; idx < internals->slave_count; idx++) {
206                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
207                                         internals->slaves[idx].port_id) != 0)
208                                 return -1;
209                 }
210         }
211
212         return 0;
213 }
214
215 int
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
217
218         struct rte_flow_error error;
219         struct bond_dev_private *internals = (struct bond_dev_private *)
220                         (bond_dev->data->dev_private);
221
222         struct rte_flow_action_queue lacp_queue_conf = {
223                 .index = internals->mode4.dedicated_queues.rx_qid,
224         };
225
226         const struct rte_flow_action actions[] = {
227                 {
228                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229                         .conf = &lacp_queue_conf
230                 },
231                 {
232                         .type = RTE_FLOW_ACTION_TYPE_END,
233                 }
234         };
235
236         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240                                 "(slave_port=%d queue_id=%d)",
241                                 error.message, slave_port,
242                                 internals->mode4.dedicated_queues.rx_qid);
243                 return -1;
244         }
245
246         return 0;
247 }
248
249 static uint16_t
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
251                 uint16_t nb_pkts)
252 {
253         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254         struct bond_dev_private *internals = bd_rx_q->dev_private;
255         uint16_t num_rx_total = 0;      /* Total number of received packets */
256         uint16_t slaves[RTE_MAX_ETHPORTS];
257         uint16_t slave_count;
258
259         uint16_t i, idx;
260
261         /* Copy slave list to protect against slave up/down changes during tx
262          * bursting */
263         slave_count = internals->active_slave_count;
264         memcpy(slaves, internals->active_slaves,
265                         sizeof(internals->active_slaves[0]) * slave_count);
266
267         for (i = 0, idx = internals->active_slave;
268                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269                 idx = idx % slave_count;
270
271                 /* Read packets from this slave */
272                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
274         }
275
276         internals->active_slave = idx;
277
278         return num_rx_total;
279 }
280
281 static uint16_t
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
283                 uint16_t nb_bufs)
284 {
285         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
286         struct bond_dev_private *internals = bd_tx_q->dev_private;
287
288         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
289         uint16_t slave_count;
290
291         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
292         uint16_t dist_slave_count;
293
294         /* 2-D array to sort mbufs for transmission on each slave into */
295         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
296         /* Number of mbufs for transmission on each slave */
297         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
298         /* Mapping array generated by hash function to map mbufs to slaves */
299         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
300
301         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
302         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
303
304         uint16_t i, j;
305
306         if (unlikely(nb_bufs == 0))
307                 return 0;
308
309         /* Copy slave list to protect against slave up/down changes during tx
310          * bursting */
311         slave_count = internals->active_slave_count;
312         if (unlikely(slave_count < 1))
313                 return 0;
314
315         memcpy(slave_port_ids, internals->active_slaves,
316                         sizeof(slave_port_ids[0]) * slave_count);
317
318
319         dist_slave_count = 0;
320         for (i = 0; i < slave_count; i++) {
321                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
322
323                 if (ACTOR_STATE(port, DISTRIBUTING))
324                         dist_slave_port_ids[dist_slave_count++] =
325                                         slave_port_ids[i];
326         }
327
328         if (unlikely(dist_slave_count < 1))
329                 return 0;
330
331         /*
332          * Populate slaves mbuf with the packets which are to be sent on it
333          * selecting output slave using hash based on xmit policy
334          */
335         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
336                         bufs_slave_port_idxs);
337
338         for (i = 0; i < nb_bufs; i++) {
339                 /* Populate slave mbuf arrays with mbufs for that slave. */
340                 uint8_t slave_idx = bufs_slave_port_idxs[i];
341
342                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
343         }
344
345
346         /* Send packet burst on each slave device */
347         for (i = 0; i < dist_slave_count; i++) {
348                 if (slave_nb_bufs[i] == 0)
349                         continue;
350
351                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
352                                 bd_tx_q->queue_id, slave_bufs[i],
353                                 slave_nb_bufs[i]);
354
355                 total_tx_count += slave_tx_count;
356
357                 /* If tx burst fails move packets to end of bufs */
358                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
359                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
360                                         slave_tx_count;
361                         total_tx_fail_count += slave_tx_fail_count[i];
362
363                         /*
364                          * Shift bufs to beginning of array to allow reordering
365                          * later
366                          */
367                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
368                                 slave_bufs[i][j] =
369                                         slave_bufs[i][(slave_tx_count - 1) + j];
370                         }
371                 }
372         }
373
374         /*
375          * If there are tx burst failures we move packets to end of bufs to
376          * preserve expected PMD behaviour of all failed transmitted being
377          * at the end of the input mbuf array
378          */
379         if (unlikely(total_tx_fail_count > 0)) {
380                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
381
382                 for (i = 0; i < slave_count; i++) {
383                         if (slave_tx_fail_count[i] > 0) {
384                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
385                                         bufs[bufs_idx++] = slave_bufs[i][j];
386                         }
387                 }
388         }
389
390         return total_tx_count;
391 }
392
393
394 static uint16_t
395 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
396                 uint16_t nb_pkts)
397 {
398         /* Cast to structure, containing bonded device's port id and queue id */
399         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
400         struct bond_dev_private *internals = bd_rx_q->dev_private;
401         struct ether_addr bond_mac;
402
403         struct ether_hdr *hdr;
404
405         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
406         uint16_t num_rx_total = 0;      /* Total number of received packets */
407         uint16_t slaves[RTE_MAX_ETHPORTS];
408         uint16_t slave_count, idx;
409
410         uint8_t collecting;  /* current slave collecting status */
411         const uint8_t promisc = internals->promiscuous_en;
412         uint8_t i, j, k;
413         uint8_t subtype;
414
415         rte_eth_macaddr_get(internals->port_id, &bond_mac);
416         /* Copy slave list to protect against slave up/down changes during tx
417          * bursting */
418         slave_count = internals->active_slave_count;
419         memcpy(slaves, internals->active_slaves,
420                         sizeof(internals->active_slaves[0]) * slave_count);
421
422         idx = internals->active_slave;
423         if (idx >= slave_count) {
424                 internals->active_slave = 0;
425                 idx = 0;
426         }
427         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
428                 j = num_rx_total;
429                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
430                                          COLLECTING);
431
432                 /* Read packets from this slave */
433                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
434                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
435
436                 for (k = j; k < 2 && k < num_rx_total; k++)
437                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
438
439                 /* Handle slow protocol packets. */
440                 while (j < num_rx_total) {
441
442                         /* If packet is not pure L2 and is known, skip it */
443                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
444                                 j++;
445                                 continue;
446                         }
447
448                         if (j + 3 < num_rx_total)
449                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
450
451                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
452                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
453
454                         /* Remove packet from array if it is slow packet or slave is not
455                          * in collecting state or bonding interface is not in promiscuous
456                          * mode and packet address does not match. */
457                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
458                                 !collecting || (!promisc &&
459                                         !is_multicast_ether_addr(&hdr->d_addr) &&
460                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
461
462                                 if (hdr->ether_type == ether_type_slow_be) {
463                                         bond_mode_8023ad_handle_slow_pkt(
464                                             internals, slaves[idx], bufs[j]);
465                                 } else
466                                         rte_pktmbuf_free(bufs[j]);
467
468                                 /* Packet is managed by mode 4 or dropped, shift the array */
469                                 num_rx_total--;
470                                 if (j < num_rx_total) {
471                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
472                                                 (num_rx_total - j));
473                                 }
474                         } else
475                                 j++;
476                 }
477                 if (unlikely(++idx == slave_count))
478                         idx = 0;
479         }
480
481         internals->active_slave = idx;
482         return num_rx_total;
483 }
484
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
488
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491 static void
492 arp_op_name(uint16_t arp_op, char *buf)
493 {
494         switch (arp_op) {
495         case ARP_OP_REQUEST:
496                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
497                 return;
498         case ARP_OP_REPLY:
499                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
500                 return;
501         case ARP_OP_REVREQUEST:
502                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
503                                 "Reverse ARP Request");
504                 return;
505         case ARP_OP_REVREPLY:
506                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
507                                 "Reverse ARP Reply");
508                 return;
509         case ARP_OP_INVREQUEST:
510                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
511                                 "Peer Identify Request");
512                 return;
513         case ARP_OP_INVREPLY:
514                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
515                                 "Peer Identify Reply");
516                 return;
517         default:
518                 break;
519         }
520         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
521         return;
522 }
523 #endif
524 #define MaxIPv4String   16
525 static void
526 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
527 {
528         uint32_t ipv4_addr;
529
530         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
531         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
532                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
533                 ipv4_addr & 0xFF);
534 }
535
536 #define MAX_CLIENTS_NUMBER      128
537 uint8_t active_clients;
538 struct client_stats_t {
539         uint16_t port;
540         uint32_t ipv4_addr;
541         uint32_t ipv4_rx_packets;
542         uint32_t ipv4_tx_packets;
543 };
544 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
545
546 static void
547 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
548 {
549         int i = 0;
550
551         for (; i < MAX_CLIENTS_NUMBER; i++)     {
552                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
553                         /* Just update RX packets number for this client */
554                         if (TXorRXindicator == &burstnumberRX)
555                                 client_stats[i].ipv4_rx_packets++;
556                         else
557                                 client_stats[i].ipv4_tx_packets++;
558                         return;
559                 }
560         }
561         /* We have a new client. Insert him to the table, and increment stats */
562         if (TXorRXindicator == &burstnumberRX)
563                 client_stats[active_clients].ipv4_rx_packets++;
564         else
565                 client_stats[active_clients].ipv4_tx_packets++;
566         client_stats[active_clients].ipv4_addr = addr;
567         client_stats[active_clients].port = port;
568         active_clients++;
569
570 }
571
572 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
573 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
574                 RTE_LOG(DEBUG, PMD, \
575                 "%s " \
576                 "port:%d " \
577                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
578                 "SrcIP:%s " \
579                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580                 "DstIP:%s " \
581                 "%s " \
582                 "%d\n", \
583                 info, \
584                 port, \
585                 eth_h->s_addr.addr_bytes[0], \
586                 eth_h->s_addr.addr_bytes[1], \
587                 eth_h->s_addr.addr_bytes[2], \
588                 eth_h->s_addr.addr_bytes[3], \
589                 eth_h->s_addr.addr_bytes[4], \
590                 eth_h->s_addr.addr_bytes[5], \
591                 src_ip, \
592                 eth_h->d_addr.addr_bytes[0], \
593                 eth_h->d_addr.addr_bytes[1], \
594                 eth_h->d_addr.addr_bytes[2], \
595                 eth_h->d_addr.addr_bytes[3], \
596                 eth_h->d_addr.addr_bytes[4], \
597                 eth_h->d_addr.addr_bytes[5], \
598                 dst_ip, \
599                 arp_op, \
600                 ++burstnumber)
601 #endif
602
603 static void
604 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
605                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
606 {
607         struct ipv4_hdr *ipv4_h;
608 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
609         struct arp_hdr *arp_h;
610         char dst_ip[16];
611         char ArpOp[24];
612         char buf[16];
613 #endif
614         char src_ip[16];
615
616         uint16_t ether_type = eth_h->ether_type;
617         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
618
619 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
620         snprintf(buf, 16, "%s", info);
621 #endif
622
623         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
624                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
625                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
626 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
627                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
628                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
629 #endif
630                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
631         }
632 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
633         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
634                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
635                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
636                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
637                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
638                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
639         }
640 #endif
641 }
642 #endif
643
644 static uint16_t
645 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
646 {
647         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
648         struct bond_dev_private *internals = bd_tx_q->dev_private;
649         struct ether_hdr *eth_h;
650         uint16_t ether_type, offset;
651         uint16_t nb_recv_pkts;
652         int i;
653
654         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
655
656         for (i = 0; i < nb_recv_pkts; i++) {
657                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
658                 ether_type = eth_h->ether_type;
659                 offset = get_vlan_offset(eth_h, &ether_type);
660
661                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
662 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
663                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
664 #endif
665                         bond_mode_alb_arp_recv(eth_h, offset, internals);
666                 }
667 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
668                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
669                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
670 #endif
671         }
672
673         return nb_recv_pkts;
674 }
675
676 static uint16_t
677 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
678                 uint16_t nb_pkts)
679 {
680         struct bond_dev_private *internals;
681         struct bond_tx_queue *bd_tx_q;
682
683         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
684         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
685
686         uint16_t num_of_slaves;
687         uint16_t slaves[RTE_MAX_ETHPORTS];
688
689         uint16_t num_tx_total = 0, num_tx_slave;
690
691         static int slave_idx = 0;
692         int i, cslave_idx = 0, tx_fail_total = 0;
693
694         bd_tx_q = (struct bond_tx_queue *)queue;
695         internals = bd_tx_q->dev_private;
696
697         /* Copy slave list to protect against slave up/down changes during tx
698          * bursting */
699         num_of_slaves = internals->active_slave_count;
700         memcpy(slaves, internals->active_slaves,
701                         sizeof(internals->active_slaves[0]) * num_of_slaves);
702
703         if (num_of_slaves < 1)
704                 return num_tx_total;
705
706         /* Populate slaves mbuf with which packets are to be sent on it  */
707         for (i = 0; i < nb_pkts; i++) {
708                 cslave_idx = (slave_idx + i) % num_of_slaves;
709                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
710         }
711
712         /* increment current slave index so the next call to tx burst starts on the
713          * next slave */
714         slave_idx = ++cslave_idx;
715
716         /* Send packet burst on each slave device */
717         for (i = 0; i < num_of_slaves; i++) {
718                 if (slave_nb_pkts[i] > 0) {
719                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
720                                         slave_bufs[i], slave_nb_pkts[i]);
721
722                         /* if tx burst fails move packets to end of bufs */
723                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
724                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
725
726                                 tx_fail_total += tx_fail_slave;
727
728                                 memcpy(&bufs[nb_pkts - tx_fail_total],
729                                                 &slave_bufs[i][num_tx_slave],
730                                                 tx_fail_slave * sizeof(bufs[0]));
731                         }
732                         num_tx_total += num_tx_slave;
733                 }
734         }
735
736         return num_tx_total;
737 }
738
739 static uint16_t
740 bond_ethdev_tx_burst_active_backup(void *queue,
741                 struct rte_mbuf **bufs, uint16_t nb_pkts)
742 {
743         struct bond_dev_private *internals;
744         struct bond_tx_queue *bd_tx_q;
745
746         bd_tx_q = (struct bond_tx_queue *)queue;
747         internals = bd_tx_q->dev_private;
748
749         if (internals->active_slave_count < 1)
750                 return 0;
751
752         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
753                         bufs, nb_pkts);
754 }
755
756 static inline uint16_t
757 ether_hash(struct ether_hdr *eth_hdr)
758 {
759         unaligned_uint16_t *word_src_addr =
760                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
761         unaligned_uint16_t *word_dst_addr =
762                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
763
764         return (word_src_addr[0] ^ word_dst_addr[0]) ^
765                         (word_src_addr[1] ^ word_dst_addr[1]) ^
766                         (word_src_addr[2] ^ word_dst_addr[2]);
767 }
768
769 static inline uint32_t
770 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
771 {
772         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
773 }
774
775 static inline uint32_t
776 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
777 {
778         unaligned_uint32_t *word_src_addr =
779                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
780         unaligned_uint32_t *word_dst_addr =
781                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
782
783         return (word_src_addr[0] ^ word_dst_addr[0]) ^
784                         (word_src_addr[1] ^ word_dst_addr[1]) ^
785                         (word_src_addr[2] ^ word_dst_addr[2]) ^
786                         (word_src_addr[3] ^ word_dst_addr[3]);
787 }
788
789
790 void
791 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792                 uint8_t slave_count, uint16_t *slaves)
793 {
794         struct ether_hdr *eth_hdr;
795         uint32_t hash;
796         int i;
797
798         for (i = 0; i < nb_pkts; i++) {
799                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
800
801                 hash = ether_hash(eth_hdr);
802
803                 slaves[i] = (hash ^= hash >> 8) % slave_count;
804         }
805 }
806
807 void
808 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
809                 uint8_t slave_count, uint16_t *slaves)
810 {
811         uint16_t i;
812         struct ether_hdr *eth_hdr;
813         uint16_t proto;
814         size_t vlan_offset;
815         uint32_t hash, l3hash;
816
817         for (i = 0; i < nb_pkts; i++) {
818                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
819                 l3hash = 0;
820
821                 proto = eth_hdr->ether_type;
822                 hash = ether_hash(eth_hdr);
823
824                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
825
826                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
827                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
828                                         ((char *)(eth_hdr + 1) + vlan_offset);
829                         l3hash = ipv4_hash(ipv4_hdr);
830
831                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
832                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
833                                         ((char *)(eth_hdr + 1) + vlan_offset);
834                         l3hash = ipv6_hash(ipv6_hdr);
835                 }
836
837                 hash = hash ^ l3hash;
838                 hash ^= hash >> 16;
839                 hash ^= hash >> 8;
840
841                 slaves[i] = hash % slave_count;
842         }
843 }
844
845 void
846 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
847                 uint8_t slave_count, uint16_t *slaves)
848 {
849         struct ether_hdr *eth_hdr;
850         uint16_t proto;
851         size_t vlan_offset;
852         int i;
853
854         struct udp_hdr *udp_hdr;
855         struct tcp_hdr *tcp_hdr;
856         uint32_t hash, l3hash, l4hash;
857
858         for (i = 0; i < nb_pkts; i++) {
859                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
860                 proto = eth_hdr->ether_type;
861                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
862                 l3hash = 0;
863                 l4hash = 0;
864
865                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
866                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
867                                         ((char *)(eth_hdr + 1) + vlan_offset);
868                         size_t ip_hdr_offset;
869
870                         l3hash = ipv4_hash(ipv4_hdr);
871
872                         /* there is no L4 header in fragmented packet */
873                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
874                                                                 == 0)) {
875                                 ip_hdr_offset = (ipv4_hdr->version_ihl
876                                         & IPV4_HDR_IHL_MASK) *
877                                         IPV4_IHL_MULTIPLIER;
878
879                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
880                                         tcp_hdr = (struct tcp_hdr *)
881                                                 ((char *)ipv4_hdr +
882                                                         ip_hdr_offset);
883                                         l4hash = HASH_L4_PORTS(tcp_hdr);
884                                 } else if (ipv4_hdr->next_proto_id ==
885                                                                 IPPROTO_UDP) {
886                                         udp_hdr = (struct udp_hdr *)
887                                                 ((char *)ipv4_hdr +
888                                                         ip_hdr_offset);
889                                         l4hash = HASH_L4_PORTS(udp_hdr);
890                                 }
891                         }
892                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
893                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
894                                         ((char *)(eth_hdr + 1) + vlan_offset);
895                         l3hash = ipv6_hash(ipv6_hdr);
896
897                         if (ipv6_hdr->proto == IPPROTO_TCP) {
898                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
899                                 l4hash = HASH_L4_PORTS(tcp_hdr);
900                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
901                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
902                                 l4hash = HASH_L4_PORTS(udp_hdr);
903                         }
904                 }
905
906                 hash = l3hash ^ l4hash;
907                 hash ^= hash >> 16;
908                 hash ^= hash >> 8;
909
910                 slaves[i] = hash % slave_count;
911         }
912 }
913
914 struct bwg_slave {
915         uint64_t bwg_left_int;
916         uint64_t bwg_left_remainder;
917         uint8_t slave;
918 };
919
920 void
921 bond_tlb_activate_slave(struct bond_dev_private *internals) {
922         int i;
923
924         for (i = 0; i < internals->active_slave_count; i++) {
925                 tlb_last_obytets[internals->active_slaves[i]] = 0;
926         }
927 }
928
929 static int
930 bandwidth_cmp(const void *a, const void *b)
931 {
932         const struct bwg_slave *bwg_a = a;
933         const struct bwg_slave *bwg_b = b;
934         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
935         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
936                         (int64_t)bwg_a->bwg_left_remainder;
937         if (diff > 0)
938                 return 1;
939         else if (diff < 0)
940                 return -1;
941         else if (diff2 > 0)
942                 return 1;
943         else if (diff2 < 0)
944                 return -1;
945         else
946                 return 0;
947 }
948
949 static void
950 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
951                 struct bwg_slave *bwg_slave)
952 {
953         struct rte_eth_link link_status;
954
955         rte_eth_link_get_nowait(port_id, &link_status);
956         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
957         if (link_bwg == 0)
958                 return;
959         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
960         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
961         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
962 }
963
964 static void
965 bond_ethdev_update_tlb_slave_cb(void *arg)
966 {
967         struct bond_dev_private *internals = arg;
968         struct rte_eth_stats slave_stats;
969         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
970         uint8_t slave_count;
971         uint64_t tx_bytes;
972
973         uint8_t update_stats = 0;
974         uint8_t i, slave_id;
975
976         internals->slave_update_idx++;
977
978
979         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
980                 update_stats = 1;
981
982         for (i = 0; i < internals->active_slave_count; i++) {
983                 slave_id = internals->active_slaves[i];
984                 rte_eth_stats_get(slave_id, &slave_stats);
985                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
986                 bandwidth_left(slave_id, tx_bytes,
987                                 internals->slave_update_idx, &bwg_array[i]);
988                 bwg_array[i].slave = slave_id;
989
990                 if (update_stats) {
991                         tlb_last_obytets[slave_id] = slave_stats.obytes;
992                 }
993         }
994
995         if (update_stats == 1)
996                 internals->slave_update_idx = 0;
997
998         slave_count = i;
999         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1000         for (i = 0; i < slave_count; i++)
1001                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1002
1003         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1004                         (struct bond_dev_private *)internals);
1005 }
1006
1007 static uint16_t
1008 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1009 {
1010         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1011         struct bond_dev_private *internals = bd_tx_q->dev_private;
1012
1013         struct rte_eth_dev *primary_port =
1014                         &rte_eth_devices[internals->primary_port];
1015         uint16_t num_tx_total = 0;
1016         uint16_t i, j;
1017
1018         uint16_t num_of_slaves = internals->active_slave_count;
1019         uint16_t slaves[RTE_MAX_ETHPORTS];
1020
1021         struct ether_hdr *ether_hdr;
1022         struct ether_addr primary_slave_addr;
1023         struct ether_addr active_slave_addr;
1024
1025         if (num_of_slaves < 1)
1026                 return num_tx_total;
1027
1028         memcpy(slaves, internals->tlb_slaves_order,
1029                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1030
1031
1032         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1033
1034         if (nb_pkts > 3) {
1035                 for (i = 0; i < 3; i++)
1036                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1037         }
1038
1039         for (i = 0; i < num_of_slaves; i++) {
1040                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1041                 for (j = num_tx_total; j < nb_pkts; j++) {
1042                         if (j + 3 < nb_pkts)
1043                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1044
1045                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1046                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1047                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1048 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1049                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1050 #endif
1051                 }
1052
1053                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1054                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1055
1056                 if (num_tx_total == nb_pkts)
1057                         break;
1058         }
1059
1060         return num_tx_total;
1061 }
1062
1063 void
1064 bond_tlb_disable(struct bond_dev_private *internals)
1065 {
1066         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1067 }
1068
1069 void
1070 bond_tlb_enable(struct bond_dev_private *internals)
1071 {
1072         bond_ethdev_update_tlb_slave_cb(internals);
1073 }
1074
1075 static uint16_t
1076 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1077 {
1078         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1079         struct bond_dev_private *internals = bd_tx_q->dev_private;
1080
1081         struct ether_hdr *eth_h;
1082         uint16_t ether_type, offset;
1083
1084         struct client_data *client_info;
1085
1086         /*
1087          * We create transmit buffers for every slave and one additional to send
1088          * through tlb. In worst case every packet will be send on one port.
1089          */
1090         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1091         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1092
1093         /*
1094          * We create separate transmit buffers for update packets as they won't
1095          * be counted in num_tx_total.
1096          */
1097         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1098         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1099
1100         struct rte_mbuf *upd_pkt;
1101         size_t pkt_size;
1102
1103         uint16_t num_send, num_not_send = 0;
1104         uint16_t num_tx_total = 0;
1105         uint16_t slave_idx;
1106
1107         int i, j;
1108
1109         /* Search tx buffer for ARP packets and forward them to alb */
1110         for (i = 0; i < nb_pkts; i++) {
1111                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1112                 ether_type = eth_h->ether_type;
1113                 offset = get_vlan_offset(eth_h, &ether_type);
1114
1115                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1116                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1117
1118                         /* Change src mac in eth header */
1119                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1120
1121                         /* Add packet to slave tx buffer */
1122                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1123                         slave_bufs_pkts[slave_idx]++;
1124                 } else {
1125                         /* If packet is not ARP, send it with TLB policy */
1126                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1127                                         bufs[i];
1128                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1129                 }
1130         }
1131
1132         /* Update connected client ARP tables */
1133         if (internals->mode6.ntt) {
1134                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1135                         client_info = &internals->mode6.client_table[i];
1136
1137                         if (client_info->in_use) {
1138                                 /* Allocate new packet to send ARP update on current slave */
1139                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1140                                 if (upd_pkt == NULL) {
1141                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1142                                         continue;
1143                                 }
1144                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1145                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1146                                 upd_pkt->data_len = pkt_size;
1147                                 upd_pkt->pkt_len = pkt_size;
1148
1149                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1150                                                 internals);
1151
1152                                 /* Add packet to update tx buffer */
1153                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1154                                 update_bufs_pkts[slave_idx]++;
1155                         }
1156                 }
1157                 internals->mode6.ntt = 0;
1158         }
1159
1160         /* Send ARP packets on proper slaves */
1161         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1162                 if (slave_bufs_pkts[i] > 0) {
1163                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1164                                         slave_bufs[i], slave_bufs_pkts[i]);
1165                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1166                                 bufs[nb_pkts - 1 - num_not_send - j] =
1167                                                 slave_bufs[i][nb_pkts - 1 - j];
1168                         }
1169
1170                         num_tx_total += num_send;
1171                         num_not_send += slave_bufs_pkts[i] - num_send;
1172
1173 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1174         /* Print TX stats including update packets */
1175                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1176                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1177                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1178                         }
1179 #endif
1180                 }
1181         }
1182
1183         /* Send update packets on proper slaves */
1184         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1185                 if (update_bufs_pkts[i] > 0) {
1186                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1187                                         update_bufs_pkts[i]);
1188                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1189                                 rte_pktmbuf_free(update_bufs[i][j]);
1190                         }
1191 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1192                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1193                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1194                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1195                         }
1196 #endif
1197                 }
1198         }
1199
1200         /* Send non-ARP packets using tlb policy */
1201         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1202                 num_send = bond_ethdev_tx_burst_tlb(queue,
1203                                 slave_bufs[RTE_MAX_ETHPORTS],
1204                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1205
1206                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1207                         bufs[nb_pkts - 1 - num_not_send - j] =
1208                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1209                 }
1210
1211                 num_tx_total += num_send;
1212         }
1213
1214         return num_tx_total;
1215 }
1216
1217 static uint16_t
1218 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1219                 uint16_t nb_bufs)
1220 {
1221         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1222         struct bond_dev_private *internals = bd_tx_q->dev_private;
1223
1224         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1225         uint16_t slave_count;
1226
1227         /* Array to sort mbufs for transmission on each slave into */
1228         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1229         /* Number of mbufs for transmission on each slave */
1230         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1231         /* Mapping array generated by hash function to map mbufs to slaves */
1232         uint16_t bufs_slave_port_idxs[nb_bufs];
1233
1234         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1235         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1236
1237         uint16_t i, j;
1238
1239         if (unlikely(nb_bufs == 0))
1240                 return 0;
1241
1242         /* Copy slave list to protect against slave up/down changes during tx
1243          * bursting */
1244         slave_count = internals->active_slave_count;
1245         if (unlikely(slave_count < 1))
1246                 return 0;
1247
1248         memcpy(slave_port_ids, internals->active_slaves,
1249                         sizeof(slave_port_ids[0]) * slave_count);
1250
1251         /*
1252          * Populate slaves mbuf with the packets which are to be sent on it
1253          * selecting output slave using hash based on xmit policy
1254          */
1255         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1256                         bufs_slave_port_idxs);
1257
1258         for (i = 0; i < nb_bufs; i++) {
1259                 /* Populate slave mbuf arrays with mbufs for that slave. */
1260                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1261
1262                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1263         }
1264
1265         /* Send packet burst on each slave device */
1266         for (i = 0; i < slave_count; i++) {
1267                 if (slave_nb_bufs[i] == 0)
1268                         continue;
1269
1270                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1271                                 bd_tx_q->queue_id, slave_bufs[i],
1272                                 slave_nb_bufs[i]);
1273
1274                 total_tx_count += slave_tx_count;
1275
1276                 /* If tx burst fails move packets to end of bufs */
1277                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1278                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1279                                         slave_tx_count;
1280                         total_tx_fail_count += slave_tx_fail_count[i];
1281
1282                         /*
1283                          * Shift bufs to beginning of array to allow reordering
1284                          * later
1285                          */
1286                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1287                                 slave_bufs[i][j] =
1288                                         slave_bufs[i][(slave_tx_count - 1) + j];
1289                         }
1290                 }
1291         }
1292
1293         /*
1294          * If there are tx burst failures we move packets to end of bufs to
1295          * preserve expected PMD behaviour of all failed transmitted being
1296          * at the end of the input mbuf array
1297          */
1298         if (unlikely(total_tx_fail_count > 0)) {
1299                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1300
1301                 for (i = 0; i < slave_count; i++) {
1302                         if (slave_tx_fail_count[i] > 0) {
1303                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1304                                         bufs[bufs_idx++] = slave_bufs[i][j];
1305                         }
1306                 }
1307         }
1308
1309         return total_tx_count;
1310 }
1311
1312 static uint16_t
1313 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1314                 uint16_t nb_bufs)
1315 {
1316         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1317         struct bond_dev_private *internals = bd_tx_q->dev_private;
1318
1319         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1320         uint16_t slave_count;
1321
1322         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1323         uint16_t dist_slave_count;
1324
1325         /* 2-D array to sort mbufs for transmission on each slave into */
1326         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1327         /* Number of mbufs for transmission on each slave */
1328         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1329         /* Mapping array generated by hash function to map mbufs to slaves */
1330         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1331
1332         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1333         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1334
1335         uint16_t i, j;
1336
1337         if (unlikely(nb_bufs == 0))
1338                 return 0;
1339
1340         /* Copy slave list to protect against slave up/down changes during tx
1341          * bursting */
1342         slave_count = internals->active_slave_count;
1343         if (unlikely(slave_count < 1))
1344                 return 0;
1345
1346         memcpy(slave_port_ids, internals->active_slaves,
1347                         sizeof(slave_port_ids[0]) * slave_count);
1348
1349         dist_slave_count = 0;
1350         for (i = 0; i < slave_count; i++) {
1351                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1352
1353                 if (ACTOR_STATE(port, DISTRIBUTING))
1354                         dist_slave_port_ids[dist_slave_count++] =
1355                                         slave_port_ids[i];
1356         }
1357
1358         if (likely(dist_slave_count > 1)) {
1359
1360                 /*
1361                  * Populate slaves mbuf with the packets which are to be sent
1362                  * on it, selecting output slave using hash based on xmit policy
1363                  */
1364                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1365                                 bufs_slave_port_idxs);
1366
1367                 for (i = 0; i < nb_bufs; i++) {
1368                         /*
1369                          * Populate slave mbuf arrays with mbufs for that
1370                          * slave
1371                          */
1372                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1373
1374                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1375                                         bufs[i];
1376                 }
1377
1378
1379                 /* Send packet burst on each slave device */
1380                 for (i = 0; i < dist_slave_count; i++) {
1381                         if (slave_nb_bufs[i] == 0)
1382                                 continue;
1383
1384                         slave_tx_count = rte_eth_tx_burst(
1385                                         dist_slave_port_ids[i],
1386                                         bd_tx_q->queue_id, slave_bufs[i],
1387                                         slave_nb_bufs[i]);
1388
1389                         total_tx_count += slave_tx_count;
1390
1391                         /* If tx burst fails move packets to end of bufs */
1392                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1393                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1394                                                 slave_tx_count;
1395                                 total_tx_fail_count += slave_tx_fail_count[i];
1396
1397                                 /*
1398                                  * Shift bufs to beginning of array to allow
1399                                  * reordering later
1400                                  */
1401                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1402                                         slave_bufs[i][j] =
1403                                                 slave_bufs[i]
1404                                                         [(slave_tx_count - 1)
1405                                                         + j];
1406                         }
1407                 }
1408
1409                 /*
1410                  * If there are tx burst failures we move packets to end of
1411                  * bufs to preserve expected PMD behaviour of all failed
1412                  * transmitted being at the end of the input mbuf array
1413                  */
1414                 if (unlikely(total_tx_fail_count > 0)) {
1415                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1416
1417                         for (i = 0; i < slave_count; i++) {
1418                                 if (slave_tx_fail_count[i] > 0) {
1419                                         for (j = 0;
1420                                                 j < slave_tx_fail_count[i];
1421                                                 j++) {
1422                                                 bufs[bufs_idx++] =
1423                                                         slave_bufs[i][j];
1424                                         }
1425                                 }
1426                         }
1427                 }
1428         }
1429
1430         /* Check for LACP control packets and send if available */
1431         for (i = 0; i < slave_count; i++) {
1432                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1433                 struct rte_mbuf *ctrl_pkt = NULL;
1434
1435                 if (likely(rte_ring_empty(port->tx_ring)))
1436                         continue;
1437
1438                 if (rte_ring_dequeue(port->tx_ring,
1439                                      (void **)&ctrl_pkt) != -ENOENT) {
1440                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1441                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1442                         /*
1443                          * re-enqueue LAG control plane packets to buffering
1444                          * ring if transmission fails so the packet isn't lost.
1445                          */
1446                         if (slave_tx_count != 1)
1447                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1448                 }
1449         }
1450
1451         return total_tx_count;
1452 }
1453
1454 static uint16_t
1455 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1456                 uint16_t nb_pkts)
1457 {
1458         struct bond_dev_private *internals;
1459         struct bond_tx_queue *bd_tx_q;
1460
1461         uint8_t tx_failed_flag = 0, num_of_slaves;
1462         uint16_t slaves[RTE_MAX_ETHPORTS];
1463
1464         uint16_t max_nb_of_tx_pkts = 0;
1465
1466         int slave_tx_total[RTE_MAX_ETHPORTS];
1467         int i, most_successful_tx_slave = -1;
1468
1469         bd_tx_q = (struct bond_tx_queue *)queue;
1470         internals = bd_tx_q->dev_private;
1471
1472         /* Copy slave list to protect against slave up/down changes during tx
1473          * bursting */
1474         num_of_slaves = internals->active_slave_count;
1475         memcpy(slaves, internals->active_slaves,
1476                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1477
1478         if (num_of_slaves < 1)
1479                 return 0;
1480
1481         /* Increment reference count on mbufs */
1482         for (i = 0; i < nb_pkts; i++)
1483                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1484
1485         /* Transmit burst on each active slave */
1486         for (i = 0; i < num_of_slaves; i++) {
1487                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1488                                         bufs, nb_pkts);
1489
1490                 if (unlikely(slave_tx_total[i] < nb_pkts))
1491                         tx_failed_flag = 1;
1492
1493                 /* record the value and slave index for the slave which transmits the
1494                  * maximum number of packets */
1495                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1496                         max_nb_of_tx_pkts = slave_tx_total[i];
1497                         most_successful_tx_slave = i;
1498                 }
1499         }
1500
1501         /* if slaves fail to transmit packets from burst, the calling application
1502          * is not expected to know about multiple references to packets so we must
1503          * handle failures of all packets except those of the most successful slave
1504          */
1505         if (unlikely(tx_failed_flag))
1506                 for (i = 0; i < num_of_slaves; i++)
1507                         if (i != most_successful_tx_slave)
1508                                 while (slave_tx_total[i] < nb_pkts)
1509                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1510
1511         return max_nb_of_tx_pkts;
1512 }
1513
1514 void
1515 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1516 {
1517         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1518
1519         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1520                 /**
1521                  * If in mode 4 then save the link properties of the first
1522                  * slave, all subsequent slaves must match these properties
1523                  */
1524                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1525
1526                 bond_link->link_autoneg = slave_link->link_autoneg;
1527                 bond_link->link_duplex = slave_link->link_duplex;
1528                 bond_link->link_speed = slave_link->link_speed;
1529         } else {
1530                 /**
1531                  * In any other mode the link properties are set to default
1532                  * values of AUTONEG/DUPLEX
1533                  */
1534                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1535                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1536         }
1537 }
1538
1539 int
1540 link_properties_valid(struct rte_eth_dev *ethdev,
1541                 struct rte_eth_link *slave_link)
1542 {
1543         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1544
1545         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1546                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1547
1548                 if (bond_link->link_duplex != slave_link->link_duplex ||
1549                         bond_link->link_autoneg != slave_link->link_autoneg ||
1550                         bond_link->link_speed != slave_link->link_speed)
1551                         return -1;
1552         }
1553
1554         return 0;
1555 }
1556
1557 int
1558 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1559 {
1560         struct ether_addr *mac_addr;
1561
1562         if (eth_dev == NULL) {
1563                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1564                 return -1;
1565         }
1566
1567         if (dst_mac_addr == NULL) {
1568                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1569                 return -1;
1570         }
1571
1572         mac_addr = eth_dev->data->mac_addrs;
1573
1574         ether_addr_copy(mac_addr, dst_mac_addr);
1575         return 0;
1576 }
1577
1578 int
1579 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1580 {
1581         struct ether_addr *mac_addr;
1582
1583         if (eth_dev == NULL) {
1584                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1585                 return -1;
1586         }
1587
1588         if (new_mac_addr == NULL) {
1589                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1590                 return -1;
1591         }
1592
1593         mac_addr = eth_dev->data->mac_addrs;
1594
1595         /* If new MAC is different to current MAC then update */
1596         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1597                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1598
1599         return 0;
1600 }
1601
1602 int
1603 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1604 {
1605         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1606         int i;
1607
1608         /* Update slave devices MAC addresses */
1609         if (internals->slave_count < 1)
1610                 return -1;
1611
1612         switch (internals->mode) {
1613         case BONDING_MODE_ROUND_ROBIN:
1614         case BONDING_MODE_BALANCE:
1615         case BONDING_MODE_BROADCAST:
1616                 for (i = 0; i < internals->slave_count; i++) {
1617                         if (rte_eth_dev_default_mac_addr_set(
1618                                         internals->slaves[i].port_id,
1619                                         bonded_eth_dev->data->mac_addrs)) {
1620                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1621                                                 internals->slaves[i].port_id);
1622                                 return -1;
1623                         }
1624                 }
1625                 break;
1626         case BONDING_MODE_8023AD:
1627                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1628                 break;
1629         case BONDING_MODE_ACTIVE_BACKUP:
1630         case BONDING_MODE_TLB:
1631         case BONDING_MODE_ALB:
1632         default:
1633                 for (i = 0; i < internals->slave_count; i++) {
1634                         if (internals->slaves[i].port_id ==
1635                                         internals->current_primary_port) {
1636                                 if (rte_eth_dev_default_mac_addr_set(
1637                                                 internals->primary_port,
1638                                                 bonded_eth_dev->data->mac_addrs)) {
1639                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1640                                                         internals->current_primary_port);
1641                                         return -1;
1642                                 }
1643                         } else {
1644                                 if (rte_eth_dev_default_mac_addr_set(
1645                                                 internals->slaves[i].port_id,
1646                                                 &internals->slaves[i].persisted_mac_addr)) {
1647                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1648                                                         internals->slaves[i].port_id);
1649                                         return -1;
1650                                 }
1651                         }
1652                 }
1653         }
1654
1655         return 0;
1656 }
1657
1658 int
1659 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1660 {
1661         struct bond_dev_private *internals;
1662
1663         internals = eth_dev->data->dev_private;
1664
1665         switch (mode) {
1666         case BONDING_MODE_ROUND_ROBIN:
1667                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1668                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1669                 break;
1670         case BONDING_MODE_ACTIVE_BACKUP:
1671                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1672                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1673                 break;
1674         case BONDING_MODE_BALANCE:
1675                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1676                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1677                 break;
1678         case BONDING_MODE_BROADCAST:
1679                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1680                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1681                 break;
1682         case BONDING_MODE_8023AD:
1683                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1684                         return -1;
1685
1686                 if (internals->mode4.dedicated_queues.enabled == 0) {
1687                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1688                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1689                         RTE_LOG(WARNING, PMD,
1690                                 "Using mode 4, it is necessary to do TX burst "
1691                                 "and RX burst at least every 100ms.\n");
1692                 } else {
1693                         /* Use flow director's optimization */
1694                         eth_dev->rx_pkt_burst =
1695                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1696                         eth_dev->tx_pkt_burst =
1697                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1698                 }
1699                 break;
1700         case BONDING_MODE_TLB:
1701                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1702                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1703                 break;
1704         case BONDING_MODE_ALB:
1705                 if (bond_mode_alb_enable(eth_dev) != 0)
1706                         return -1;
1707
1708                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1709                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1710                 break;
1711         default:
1712                 return -1;
1713         }
1714
1715         internals->mode = mode;
1716
1717         return 0;
1718 }
1719
1720
1721 static int
1722 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1723                 struct rte_eth_dev *slave_eth_dev)
1724 {
1725         int errval = 0;
1726         struct bond_dev_private *internals = (struct bond_dev_private *)
1727                 bonded_eth_dev->data->dev_private;
1728         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1729
1730         if (port->slow_pool == NULL) {
1731                 char mem_name[256];
1732                 int slave_id = slave_eth_dev->data->port_id;
1733
1734                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1735                                 slave_id);
1736                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1737                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1738                         slave_eth_dev->data->numa_node);
1739
1740                 /* Any memory allocation failure in initialization is critical because
1741                  * resources can't be free, so reinitialization is impossible. */
1742                 if (port->slow_pool == NULL) {
1743                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1744                                 slave_id, mem_name, rte_strerror(rte_errno));
1745                 }
1746         }
1747
1748         if (internals->mode4.dedicated_queues.enabled == 1) {
1749                 /* Configure slow Rx queue */
1750
1751                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1752                                 internals->mode4.dedicated_queues.rx_qid, 128,
1753                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1754                                 NULL, port->slow_pool);
1755                 if (errval != 0) {
1756                         RTE_BOND_LOG(ERR,
1757                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1758                                         slave_eth_dev->data->port_id,
1759                                         internals->mode4.dedicated_queues.rx_qid,
1760                                         errval);
1761                         return errval;
1762                 }
1763
1764                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1765                                 internals->mode4.dedicated_queues.tx_qid, 512,
1766                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1767                                 NULL);
1768                 if (errval != 0) {
1769                         RTE_BOND_LOG(ERR,
1770                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1771                                 slave_eth_dev->data->port_id,
1772                                 internals->mode4.dedicated_queues.tx_qid,
1773                                 errval);
1774                         return errval;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 int
1781 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1782                 struct rte_eth_dev *slave_eth_dev)
1783 {
1784         struct bond_rx_queue *bd_rx_q;
1785         struct bond_tx_queue *bd_tx_q;
1786         uint16_t nb_rx_queues;
1787         uint16_t nb_tx_queues;
1788
1789         int errval;
1790         uint16_t q_id;
1791         struct rte_flow_error flow_error;
1792
1793         struct bond_dev_private *internals = (struct bond_dev_private *)
1794                 bonded_eth_dev->data->dev_private;
1795
1796         /* Stop slave */
1797         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1798
1799         /* Enable interrupts on slave device if supported */
1800         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1801                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1802
1803         /* If RSS is enabled for bonding, try to enable it for slaves  */
1804         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1805                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1806                                 != 0) {
1807                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1808                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1809                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1810                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1811                 } else {
1812                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1813                 }
1814
1815                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1816                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1817                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1818                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1819         }
1820
1821         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1822                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1823
1824         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1825         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1826
1827         if (internals->mode == BONDING_MODE_8023AD) {
1828                 if (internals->mode4.dedicated_queues.enabled == 1) {
1829                         nb_rx_queues++;
1830                         nb_tx_queues++;
1831                 }
1832         }
1833
1834         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1835                                      bonded_eth_dev->data->mtu);
1836         if (errval != 0 && errval != -ENOTSUP) {
1837                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1838                                 slave_eth_dev->data->port_id, errval);
1839                 return errval;
1840         }
1841
1842         /* Configure device */
1843         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1844                         nb_rx_queues, nb_tx_queues,
1845                         &(slave_eth_dev->data->dev_conf));
1846         if (errval != 0) {
1847                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1848                                 slave_eth_dev->data->port_id, errval);
1849                 return errval;
1850         }
1851
1852         /* Setup Rx Queues */
1853         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1854                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1855
1856                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1857                                 bd_rx_q->nb_rx_desc,
1858                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1859                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1860                 if (errval != 0) {
1861                         RTE_BOND_LOG(ERR,
1862                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1863                                         slave_eth_dev->data->port_id, q_id, errval);
1864                         return errval;
1865                 }
1866         }
1867
1868         /* Setup Tx Queues */
1869         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1870                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1871
1872                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1873                                 bd_tx_q->nb_tx_desc,
1874                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1875                                 &bd_tx_q->tx_conf);
1876                 if (errval != 0) {
1877                         RTE_BOND_LOG(ERR,
1878                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1879                                 slave_eth_dev->data->port_id, q_id, errval);
1880                         return errval;
1881                 }
1882         }
1883
1884         if (internals->mode == BONDING_MODE_8023AD &&
1885                         internals->mode4.dedicated_queues.enabled == 1) {
1886                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1887                                 != 0)
1888                         return errval;
1889
1890                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1891                                 slave_eth_dev->data->port_id) != 0) {
1892                         RTE_BOND_LOG(ERR,
1893                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1894                                 slave_eth_dev->data->port_id, q_id, errval);
1895                         return -1;
1896                 }
1897
1898                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1899                         rte_flow_destroy(slave_eth_dev->data->port_id,
1900                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1901                                         &flow_error);
1902
1903                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1904                                 slave_eth_dev->data->port_id);
1905         }
1906
1907         /* Start device */
1908         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1909         if (errval != 0) {
1910                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1911                                 slave_eth_dev->data->port_id, errval);
1912                 return -1;
1913         }
1914
1915         /* If RSS is enabled for bonding, synchronize RETA */
1916         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1917                 int i;
1918                 struct bond_dev_private *internals;
1919
1920                 internals = bonded_eth_dev->data->dev_private;
1921
1922                 for (i = 0; i < internals->slave_count; i++) {
1923                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1924                                 errval = rte_eth_dev_rss_reta_update(
1925                                                 slave_eth_dev->data->port_id,
1926                                                 &internals->reta_conf[0],
1927                                                 internals->slaves[i].reta_size);
1928                                 if (errval != 0) {
1929                                         RTE_LOG(WARNING, PMD,
1930                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1931                                                         " RSS Configuration for bonding may be inconsistent.\n",
1932                                                         slave_eth_dev->data->port_id, errval);
1933                                 }
1934                                 break;
1935                         }
1936                 }
1937         }
1938
1939         /* If lsc interrupt is set, check initial slave's link status */
1940         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1941                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1942                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1943                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1944                         NULL);
1945         }
1946
1947         return 0;
1948 }
1949
1950 void
1951 slave_remove(struct bond_dev_private *internals,
1952                 struct rte_eth_dev *slave_eth_dev)
1953 {
1954         uint8_t i;
1955
1956         for (i = 0; i < internals->slave_count; i++)
1957                 if (internals->slaves[i].port_id ==
1958                                 slave_eth_dev->data->port_id)
1959                         break;
1960
1961         if (i < (internals->slave_count - 1))
1962                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1963                                 sizeof(internals->slaves[0]) *
1964                                 (internals->slave_count - i - 1));
1965
1966         internals->slave_count--;
1967
1968         /* force reconfiguration of slave interfaces */
1969         _rte_eth_dev_reset(slave_eth_dev);
1970 }
1971
1972 static void
1973 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1974
1975 void
1976 slave_add(struct bond_dev_private *internals,
1977                 struct rte_eth_dev *slave_eth_dev)
1978 {
1979         struct bond_slave_details *slave_details =
1980                         &internals->slaves[internals->slave_count];
1981
1982         slave_details->port_id = slave_eth_dev->data->port_id;
1983         slave_details->last_link_status = 0;
1984
1985         /* Mark slave devices that don't support interrupts so we can
1986          * compensate when we start the bond
1987          */
1988         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1989                 slave_details->link_status_poll_enabled = 1;
1990         }
1991
1992         slave_details->link_status_wait_to_complete = 0;
1993         /* clean tlb_last_obytes when adding port for bonding device */
1994         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1995                         sizeof(struct ether_addr));
1996 }
1997
1998 void
1999 bond_ethdev_primary_set(struct bond_dev_private *internals,
2000                 uint16_t slave_port_id)
2001 {
2002         int i;
2003
2004         if (internals->active_slave_count < 1)
2005                 internals->current_primary_port = slave_port_id;
2006         else
2007                 /* Search bonded device slave ports for new proposed primary port */
2008                 for (i = 0; i < internals->active_slave_count; i++) {
2009                         if (internals->active_slaves[i] == slave_port_id)
2010                                 internals->current_primary_port = slave_port_id;
2011                 }
2012 }
2013
2014 static void
2015 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2016
2017 static int
2018 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2019 {
2020         struct bond_dev_private *internals;
2021         int i;
2022
2023         /* slave eth dev will be started by bonded device */
2024         if (check_for_bonded_ethdev(eth_dev)) {
2025                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2026                                 eth_dev->data->port_id);
2027                 return -1;
2028         }
2029
2030         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2031         eth_dev->data->dev_started = 1;
2032
2033         internals = eth_dev->data->dev_private;
2034
2035         if (internals->slave_count == 0) {
2036                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2037                 return -1;
2038         }
2039
2040         if (internals->user_defined_mac == 0) {
2041                 struct ether_addr *new_mac_addr = NULL;
2042
2043                 for (i = 0; i < internals->slave_count; i++)
2044                         if (internals->slaves[i].port_id == internals->primary_port)
2045                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2046
2047                 if (new_mac_addr == NULL)
2048                         return -1;
2049
2050                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2051                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2052                                         eth_dev->data->port_id);
2053                         return -1;
2054                 }
2055         }
2056
2057         /* Update all slave devices MACs*/
2058         if (mac_address_slaves_update(eth_dev) != 0)
2059                 return -1;
2060
2061         /* If bonded device is configure in promiscuous mode then re-apply config */
2062         if (internals->promiscuous_en)
2063                 bond_ethdev_promiscuous_enable(eth_dev);
2064
2065         if (internals->mode == BONDING_MODE_8023AD) {
2066                 if (internals->mode4.dedicated_queues.enabled == 1) {
2067                         internals->mode4.dedicated_queues.rx_qid =
2068                                         eth_dev->data->nb_rx_queues;
2069                         internals->mode4.dedicated_queues.tx_qid =
2070                                         eth_dev->data->nb_tx_queues;
2071                 }
2072         }
2073
2074
2075         /* Reconfigure each slave device if starting bonded device */
2076         for (i = 0; i < internals->slave_count; i++) {
2077                 struct rte_eth_dev *slave_ethdev =
2078                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2079                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2080                         RTE_BOND_LOG(ERR,
2081                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2082                                 eth_dev->data->port_id,
2083                                 internals->slaves[i].port_id);
2084                         return -1;
2085                 }
2086                 /* We will need to poll for link status if any slave doesn't
2087                  * support interrupts
2088                  */
2089                 if (internals->slaves[i].link_status_poll_enabled)
2090                         internals->link_status_polling_enabled = 1;
2091         }
2092         /* start polling if needed */
2093         if (internals->link_status_polling_enabled) {
2094                 rte_eal_alarm_set(
2095                         internals->link_status_polling_interval_ms * 1000,
2096                         bond_ethdev_slave_link_status_change_monitor,
2097                         (void *)&rte_eth_devices[internals->port_id]);
2098         }
2099
2100         if (internals->user_defined_primary_port)
2101                 bond_ethdev_primary_set(internals, internals->primary_port);
2102
2103         if (internals->mode == BONDING_MODE_8023AD)
2104                 bond_mode_8023ad_start(eth_dev);
2105
2106         if (internals->mode == BONDING_MODE_TLB ||
2107                         internals->mode == BONDING_MODE_ALB)
2108                 bond_tlb_enable(internals);
2109
2110         return 0;
2111 }
2112
2113 static void
2114 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2115 {
2116         uint8_t i;
2117
2118         if (dev->data->rx_queues != NULL) {
2119                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2120                         rte_free(dev->data->rx_queues[i]);
2121                         dev->data->rx_queues[i] = NULL;
2122                 }
2123                 dev->data->nb_rx_queues = 0;
2124         }
2125
2126         if (dev->data->tx_queues != NULL) {
2127                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2128                         rte_free(dev->data->tx_queues[i]);
2129                         dev->data->tx_queues[i] = NULL;
2130                 }
2131                 dev->data->nb_tx_queues = 0;
2132         }
2133 }
2134
2135 void
2136 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2137 {
2138         struct bond_dev_private *internals = eth_dev->data->dev_private;
2139         uint8_t i;
2140
2141         if (internals->mode == BONDING_MODE_8023AD) {
2142                 struct port *port;
2143                 void *pkt = NULL;
2144
2145                 bond_mode_8023ad_stop(eth_dev);
2146
2147                 /* Discard all messages to/from mode 4 state machines */
2148                 for (i = 0; i < internals->active_slave_count; i++) {
2149                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2150
2151                         RTE_ASSERT(port->rx_ring != NULL);
2152                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2153                                 rte_pktmbuf_free(pkt);
2154
2155                         RTE_ASSERT(port->tx_ring != NULL);
2156                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2157                                 rte_pktmbuf_free(pkt);
2158                 }
2159         }
2160
2161         if (internals->mode == BONDING_MODE_TLB ||
2162                         internals->mode == BONDING_MODE_ALB) {
2163                 bond_tlb_disable(internals);
2164                 for (i = 0; i < internals->active_slave_count; i++)
2165                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2166         }
2167
2168         internals->active_slave_count = 0;
2169         internals->link_status_polling_enabled = 0;
2170         for (i = 0; i < internals->slave_count; i++)
2171                 internals->slaves[i].last_link_status = 0;
2172
2173         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2174         eth_dev->data->dev_started = 0;
2175 }
2176
2177 void
2178 bond_ethdev_close(struct rte_eth_dev *dev)
2179 {
2180         struct bond_dev_private *internals = dev->data->dev_private;
2181         uint8_t bond_port_id = internals->port_id;
2182         int skipped = 0;
2183
2184         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2185         while (internals->slave_count != skipped) {
2186                 uint16_t port_id = internals->slaves[skipped].port_id;
2187
2188                 rte_eth_dev_stop(port_id);
2189
2190                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2191                         RTE_LOG(ERR, EAL,
2192                                 "Failed to remove port %d from bonded device "
2193                                 "%s\n", port_id, dev->device->name);
2194                         skipped++;
2195                 }
2196         }
2197         bond_ethdev_free_queues(dev);
2198         rte_bitmap_reset(internals->vlan_filter_bmp);
2199 }
2200
2201 /* forward declaration */
2202 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2203
2204 static void
2205 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2206 {
2207         struct bond_dev_private *internals = dev->data->dev_private;
2208
2209         uint16_t max_nb_rx_queues = UINT16_MAX;
2210         uint16_t max_nb_tx_queues = UINT16_MAX;
2211
2212         dev_info->max_mac_addrs = 1;
2213
2214         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2215                         internals->candidate_max_rx_pktlen :
2216                         ETHER_MAX_JUMBO_FRAME_LEN;
2217
2218         /* Max number of tx/rx queues that the bonded device can support is the
2219          * minimum values of the bonded slaves, as all slaves must be capable
2220          * of supporting the same number of tx/rx queues.
2221          */
2222         if (internals->slave_count > 0) {
2223                 struct rte_eth_dev_info slave_info;
2224                 uint8_t idx;
2225
2226                 for (idx = 0; idx < internals->slave_count; idx++) {
2227                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2228                                         &slave_info);
2229
2230                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2231                                 max_nb_rx_queues = slave_info.max_rx_queues;
2232
2233                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2234                                 max_nb_tx_queues = slave_info.max_tx_queues;
2235                 }
2236         }
2237
2238         dev_info->max_rx_queues = max_nb_rx_queues;
2239         dev_info->max_tx_queues = max_nb_tx_queues;
2240
2241         /**
2242          * If dedicated hw queues enabled for link bonding device in LACP mode
2243          * then we need to reduce the maximum number of data path queues by 1.
2244          */
2245         if (internals->mode == BONDING_MODE_8023AD &&
2246                 internals->mode4.dedicated_queues.enabled == 1) {
2247                 dev_info->max_rx_queues--;
2248                 dev_info->max_tx_queues--;
2249         }
2250
2251         dev_info->min_rx_bufsize = 0;
2252
2253         dev_info->rx_offload_capa = internals->rx_offload_capa;
2254         dev_info->tx_offload_capa = internals->tx_offload_capa;
2255         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2256
2257         dev_info->reta_size = internals->reta_size;
2258 }
2259
2260 static int
2261 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2262 {
2263         int res;
2264         uint16_t i;
2265         struct bond_dev_private *internals = dev->data->dev_private;
2266
2267         /* don't do this while a slave is being added */
2268         rte_spinlock_lock(&internals->lock);
2269
2270         if (on)
2271                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2272         else
2273                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2274
2275         for (i = 0; i < internals->slave_count; i++) {
2276                 uint16_t port_id = internals->slaves[i].port_id;
2277
2278                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2279                 if (res == ENOTSUP)
2280                         RTE_LOG(WARNING, PMD,
2281                                 "Setting VLAN filter on slave port %u not supported.\n",
2282                                 port_id);
2283         }
2284
2285         rte_spinlock_unlock(&internals->lock);
2286         return 0;
2287 }
2288
2289 static int
2290 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2291                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2292                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2293 {
2294         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2295                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2296                                         0, dev->data->numa_node);
2297         if (bd_rx_q == NULL)
2298                 return -1;
2299
2300         bd_rx_q->queue_id = rx_queue_id;
2301         bd_rx_q->dev_private = dev->data->dev_private;
2302
2303         bd_rx_q->nb_rx_desc = nb_rx_desc;
2304
2305         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2306         bd_rx_q->mb_pool = mb_pool;
2307
2308         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2309
2310         return 0;
2311 }
2312
2313 static int
2314 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2315                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2316                 const struct rte_eth_txconf *tx_conf)
2317 {
2318         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2319                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2320                                         0, dev->data->numa_node);
2321
2322         if (bd_tx_q == NULL)
2323                 return -1;
2324
2325         bd_tx_q->queue_id = tx_queue_id;
2326         bd_tx_q->dev_private = dev->data->dev_private;
2327
2328         bd_tx_q->nb_tx_desc = nb_tx_desc;
2329         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2330
2331         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2332
2333         return 0;
2334 }
2335
2336 static void
2337 bond_ethdev_rx_queue_release(void *queue)
2338 {
2339         if (queue == NULL)
2340                 return;
2341
2342         rte_free(queue);
2343 }
2344
2345 static void
2346 bond_ethdev_tx_queue_release(void *queue)
2347 {
2348         if (queue == NULL)
2349                 return;
2350
2351         rte_free(queue);
2352 }
2353
2354 static void
2355 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2356 {
2357         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2358         struct bond_dev_private *internals;
2359
2360         /* Default value for polling slave found is true as we don't want to
2361          * disable the polling thread if we cannot get the lock */
2362         int i, polling_slave_found = 1;
2363
2364         if (cb_arg == NULL)
2365                 return;
2366
2367         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2368         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2369
2370         if (!bonded_ethdev->data->dev_started ||
2371                 !internals->link_status_polling_enabled)
2372                 return;
2373
2374         /* If device is currently being configured then don't check slaves link
2375          * status, wait until next period */
2376         if (rte_spinlock_trylock(&internals->lock)) {
2377                 if (internals->slave_count > 0)
2378                         polling_slave_found = 0;
2379
2380                 for (i = 0; i < internals->slave_count; i++) {
2381                         if (!internals->slaves[i].link_status_poll_enabled)
2382                                 continue;
2383
2384                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2385                         polling_slave_found = 1;
2386
2387                         /* Update slave link status */
2388                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2389                                         internals->slaves[i].link_status_wait_to_complete);
2390
2391                         /* if link status has changed since last checked then call lsc
2392                          * event callback */
2393                         if (slave_ethdev->data->dev_link.link_status !=
2394                                         internals->slaves[i].last_link_status) {
2395                                 internals->slaves[i].last_link_status =
2396                                                 slave_ethdev->data->dev_link.link_status;
2397
2398                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2399                                                 RTE_ETH_EVENT_INTR_LSC,
2400                                                 &bonded_ethdev->data->port_id,
2401                                                 NULL);
2402                         }
2403                 }
2404                 rte_spinlock_unlock(&internals->lock);
2405         }
2406
2407         if (polling_slave_found)
2408                 /* Set alarm to continue monitoring link status of slave ethdev's */
2409                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2410                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2411 }
2412
2413 static int
2414 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2415 {
2416         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2417
2418         struct bond_dev_private *bond_ctx;
2419         struct rte_eth_link slave_link;
2420
2421         uint32_t idx;
2422
2423         bond_ctx = ethdev->data->dev_private;
2424
2425         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2426
2427         if (ethdev->data->dev_started == 0 ||
2428                         bond_ctx->active_slave_count == 0) {
2429                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2430                 return 0;
2431         }
2432
2433         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2434
2435         if (wait_to_complete)
2436                 link_update = rte_eth_link_get;
2437         else
2438                 link_update = rte_eth_link_get_nowait;
2439
2440         switch (bond_ctx->mode) {
2441         case BONDING_MODE_BROADCAST:
2442                 /**
2443                  * Setting link speed to UINT32_MAX to ensure we pick up the
2444                  * value of the first active slave
2445                  */
2446                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2447
2448                 /**
2449                  * link speed is minimum value of all the slaves link speed as
2450                  * packet loss will occur on this slave if transmission at rates
2451                  * greater than this are attempted
2452                  */
2453                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2454                         link_update(bond_ctx->active_slaves[0], &slave_link);
2455
2456                         if (slave_link.link_speed <
2457                                         ethdev->data->dev_link.link_speed)
2458                                 ethdev->data->dev_link.link_speed =
2459                                                 slave_link.link_speed;
2460                 }
2461                 break;
2462         case BONDING_MODE_ACTIVE_BACKUP:
2463                 /* Current primary slave */
2464                 link_update(bond_ctx->current_primary_port, &slave_link);
2465
2466                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2467                 break;
2468         case BONDING_MODE_8023AD:
2469                 ethdev->data->dev_link.link_autoneg =
2470                                 bond_ctx->mode4.slave_link.link_autoneg;
2471                 ethdev->data->dev_link.link_duplex =
2472                                 bond_ctx->mode4.slave_link.link_duplex;
2473                 /* fall through to update link speed */
2474         case BONDING_MODE_ROUND_ROBIN:
2475         case BONDING_MODE_BALANCE:
2476         case BONDING_MODE_TLB:
2477         case BONDING_MODE_ALB:
2478         default:
2479                 /**
2480                  * In theses mode the maximum theoretical link speed is the sum
2481                  * of all the slaves
2482                  */
2483                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2484
2485                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2486                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2487
2488                         ethdev->data->dev_link.link_speed +=
2489                                         slave_link.link_speed;
2490                 }
2491         }
2492
2493
2494         return 0;
2495 }
2496
2497
2498 static int
2499 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2500 {
2501         struct bond_dev_private *internals = dev->data->dev_private;
2502         struct rte_eth_stats slave_stats;
2503         int i, j;
2504
2505         for (i = 0; i < internals->slave_count; i++) {
2506                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2507
2508                 stats->ipackets += slave_stats.ipackets;
2509                 stats->opackets += slave_stats.opackets;
2510                 stats->ibytes += slave_stats.ibytes;
2511                 stats->obytes += slave_stats.obytes;
2512                 stats->imissed += slave_stats.imissed;
2513                 stats->ierrors += slave_stats.ierrors;
2514                 stats->oerrors += slave_stats.oerrors;
2515                 stats->rx_nombuf += slave_stats.rx_nombuf;
2516
2517                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2518                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2519                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2520                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2521                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2522                         stats->q_errors[j] += slave_stats.q_errors[j];
2523                 }
2524
2525         }
2526
2527         return 0;
2528 }
2529
2530 static void
2531 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2532 {
2533         struct bond_dev_private *internals = dev->data->dev_private;
2534         int i;
2535
2536         for (i = 0; i < internals->slave_count; i++)
2537                 rte_eth_stats_reset(internals->slaves[i].port_id);
2538 }
2539
2540 static void
2541 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2542 {
2543         struct bond_dev_private *internals = eth_dev->data->dev_private;
2544         int i;
2545
2546         internals->promiscuous_en = 1;
2547
2548         switch (internals->mode) {
2549         /* Promiscuous mode is propagated to all slaves */
2550         case BONDING_MODE_ROUND_ROBIN:
2551         case BONDING_MODE_BALANCE:
2552         case BONDING_MODE_BROADCAST:
2553                 for (i = 0; i < internals->slave_count; i++)
2554                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2555                 break;
2556         /* In mode4 promiscus mode is managed when slave is added/removed */
2557         case BONDING_MODE_8023AD:
2558                 break;
2559         /* Promiscuous mode is propagated only to primary slave */
2560         case BONDING_MODE_ACTIVE_BACKUP:
2561         case BONDING_MODE_TLB:
2562         case BONDING_MODE_ALB:
2563         default:
2564                 rte_eth_promiscuous_enable(internals->current_primary_port);
2565         }
2566 }
2567
2568 static void
2569 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2570 {
2571         struct bond_dev_private *internals = dev->data->dev_private;
2572         int i;
2573
2574         internals->promiscuous_en = 0;
2575
2576         switch (internals->mode) {
2577         /* Promiscuous mode is propagated to all slaves */
2578         case BONDING_MODE_ROUND_ROBIN:
2579         case BONDING_MODE_BALANCE:
2580         case BONDING_MODE_BROADCAST:
2581                 for (i = 0; i < internals->slave_count; i++)
2582                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2583                 break;
2584         /* In mode4 promiscus mode is set managed when slave is added/removed */
2585         case BONDING_MODE_8023AD:
2586                 break;
2587         /* Promiscuous mode is propagated only to primary slave */
2588         case BONDING_MODE_ACTIVE_BACKUP:
2589         case BONDING_MODE_TLB:
2590         case BONDING_MODE_ALB:
2591         default:
2592                 rte_eth_promiscuous_disable(internals->current_primary_port);
2593         }
2594 }
2595
2596 static void
2597 bond_ethdev_delayed_lsc_propagation(void *arg)
2598 {
2599         if (arg == NULL)
2600                 return;
2601
2602         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2603                         RTE_ETH_EVENT_INTR_LSC, NULL);
2604 }
2605
2606 int
2607 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2608                 void *param, void *ret_param __rte_unused)
2609 {
2610         struct rte_eth_dev *bonded_eth_dev;
2611         struct bond_dev_private *internals;
2612         struct rte_eth_link link;
2613         int rc = -1;
2614
2615         int i, valid_slave = 0;
2616         uint8_t active_pos;
2617         uint8_t lsc_flag = 0;
2618
2619         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2620                 return rc;
2621
2622         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2623
2624         if (check_for_bonded_ethdev(bonded_eth_dev))
2625                 return rc;
2626
2627         internals = bonded_eth_dev->data->dev_private;
2628
2629         /* If the device isn't started don't handle interrupts */
2630         if (!bonded_eth_dev->data->dev_started)
2631                 return rc;
2632
2633         /* verify that port_id is a valid slave of bonded port */
2634         for (i = 0; i < internals->slave_count; i++) {
2635                 if (internals->slaves[i].port_id == port_id) {
2636                         valid_slave = 1;
2637                         break;
2638                 }
2639         }
2640
2641         if (!valid_slave)
2642                 return rc;
2643
2644         /* Search for port in active port list */
2645         active_pos = find_slave_by_id(internals->active_slaves,
2646                         internals->active_slave_count, port_id);
2647
2648         rte_eth_link_get_nowait(port_id, &link);
2649         if (link.link_status) {
2650                 if (active_pos < internals->active_slave_count)
2651                         return rc;
2652
2653                 /* if no active slave ports then set this port to be primary port */
2654                 if (internals->active_slave_count < 1) {
2655                         /* If first active slave, then change link status */
2656                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2657                         internals->current_primary_port = port_id;
2658                         lsc_flag = 1;
2659
2660                         mac_address_slaves_update(bonded_eth_dev);
2661                 }
2662
2663                 activate_slave(bonded_eth_dev, port_id);
2664
2665                 /* If user has defined the primary port then default to using it */
2666                 if (internals->user_defined_primary_port &&
2667                                 internals->primary_port == port_id)
2668                         bond_ethdev_primary_set(internals, port_id);
2669         } else {
2670                 if (active_pos == internals->active_slave_count)
2671                         return rc;
2672
2673                 /* Remove from active slave list */
2674                 deactivate_slave(bonded_eth_dev, port_id);
2675
2676                 if (internals->active_slave_count < 1)
2677                         lsc_flag = 1;
2678
2679                 /* Update primary id, take first active slave from list or if none
2680                  * available set to -1 */
2681                 if (port_id == internals->current_primary_port) {
2682                         if (internals->active_slave_count > 0)
2683                                 bond_ethdev_primary_set(internals,
2684                                                 internals->active_slaves[0]);
2685                         else
2686                                 internals->current_primary_port = internals->primary_port;
2687                 }
2688         }
2689
2690         /**
2691          * Update bonded device link properties after any change to active
2692          * slaves
2693          */
2694         bond_ethdev_link_update(bonded_eth_dev, 0);
2695
2696         if (lsc_flag) {
2697                 /* Cancel any possible outstanding interrupts if delays are enabled */
2698                 if (internals->link_up_delay_ms > 0 ||
2699                         internals->link_down_delay_ms > 0)
2700                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2701                                         bonded_eth_dev);
2702
2703                 if (bonded_eth_dev->data->dev_link.link_status) {
2704                         if (internals->link_up_delay_ms > 0)
2705                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2706                                                 bond_ethdev_delayed_lsc_propagation,
2707                                                 (void *)bonded_eth_dev);
2708                         else
2709                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2710                                                 RTE_ETH_EVENT_INTR_LSC,
2711                                                 NULL);
2712
2713                 } else {
2714                         if (internals->link_down_delay_ms > 0)
2715                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2716                                                 bond_ethdev_delayed_lsc_propagation,
2717                                                 (void *)bonded_eth_dev);
2718                         else
2719                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2720                                                 RTE_ETH_EVENT_INTR_LSC,
2721                                                 NULL);
2722                 }
2723         }
2724         return 0;
2725 }
2726
2727 static int
2728 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2729                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2730 {
2731         unsigned i, j;
2732         int result = 0;
2733         int slave_reta_size;
2734         unsigned reta_count;
2735         struct bond_dev_private *internals = dev->data->dev_private;
2736
2737         if (reta_size != internals->reta_size)
2738                 return -EINVAL;
2739
2740          /* Copy RETA table */
2741         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2742
2743         for (i = 0; i < reta_count; i++) {
2744                 internals->reta_conf[i].mask = reta_conf[i].mask;
2745                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2746                         if ((reta_conf[i].mask >> j) & 0x01)
2747                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2748         }
2749
2750         /* Fill rest of array */
2751         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2752                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2753                                 sizeof(internals->reta_conf[0]) * reta_count);
2754
2755         /* Propagate RETA over slaves */
2756         for (i = 0; i < internals->slave_count; i++) {
2757                 slave_reta_size = internals->slaves[i].reta_size;
2758                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2759                                 &internals->reta_conf[0], slave_reta_size);
2760                 if (result < 0)
2761                         return result;
2762         }
2763
2764         return 0;
2765 }
2766
2767 static int
2768 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2769                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2770 {
2771         int i, j;
2772         struct bond_dev_private *internals = dev->data->dev_private;
2773
2774         if (reta_size != internals->reta_size)
2775                 return -EINVAL;
2776
2777          /* Copy RETA table */
2778         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2779                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2780                         if ((reta_conf[i].mask >> j) & 0x01)
2781                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2782
2783         return 0;
2784 }
2785
2786 static int
2787 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2788                 struct rte_eth_rss_conf *rss_conf)
2789 {
2790         int i, result = 0;
2791         struct bond_dev_private *internals = dev->data->dev_private;
2792         struct rte_eth_rss_conf bond_rss_conf;
2793
2794         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2795
2796         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2797
2798         if (bond_rss_conf.rss_hf != 0)
2799                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2800
2801         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2802                         sizeof(internals->rss_key)) {
2803                 if (bond_rss_conf.rss_key_len == 0)
2804                         bond_rss_conf.rss_key_len = 40;
2805                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2806                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2807                                 internals->rss_key_len);
2808         }
2809
2810         for (i = 0; i < internals->slave_count; i++) {
2811                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2812                                 &bond_rss_conf);
2813                 if (result < 0)
2814                         return result;
2815         }
2816
2817         return 0;
2818 }
2819
2820 static int
2821 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2822                 struct rte_eth_rss_conf *rss_conf)
2823 {
2824         struct bond_dev_private *internals = dev->data->dev_private;
2825
2826         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2827         rss_conf->rss_key_len = internals->rss_key_len;
2828         if (rss_conf->rss_key)
2829                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2830
2831         return 0;
2832 }
2833
2834 static int
2835 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2836 {
2837         struct rte_eth_dev *slave_eth_dev;
2838         struct bond_dev_private *internals = dev->data->dev_private;
2839         int ret, i;
2840
2841         rte_spinlock_lock(&internals->lock);
2842
2843         for (i = 0; i < internals->slave_count; i++) {
2844                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2845                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2846                         rte_spinlock_unlock(&internals->lock);
2847                         return -ENOTSUP;
2848                 }
2849         }
2850         for (i = 0; i < internals->slave_count; i++) {
2851                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2852                 if (ret < 0) {
2853                         rte_spinlock_unlock(&internals->lock);
2854                         return ret;
2855                 }
2856         }
2857
2858         rte_spinlock_unlock(&internals->lock);
2859         return 0;
2860 }
2861
2862 static void
2863 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2864 {
2865         if (mac_address_set(dev, addr))
2866                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2867 }
2868
2869 const struct eth_dev_ops default_dev_ops = {
2870         .dev_start            = bond_ethdev_start,
2871         .dev_stop             = bond_ethdev_stop,
2872         .dev_close            = bond_ethdev_close,
2873         .dev_configure        = bond_ethdev_configure,
2874         .dev_infos_get        = bond_ethdev_info,
2875         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2876         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2877         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2878         .rx_queue_release     = bond_ethdev_rx_queue_release,
2879         .tx_queue_release     = bond_ethdev_tx_queue_release,
2880         .link_update          = bond_ethdev_link_update,
2881         .stats_get            = bond_ethdev_stats_get,
2882         .stats_reset          = bond_ethdev_stats_reset,
2883         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2884         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2885         .reta_update          = bond_ethdev_rss_reta_update,
2886         .reta_query           = bond_ethdev_rss_reta_query,
2887         .rss_hash_update      = bond_ethdev_rss_hash_update,
2888         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2889         .mtu_set              = bond_ethdev_mtu_set,
2890         .mac_addr_set         = bond_ethdev_mac_address_set
2891 };
2892
2893 static int
2894 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2895 {
2896         const char *name = rte_vdev_device_name(dev);
2897         uint8_t socket_id = dev->device.numa_node;
2898         struct bond_dev_private *internals = NULL;
2899         struct rte_eth_dev *eth_dev = NULL;
2900         uint32_t vlan_filter_bmp_size;
2901
2902         /* now do all data allocation - for eth_dev structure, dummy pci driver
2903          * and internal (private) data
2904          */
2905
2906         /* reserve an ethdev entry */
2907         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2908         if (eth_dev == NULL) {
2909                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2910                 goto err;
2911         }
2912
2913         internals = eth_dev->data->dev_private;
2914         eth_dev->data->nb_rx_queues = (uint16_t)1;
2915         eth_dev->data->nb_tx_queues = (uint16_t)1;
2916
2917         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2918                         socket_id);
2919         if (eth_dev->data->mac_addrs == NULL) {
2920                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2921                 goto err;
2922         }
2923
2924         eth_dev->dev_ops = &default_dev_ops;
2925         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2926
2927         rte_spinlock_init(&internals->lock);
2928
2929         internals->port_id = eth_dev->data->port_id;
2930         internals->mode = BONDING_MODE_INVALID;
2931         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2932         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2933         internals->burst_xmit_hash = burst_xmit_l2_hash;
2934         internals->user_defined_mac = 0;
2935
2936         internals->link_status_polling_enabled = 0;
2937
2938         internals->link_status_polling_interval_ms =
2939                 DEFAULT_POLLING_INTERVAL_10_MS;
2940         internals->link_down_delay_ms = 0;
2941         internals->link_up_delay_ms = 0;
2942
2943         internals->slave_count = 0;
2944         internals->active_slave_count = 0;
2945         internals->rx_offload_capa = 0;
2946         internals->tx_offload_capa = 0;
2947         internals->candidate_max_rx_pktlen = 0;
2948         internals->max_rx_pktlen = 0;
2949
2950         /* Initially allow to choose any offload type */
2951         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2952
2953         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2954         memset(internals->slaves, 0, sizeof(internals->slaves));
2955
2956         /* Set mode 4 default configuration */
2957         bond_mode_8023ad_setup(eth_dev, NULL);
2958         if (bond_ethdev_mode_set(eth_dev, mode)) {
2959                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2960                                  eth_dev->data->port_id, mode);
2961                 goto err;
2962         }
2963
2964         vlan_filter_bmp_size =
2965                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2966         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2967                                                    RTE_CACHE_LINE_SIZE);
2968         if (internals->vlan_filter_bmpmem == NULL) {
2969                 RTE_BOND_LOG(ERR,
2970                              "Failed to allocate vlan bitmap for bonded device %u\n",
2971                              eth_dev->data->port_id);
2972                 goto err;
2973         }
2974
2975         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2976                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2977         if (internals->vlan_filter_bmp == NULL) {
2978                 RTE_BOND_LOG(ERR,
2979                              "Failed to init vlan bitmap for bonded device %u\n",
2980                              eth_dev->data->port_id);
2981                 rte_free(internals->vlan_filter_bmpmem);
2982                 goto err;
2983         }
2984
2985         return eth_dev->data->port_id;
2986
2987 err:
2988         rte_free(internals);
2989         if (eth_dev != NULL) {
2990                 rte_free(eth_dev->data->mac_addrs);
2991                 rte_eth_dev_release_port(eth_dev);
2992         }
2993         return -1;
2994 }
2995
2996 static int
2997 bond_probe(struct rte_vdev_device *dev)
2998 {
2999         const char *name;
3000         struct bond_dev_private *internals;
3001         struct rte_kvargs *kvlist;
3002         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3003         int  arg_count, port_id;
3004         uint8_t agg_mode;
3005
3006         if (!dev)
3007                 return -EINVAL;
3008
3009         name = rte_vdev_device_name(dev);
3010         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3011
3012         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3013                 pmd_bond_init_valid_arguments);
3014         if (kvlist == NULL)
3015                 return -1;
3016
3017         /* Parse link bonding mode */
3018         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3019                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3020                                 &bond_ethdev_parse_slave_mode_kvarg,
3021                                 &bonding_mode) != 0) {
3022                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3023                                         name);
3024                         goto parse_error;
3025                 }
3026         } else {
3027                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3028                                 "device %s\n", name);
3029                 goto parse_error;
3030         }
3031
3032         /* Parse socket id to create bonding device on */
3033         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3034         if (arg_count == 1) {
3035                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3036                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3037                                 != 0) {
3038                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3039                                         "bonded device %s\n", name);
3040                         goto parse_error;
3041                 }
3042         } else if (arg_count > 1) {
3043                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3044                                 "bonded device %s\n", name);
3045                 goto parse_error;
3046         } else {
3047                 socket_id = rte_socket_id();
3048         }
3049
3050         dev->device.numa_node = socket_id;
3051
3052         /* Create link bonding eth device */
3053         port_id = bond_alloc(dev, bonding_mode);
3054         if (port_id < 0) {
3055                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3056                                 "socket %u.\n", name, bonding_mode, socket_id);
3057                 goto parse_error;
3058         }
3059         internals = rte_eth_devices[port_id].data->dev_private;
3060         internals->kvlist = kvlist;
3061
3062
3063         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3064                 if (rte_kvargs_process(kvlist,
3065                                 PMD_BOND_AGG_MODE_KVARG,
3066                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3067                                 &agg_mode) != 0) {
3068                         RTE_LOG(ERR, EAL,
3069                                         "Failed to parse agg selection mode for bonded device %s\n",
3070                                         name);
3071                         goto parse_error;
3072                 }
3073
3074                 if (internals->mode == BONDING_MODE_8023AD)
3075                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3076                                         agg_mode);
3077         } else {
3078                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3079         }
3080
3081         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3082                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
3083         return 0;
3084
3085 parse_error:
3086         rte_kvargs_free(kvlist);
3087
3088         return -1;
3089 }
3090
3091 static int
3092 bond_remove(struct rte_vdev_device *dev)
3093 {
3094         struct rte_eth_dev *eth_dev;
3095         struct bond_dev_private *internals;
3096         const char *name;
3097
3098         if (!dev)
3099                 return -EINVAL;
3100
3101         name = rte_vdev_device_name(dev);
3102         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3103
3104         /* now free all data allocation - for eth_dev structure,
3105          * dummy pci driver and internal (private) data
3106          */
3107
3108         /* find an ethdev entry */
3109         eth_dev = rte_eth_dev_allocated(name);
3110         if (eth_dev == NULL)
3111                 return -ENODEV;
3112
3113         RTE_ASSERT(eth_dev->device == &dev->device);
3114
3115         internals = eth_dev->data->dev_private;
3116         if (internals->slave_count != 0)
3117                 return -EBUSY;
3118
3119         if (eth_dev->data->dev_started == 1) {
3120                 bond_ethdev_stop(eth_dev);
3121                 bond_ethdev_close(eth_dev);
3122         }
3123
3124         eth_dev->dev_ops = NULL;
3125         eth_dev->rx_pkt_burst = NULL;
3126         eth_dev->tx_pkt_burst = NULL;
3127
3128         internals = eth_dev->data->dev_private;
3129         rte_bitmap_free(internals->vlan_filter_bmp);
3130         rte_free(internals->vlan_filter_bmpmem);
3131         rte_free(eth_dev->data->dev_private);
3132         rte_free(eth_dev->data->mac_addrs);
3133
3134         rte_eth_dev_release_port(eth_dev);
3135
3136         return 0;
3137 }
3138
3139 /* this part will resolve the slave portids after all the other pdev and vdev
3140  * have been allocated */
3141 static int
3142 bond_ethdev_configure(struct rte_eth_dev *dev)
3143 {
3144         const char *name = dev->device->name;
3145         struct bond_dev_private *internals = dev->data->dev_private;
3146         struct rte_kvargs *kvlist = internals->kvlist;
3147         int arg_count;
3148         uint16_t port_id = dev - rte_eth_devices;
3149         uint8_t agg_mode;
3150
3151         static const uint8_t default_rss_key[40] = {
3152                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3153                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3154                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3155                 0xBE, 0xAC, 0x01, 0xFA
3156         };
3157
3158         unsigned i, j;
3159
3160         /* If RSS is enabled, fill table and key with default values */
3161         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3162                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3163                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3164                 memcpy(internals->rss_key, default_rss_key, 40);
3165
3166                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3167                         internals->reta_conf[i].mask = ~0LL;
3168                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3169                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3170                 }
3171         }
3172
3173         /* set the max_rx_pktlen */
3174         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3175
3176         /*
3177          * if no kvlist, it means that this bonded device has been created
3178          * through the bonding api.
3179          */
3180         if (!kvlist)
3181                 return 0;
3182
3183         /* Parse MAC address for bonded device */
3184         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3185         if (arg_count == 1) {
3186                 struct ether_addr bond_mac;
3187
3188                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3189                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3190                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3191                                         name);
3192                         return -1;
3193                 }
3194
3195                 /* Set MAC address */
3196                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3197                         RTE_LOG(ERR, EAL,
3198                                         "Failed to set mac address on bonded device %s\n",
3199                                         name);
3200                         return -1;
3201                 }
3202         } else if (arg_count > 1) {
3203                 RTE_LOG(ERR, EAL,
3204                                 "MAC address can be specified only once for bonded device %s\n",
3205                                 name);
3206                 return -1;
3207         }
3208
3209         /* Parse/set balance mode transmit policy */
3210         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3211         if (arg_count == 1) {
3212                 uint8_t xmit_policy;
3213
3214                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3215                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3216                                                 0) {
3217                         RTE_LOG(INFO, EAL,
3218                                         "Invalid xmit policy specified for bonded device %s\n",
3219                                         name);
3220                         return -1;
3221                 }
3222
3223                 /* Set balance mode transmit policy*/
3224                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3225                         RTE_LOG(ERR, EAL,
3226                                         "Failed to set balance xmit policy on bonded device %s\n",
3227                                         name);
3228                         return -1;
3229                 }
3230         } else if (arg_count > 1) {
3231                 RTE_LOG(ERR, EAL,
3232                                 "Transmit policy can be specified only once for bonded device"
3233                                 " %s\n", name);
3234                 return -1;
3235         }
3236
3237         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3238                 if (rte_kvargs_process(kvlist,
3239                                 PMD_BOND_AGG_MODE_KVARG,
3240                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3241                                 &agg_mode) != 0) {
3242                         RTE_LOG(ERR, EAL,
3243                                         "Failed to parse agg selection mode for bonded device %s\n",
3244                                         name);
3245                 }
3246                 if (internals->mode == BONDING_MODE_8023AD)
3247                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3248                                                 agg_mode);
3249         }
3250
3251         /* Parse/add slave ports to bonded device */
3252         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3253                 struct bond_ethdev_slave_ports slave_ports;
3254                 unsigned i;
3255
3256                 memset(&slave_ports, 0, sizeof(slave_ports));
3257
3258                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3259                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3260                         RTE_LOG(ERR, EAL,
3261                                         "Failed to parse slave ports for bonded device %s\n",
3262                                         name);
3263                         return -1;
3264                 }
3265
3266                 for (i = 0; i < slave_ports.slave_count; i++) {
3267                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3268                                 RTE_LOG(ERR, EAL,
3269                                                 "Failed to add port %d as slave to bonded device %s\n",
3270                                                 slave_ports.slaves[i], name);
3271                         }
3272                 }
3273
3274         } else {
3275                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3276                 return -1;
3277         }
3278
3279         /* Parse/set primary slave port id*/
3280         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3281         if (arg_count == 1) {
3282                 uint16_t primary_slave_port_id;
3283
3284                 if (rte_kvargs_process(kvlist,
3285                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3286                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3287                                 &primary_slave_port_id) < 0) {
3288                         RTE_LOG(INFO, EAL,
3289                                         "Invalid primary slave port id specified for bonded device"
3290                                         " %s\n", name);
3291                         return -1;
3292                 }
3293
3294                 /* Set balance mode transmit policy*/
3295                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3296                                 != 0) {
3297                         RTE_LOG(ERR, EAL,
3298                                         "Failed to set primary slave port %d on bonded device %s\n",
3299                                         primary_slave_port_id, name);
3300                         return -1;
3301                 }
3302         } else if (arg_count > 1) {
3303                 RTE_LOG(INFO, EAL,
3304                                 "Primary slave can be specified only once for bonded device"
3305                                 " %s\n", name);
3306                 return -1;
3307         }
3308
3309         /* Parse link status monitor polling interval */
3310         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3311         if (arg_count == 1) {
3312                 uint32_t lsc_poll_interval_ms;
3313
3314                 if (rte_kvargs_process(kvlist,
3315                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3316                                 &bond_ethdev_parse_time_ms_kvarg,
3317                                 &lsc_poll_interval_ms) < 0) {
3318                         RTE_LOG(INFO, EAL,
3319                                         "Invalid lsc polling interval value specified for bonded"
3320                                         " device %s\n", name);
3321                         return -1;
3322                 }
3323
3324                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3325                                 != 0) {
3326                         RTE_LOG(ERR, EAL,
3327                                         "Failed to set lsc monitor polling interval (%u ms) on"
3328                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3329                         return -1;
3330                 }
3331         } else if (arg_count > 1) {
3332                 RTE_LOG(INFO, EAL,
3333                                 "LSC polling interval can be specified only once for bonded"
3334                                 " device %s\n", name);
3335                 return -1;
3336         }
3337
3338         /* Parse link up interrupt propagation delay */
3339         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3340         if (arg_count == 1) {
3341                 uint32_t link_up_delay_ms;
3342
3343                 if (rte_kvargs_process(kvlist,
3344                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3345                                 &bond_ethdev_parse_time_ms_kvarg,
3346                                 &link_up_delay_ms) < 0) {
3347                         RTE_LOG(INFO, EAL,
3348                                         "Invalid link up propagation delay value specified for"
3349                                         " bonded device %s\n", name);
3350                         return -1;
3351                 }
3352
3353                 /* Set balance mode transmit policy*/
3354                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3355                                 != 0) {
3356                         RTE_LOG(ERR, EAL,
3357                                         "Failed to set link up propagation delay (%u ms) on bonded"
3358                                         " device %s\n", link_up_delay_ms, name);
3359                         return -1;
3360                 }
3361         } else if (arg_count > 1) {
3362                 RTE_LOG(INFO, EAL,
3363                                 "Link up propagation delay can be specified only once for"
3364                                 " bonded device %s\n", name);
3365                 return -1;
3366         }
3367
3368         /* Parse link down interrupt propagation delay */
3369         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3370         if (arg_count == 1) {
3371                 uint32_t link_down_delay_ms;
3372
3373                 if (rte_kvargs_process(kvlist,
3374                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3375                                 &bond_ethdev_parse_time_ms_kvarg,
3376                                 &link_down_delay_ms) < 0) {
3377                         RTE_LOG(INFO, EAL,
3378                                         "Invalid link down propagation delay value specified for"
3379                                         " bonded device %s\n", name);
3380                         return -1;
3381                 }
3382
3383                 /* Set balance mode transmit policy*/
3384                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3385                                 != 0) {
3386                         RTE_LOG(ERR, EAL,
3387                                         "Failed to set link down propagation delay (%u ms) on"
3388                                         " bonded device %s\n", link_down_delay_ms, name);
3389                         return -1;
3390                 }
3391         } else if (arg_count > 1) {
3392                 RTE_LOG(INFO, EAL,
3393                                 "Link down propagation delay can be specified only once for"
3394                                 " bonded device %s\n", name);
3395                 return -1;
3396         }
3397
3398         return 0;
3399 }
3400
3401 struct rte_vdev_driver pmd_bond_drv = {
3402         .probe = bond_probe,
3403         .remove = bond_remove,
3404 };
3405
3406 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3407 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3408
3409 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3410         "slave=<ifc> "
3411         "primary=<ifc> "
3412         "mode=[0-6] "
3413         "xmit_policy=[l2 | l23 | l34] "
3414         "agg_mode=[count | stable | bandwidth] "
3415         "socket_id=<int> "
3416         "mac=<mac addr> "
3417         "lsc_poll_period_ms=<int> "
3418         "up_delay=<int> "
3419         "down_delay=<int>");