9b02850ad3d9c4b360ca0159002c483e4b3e3e91
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
33
34 static inline size_t
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 {
37         size_t vlan_offset = 0;
38
39         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41
42                 vlan_offset = sizeof(struct vlan_hdr);
43                 *proto = vlan_hdr->eth_proto;
44
45                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46                         vlan_hdr = vlan_hdr + 1;
47                         *proto = vlan_hdr->eth_proto;
48                         vlan_offset += sizeof(struct vlan_hdr);
49                 }
50         }
51         return vlan_offset;
52 }
53
54 static uint16_t
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 {
57         struct bond_dev_private *internals;
58
59         uint16_t num_rx_slave = 0;
60         uint16_t num_rx_total = 0;
61
62         int i;
63
64         /* Cast to structure, containing bonded device's port id and queue id */
65         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66
67         internals = bd_rx_q->dev_private;
68
69
70         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71                 /* Offset of pointer to *bufs increases as packets are received
72                  * from other slaves */
73                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75                 if (num_rx_slave) {
76                         num_rx_total += num_rx_slave;
77                         nb_pkts -= num_rx_slave;
78                 }
79         }
80
81         return num_rx_total;
82 }
83
84 static uint16_t
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
86                 uint16_t nb_pkts)
87 {
88         struct bond_dev_private *internals;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95         return rte_eth_rx_burst(internals->current_primary_port,
96                         bd_rx_q->queue_id, bufs, nb_pkts);
97 }
98
99 static inline uint8_t
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 {
102         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103
104         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105                 (ethertype == ether_type_slow_be &&
106                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
107 }
108
109 /*****************************************************************************
110  * Flow director's setup for mode 4 optimization
111  */
112
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114         .dst.addr_bytes = { 0 },
115         .src.addr_bytes = { 0 },
116         .type = RTE_BE16(ETHER_TYPE_SLOW),
117 };
118
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120         .dst.addr_bytes = { 0 },
121         .src.addr_bytes = { 0 },
122         .type = 0xFFFF,
123 };
124
125 static struct rte_flow_item flow_item_8023ad[] = {
126         {
127                 .type = RTE_FLOW_ITEM_TYPE_ETH,
128                 .spec = &flow_item_eth_type_8023ad,
129                 .last = NULL,
130                 .mask = &flow_item_eth_mask_type_8023ad,
131         },
132         {
133                 .type = RTE_FLOW_ITEM_TYPE_END,
134                 .spec = NULL,
135                 .last = NULL,
136                 .mask = NULL,
137         }
138 };
139
140 const struct rte_flow_attr flow_attr_8023ad = {
141         .group = 0,
142         .priority = 0,
143         .ingress = 1,
144         .egress = 0,
145         .reserved = 0,
146 };
147
148 int
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150                 uint16_t slave_port) {
151         struct rte_eth_dev_info slave_info;
152         struct rte_flow_error error;
153         struct bond_dev_private *internals = (struct bond_dev_private *)
154                         (bond_dev->data->dev_private);
155
156         const struct rte_flow_action_queue lacp_queue_conf = {
157                 .index = 0,
158         };
159
160         const struct rte_flow_action actions[] = {
161                 {
162                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
163                         .conf = &lacp_queue_conf
164                 },
165                 {
166                         .type = RTE_FLOW_ACTION_TYPE_END,
167                 }
168         };
169
170         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171                         flow_item_8023ad, actions, &error);
172         if (ret < 0) {
173                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174                                 __func__, error.message, slave_port,
175                                 internals->mode4.dedicated_queues.rx_qid);
176                 return -1;
177         }
178
179         rte_eth_dev_info_get(slave_port, &slave_info);
180         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182                 RTE_BOND_LOG(ERR,
183                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
184                         __func__, slave_port);
185                 return -1;
186         }
187
188         return 0;
189 }
190
191 int
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194         struct bond_dev_private *internals = (struct bond_dev_private *)
195                         (bond_dev->data->dev_private);
196         struct rte_eth_dev_info bond_info;
197         uint16_t idx;
198
199         /* Verify if all slaves in bonding supports flow director and */
200         if (internals->slave_count > 0) {
201                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202
203                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205
206                 for (idx = 0; idx < internals->slave_count; idx++) {
207                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
208                                         internals->slaves[idx].port_id) != 0)
209                                 return -1;
210                 }
211         }
212
213         return 0;
214 }
215
216 int
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218
219         struct rte_flow_error error;
220         struct bond_dev_private *internals = (struct bond_dev_private *)
221                         (bond_dev->data->dev_private);
222
223         struct rte_flow_action_queue lacp_queue_conf = {
224                 .index = internals->mode4.dedicated_queues.rx_qid,
225         };
226
227         const struct rte_flow_action actions[] = {
228                 {
229                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
230                         .conf = &lacp_queue_conf
231                 },
232                 {
233                         .type = RTE_FLOW_ACTION_TYPE_END,
234                 }
235         };
236
237         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
239         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241                                 "(slave_port=%d queue_id=%d)",
242                                 error.message, slave_port,
243                                 internals->mode4.dedicated_queues.rx_qid);
244                 return -1;
245         }
246
247         return 0;
248 }
249
250 static uint16_t
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
252                 uint16_t nb_pkts)
253 {
254         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255         struct bond_dev_private *internals = bd_rx_q->dev_private;
256         uint16_t num_rx_total = 0;      /* Total number of received packets */
257         uint16_t slaves[RTE_MAX_ETHPORTS];
258         uint16_t slave_count;
259
260         uint16_t i, idx;
261
262         /* Copy slave list to protect against slave up/down changes during tx
263          * bursting */
264         slave_count = internals->active_slave_count;
265         memcpy(slaves, internals->active_slaves,
266                         sizeof(internals->active_slaves[0]) * slave_count);
267
268         for (i = 0, idx = internals->active_slave;
269                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270                 idx = idx % slave_count;
271
272                 /* Read packets from this slave */
273                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
275         }
276
277         internals->active_slave = idx;
278
279         return num_rx_total;
280 }
281
282 static uint16_t
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
284                 uint16_t nb_bufs)
285 {
286         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287         struct bond_dev_private *internals = bd_tx_q->dev_private;
288
289         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290         uint16_t slave_count;
291
292         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293         uint16_t dist_slave_count;
294
295         /* 2-D array to sort mbufs for transmission on each slave into */
296         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297         /* Number of mbufs for transmission on each slave */
298         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299         /* Mapping array generated by hash function to map mbufs to slaves */
300         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301
302         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
304
305         uint16_t i, j;
306
307         if (unlikely(nb_bufs == 0))
308                 return 0;
309
310         /* Copy slave list to protect against slave up/down changes during tx
311          * bursting */
312         slave_count = internals->active_slave_count;
313         if (unlikely(slave_count < 1))
314                 return 0;
315
316         memcpy(slave_port_ids, internals->active_slaves,
317                         sizeof(slave_port_ids[0]) * slave_count);
318
319
320         dist_slave_count = 0;
321         for (i = 0; i < slave_count; i++) {
322                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323
324                 if (ACTOR_STATE(port, DISTRIBUTING))
325                         dist_slave_port_ids[dist_slave_count++] =
326                                         slave_port_ids[i];
327         }
328
329         if (unlikely(dist_slave_count < 1))
330                 return 0;
331
332         /*
333          * Populate slaves mbuf with the packets which are to be sent on it
334          * selecting output slave using hash based on xmit policy
335          */
336         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337                         bufs_slave_port_idxs);
338
339         for (i = 0; i < nb_bufs; i++) {
340                 /* Populate slave mbuf arrays with mbufs for that slave. */
341                 uint8_t slave_idx = bufs_slave_port_idxs[i];
342
343                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
344         }
345
346
347         /* Send packet burst on each slave device */
348         for (i = 0; i < dist_slave_count; i++) {
349                 if (slave_nb_bufs[i] == 0)
350                         continue;
351
352                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353                                 bd_tx_q->queue_id, slave_bufs[i],
354                                 slave_nb_bufs[i]);
355
356                 total_tx_count += slave_tx_count;
357
358                 /* If tx burst fails move packets to end of bufs */
359                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
361                                         slave_tx_count;
362                         total_tx_fail_count += slave_tx_fail_count[i];
363
364                         /*
365                          * Shift bufs to beginning of array to allow reordering
366                          * later
367                          */
368                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
369                                 slave_bufs[i][j] =
370                                         slave_bufs[i][(slave_tx_count - 1) + j];
371                         }
372                 }
373         }
374
375         /*
376          * If there are tx burst failures we move packets to end of bufs to
377          * preserve expected PMD behaviour of all failed transmitted being
378          * at the end of the input mbuf array
379          */
380         if (unlikely(total_tx_fail_count > 0)) {
381                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382
383                 for (i = 0; i < slave_count; i++) {
384                         if (slave_tx_fail_count[i] > 0) {
385                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
386                                         bufs[bufs_idx++] = slave_bufs[i][j];
387                         }
388                 }
389         }
390
391         return total_tx_count;
392 }
393
394
395 static uint16_t
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
397                 uint16_t nb_pkts)
398 {
399         /* Cast to structure, containing bonded device's port id and queue id */
400         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401         struct bond_dev_private *internals = bd_rx_q->dev_private;
402         struct ether_addr bond_mac;
403
404         struct ether_hdr *hdr;
405
406         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407         uint16_t num_rx_total = 0;      /* Total number of received packets */
408         uint16_t slaves[RTE_MAX_ETHPORTS];
409         uint16_t slave_count, idx;
410
411         uint8_t collecting;  /* current slave collecting status */
412         const uint8_t promisc = internals->promiscuous_en;
413         uint8_t i, j, k;
414         uint8_t subtype;
415
416         rte_eth_macaddr_get(internals->port_id, &bond_mac);
417         /* Copy slave list to protect against slave up/down changes during tx
418          * bursting */
419         slave_count = internals->active_slave_count;
420         memcpy(slaves, internals->active_slaves,
421                         sizeof(internals->active_slaves[0]) * slave_count);
422
423         idx = internals->active_slave;
424         if (idx >= slave_count) {
425                 internals->active_slave = 0;
426                 idx = 0;
427         }
428         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429                 j = num_rx_total;
430                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
431                                          COLLECTING);
432
433                 /* Read packets from this slave */
434                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
436
437                 for (k = j; k < 2 && k < num_rx_total; k++)
438                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439
440                 /* Handle slow protocol packets. */
441                 while (j < num_rx_total) {
442
443                         /* If packet is not pure L2 and is known, skip it */
444                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
445                                 j++;
446                                 continue;
447                         }
448
449                         if (j + 3 < num_rx_total)
450                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451
452                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454
455                         /* Remove packet from array if it is slow packet or slave is not
456                          * in collecting state or bonding interface is not in promiscuous
457                          * mode and packet address does not match. */
458                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459                                 !collecting || (!promisc &&
460                                         !is_multicast_ether_addr(&hdr->d_addr) &&
461                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462
463                                 if (hdr->ether_type == ether_type_slow_be) {
464                                         bond_mode_8023ad_handle_slow_pkt(
465                                             internals, slaves[idx], bufs[j]);
466                                 } else
467                                         rte_pktmbuf_free(bufs[j]);
468
469                                 /* Packet is managed by mode 4 or dropped, shift the array */
470                                 num_rx_total--;
471                                 if (j < num_rx_total) {
472                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
473                                                 (num_rx_total - j));
474                                 }
475                         } else
476                                 j++;
477                 }
478                 if (unlikely(++idx == slave_count))
479                         idx = 0;
480         }
481
482         internals->active_slave = idx;
483         return num_rx_total;
484 }
485
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491
492 static void
493 arp_op_name(uint16_t arp_op, char *buf)
494 {
495         switch (arp_op) {
496         case ARP_OP_REQUEST:
497                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
498                 return;
499         case ARP_OP_REPLY:
500                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501                 return;
502         case ARP_OP_REVREQUEST:
503                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504                                 "Reverse ARP Request");
505                 return;
506         case ARP_OP_REVREPLY:
507                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508                                 "Reverse ARP Reply");
509                 return;
510         case ARP_OP_INVREQUEST:
511                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512                                 "Peer Identify Request");
513                 return;
514         case ARP_OP_INVREPLY:
515                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516                                 "Peer Identify Reply");
517                 return;
518         default:
519                 break;
520         }
521         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
522         return;
523 }
524 #endif
525 #define MaxIPv4String   16
526 static void
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
528 {
529         uint32_t ipv4_addr;
530
531         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
534                 ipv4_addr & 0xFF);
535 }
536
537 #define MAX_CLIENTS_NUMBER      128
538 uint8_t active_clients;
539 struct client_stats_t {
540         uint16_t port;
541         uint32_t ipv4_addr;
542         uint32_t ipv4_rx_packets;
543         uint32_t ipv4_tx_packets;
544 };
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
546
547 static void
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
549 {
550         int i = 0;
551
552         for (; i < MAX_CLIENTS_NUMBER; i++)     {
553                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
554                         /* Just update RX packets number for this client */
555                         if (TXorRXindicator == &burstnumberRX)
556                                 client_stats[i].ipv4_rx_packets++;
557                         else
558                                 client_stats[i].ipv4_tx_packets++;
559                         return;
560                 }
561         }
562         /* We have a new client. Insert him to the table, and increment stats */
563         if (TXorRXindicator == &burstnumberRX)
564                 client_stats[active_clients].ipv4_rx_packets++;
565         else
566                 client_stats[active_clients].ipv4_tx_packets++;
567         client_stats[active_clients].ipv4_addr = addr;
568         client_stats[active_clients].port = port;
569         active_clients++;
570
571 }
572
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
575                 RTE_LOG(DEBUG, PMD, \
576                 "%s " \
577                 "port:%d " \
578                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
579                 "SrcIP:%s " \
580                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
581                 "DstIP:%s " \
582                 "%s " \
583                 "%d\n", \
584                 info, \
585                 port, \
586                 eth_h->s_addr.addr_bytes[0], \
587                 eth_h->s_addr.addr_bytes[1], \
588                 eth_h->s_addr.addr_bytes[2], \
589                 eth_h->s_addr.addr_bytes[3], \
590                 eth_h->s_addr.addr_bytes[4], \
591                 eth_h->s_addr.addr_bytes[5], \
592                 src_ip, \
593                 eth_h->d_addr.addr_bytes[0], \
594                 eth_h->d_addr.addr_bytes[1], \
595                 eth_h->d_addr.addr_bytes[2], \
596                 eth_h->d_addr.addr_bytes[3], \
597                 eth_h->d_addr.addr_bytes[4], \
598                 eth_h->d_addr.addr_bytes[5], \
599                 dst_ip, \
600                 arp_op, \
601                 ++burstnumber)
602 #endif
603
604 static void
605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
606                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
607 {
608         struct ipv4_hdr *ipv4_h;
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610         struct arp_hdr *arp_h;
611         char dst_ip[16];
612         char ArpOp[24];
613         char buf[16];
614 #endif
615         char src_ip[16];
616
617         uint16_t ether_type = eth_h->ether_type;
618         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
619
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621         strlcpy(buf, info, 16);
622 #endif
623
624         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
625                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
626                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
628                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
629                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
630 #endif
631                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
632         }
633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
634         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
636                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
638                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
639                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
640         }
641 #endif
642 }
643 #endif
644
645 static uint16_t
646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
647 {
648         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
649         struct bond_dev_private *internals = bd_tx_q->dev_private;
650         struct ether_hdr *eth_h;
651         uint16_t ether_type, offset;
652         uint16_t nb_recv_pkts;
653         int i;
654
655         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
656
657         for (i = 0; i < nb_recv_pkts; i++) {
658                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
659                 ether_type = eth_h->ether_type;
660                 offset = get_vlan_offset(eth_h, &ether_type);
661
662                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
664                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
665 #endif
666                         bond_mode_alb_arp_recv(eth_h, offset, internals);
667                 }
668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
669                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
670                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
671 #endif
672         }
673
674         return nb_recv_pkts;
675 }
676
677 static uint16_t
678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
679                 uint16_t nb_pkts)
680 {
681         struct bond_dev_private *internals;
682         struct bond_tx_queue *bd_tx_q;
683
684         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
685         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
686
687         uint16_t num_of_slaves;
688         uint16_t slaves[RTE_MAX_ETHPORTS];
689
690         uint16_t num_tx_total = 0, num_tx_slave;
691
692         static int slave_idx = 0;
693         int i, cslave_idx = 0, tx_fail_total = 0;
694
695         bd_tx_q = (struct bond_tx_queue *)queue;
696         internals = bd_tx_q->dev_private;
697
698         /* Copy slave list to protect against slave up/down changes during tx
699          * bursting */
700         num_of_slaves = internals->active_slave_count;
701         memcpy(slaves, internals->active_slaves,
702                         sizeof(internals->active_slaves[0]) * num_of_slaves);
703
704         if (num_of_slaves < 1)
705                 return num_tx_total;
706
707         /* Populate slaves mbuf with which packets are to be sent on it  */
708         for (i = 0; i < nb_pkts; i++) {
709                 cslave_idx = (slave_idx + i) % num_of_slaves;
710                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
711         }
712
713         /* increment current slave index so the next call to tx burst starts on the
714          * next slave */
715         slave_idx = ++cslave_idx;
716
717         /* Send packet burst on each slave device */
718         for (i = 0; i < num_of_slaves; i++) {
719                 if (slave_nb_pkts[i] > 0) {
720                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
721                                         slave_bufs[i], slave_nb_pkts[i]);
722
723                         /* if tx burst fails move packets to end of bufs */
724                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
725                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
726
727                                 tx_fail_total += tx_fail_slave;
728
729                                 memcpy(&bufs[nb_pkts - tx_fail_total],
730                                                 &slave_bufs[i][num_tx_slave],
731                                                 tx_fail_slave * sizeof(bufs[0]));
732                         }
733                         num_tx_total += num_tx_slave;
734                 }
735         }
736
737         return num_tx_total;
738 }
739
740 static uint16_t
741 bond_ethdev_tx_burst_active_backup(void *queue,
742                 struct rte_mbuf **bufs, uint16_t nb_pkts)
743 {
744         struct bond_dev_private *internals;
745         struct bond_tx_queue *bd_tx_q;
746
747         bd_tx_q = (struct bond_tx_queue *)queue;
748         internals = bd_tx_q->dev_private;
749
750         if (internals->active_slave_count < 1)
751                 return 0;
752
753         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
754                         bufs, nb_pkts);
755 }
756
757 static inline uint16_t
758 ether_hash(struct ether_hdr *eth_hdr)
759 {
760         unaligned_uint16_t *word_src_addr =
761                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
762         unaligned_uint16_t *word_dst_addr =
763                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
764
765         return (word_src_addr[0] ^ word_dst_addr[0]) ^
766                         (word_src_addr[1] ^ word_dst_addr[1]) ^
767                         (word_src_addr[2] ^ word_dst_addr[2]);
768 }
769
770 static inline uint32_t
771 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
772 {
773         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
774 }
775
776 static inline uint32_t
777 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
778 {
779         unaligned_uint32_t *word_src_addr =
780                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
781         unaligned_uint32_t *word_dst_addr =
782                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
783
784         return (word_src_addr[0] ^ word_dst_addr[0]) ^
785                         (word_src_addr[1] ^ word_dst_addr[1]) ^
786                         (word_src_addr[2] ^ word_dst_addr[2]) ^
787                         (word_src_addr[3] ^ word_dst_addr[3]);
788 }
789
790
791 void
792 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint8_t slave_count, uint16_t *slaves)
794 {
795         struct ether_hdr *eth_hdr;
796         uint32_t hash;
797         int i;
798
799         for (i = 0; i < nb_pkts; i++) {
800                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
801
802                 hash = ether_hash(eth_hdr);
803
804                 slaves[i] = (hash ^= hash >> 8) % slave_count;
805         }
806 }
807
808 void
809 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
810                 uint8_t slave_count, uint16_t *slaves)
811 {
812         uint16_t i;
813         struct ether_hdr *eth_hdr;
814         uint16_t proto;
815         size_t vlan_offset;
816         uint32_t hash, l3hash;
817
818         for (i = 0; i < nb_pkts; i++) {
819                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
820                 l3hash = 0;
821
822                 proto = eth_hdr->ether_type;
823                 hash = ether_hash(eth_hdr);
824
825                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
826
827                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829                                         ((char *)(eth_hdr + 1) + vlan_offset);
830                         l3hash = ipv4_hash(ipv4_hdr);
831
832                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834                                         ((char *)(eth_hdr + 1) + vlan_offset);
835                         l3hash = ipv6_hash(ipv6_hdr);
836                 }
837
838                 hash = hash ^ l3hash;
839                 hash ^= hash >> 16;
840                 hash ^= hash >> 8;
841
842                 slaves[i] = hash % slave_count;
843         }
844 }
845
846 void
847 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
848                 uint8_t slave_count, uint16_t *slaves)
849 {
850         struct ether_hdr *eth_hdr;
851         uint16_t proto;
852         size_t vlan_offset;
853         int i;
854
855         struct udp_hdr *udp_hdr;
856         struct tcp_hdr *tcp_hdr;
857         uint32_t hash, l3hash, l4hash;
858
859         for (i = 0; i < nb_pkts; i++) {
860                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
861                 proto = eth_hdr->ether_type;
862                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
863                 l3hash = 0;
864                 l4hash = 0;
865
866                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
867                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
868                                         ((char *)(eth_hdr + 1) + vlan_offset);
869                         size_t ip_hdr_offset;
870
871                         l3hash = ipv4_hash(ipv4_hdr);
872
873                         /* there is no L4 header in fragmented packet */
874                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
875                                                                 == 0)) {
876                                 ip_hdr_offset = (ipv4_hdr->version_ihl
877                                         & IPV4_HDR_IHL_MASK) *
878                                         IPV4_IHL_MULTIPLIER;
879
880                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
881                                         tcp_hdr = (struct tcp_hdr *)
882                                                 ((char *)ipv4_hdr +
883                                                         ip_hdr_offset);
884                                         l4hash = HASH_L4_PORTS(tcp_hdr);
885                                 } else if (ipv4_hdr->next_proto_id ==
886                                                                 IPPROTO_UDP) {
887                                         udp_hdr = (struct udp_hdr *)
888                                                 ((char *)ipv4_hdr +
889                                                         ip_hdr_offset);
890                                         l4hash = HASH_L4_PORTS(udp_hdr);
891                                 }
892                         }
893                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
894                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
895                                         ((char *)(eth_hdr + 1) + vlan_offset);
896                         l3hash = ipv6_hash(ipv6_hdr);
897
898                         if (ipv6_hdr->proto == IPPROTO_TCP) {
899                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
900                                 l4hash = HASH_L4_PORTS(tcp_hdr);
901                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
902                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
903                                 l4hash = HASH_L4_PORTS(udp_hdr);
904                         }
905                 }
906
907                 hash = l3hash ^ l4hash;
908                 hash ^= hash >> 16;
909                 hash ^= hash >> 8;
910
911                 slaves[i] = hash % slave_count;
912         }
913 }
914
915 struct bwg_slave {
916         uint64_t bwg_left_int;
917         uint64_t bwg_left_remainder;
918         uint8_t slave;
919 };
920
921 void
922 bond_tlb_activate_slave(struct bond_dev_private *internals) {
923         int i;
924
925         for (i = 0; i < internals->active_slave_count; i++) {
926                 tlb_last_obytets[internals->active_slaves[i]] = 0;
927         }
928 }
929
930 static int
931 bandwidth_cmp(const void *a, const void *b)
932 {
933         const struct bwg_slave *bwg_a = a;
934         const struct bwg_slave *bwg_b = b;
935         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
936         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
937                         (int64_t)bwg_a->bwg_left_remainder;
938         if (diff > 0)
939                 return 1;
940         else if (diff < 0)
941                 return -1;
942         else if (diff2 > 0)
943                 return 1;
944         else if (diff2 < 0)
945                 return -1;
946         else
947                 return 0;
948 }
949
950 static void
951 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
952                 struct bwg_slave *bwg_slave)
953 {
954         struct rte_eth_link link_status;
955
956         rte_eth_link_get_nowait(port_id, &link_status);
957         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
958         if (link_bwg == 0)
959                 return;
960         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
961         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
962         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
963 }
964
965 static void
966 bond_ethdev_update_tlb_slave_cb(void *arg)
967 {
968         struct bond_dev_private *internals = arg;
969         struct rte_eth_stats slave_stats;
970         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
971         uint8_t slave_count;
972         uint64_t tx_bytes;
973
974         uint8_t update_stats = 0;
975         uint8_t i, slave_id;
976
977         internals->slave_update_idx++;
978
979
980         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
981                 update_stats = 1;
982
983         for (i = 0; i < internals->active_slave_count; i++) {
984                 slave_id = internals->active_slaves[i];
985                 rte_eth_stats_get(slave_id, &slave_stats);
986                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
987                 bandwidth_left(slave_id, tx_bytes,
988                                 internals->slave_update_idx, &bwg_array[i]);
989                 bwg_array[i].slave = slave_id;
990
991                 if (update_stats) {
992                         tlb_last_obytets[slave_id] = slave_stats.obytes;
993                 }
994         }
995
996         if (update_stats == 1)
997                 internals->slave_update_idx = 0;
998
999         slave_count = i;
1000         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1001         for (i = 0; i < slave_count; i++)
1002                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1003
1004         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1005                         (struct bond_dev_private *)internals);
1006 }
1007
1008 static uint16_t
1009 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1010 {
1011         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1012         struct bond_dev_private *internals = bd_tx_q->dev_private;
1013
1014         struct rte_eth_dev *primary_port =
1015                         &rte_eth_devices[internals->primary_port];
1016         uint16_t num_tx_total = 0;
1017         uint16_t i, j;
1018
1019         uint16_t num_of_slaves = internals->active_slave_count;
1020         uint16_t slaves[RTE_MAX_ETHPORTS];
1021
1022         struct ether_hdr *ether_hdr;
1023         struct ether_addr primary_slave_addr;
1024         struct ether_addr active_slave_addr;
1025
1026         if (num_of_slaves < 1)
1027                 return num_tx_total;
1028
1029         memcpy(slaves, internals->tlb_slaves_order,
1030                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1031
1032
1033         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1034
1035         if (nb_pkts > 3) {
1036                 for (i = 0; i < 3; i++)
1037                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1038         }
1039
1040         for (i = 0; i < num_of_slaves; i++) {
1041                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1042                 for (j = num_tx_total; j < nb_pkts; j++) {
1043                         if (j + 3 < nb_pkts)
1044                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1045
1046                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1047                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1048                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1049 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1050                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1051 #endif
1052                 }
1053
1054                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1055                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1056
1057                 if (num_tx_total == nb_pkts)
1058                         break;
1059         }
1060
1061         return num_tx_total;
1062 }
1063
1064 void
1065 bond_tlb_disable(struct bond_dev_private *internals)
1066 {
1067         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1068 }
1069
1070 void
1071 bond_tlb_enable(struct bond_dev_private *internals)
1072 {
1073         bond_ethdev_update_tlb_slave_cb(internals);
1074 }
1075
1076 static uint16_t
1077 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1078 {
1079         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1080         struct bond_dev_private *internals = bd_tx_q->dev_private;
1081
1082         struct ether_hdr *eth_h;
1083         uint16_t ether_type, offset;
1084
1085         struct client_data *client_info;
1086
1087         /*
1088          * We create transmit buffers for every slave and one additional to send
1089          * through tlb. In worst case every packet will be send on one port.
1090          */
1091         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1092         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1093
1094         /*
1095          * We create separate transmit buffers for update packets as they won't
1096          * be counted in num_tx_total.
1097          */
1098         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1099         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1100
1101         struct rte_mbuf *upd_pkt;
1102         size_t pkt_size;
1103
1104         uint16_t num_send, num_not_send = 0;
1105         uint16_t num_tx_total = 0;
1106         uint16_t slave_idx;
1107
1108         int i, j;
1109
1110         /* Search tx buffer for ARP packets and forward them to alb */
1111         for (i = 0; i < nb_pkts; i++) {
1112                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1113                 ether_type = eth_h->ether_type;
1114                 offset = get_vlan_offset(eth_h, &ether_type);
1115
1116                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1117                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1118
1119                         /* Change src mac in eth header */
1120                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1121
1122                         /* Add packet to slave tx buffer */
1123                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1124                         slave_bufs_pkts[slave_idx]++;
1125                 } else {
1126                         /* If packet is not ARP, send it with TLB policy */
1127                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1128                                         bufs[i];
1129                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1130                 }
1131         }
1132
1133         /* Update connected client ARP tables */
1134         if (internals->mode6.ntt) {
1135                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1136                         client_info = &internals->mode6.client_table[i];
1137
1138                         if (client_info->in_use) {
1139                                 /* Allocate new packet to send ARP update on current slave */
1140                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1141                                 if (upd_pkt == NULL) {
1142                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1143                                         continue;
1144                                 }
1145                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1146                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1147                                 upd_pkt->data_len = pkt_size;
1148                                 upd_pkt->pkt_len = pkt_size;
1149
1150                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1151                                                 internals);
1152
1153                                 /* Add packet to update tx buffer */
1154                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1155                                 update_bufs_pkts[slave_idx]++;
1156                         }
1157                 }
1158                 internals->mode6.ntt = 0;
1159         }
1160
1161         /* Send ARP packets on proper slaves */
1162         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1163                 if (slave_bufs_pkts[i] > 0) {
1164                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1165                                         slave_bufs[i], slave_bufs_pkts[i]);
1166                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1167                                 bufs[nb_pkts - 1 - num_not_send - j] =
1168                                                 slave_bufs[i][nb_pkts - 1 - j];
1169                         }
1170
1171                         num_tx_total += num_send;
1172                         num_not_send += slave_bufs_pkts[i] - num_send;
1173
1174 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1175         /* Print TX stats including update packets */
1176                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send update packets on proper slaves */
1185         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1186                 if (update_bufs_pkts[i] > 0) {
1187                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1188                                         update_bufs_pkts[i]);
1189                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1190                                 rte_pktmbuf_free(update_bufs[i][j]);
1191                         }
1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1193                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1194                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1195                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1196                         }
1197 #endif
1198                 }
1199         }
1200
1201         /* Send non-ARP packets using tlb policy */
1202         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1203                 num_send = bond_ethdev_tx_burst_tlb(queue,
1204                                 slave_bufs[RTE_MAX_ETHPORTS],
1205                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1206
1207                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1208                         bufs[nb_pkts - 1 - num_not_send - j] =
1209                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1210                 }
1211
1212                 num_tx_total += num_send;
1213         }
1214
1215         return num_tx_total;
1216 }
1217
1218 static uint16_t
1219 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1220                 uint16_t nb_bufs)
1221 {
1222         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1223         struct bond_dev_private *internals = bd_tx_q->dev_private;
1224
1225         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1226         uint16_t slave_count;
1227
1228         /* Array to sort mbufs for transmission on each slave into */
1229         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1230         /* Number of mbufs for transmission on each slave */
1231         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1232         /* Mapping array generated by hash function to map mbufs to slaves */
1233         uint16_t bufs_slave_port_idxs[nb_bufs];
1234
1235         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1236         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1237
1238         uint16_t i, j;
1239
1240         if (unlikely(nb_bufs == 0))
1241                 return 0;
1242
1243         /* Copy slave list to protect against slave up/down changes during tx
1244          * bursting */
1245         slave_count = internals->active_slave_count;
1246         if (unlikely(slave_count < 1))
1247                 return 0;
1248
1249         memcpy(slave_port_ids, internals->active_slaves,
1250                         sizeof(slave_port_ids[0]) * slave_count);
1251
1252         /*
1253          * Populate slaves mbuf with the packets which are to be sent on it
1254          * selecting output slave using hash based on xmit policy
1255          */
1256         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1257                         bufs_slave_port_idxs);
1258
1259         for (i = 0; i < nb_bufs; i++) {
1260                 /* Populate slave mbuf arrays with mbufs for that slave. */
1261                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1262
1263                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1264         }
1265
1266         /* Send packet burst on each slave device */
1267         for (i = 0; i < slave_count; i++) {
1268                 if (slave_nb_bufs[i] == 0)
1269                         continue;
1270
1271                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1272                                 bd_tx_q->queue_id, slave_bufs[i],
1273                                 slave_nb_bufs[i]);
1274
1275                 total_tx_count += slave_tx_count;
1276
1277                 /* If tx burst fails move packets to end of bufs */
1278                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1279                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1280                                         slave_tx_count;
1281                         total_tx_fail_count += slave_tx_fail_count[i];
1282
1283                         /*
1284                          * Shift bufs to beginning of array to allow reordering
1285                          * later
1286                          */
1287                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1288                                 slave_bufs[i][j] =
1289                                         slave_bufs[i][(slave_tx_count - 1) + j];
1290                         }
1291                 }
1292         }
1293
1294         /*
1295          * If there are tx burst failures we move packets to end of bufs to
1296          * preserve expected PMD behaviour of all failed transmitted being
1297          * at the end of the input mbuf array
1298          */
1299         if (unlikely(total_tx_fail_count > 0)) {
1300                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1301
1302                 for (i = 0; i < slave_count; i++) {
1303                         if (slave_tx_fail_count[i] > 0) {
1304                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1305                                         bufs[bufs_idx++] = slave_bufs[i][j];
1306                         }
1307                 }
1308         }
1309
1310         return total_tx_count;
1311 }
1312
1313 static uint16_t
1314 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1315                 uint16_t nb_bufs)
1316 {
1317         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1318         struct bond_dev_private *internals = bd_tx_q->dev_private;
1319
1320         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1321         uint16_t slave_count;
1322
1323         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1324         uint16_t dist_slave_count;
1325
1326         /* 2-D array to sort mbufs for transmission on each slave into */
1327         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1328         /* Number of mbufs for transmission on each slave */
1329         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1330         /* Mapping array generated by hash function to map mbufs to slaves */
1331         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1332
1333         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1334         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1335
1336         uint16_t i, j;
1337
1338         if (unlikely(nb_bufs == 0))
1339                 return 0;
1340
1341         /* Copy slave list to protect against slave up/down changes during tx
1342          * bursting */
1343         slave_count = internals->active_slave_count;
1344         if (unlikely(slave_count < 1))
1345                 return 0;
1346
1347         memcpy(slave_port_ids, internals->active_slaves,
1348                         sizeof(slave_port_ids[0]) * slave_count);
1349
1350         dist_slave_count = 0;
1351         for (i = 0; i < slave_count; i++) {
1352                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1353
1354                 if (ACTOR_STATE(port, DISTRIBUTING))
1355                         dist_slave_port_ids[dist_slave_count++] =
1356                                         slave_port_ids[i];
1357         }
1358
1359         if (likely(dist_slave_count > 1)) {
1360
1361                 /*
1362                  * Populate slaves mbuf with the packets which are to be sent
1363                  * on it, selecting output slave using hash based on xmit policy
1364                  */
1365                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1366                                 bufs_slave_port_idxs);
1367
1368                 for (i = 0; i < nb_bufs; i++) {
1369                         /*
1370                          * Populate slave mbuf arrays with mbufs for that
1371                          * slave
1372                          */
1373                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1374
1375                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1376                                         bufs[i];
1377                 }
1378
1379
1380                 /* Send packet burst on each slave device */
1381                 for (i = 0; i < dist_slave_count; i++) {
1382                         if (slave_nb_bufs[i] == 0)
1383                                 continue;
1384
1385                         slave_tx_count = rte_eth_tx_burst(
1386                                         dist_slave_port_ids[i],
1387                                         bd_tx_q->queue_id, slave_bufs[i],
1388                                         slave_nb_bufs[i]);
1389
1390                         total_tx_count += slave_tx_count;
1391
1392                         /* If tx burst fails move packets to end of bufs */
1393                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1394                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1395                                                 slave_tx_count;
1396                                 total_tx_fail_count += slave_tx_fail_count[i];
1397
1398                                 /*
1399                                  * Shift bufs to beginning of array to allow
1400                                  * reordering later
1401                                  */
1402                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1403                                         slave_bufs[i][j] =
1404                                                 slave_bufs[i]
1405                                                         [(slave_tx_count - 1)
1406                                                         + j];
1407                         }
1408                 }
1409
1410                 /*
1411                  * If there are tx burst failures we move packets to end of
1412                  * bufs to preserve expected PMD behaviour of all failed
1413                  * transmitted being at the end of the input mbuf array
1414                  */
1415                 if (unlikely(total_tx_fail_count > 0)) {
1416                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1417
1418                         for (i = 0; i < slave_count; i++) {
1419                                 if (slave_tx_fail_count[i] > 0) {
1420                                         for (j = 0;
1421                                                 j < slave_tx_fail_count[i];
1422                                                 j++) {
1423                                                 bufs[bufs_idx++] =
1424                                                         slave_bufs[i][j];
1425                                         }
1426                                 }
1427                         }
1428                 }
1429         }
1430
1431         /* Check for LACP control packets and send if available */
1432         for (i = 0; i < slave_count; i++) {
1433                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1434                 struct rte_mbuf *ctrl_pkt = NULL;
1435
1436                 if (likely(rte_ring_empty(port->tx_ring)))
1437                         continue;
1438
1439                 if (rte_ring_dequeue(port->tx_ring,
1440                                      (void **)&ctrl_pkt) != -ENOENT) {
1441                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1442                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1443                         /*
1444                          * re-enqueue LAG control plane packets to buffering
1445                          * ring if transmission fails so the packet isn't lost.
1446                          */
1447                         if (slave_tx_count != 1)
1448                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1449                 }
1450         }
1451
1452         return total_tx_count;
1453 }
1454
1455 static uint16_t
1456 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1457                 uint16_t nb_pkts)
1458 {
1459         struct bond_dev_private *internals;
1460         struct bond_tx_queue *bd_tx_q;
1461
1462         uint8_t tx_failed_flag = 0, num_of_slaves;
1463         uint16_t slaves[RTE_MAX_ETHPORTS];
1464
1465         uint16_t max_nb_of_tx_pkts = 0;
1466
1467         int slave_tx_total[RTE_MAX_ETHPORTS];
1468         int i, most_successful_tx_slave = -1;
1469
1470         bd_tx_q = (struct bond_tx_queue *)queue;
1471         internals = bd_tx_q->dev_private;
1472
1473         /* Copy slave list to protect against slave up/down changes during tx
1474          * bursting */
1475         num_of_slaves = internals->active_slave_count;
1476         memcpy(slaves, internals->active_slaves,
1477                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1478
1479         if (num_of_slaves < 1)
1480                 return 0;
1481
1482         /* Increment reference count on mbufs */
1483         for (i = 0; i < nb_pkts; i++)
1484                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1485
1486         /* Transmit burst on each active slave */
1487         for (i = 0; i < num_of_slaves; i++) {
1488                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1489                                         bufs, nb_pkts);
1490
1491                 if (unlikely(slave_tx_total[i] < nb_pkts))
1492                         tx_failed_flag = 1;
1493
1494                 /* record the value and slave index for the slave which transmits the
1495                  * maximum number of packets */
1496                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1497                         max_nb_of_tx_pkts = slave_tx_total[i];
1498                         most_successful_tx_slave = i;
1499                 }
1500         }
1501
1502         /* if slaves fail to transmit packets from burst, the calling application
1503          * is not expected to know about multiple references to packets so we must
1504          * handle failures of all packets except those of the most successful slave
1505          */
1506         if (unlikely(tx_failed_flag))
1507                 for (i = 0; i < num_of_slaves; i++)
1508                         if (i != most_successful_tx_slave)
1509                                 while (slave_tx_total[i] < nb_pkts)
1510                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1511
1512         return max_nb_of_tx_pkts;
1513 }
1514
1515 void
1516 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1517 {
1518         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1519
1520         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1521                 /**
1522                  * If in mode 4 then save the link properties of the first
1523                  * slave, all subsequent slaves must match these properties
1524                  */
1525                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1526
1527                 bond_link->link_autoneg = slave_link->link_autoneg;
1528                 bond_link->link_duplex = slave_link->link_duplex;
1529                 bond_link->link_speed = slave_link->link_speed;
1530         } else {
1531                 /**
1532                  * In any other mode the link properties are set to default
1533                  * values of AUTONEG/DUPLEX
1534                  */
1535                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1536                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1537         }
1538 }
1539
1540 int
1541 link_properties_valid(struct rte_eth_dev *ethdev,
1542                 struct rte_eth_link *slave_link)
1543 {
1544         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1545
1546         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1547                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1548
1549                 if (bond_link->link_duplex != slave_link->link_duplex ||
1550                         bond_link->link_autoneg != slave_link->link_autoneg ||
1551                         bond_link->link_speed != slave_link->link_speed)
1552                         return -1;
1553         }
1554
1555         return 0;
1556 }
1557
1558 int
1559 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1560 {
1561         struct ether_addr *mac_addr;
1562
1563         if (eth_dev == NULL) {
1564                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1565                 return -1;
1566         }
1567
1568         if (dst_mac_addr == NULL) {
1569                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1570                 return -1;
1571         }
1572
1573         mac_addr = eth_dev->data->mac_addrs;
1574
1575         ether_addr_copy(mac_addr, dst_mac_addr);
1576         return 0;
1577 }
1578
1579 int
1580 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1581 {
1582         struct ether_addr *mac_addr;
1583
1584         if (eth_dev == NULL) {
1585                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1586                 return -1;
1587         }
1588
1589         if (new_mac_addr == NULL) {
1590                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1591                 return -1;
1592         }
1593
1594         mac_addr = eth_dev->data->mac_addrs;
1595
1596         /* If new MAC is different to current MAC then update */
1597         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1598                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1599
1600         return 0;
1601 }
1602
1603 int
1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 {
1606         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1607         int i;
1608
1609         /* Update slave devices MAC addresses */
1610         if (internals->slave_count < 1)
1611                 return -1;
1612
1613         switch (internals->mode) {
1614         case BONDING_MODE_ROUND_ROBIN:
1615         case BONDING_MODE_BALANCE:
1616         case BONDING_MODE_BROADCAST:
1617                 for (i = 0; i < internals->slave_count; i++) {
1618                         if (rte_eth_dev_default_mac_addr_set(
1619                                         internals->slaves[i].port_id,
1620                                         bonded_eth_dev->data->mac_addrs)) {
1621                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1622                                                 internals->slaves[i].port_id);
1623                                 return -1;
1624                         }
1625                 }
1626                 break;
1627         case BONDING_MODE_8023AD:
1628                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629                 break;
1630         case BONDING_MODE_ACTIVE_BACKUP:
1631         case BONDING_MODE_TLB:
1632         case BONDING_MODE_ALB:
1633         default:
1634                 for (i = 0; i < internals->slave_count; i++) {
1635                         if (internals->slaves[i].port_id ==
1636                                         internals->current_primary_port) {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->primary_port,
1639                                                 bonded_eth_dev->data->mac_addrs)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->current_primary_port);
1642                                         return -1;
1643                                 }
1644                         } else {
1645                                 if (rte_eth_dev_default_mac_addr_set(
1646                                                 internals->slaves[i].port_id,
1647                                                 &internals->slaves[i].persisted_mac_addr)) {
1648                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1649                                                         internals->slaves[i].port_id);
1650                                         return -1;
1651                                 }
1652                         }
1653                 }
1654         }
1655
1656         return 0;
1657 }
1658
1659 int
1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 {
1662         struct bond_dev_private *internals;
1663
1664         internals = eth_dev->data->dev_private;
1665
1666         switch (mode) {
1667         case BONDING_MODE_ROUND_ROBIN:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_ACTIVE_BACKUP:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674                 break;
1675         case BONDING_MODE_BALANCE:
1676                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1677                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678                 break;
1679         case BONDING_MODE_BROADCAST:
1680                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1681                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682                 break;
1683         case BONDING_MODE_8023AD:
1684                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1685                         return -1;
1686
1687                 if (internals->mode4.dedicated_queues.enabled == 0) {
1688                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1689                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1690                         RTE_LOG(WARNING, PMD,
1691                                 "Using mode 4, it is necessary to do TX burst "
1692                                 "and RX burst at least every 100ms.\n");
1693                 } else {
1694                         /* Use flow director's optimization */
1695                         eth_dev->rx_pkt_burst =
1696                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1697                         eth_dev->tx_pkt_burst =
1698                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1699                 }
1700                 break;
1701         case BONDING_MODE_TLB:
1702                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1703                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704                 break;
1705         case BONDING_MODE_ALB:
1706                 if (bond_mode_alb_enable(eth_dev) != 0)
1707                         return -1;
1708
1709                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1710                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1711                 break;
1712         default:
1713                 return -1;
1714         }
1715
1716         internals->mode = mode;
1717
1718         return 0;
1719 }
1720
1721
1722 static int
1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1724                 struct rte_eth_dev *slave_eth_dev)
1725 {
1726         int errval = 0;
1727         struct bond_dev_private *internals = (struct bond_dev_private *)
1728                 bonded_eth_dev->data->dev_private;
1729         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1730
1731         if (port->slow_pool == NULL) {
1732                 char mem_name[256];
1733                 int slave_id = slave_eth_dev->data->port_id;
1734
1735                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736                                 slave_id);
1737                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1738                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1739                         slave_eth_dev->data->numa_node);
1740
1741                 /* Any memory allocation failure in initialization is critical because
1742                  * resources can't be free, so reinitialization is impossible. */
1743                 if (port->slow_pool == NULL) {
1744                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1745                                 slave_id, mem_name, rte_strerror(rte_errno));
1746                 }
1747         }
1748
1749         if (internals->mode4.dedicated_queues.enabled == 1) {
1750                 /* Configure slow Rx queue */
1751
1752                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1753                                 internals->mode4.dedicated_queues.rx_qid, 128,
1754                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755                                 NULL, port->slow_pool);
1756                 if (errval != 0) {
1757                         RTE_BOND_LOG(ERR,
1758                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1759                                         slave_eth_dev->data->port_id,
1760                                         internals->mode4.dedicated_queues.rx_qid,
1761                                         errval);
1762                         return errval;
1763                 }
1764
1765                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1766                                 internals->mode4.dedicated_queues.tx_qid, 512,
1767                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1768                                 NULL);
1769                 if (errval != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id,
1773                                 internals->mode4.dedicated_queues.tx_qid,
1774                                 errval);
1775                         return errval;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 int
1782 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1783                 struct rte_eth_dev *slave_eth_dev)
1784 {
1785         struct bond_rx_queue *bd_rx_q;
1786         struct bond_tx_queue *bd_tx_q;
1787         uint16_t nb_rx_queues;
1788         uint16_t nb_tx_queues;
1789
1790         int errval;
1791         uint16_t q_id;
1792         struct rte_flow_error flow_error;
1793
1794         struct bond_dev_private *internals = (struct bond_dev_private *)
1795                 bonded_eth_dev->data->dev_private;
1796
1797         /* Stop slave */
1798         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799
1800         /* Enable interrupts on slave device if supported */
1801         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1802                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803
1804         /* If RSS is enabled for bonding, try to enable it for slaves  */
1805         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1806                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1807                                 != 0) {
1808                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1809                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1810                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1811                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1812                 } else {
1813                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1814                 }
1815
1816                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1817                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1818                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1819                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1820         }
1821
1822         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1823                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1824
1825         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1826         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1827
1828         if (internals->mode == BONDING_MODE_8023AD) {
1829                 if (internals->mode4.dedicated_queues.enabled == 1) {
1830                         nb_rx_queues++;
1831                         nb_tx_queues++;
1832                 }
1833         }
1834
1835         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1836                                      bonded_eth_dev->data->mtu);
1837         if (errval != 0 && errval != -ENOTSUP) {
1838                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1839                                 slave_eth_dev->data->port_id, errval);
1840                 return errval;
1841         }
1842
1843         /* Configure device */
1844         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1845                         nb_rx_queues, nb_tx_queues,
1846                         &(slave_eth_dev->data->dev_conf));
1847         if (errval != 0) {
1848                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1849                                 slave_eth_dev->data->port_id, errval);
1850                 return errval;
1851         }
1852
1853         /* Setup Rx Queues */
1854         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1855                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1856
1857                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1858                                 bd_rx_q->nb_rx_desc,
1859                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1860                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1861                 if (errval != 0) {
1862                         RTE_BOND_LOG(ERR,
1863                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1864                                         slave_eth_dev->data->port_id, q_id, errval);
1865                         return errval;
1866                 }
1867         }
1868
1869         /* Setup Tx Queues */
1870         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1871                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1872
1873                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1874                                 bd_tx_q->nb_tx_desc,
1875                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1876                                 &bd_tx_q->tx_conf);
1877                 if (errval != 0) {
1878                         RTE_BOND_LOG(ERR,
1879                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1880                                 slave_eth_dev->data->port_id, q_id, errval);
1881                         return errval;
1882                 }
1883         }
1884
1885         if (internals->mode == BONDING_MODE_8023AD &&
1886                         internals->mode4.dedicated_queues.enabled == 1) {
1887                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1888                                 != 0)
1889                         return errval;
1890
1891                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1892                                 slave_eth_dev->data->port_id) != 0) {
1893                         RTE_BOND_LOG(ERR,
1894                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1895                                 slave_eth_dev->data->port_id, q_id, errval);
1896                         return -1;
1897                 }
1898
1899                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1900                         rte_flow_destroy(slave_eth_dev->data->port_id,
1901                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1902                                         &flow_error);
1903
1904                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1905                                 slave_eth_dev->data->port_id);
1906         }
1907
1908         /* Start device */
1909         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1910         if (errval != 0) {
1911                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1912                                 slave_eth_dev->data->port_id, errval);
1913                 return -1;
1914         }
1915
1916         /* If RSS is enabled for bonding, synchronize RETA */
1917         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1918                 int i;
1919                 struct bond_dev_private *internals;
1920
1921                 internals = bonded_eth_dev->data->dev_private;
1922
1923                 for (i = 0; i < internals->slave_count; i++) {
1924                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1925                                 errval = rte_eth_dev_rss_reta_update(
1926                                                 slave_eth_dev->data->port_id,
1927                                                 &internals->reta_conf[0],
1928                                                 internals->slaves[i].reta_size);
1929                                 if (errval != 0) {
1930                                         RTE_LOG(WARNING, PMD,
1931                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1932                                                         " RSS Configuration for bonding may be inconsistent.\n",
1933                                                         slave_eth_dev->data->port_id, errval);
1934                                 }
1935                                 break;
1936                         }
1937                 }
1938         }
1939
1940         /* If lsc interrupt is set, check initial slave's link status */
1941         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1942                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1943                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1944                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1945                         NULL);
1946         }
1947
1948         return 0;
1949 }
1950
1951 void
1952 slave_remove(struct bond_dev_private *internals,
1953                 struct rte_eth_dev *slave_eth_dev)
1954 {
1955         uint8_t i;
1956
1957         for (i = 0; i < internals->slave_count; i++)
1958                 if (internals->slaves[i].port_id ==
1959                                 slave_eth_dev->data->port_id)
1960                         break;
1961
1962         if (i < (internals->slave_count - 1))
1963                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1964                                 sizeof(internals->slaves[0]) *
1965                                 (internals->slave_count - i - 1));
1966
1967         internals->slave_count--;
1968
1969         /* force reconfiguration of slave interfaces */
1970         _rte_eth_dev_reset(slave_eth_dev);
1971 }
1972
1973 static void
1974 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1975
1976 void
1977 slave_add(struct bond_dev_private *internals,
1978                 struct rte_eth_dev *slave_eth_dev)
1979 {
1980         struct bond_slave_details *slave_details =
1981                         &internals->slaves[internals->slave_count];
1982
1983         slave_details->port_id = slave_eth_dev->data->port_id;
1984         slave_details->last_link_status = 0;
1985
1986         /* Mark slave devices that don't support interrupts so we can
1987          * compensate when we start the bond
1988          */
1989         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1990                 slave_details->link_status_poll_enabled = 1;
1991         }
1992
1993         slave_details->link_status_wait_to_complete = 0;
1994         /* clean tlb_last_obytes when adding port for bonding device */
1995         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1996                         sizeof(struct ether_addr));
1997 }
1998
1999 void
2000 bond_ethdev_primary_set(struct bond_dev_private *internals,
2001                 uint16_t slave_port_id)
2002 {
2003         int i;
2004
2005         if (internals->active_slave_count < 1)
2006                 internals->current_primary_port = slave_port_id;
2007         else
2008                 /* Search bonded device slave ports for new proposed primary port */
2009                 for (i = 0; i < internals->active_slave_count; i++) {
2010                         if (internals->active_slaves[i] == slave_port_id)
2011                                 internals->current_primary_port = slave_port_id;
2012                 }
2013 }
2014
2015 static void
2016 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2017
2018 static int
2019 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2020 {
2021         struct bond_dev_private *internals;
2022         int i;
2023
2024         /* slave eth dev will be started by bonded device */
2025         if (check_for_bonded_ethdev(eth_dev)) {
2026                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2027                                 eth_dev->data->port_id);
2028                 return -1;
2029         }
2030
2031         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2032         eth_dev->data->dev_started = 1;
2033
2034         internals = eth_dev->data->dev_private;
2035
2036         if (internals->slave_count == 0) {
2037                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2038                 return -1;
2039         }
2040
2041         if (internals->user_defined_mac == 0) {
2042                 struct ether_addr *new_mac_addr = NULL;
2043
2044                 for (i = 0; i < internals->slave_count; i++)
2045                         if (internals->slaves[i].port_id == internals->primary_port)
2046                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2047
2048                 if (new_mac_addr == NULL)
2049                         return -1;
2050
2051                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2052                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2053                                         eth_dev->data->port_id);
2054                         return -1;
2055                 }
2056         }
2057
2058         /* Update all slave devices MACs*/
2059         if (mac_address_slaves_update(eth_dev) != 0)
2060                 return -1;
2061
2062         /* If bonded device is configure in promiscuous mode then re-apply config */
2063         if (internals->promiscuous_en)
2064                 bond_ethdev_promiscuous_enable(eth_dev);
2065
2066         if (internals->mode == BONDING_MODE_8023AD) {
2067                 if (internals->mode4.dedicated_queues.enabled == 1) {
2068                         internals->mode4.dedicated_queues.rx_qid =
2069                                         eth_dev->data->nb_rx_queues;
2070                         internals->mode4.dedicated_queues.tx_qid =
2071                                         eth_dev->data->nb_tx_queues;
2072                 }
2073         }
2074
2075
2076         /* Reconfigure each slave device if starting bonded device */
2077         for (i = 0; i < internals->slave_count; i++) {
2078                 struct rte_eth_dev *slave_ethdev =
2079                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2080                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2081                         RTE_BOND_LOG(ERR,
2082                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2083                                 eth_dev->data->port_id,
2084                                 internals->slaves[i].port_id);
2085                         return -1;
2086                 }
2087                 /* We will need to poll for link status if any slave doesn't
2088                  * support interrupts
2089                  */
2090                 if (internals->slaves[i].link_status_poll_enabled)
2091                         internals->link_status_polling_enabled = 1;
2092         }
2093         /* start polling if needed */
2094         if (internals->link_status_polling_enabled) {
2095                 rte_eal_alarm_set(
2096                         internals->link_status_polling_interval_ms * 1000,
2097                         bond_ethdev_slave_link_status_change_monitor,
2098                         (void *)&rte_eth_devices[internals->port_id]);
2099         }
2100
2101         if (internals->user_defined_primary_port)
2102                 bond_ethdev_primary_set(internals, internals->primary_port);
2103
2104         if (internals->mode == BONDING_MODE_8023AD)
2105                 bond_mode_8023ad_start(eth_dev);
2106
2107         if (internals->mode == BONDING_MODE_TLB ||
2108                         internals->mode == BONDING_MODE_ALB)
2109                 bond_tlb_enable(internals);
2110
2111         return 0;
2112 }
2113
2114 static void
2115 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2116 {
2117         uint8_t i;
2118
2119         if (dev->data->rx_queues != NULL) {
2120                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2121                         rte_free(dev->data->rx_queues[i]);
2122                         dev->data->rx_queues[i] = NULL;
2123                 }
2124                 dev->data->nb_rx_queues = 0;
2125         }
2126
2127         if (dev->data->tx_queues != NULL) {
2128                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2129                         rte_free(dev->data->tx_queues[i]);
2130                         dev->data->tx_queues[i] = NULL;
2131                 }
2132                 dev->data->nb_tx_queues = 0;
2133         }
2134 }
2135
2136 void
2137 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2138 {
2139         struct bond_dev_private *internals = eth_dev->data->dev_private;
2140         uint8_t i;
2141
2142         if (internals->mode == BONDING_MODE_8023AD) {
2143                 struct port *port;
2144                 void *pkt = NULL;
2145
2146                 bond_mode_8023ad_stop(eth_dev);
2147
2148                 /* Discard all messages to/from mode 4 state machines */
2149                 for (i = 0; i < internals->active_slave_count; i++) {
2150                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2151
2152                         RTE_ASSERT(port->rx_ring != NULL);
2153                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2154                                 rte_pktmbuf_free(pkt);
2155
2156                         RTE_ASSERT(port->tx_ring != NULL);
2157                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2158                                 rte_pktmbuf_free(pkt);
2159                 }
2160         }
2161
2162         if (internals->mode == BONDING_MODE_TLB ||
2163                         internals->mode == BONDING_MODE_ALB) {
2164                 bond_tlb_disable(internals);
2165                 for (i = 0; i < internals->active_slave_count; i++)
2166                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2167         }
2168
2169         internals->active_slave_count = 0;
2170         internals->link_status_polling_enabled = 0;
2171         for (i = 0; i < internals->slave_count; i++)
2172                 internals->slaves[i].last_link_status = 0;
2173
2174         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2175         eth_dev->data->dev_started = 0;
2176 }
2177
2178 void
2179 bond_ethdev_close(struct rte_eth_dev *dev)
2180 {
2181         struct bond_dev_private *internals = dev->data->dev_private;
2182         uint8_t bond_port_id = internals->port_id;
2183         int skipped = 0;
2184
2185         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2186         while (internals->slave_count != skipped) {
2187                 uint16_t port_id = internals->slaves[skipped].port_id;
2188
2189                 rte_eth_dev_stop(port_id);
2190
2191                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2192                         RTE_LOG(ERR, EAL,
2193                                 "Failed to remove port %d from bonded device "
2194                                 "%s\n", port_id, dev->device->name);
2195                         skipped++;
2196                 }
2197         }
2198         bond_ethdev_free_queues(dev);
2199         rte_bitmap_reset(internals->vlan_filter_bmp);
2200 }
2201
2202 /* forward declaration */
2203 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2204
2205 static void
2206 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2207 {
2208         struct bond_dev_private *internals = dev->data->dev_private;
2209
2210         uint16_t max_nb_rx_queues = UINT16_MAX;
2211         uint16_t max_nb_tx_queues = UINT16_MAX;
2212
2213         dev_info->max_mac_addrs = 1;
2214
2215         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2216                         internals->candidate_max_rx_pktlen :
2217                         ETHER_MAX_JUMBO_FRAME_LEN;
2218
2219         /* Max number of tx/rx queues that the bonded device can support is the
2220          * minimum values of the bonded slaves, as all slaves must be capable
2221          * of supporting the same number of tx/rx queues.
2222          */
2223         if (internals->slave_count > 0) {
2224                 struct rte_eth_dev_info slave_info;
2225                 uint8_t idx;
2226
2227                 for (idx = 0; idx < internals->slave_count; idx++) {
2228                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2229                                         &slave_info);
2230
2231                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2232                                 max_nb_rx_queues = slave_info.max_rx_queues;
2233
2234                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2235                                 max_nb_tx_queues = slave_info.max_tx_queues;
2236                 }
2237         }
2238
2239         dev_info->max_rx_queues = max_nb_rx_queues;
2240         dev_info->max_tx_queues = max_nb_tx_queues;
2241
2242         /**
2243          * If dedicated hw queues enabled for link bonding device in LACP mode
2244          * then we need to reduce the maximum number of data path queues by 1.
2245          */
2246         if (internals->mode == BONDING_MODE_8023AD &&
2247                 internals->mode4.dedicated_queues.enabled == 1) {
2248                 dev_info->max_rx_queues--;
2249                 dev_info->max_tx_queues--;
2250         }
2251
2252         dev_info->min_rx_bufsize = 0;
2253
2254         dev_info->rx_offload_capa = internals->rx_offload_capa;
2255         dev_info->tx_offload_capa = internals->tx_offload_capa;
2256         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2257
2258         dev_info->reta_size = internals->reta_size;
2259 }
2260
2261 static int
2262 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2263 {
2264         int res;
2265         uint16_t i;
2266         struct bond_dev_private *internals = dev->data->dev_private;
2267
2268         /* don't do this while a slave is being added */
2269         rte_spinlock_lock(&internals->lock);
2270
2271         if (on)
2272                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2273         else
2274                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2275
2276         for (i = 0; i < internals->slave_count; i++) {
2277                 uint16_t port_id = internals->slaves[i].port_id;
2278
2279                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2280                 if (res == ENOTSUP)
2281                         RTE_LOG(WARNING, PMD,
2282                                 "Setting VLAN filter on slave port %u not supported.\n",
2283                                 port_id);
2284         }
2285
2286         rte_spinlock_unlock(&internals->lock);
2287         return 0;
2288 }
2289
2290 static int
2291 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2292                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2293                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2294 {
2295         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2296                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2297                                         0, dev->data->numa_node);
2298         if (bd_rx_q == NULL)
2299                 return -1;
2300
2301         bd_rx_q->queue_id = rx_queue_id;
2302         bd_rx_q->dev_private = dev->data->dev_private;
2303
2304         bd_rx_q->nb_rx_desc = nb_rx_desc;
2305
2306         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2307         bd_rx_q->mb_pool = mb_pool;
2308
2309         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2310
2311         return 0;
2312 }
2313
2314 static int
2315 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2316                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2317                 const struct rte_eth_txconf *tx_conf)
2318 {
2319         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2320                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2321                                         0, dev->data->numa_node);
2322
2323         if (bd_tx_q == NULL)
2324                 return -1;
2325
2326         bd_tx_q->queue_id = tx_queue_id;
2327         bd_tx_q->dev_private = dev->data->dev_private;
2328
2329         bd_tx_q->nb_tx_desc = nb_tx_desc;
2330         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2331
2332         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2333
2334         return 0;
2335 }
2336
2337 static void
2338 bond_ethdev_rx_queue_release(void *queue)
2339 {
2340         if (queue == NULL)
2341                 return;
2342
2343         rte_free(queue);
2344 }
2345
2346 static void
2347 bond_ethdev_tx_queue_release(void *queue)
2348 {
2349         if (queue == NULL)
2350                 return;
2351
2352         rte_free(queue);
2353 }
2354
2355 static void
2356 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2357 {
2358         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2359         struct bond_dev_private *internals;
2360
2361         /* Default value for polling slave found is true as we don't want to
2362          * disable the polling thread if we cannot get the lock */
2363         int i, polling_slave_found = 1;
2364
2365         if (cb_arg == NULL)
2366                 return;
2367
2368         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2369         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2370
2371         if (!bonded_ethdev->data->dev_started ||
2372                 !internals->link_status_polling_enabled)
2373                 return;
2374
2375         /* If device is currently being configured then don't check slaves link
2376          * status, wait until next period */
2377         if (rte_spinlock_trylock(&internals->lock)) {
2378                 if (internals->slave_count > 0)
2379                         polling_slave_found = 0;
2380
2381                 for (i = 0; i < internals->slave_count; i++) {
2382                         if (!internals->slaves[i].link_status_poll_enabled)
2383                                 continue;
2384
2385                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2386                         polling_slave_found = 1;
2387
2388                         /* Update slave link status */
2389                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2390                                         internals->slaves[i].link_status_wait_to_complete);
2391
2392                         /* if link status has changed since last checked then call lsc
2393                          * event callback */
2394                         if (slave_ethdev->data->dev_link.link_status !=
2395                                         internals->slaves[i].last_link_status) {
2396                                 internals->slaves[i].last_link_status =
2397                                                 slave_ethdev->data->dev_link.link_status;
2398
2399                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2400                                                 RTE_ETH_EVENT_INTR_LSC,
2401                                                 &bonded_ethdev->data->port_id,
2402                                                 NULL);
2403                         }
2404                 }
2405                 rte_spinlock_unlock(&internals->lock);
2406         }
2407
2408         if (polling_slave_found)
2409                 /* Set alarm to continue monitoring link status of slave ethdev's */
2410                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2411                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2412 }
2413
2414 static int
2415 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2416 {
2417         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2418
2419         struct bond_dev_private *bond_ctx;
2420         struct rte_eth_link slave_link;
2421
2422         uint32_t idx;
2423
2424         bond_ctx = ethdev->data->dev_private;
2425
2426         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2427
2428         if (ethdev->data->dev_started == 0 ||
2429                         bond_ctx->active_slave_count == 0) {
2430                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2431                 return 0;
2432         }
2433
2434         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2435
2436         if (wait_to_complete)
2437                 link_update = rte_eth_link_get;
2438         else
2439                 link_update = rte_eth_link_get_nowait;
2440
2441         switch (bond_ctx->mode) {
2442         case BONDING_MODE_BROADCAST:
2443                 /**
2444                  * Setting link speed to UINT32_MAX to ensure we pick up the
2445                  * value of the first active slave
2446                  */
2447                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2448
2449                 /**
2450                  * link speed is minimum value of all the slaves link speed as
2451                  * packet loss will occur on this slave if transmission at rates
2452                  * greater than this are attempted
2453                  */
2454                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2455                         link_update(bond_ctx->active_slaves[0], &slave_link);
2456
2457                         if (slave_link.link_speed <
2458                                         ethdev->data->dev_link.link_speed)
2459                                 ethdev->data->dev_link.link_speed =
2460                                                 slave_link.link_speed;
2461                 }
2462                 break;
2463         case BONDING_MODE_ACTIVE_BACKUP:
2464                 /* Current primary slave */
2465                 link_update(bond_ctx->current_primary_port, &slave_link);
2466
2467                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2468                 break;
2469         case BONDING_MODE_8023AD:
2470                 ethdev->data->dev_link.link_autoneg =
2471                                 bond_ctx->mode4.slave_link.link_autoneg;
2472                 ethdev->data->dev_link.link_duplex =
2473                                 bond_ctx->mode4.slave_link.link_duplex;
2474                 /* fall through to update link speed */
2475         case BONDING_MODE_ROUND_ROBIN:
2476         case BONDING_MODE_BALANCE:
2477         case BONDING_MODE_TLB:
2478         case BONDING_MODE_ALB:
2479         default:
2480                 /**
2481                  * In theses mode the maximum theoretical link speed is the sum
2482                  * of all the slaves
2483                  */
2484                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2485
2486                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2487                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2488
2489                         ethdev->data->dev_link.link_speed +=
2490                                         slave_link.link_speed;
2491                 }
2492         }
2493
2494
2495         return 0;
2496 }
2497
2498
2499 static int
2500 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2501 {
2502         struct bond_dev_private *internals = dev->data->dev_private;
2503         struct rte_eth_stats slave_stats;
2504         int i, j;
2505
2506         for (i = 0; i < internals->slave_count; i++) {
2507                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2508
2509                 stats->ipackets += slave_stats.ipackets;
2510                 stats->opackets += slave_stats.opackets;
2511                 stats->ibytes += slave_stats.ibytes;
2512                 stats->obytes += slave_stats.obytes;
2513                 stats->imissed += slave_stats.imissed;
2514                 stats->ierrors += slave_stats.ierrors;
2515                 stats->oerrors += slave_stats.oerrors;
2516                 stats->rx_nombuf += slave_stats.rx_nombuf;
2517
2518                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2519                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2520                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2521                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2522                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2523                         stats->q_errors[j] += slave_stats.q_errors[j];
2524                 }
2525
2526         }
2527
2528         return 0;
2529 }
2530
2531 static void
2532 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2533 {
2534         struct bond_dev_private *internals = dev->data->dev_private;
2535         int i;
2536
2537         for (i = 0; i < internals->slave_count; i++)
2538                 rte_eth_stats_reset(internals->slaves[i].port_id);
2539 }
2540
2541 static void
2542 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2543 {
2544         struct bond_dev_private *internals = eth_dev->data->dev_private;
2545         int i;
2546
2547         internals->promiscuous_en = 1;
2548
2549         switch (internals->mode) {
2550         /* Promiscuous mode is propagated to all slaves */
2551         case BONDING_MODE_ROUND_ROBIN:
2552         case BONDING_MODE_BALANCE:
2553         case BONDING_MODE_BROADCAST:
2554                 for (i = 0; i < internals->slave_count; i++)
2555                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2556                 break;
2557         /* In mode4 promiscus mode is managed when slave is added/removed */
2558         case BONDING_MODE_8023AD:
2559                 break;
2560         /* Promiscuous mode is propagated only to primary slave */
2561         case BONDING_MODE_ACTIVE_BACKUP:
2562         case BONDING_MODE_TLB:
2563         case BONDING_MODE_ALB:
2564         default:
2565                 rte_eth_promiscuous_enable(internals->current_primary_port);
2566         }
2567 }
2568
2569 static void
2570 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2571 {
2572         struct bond_dev_private *internals = dev->data->dev_private;
2573         int i;
2574
2575         internals->promiscuous_en = 0;
2576
2577         switch (internals->mode) {
2578         /* Promiscuous mode is propagated to all slaves */
2579         case BONDING_MODE_ROUND_ROBIN:
2580         case BONDING_MODE_BALANCE:
2581         case BONDING_MODE_BROADCAST:
2582                 for (i = 0; i < internals->slave_count; i++)
2583                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2584                 break;
2585         /* In mode4 promiscus mode is set managed when slave is added/removed */
2586         case BONDING_MODE_8023AD:
2587                 break;
2588         /* Promiscuous mode is propagated only to primary slave */
2589         case BONDING_MODE_ACTIVE_BACKUP:
2590         case BONDING_MODE_TLB:
2591         case BONDING_MODE_ALB:
2592         default:
2593                 rte_eth_promiscuous_disable(internals->current_primary_port);
2594         }
2595 }
2596
2597 static void
2598 bond_ethdev_delayed_lsc_propagation(void *arg)
2599 {
2600         if (arg == NULL)
2601                 return;
2602
2603         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2604                         RTE_ETH_EVENT_INTR_LSC, NULL);
2605 }
2606
2607 int
2608 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2609                 void *param, void *ret_param __rte_unused)
2610 {
2611         struct rte_eth_dev *bonded_eth_dev;
2612         struct bond_dev_private *internals;
2613         struct rte_eth_link link;
2614         int rc = -1;
2615
2616         int i, valid_slave = 0;
2617         uint8_t active_pos;
2618         uint8_t lsc_flag = 0;
2619
2620         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2621                 return rc;
2622
2623         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2624
2625         if (check_for_bonded_ethdev(bonded_eth_dev))
2626                 return rc;
2627
2628         internals = bonded_eth_dev->data->dev_private;
2629
2630         /* If the device isn't started don't handle interrupts */
2631         if (!bonded_eth_dev->data->dev_started)
2632                 return rc;
2633
2634         /* verify that port_id is a valid slave of bonded port */
2635         for (i = 0; i < internals->slave_count; i++) {
2636                 if (internals->slaves[i].port_id == port_id) {
2637                         valid_slave = 1;
2638                         break;
2639                 }
2640         }
2641
2642         if (!valid_slave)
2643                 return rc;
2644
2645         /* Search for port in active port list */
2646         active_pos = find_slave_by_id(internals->active_slaves,
2647                         internals->active_slave_count, port_id);
2648
2649         rte_eth_link_get_nowait(port_id, &link);
2650         if (link.link_status) {
2651                 if (active_pos < internals->active_slave_count)
2652                         return rc;
2653
2654                 /* if no active slave ports then set this port to be primary port */
2655                 if (internals->active_slave_count < 1) {
2656                         /* If first active slave, then change link status */
2657                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2658                         internals->current_primary_port = port_id;
2659                         lsc_flag = 1;
2660
2661                         mac_address_slaves_update(bonded_eth_dev);
2662                 }
2663
2664                 activate_slave(bonded_eth_dev, port_id);
2665
2666                 /* If user has defined the primary port then default to using it */
2667                 if (internals->user_defined_primary_port &&
2668                                 internals->primary_port == port_id)
2669                         bond_ethdev_primary_set(internals, port_id);
2670         } else {
2671                 if (active_pos == internals->active_slave_count)
2672                         return rc;
2673
2674                 /* Remove from active slave list */
2675                 deactivate_slave(bonded_eth_dev, port_id);
2676
2677                 if (internals->active_slave_count < 1)
2678                         lsc_flag = 1;
2679
2680                 /* Update primary id, take first active slave from list or if none
2681                  * available set to -1 */
2682                 if (port_id == internals->current_primary_port) {
2683                         if (internals->active_slave_count > 0)
2684                                 bond_ethdev_primary_set(internals,
2685                                                 internals->active_slaves[0]);
2686                         else
2687                                 internals->current_primary_port = internals->primary_port;
2688                 }
2689         }
2690
2691         /**
2692          * Update bonded device link properties after any change to active
2693          * slaves
2694          */
2695         bond_ethdev_link_update(bonded_eth_dev, 0);
2696
2697         if (lsc_flag) {
2698                 /* Cancel any possible outstanding interrupts if delays are enabled */
2699                 if (internals->link_up_delay_ms > 0 ||
2700                         internals->link_down_delay_ms > 0)
2701                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2702                                         bonded_eth_dev);
2703
2704                 if (bonded_eth_dev->data->dev_link.link_status) {
2705                         if (internals->link_up_delay_ms > 0)
2706                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2707                                                 bond_ethdev_delayed_lsc_propagation,
2708                                                 (void *)bonded_eth_dev);
2709                         else
2710                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2711                                                 RTE_ETH_EVENT_INTR_LSC,
2712                                                 NULL);
2713
2714                 } else {
2715                         if (internals->link_down_delay_ms > 0)
2716                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2717                                                 bond_ethdev_delayed_lsc_propagation,
2718                                                 (void *)bonded_eth_dev);
2719                         else
2720                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2721                                                 RTE_ETH_EVENT_INTR_LSC,
2722                                                 NULL);
2723                 }
2724         }
2725         return 0;
2726 }
2727
2728 static int
2729 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2730                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2731 {
2732         unsigned i, j;
2733         int result = 0;
2734         int slave_reta_size;
2735         unsigned reta_count;
2736         struct bond_dev_private *internals = dev->data->dev_private;
2737
2738         if (reta_size != internals->reta_size)
2739                 return -EINVAL;
2740
2741          /* Copy RETA table */
2742         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2743
2744         for (i = 0; i < reta_count; i++) {
2745                 internals->reta_conf[i].mask = reta_conf[i].mask;
2746                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2747                         if ((reta_conf[i].mask >> j) & 0x01)
2748                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2749         }
2750
2751         /* Fill rest of array */
2752         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2753                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2754                                 sizeof(internals->reta_conf[0]) * reta_count);
2755
2756         /* Propagate RETA over slaves */
2757         for (i = 0; i < internals->slave_count; i++) {
2758                 slave_reta_size = internals->slaves[i].reta_size;
2759                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2760                                 &internals->reta_conf[0], slave_reta_size);
2761                 if (result < 0)
2762                         return result;
2763         }
2764
2765         return 0;
2766 }
2767
2768 static int
2769 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2770                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2771 {
2772         int i, j;
2773         struct bond_dev_private *internals = dev->data->dev_private;
2774
2775         if (reta_size != internals->reta_size)
2776                 return -EINVAL;
2777
2778          /* Copy RETA table */
2779         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2780                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2781                         if ((reta_conf[i].mask >> j) & 0x01)
2782                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2783
2784         return 0;
2785 }
2786
2787 static int
2788 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2789                 struct rte_eth_rss_conf *rss_conf)
2790 {
2791         int i, result = 0;
2792         struct bond_dev_private *internals = dev->data->dev_private;
2793         struct rte_eth_rss_conf bond_rss_conf;
2794
2795         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2796
2797         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2798
2799         if (bond_rss_conf.rss_hf != 0)
2800                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2801
2802         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2803                         sizeof(internals->rss_key)) {
2804                 if (bond_rss_conf.rss_key_len == 0)
2805                         bond_rss_conf.rss_key_len = 40;
2806                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2807                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2808                                 internals->rss_key_len);
2809         }
2810
2811         for (i = 0; i < internals->slave_count; i++) {
2812                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2813                                 &bond_rss_conf);
2814                 if (result < 0)
2815                         return result;
2816         }
2817
2818         return 0;
2819 }
2820
2821 static int
2822 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2823                 struct rte_eth_rss_conf *rss_conf)
2824 {
2825         struct bond_dev_private *internals = dev->data->dev_private;
2826
2827         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2828         rss_conf->rss_key_len = internals->rss_key_len;
2829         if (rss_conf->rss_key)
2830                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2831
2832         return 0;
2833 }
2834
2835 static int
2836 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2837 {
2838         struct rte_eth_dev *slave_eth_dev;
2839         struct bond_dev_private *internals = dev->data->dev_private;
2840         int ret, i;
2841
2842         rte_spinlock_lock(&internals->lock);
2843
2844         for (i = 0; i < internals->slave_count; i++) {
2845                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2846                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2847                         rte_spinlock_unlock(&internals->lock);
2848                         return -ENOTSUP;
2849                 }
2850         }
2851         for (i = 0; i < internals->slave_count; i++) {
2852                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2853                 if (ret < 0) {
2854                         rte_spinlock_unlock(&internals->lock);
2855                         return ret;
2856                 }
2857         }
2858
2859         rte_spinlock_unlock(&internals->lock);
2860         return 0;
2861 }
2862
2863 static void
2864 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2865 {
2866         if (mac_address_set(dev, addr))
2867                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2868 }
2869
2870 const struct eth_dev_ops default_dev_ops = {
2871         .dev_start            = bond_ethdev_start,
2872         .dev_stop             = bond_ethdev_stop,
2873         .dev_close            = bond_ethdev_close,
2874         .dev_configure        = bond_ethdev_configure,
2875         .dev_infos_get        = bond_ethdev_info,
2876         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2877         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2878         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2879         .rx_queue_release     = bond_ethdev_rx_queue_release,
2880         .tx_queue_release     = bond_ethdev_tx_queue_release,
2881         .link_update          = bond_ethdev_link_update,
2882         .stats_get            = bond_ethdev_stats_get,
2883         .stats_reset          = bond_ethdev_stats_reset,
2884         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2885         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2886         .reta_update          = bond_ethdev_rss_reta_update,
2887         .reta_query           = bond_ethdev_rss_reta_query,
2888         .rss_hash_update      = bond_ethdev_rss_hash_update,
2889         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2890         .mtu_set              = bond_ethdev_mtu_set,
2891         .mac_addr_set         = bond_ethdev_mac_address_set
2892 };
2893
2894 static int
2895 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2896 {
2897         const char *name = rte_vdev_device_name(dev);
2898         uint8_t socket_id = dev->device.numa_node;
2899         struct bond_dev_private *internals = NULL;
2900         struct rte_eth_dev *eth_dev = NULL;
2901         uint32_t vlan_filter_bmp_size;
2902
2903         /* now do all data allocation - for eth_dev structure, dummy pci driver
2904          * and internal (private) data
2905          */
2906
2907         /* reserve an ethdev entry */
2908         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2909         if (eth_dev == NULL) {
2910                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2911                 goto err;
2912         }
2913
2914         internals = eth_dev->data->dev_private;
2915         eth_dev->data->nb_rx_queues = (uint16_t)1;
2916         eth_dev->data->nb_tx_queues = (uint16_t)1;
2917
2918         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2919                         socket_id);
2920         if (eth_dev->data->mac_addrs == NULL) {
2921                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2922                 goto err;
2923         }
2924
2925         eth_dev->dev_ops = &default_dev_ops;
2926         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2927
2928         rte_spinlock_init(&internals->lock);
2929
2930         internals->port_id = eth_dev->data->port_id;
2931         internals->mode = BONDING_MODE_INVALID;
2932         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2933         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2934         internals->burst_xmit_hash = burst_xmit_l2_hash;
2935         internals->user_defined_mac = 0;
2936
2937         internals->link_status_polling_enabled = 0;
2938
2939         internals->link_status_polling_interval_ms =
2940                 DEFAULT_POLLING_INTERVAL_10_MS;
2941         internals->link_down_delay_ms = 0;
2942         internals->link_up_delay_ms = 0;
2943
2944         internals->slave_count = 0;
2945         internals->active_slave_count = 0;
2946         internals->rx_offload_capa = 0;
2947         internals->tx_offload_capa = 0;
2948         internals->candidate_max_rx_pktlen = 0;
2949         internals->max_rx_pktlen = 0;
2950
2951         /* Initially allow to choose any offload type */
2952         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2953
2954         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2955         memset(internals->slaves, 0, sizeof(internals->slaves));
2956
2957         /* Set mode 4 default configuration */
2958         bond_mode_8023ad_setup(eth_dev, NULL);
2959         if (bond_ethdev_mode_set(eth_dev, mode)) {
2960                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2961                                  eth_dev->data->port_id, mode);
2962                 goto err;
2963         }
2964
2965         vlan_filter_bmp_size =
2966                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2967         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2968                                                    RTE_CACHE_LINE_SIZE);
2969         if (internals->vlan_filter_bmpmem == NULL) {
2970                 RTE_BOND_LOG(ERR,
2971                              "Failed to allocate vlan bitmap for bonded device %u\n",
2972                              eth_dev->data->port_id);
2973                 goto err;
2974         }
2975
2976         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2977                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2978         if (internals->vlan_filter_bmp == NULL) {
2979                 RTE_BOND_LOG(ERR,
2980                              "Failed to init vlan bitmap for bonded device %u\n",
2981                              eth_dev->data->port_id);
2982                 rte_free(internals->vlan_filter_bmpmem);
2983                 goto err;
2984         }
2985
2986         return eth_dev->data->port_id;
2987
2988 err:
2989         rte_free(internals);
2990         if (eth_dev != NULL) {
2991                 rte_free(eth_dev->data->mac_addrs);
2992                 rte_eth_dev_release_port(eth_dev);
2993         }
2994         return -1;
2995 }
2996
2997 static int
2998 bond_probe(struct rte_vdev_device *dev)
2999 {
3000         const char *name;
3001         struct bond_dev_private *internals;
3002         struct rte_kvargs *kvlist;
3003         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3004         int  arg_count, port_id;
3005         uint8_t agg_mode;
3006
3007         if (!dev)
3008                 return -EINVAL;
3009
3010         name = rte_vdev_device_name(dev);
3011         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3012
3013         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3014                 pmd_bond_init_valid_arguments);
3015         if (kvlist == NULL)
3016                 return -1;
3017
3018         /* Parse link bonding mode */
3019         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3020                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3021                                 &bond_ethdev_parse_slave_mode_kvarg,
3022                                 &bonding_mode) != 0) {
3023                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3024                                         name);
3025                         goto parse_error;
3026                 }
3027         } else {
3028                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3029                                 "device %s\n", name);
3030                 goto parse_error;
3031         }
3032
3033         /* Parse socket id to create bonding device on */
3034         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3035         if (arg_count == 1) {
3036                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3037                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3038                                 != 0) {
3039                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3040                                         "bonded device %s\n", name);
3041                         goto parse_error;
3042                 }
3043         } else if (arg_count > 1) {
3044                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3045                                 "bonded device %s\n", name);
3046                 goto parse_error;
3047         } else {
3048                 socket_id = rte_socket_id();
3049         }
3050
3051         dev->device.numa_node = socket_id;
3052
3053         /* Create link bonding eth device */
3054         port_id = bond_alloc(dev, bonding_mode);
3055         if (port_id < 0) {
3056                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3057                                 "socket %u.\n", name, bonding_mode, socket_id);
3058                 goto parse_error;
3059         }
3060         internals = rte_eth_devices[port_id].data->dev_private;
3061         internals->kvlist = kvlist;
3062
3063
3064         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3065                 if (rte_kvargs_process(kvlist,
3066                                 PMD_BOND_AGG_MODE_KVARG,
3067                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3068                                 &agg_mode) != 0) {
3069                         RTE_LOG(ERR, EAL,
3070                                         "Failed to parse agg selection mode for bonded device %s\n",
3071                                         name);
3072                         goto parse_error;
3073                 }
3074
3075                 if (internals->mode == BONDING_MODE_8023AD)
3076                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3077                                         agg_mode);
3078         } else {
3079                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3080         }
3081
3082         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3083                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
3084         return 0;
3085
3086 parse_error:
3087         rte_kvargs_free(kvlist);
3088
3089         return -1;
3090 }
3091
3092 static int
3093 bond_remove(struct rte_vdev_device *dev)
3094 {
3095         struct rte_eth_dev *eth_dev;
3096         struct bond_dev_private *internals;
3097         const char *name;
3098
3099         if (!dev)
3100                 return -EINVAL;
3101
3102         name = rte_vdev_device_name(dev);
3103         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3104
3105         /* now free all data allocation - for eth_dev structure,
3106          * dummy pci driver and internal (private) data
3107          */
3108
3109         /* find an ethdev entry */
3110         eth_dev = rte_eth_dev_allocated(name);
3111         if (eth_dev == NULL)
3112                 return -ENODEV;
3113
3114         RTE_ASSERT(eth_dev->device == &dev->device);
3115
3116         internals = eth_dev->data->dev_private;
3117         if (internals->slave_count != 0)
3118                 return -EBUSY;
3119
3120         if (eth_dev->data->dev_started == 1) {
3121                 bond_ethdev_stop(eth_dev);
3122                 bond_ethdev_close(eth_dev);
3123         }
3124
3125         eth_dev->dev_ops = NULL;
3126         eth_dev->rx_pkt_burst = NULL;
3127         eth_dev->tx_pkt_burst = NULL;
3128
3129         internals = eth_dev->data->dev_private;
3130         rte_bitmap_free(internals->vlan_filter_bmp);
3131         rte_free(internals->vlan_filter_bmpmem);
3132         rte_free(eth_dev->data->dev_private);
3133         rte_free(eth_dev->data->mac_addrs);
3134
3135         rte_eth_dev_release_port(eth_dev);
3136
3137         return 0;
3138 }
3139
3140 /* this part will resolve the slave portids after all the other pdev and vdev
3141  * have been allocated */
3142 static int
3143 bond_ethdev_configure(struct rte_eth_dev *dev)
3144 {
3145         const char *name = dev->device->name;
3146         struct bond_dev_private *internals = dev->data->dev_private;
3147         struct rte_kvargs *kvlist = internals->kvlist;
3148         int arg_count;
3149         uint16_t port_id = dev - rte_eth_devices;
3150         uint8_t agg_mode;
3151
3152         static const uint8_t default_rss_key[40] = {
3153                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3154                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3155                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3156                 0xBE, 0xAC, 0x01, 0xFA
3157         };
3158
3159         unsigned i, j;
3160
3161         /* If RSS is enabled, fill table and key with default values */
3162         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3163                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3164                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3165                 memcpy(internals->rss_key, default_rss_key, 40);
3166
3167                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3168                         internals->reta_conf[i].mask = ~0LL;
3169                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3170                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3171                 }
3172         }
3173
3174         /* set the max_rx_pktlen */
3175         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3176
3177         /*
3178          * if no kvlist, it means that this bonded device has been created
3179          * through the bonding api.
3180          */
3181         if (!kvlist)
3182                 return 0;
3183
3184         /* Parse MAC address for bonded device */
3185         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3186         if (arg_count == 1) {
3187                 struct ether_addr bond_mac;
3188
3189                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3190                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3191                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3192                                         name);
3193                         return -1;
3194                 }
3195
3196                 /* Set MAC address */
3197                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3198                         RTE_LOG(ERR, EAL,
3199                                         "Failed to set mac address on bonded device %s\n",
3200                                         name);
3201                         return -1;
3202                 }
3203         } else if (arg_count > 1) {
3204                 RTE_LOG(ERR, EAL,
3205                                 "MAC address can be specified only once for bonded device %s\n",
3206                                 name);
3207                 return -1;
3208         }
3209
3210         /* Parse/set balance mode transmit policy */
3211         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3212         if (arg_count == 1) {
3213                 uint8_t xmit_policy;
3214
3215                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3216                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3217                                                 0) {
3218                         RTE_LOG(INFO, EAL,
3219                                         "Invalid xmit policy specified for bonded device %s\n",
3220                                         name);
3221                         return -1;
3222                 }
3223
3224                 /* Set balance mode transmit policy*/
3225                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3226                         RTE_LOG(ERR, EAL,
3227                                         "Failed to set balance xmit policy on bonded device %s\n",
3228                                         name);
3229                         return -1;
3230                 }
3231         } else if (arg_count > 1) {
3232                 RTE_LOG(ERR, EAL,
3233                                 "Transmit policy can be specified only once for bonded device"
3234                                 " %s\n", name);
3235                 return -1;
3236         }
3237
3238         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3239                 if (rte_kvargs_process(kvlist,
3240                                 PMD_BOND_AGG_MODE_KVARG,
3241                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3242                                 &agg_mode) != 0) {
3243                         RTE_LOG(ERR, EAL,
3244                                         "Failed to parse agg selection mode for bonded device %s\n",
3245                                         name);
3246                 }
3247                 if (internals->mode == BONDING_MODE_8023AD)
3248                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3249                                                 agg_mode);
3250         }
3251
3252         /* Parse/add slave ports to bonded device */
3253         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3254                 struct bond_ethdev_slave_ports slave_ports;
3255                 unsigned i;
3256
3257                 memset(&slave_ports, 0, sizeof(slave_ports));
3258
3259                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3260                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3261                         RTE_LOG(ERR, EAL,
3262                                         "Failed to parse slave ports for bonded device %s\n",
3263                                         name);
3264                         return -1;
3265                 }
3266
3267                 for (i = 0; i < slave_ports.slave_count; i++) {
3268                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3269                                 RTE_LOG(ERR, EAL,
3270                                                 "Failed to add port %d as slave to bonded device %s\n",
3271                                                 slave_ports.slaves[i], name);
3272                         }
3273                 }
3274
3275         } else {
3276                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3277                 return -1;
3278         }
3279
3280         /* Parse/set primary slave port id*/
3281         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3282         if (arg_count == 1) {
3283                 uint16_t primary_slave_port_id;
3284
3285                 if (rte_kvargs_process(kvlist,
3286                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3287                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3288                                 &primary_slave_port_id) < 0) {
3289                         RTE_LOG(INFO, EAL,
3290                                         "Invalid primary slave port id specified for bonded device"
3291                                         " %s\n", name);
3292                         return -1;
3293                 }
3294
3295                 /* Set balance mode transmit policy*/
3296                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3297                                 != 0) {
3298                         RTE_LOG(ERR, EAL,
3299                                         "Failed to set primary slave port %d on bonded device %s\n",
3300                                         primary_slave_port_id, name);
3301                         return -1;
3302                 }
3303         } else if (arg_count > 1) {
3304                 RTE_LOG(INFO, EAL,
3305                                 "Primary slave can be specified only once for bonded device"
3306                                 " %s\n", name);
3307                 return -1;
3308         }
3309
3310         /* Parse link status monitor polling interval */
3311         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3312         if (arg_count == 1) {
3313                 uint32_t lsc_poll_interval_ms;
3314
3315                 if (rte_kvargs_process(kvlist,
3316                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3317                                 &bond_ethdev_parse_time_ms_kvarg,
3318                                 &lsc_poll_interval_ms) < 0) {
3319                         RTE_LOG(INFO, EAL,
3320                                         "Invalid lsc polling interval value specified for bonded"
3321                                         " device %s\n", name);
3322                         return -1;
3323                 }
3324
3325                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3326                                 != 0) {
3327                         RTE_LOG(ERR, EAL,
3328                                         "Failed to set lsc monitor polling interval (%u ms) on"
3329                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3330                         return -1;
3331                 }
3332         } else if (arg_count > 1) {
3333                 RTE_LOG(INFO, EAL,
3334                                 "LSC polling interval can be specified only once for bonded"
3335                                 " device %s\n", name);
3336                 return -1;
3337         }
3338
3339         /* Parse link up interrupt propagation delay */
3340         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3341         if (arg_count == 1) {
3342                 uint32_t link_up_delay_ms;
3343
3344                 if (rte_kvargs_process(kvlist,
3345                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3346                                 &bond_ethdev_parse_time_ms_kvarg,
3347                                 &link_up_delay_ms) < 0) {
3348                         RTE_LOG(INFO, EAL,
3349                                         "Invalid link up propagation delay value specified for"
3350                                         " bonded device %s\n", name);
3351                         return -1;
3352                 }
3353
3354                 /* Set balance mode transmit policy*/
3355                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3356                                 != 0) {
3357                         RTE_LOG(ERR, EAL,
3358                                         "Failed to set link up propagation delay (%u ms) on bonded"
3359                                         " device %s\n", link_up_delay_ms, name);
3360                         return -1;
3361                 }
3362         } else if (arg_count > 1) {
3363                 RTE_LOG(INFO, EAL,
3364                                 "Link up propagation delay can be specified only once for"
3365                                 " bonded device %s\n", name);
3366                 return -1;
3367         }
3368
3369         /* Parse link down interrupt propagation delay */
3370         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3371         if (arg_count == 1) {
3372                 uint32_t link_down_delay_ms;
3373
3374                 if (rte_kvargs_process(kvlist,
3375                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3376                                 &bond_ethdev_parse_time_ms_kvarg,
3377                                 &link_down_delay_ms) < 0) {
3378                         RTE_LOG(INFO, EAL,
3379                                         "Invalid link down propagation delay value specified for"
3380                                         " bonded device %s\n", name);
3381                         return -1;
3382                 }
3383
3384                 /* Set balance mode transmit policy*/
3385                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3386                                 != 0) {
3387                         RTE_LOG(ERR, EAL,
3388                                         "Failed to set link down propagation delay (%u ms) on"
3389                                         " bonded device %s\n", link_down_delay_ms, name);
3390                         return -1;
3391                 }
3392         } else if (arg_count > 1) {
3393                 RTE_LOG(INFO, EAL,
3394                                 "Link down propagation delay can be specified only once for"
3395                                 " bonded device %s\n", name);
3396                 return -1;
3397         }
3398
3399         return 0;
3400 }
3401
3402 struct rte_vdev_driver pmd_bond_drv = {
3403         .probe = bond_probe,
3404         .remove = bond_remove,
3405 };
3406
3407 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3408 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3409
3410 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3411         "slave=<ifc> "
3412         "primary=<ifc> "
3413         "mode=[0-6] "
3414         "xmit_policy=[l2 | l23 | l34] "
3415         "agg_mode=[count | stable | bandwidth] "
3416         "socket_id=<int> "
3417         "mac=<mac addr> "
3418         "lsc_poll_period_ms=<int> "
3419         "up_delay=<int> "
3420         "down_delay=<int>");