960daac39903e724a09b275cc2564c150754ef2e
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
33
34 static inline size_t
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 {
37         size_t vlan_offset = 0;
38
39         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41
42                 vlan_offset = sizeof(struct vlan_hdr);
43                 *proto = vlan_hdr->eth_proto;
44
45                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46                         vlan_hdr = vlan_hdr + 1;
47                         *proto = vlan_hdr->eth_proto;
48                         vlan_offset += sizeof(struct vlan_hdr);
49                 }
50         }
51         return vlan_offset;
52 }
53
54 static uint16_t
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 {
57         struct bond_dev_private *internals;
58
59         uint16_t num_rx_slave = 0;
60         uint16_t num_rx_total = 0;
61
62         int i;
63
64         /* Cast to structure, containing bonded device's port id and queue id */
65         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66
67         internals = bd_rx_q->dev_private;
68
69
70         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71                 /* Offset of pointer to *bufs increases as packets are received
72                  * from other slaves */
73                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75                 if (num_rx_slave) {
76                         num_rx_total += num_rx_slave;
77                         nb_pkts -= num_rx_slave;
78                 }
79         }
80
81         return num_rx_total;
82 }
83
84 static uint16_t
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
86                 uint16_t nb_pkts)
87 {
88         struct bond_dev_private *internals;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95         return rte_eth_rx_burst(internals->current_primary_port,
96                         bd_rx_q->queue_id, bufs, nb_pkts);
97 }
98
99 static inline uint8_t
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 {
102         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103
104         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105                 (ethertype == ether_type_slow_be &&
106                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
107 }
108
109 /*****************************************************************************
110  * Flow director's setup for mode 4 optimization
111  */
112
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114         .dst.addr_bytes = { 0 },
115         .src.addr_bytes = { 0 },
116         .type = RTE_BE16(ETHER_TYPE_SLOW),
117 };
118
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120         .dst.addr_bytes = { 0 },
121         .src.addr_bytes = { 0 },
122         .type = 0xFFFF,
123 };
124
125 static struct rte_flow_item flow_item_8023ad[] = {
126         {
127                 .type = RTE_FLOW_ITEM_TYPE_ETH,
128                 .spec = &flow_item_eth_type_8023ad,
129                 .last = NULL,
130                 .mask = &flow_item_eth_mask_type_8023ad,
131         },
132         {
133                 .type = RTE_FLOW_ITEM_TYPE_END,
134                 .spec = NULL,
135                 .last = NULL,
136                 .mask = NULL,
137         }
138 };
139
140 const struct rte_flow_attr flow_attr_8023ad = {
141         .group = 0,
142         .priority = 0,
143         .ingress = 1,
144         .egress = 0,
145         .reserved = 0,
146 };
147
148 int
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150                 uint16_t slave_port) {
151         struct rte_eth_dev_info slave_info;
152         struct rte_flow_error error;
153         struct bond_dev_private *internals = (struct bond_dev_private *)
154                         (bond_dev->data->dev_private);
155
156         const struct rte_flow_action_queue lacp_queue_conf = {
157                 .index = 0,
158         };
159
160         const struct rte_flow_action actions[] = {
161                 {
162                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
163                         .conf = &lacp_queue_conf
164                 },
165                 {
166                         .type = RTE_FLOW_ACTION_TYPE_END,
167                 }
168         };
169
170         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171                         flow_item_8023ad, actions, &error);
172         if (ret < 0) {
173                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174                                 __func__, error.message, slave_port,
175                                 internals->mode4.dedicated_queues.rx_qid);
176                 return -1;
177         }
178
179         rte_eth_dev_info_get(slave_port, &slave_info);
180         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182                 RTE_BOND_LOG(ERR,
183                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
184                         __func__, slave_port);
185                 return -1;
186         }
187
188         return 0;
189 }
190
191 int
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194         struct bond_dev_private *internals = (struct bond_dev_private *)
195                         (bond_dev->data->dev_private);
196         struct rte_eth_dev_info bond_info;
197         uint16_t idx;
198
199         /* Verify if all slaves in bonding supports flow director and */
200         if (internals->slave_count > 0) {
201                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202
203                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205
206                 for (idx = 0; idx < internals->slave_count; idx++) {
207                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
208                                         internals->slaves[idx].port_id) != 0)
209                                 return -1;
210                 }
211         }
212
213         return 0;
214 }
215
216 int
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218
219         struct rte_flow_error error;
220         struct bond_dev_private *internals = (struct bond_dev_private *)
221                         (bond_dev->data->dev_private);
222
223         struct rte_flow_action_queue lacp_queue_conf = {
224                 .index = internals->mode4.dedicated_queues.rx_qid,
225         };
226
227         const struct rte_flow_action actions[] = {
228                 {
229                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
230                         .conf = &lacp_queue_conf
231                 },
232                 {
233                         .type = RTE_FLOW_ACTION_TYPE_END,
234                 }
235         };
236
237         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
239         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241                                 "(slave_port=%d queue_id=%d)",
242                                 error.message, slave_port,
243                                 internals->mode4.dedicated_queues.rx_qid);
244                 return -1;
245         }
246
247         return 0;
248 }
249
250 static uint16_t
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
252                 uint16_t nb_pkts)
253 {
254         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255         struct bond_dev_private *internals = bd_rx_q->dev_private;
256         uint16_t num_rx_total = 0;      /* Total number of received packets */
257         uint16_t slaves[RTE_MAX_ETHPORTS];
258         uint16_t slave_count;
259
260         uint16_t i, idx;
261
262         /* Copy slave list to protect against slave up/down changes during tx
263          * bursting */
264         slave_count = internals->active_slave_count;
265         memcpy(slaves, internals->active_slaves,
266                         sizeof(internals->active_slaves[0]) * slave_count);
267
268         for (i = 0, idx = internals->active_slave;
269                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270                 idx = idx % slave_count;
271
272                 /* Read packets from this slave */
273                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
275         }
276
277         internals->active_slave = idx;
278
279         return num_rx_total;
280 }
281
282 static uint16_t
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
284                 uint16_t nb_bufs)
285 {
286         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287         struct bond_dev_private *internals = bd_tx_q->dev_private;
288
289         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290         uint16_t slave_count;
291
292         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293         uint16_t dist_slave_count;
294
295         /* 2-D array to sort mbufs for transmission on each slave into */
296         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297         /* Number of mbufs for transmission on each slave */
298         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299         /* Mapping array generated by hash function to map mbufs to slaves */
300         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301
302         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
304
305         uint16_t i, j;
306
307         if (unlikely(nb_bufs == 0))
308                 return 0;
309
310         /* Copy slave list to protect against slave up/down changes during tx
311          * bursting */
312         slave_count = internals->active_slave_count;
313         if (unlikely(slave_count < 1))
314                 return 0;
315
316         memcpy(slave_port_ids, internals->active_slaves,
317                         sizeof(slave_port_ids[0]) * slave_count);
318
319
320         dist_slave_count = 0;
321         for (i = 0; i < slave_count; i++) {
322                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323
324                 if (ACTOR_STATE(port, DISTRIBUTING))
325                         dist_slave_port_ids[dist_slave_count++] =
326                                         slave_port_ids[i];
327         }
328
329         if (unlikely(dist_slave_count < 1))
330                 return 0;
331
332         /*
333          * Populate slaves mbuf with the packets which are to be sent on it
334          * selecting output slave using hash based on xmit policy
335          */
336         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337                         bufs_slave_port_idxs);
338
339         for (i = 0; i < nb_bufs; i++) {
340                 /* Populate slave mbuf arrays with mbufs for that slave. */
341                 uint8_t slave_idx = bufs_slave_port_idxs[i];
342
343                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
344         }
345
346
347         /* Send packet burst on each slave device */
348         for (i = 0; i < dist_slave_count; i++) {
349                 if (slave_nb_bufs[i] == 0)
350                         continue;
351
352                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353                                 bd_tx_q->queue_id, slave_bufs[i],
354                                 slave_nb_bufs[i]);
355
356                 total_tx_count += slave_tx_count;
357
358                 /* If tx burst fails move packets to end of bufs */
359                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
361                                         slave_tx_count;
362                         total_tx_fail_count += slave_tx_fail_count[i];
363
364                         /*
365                          * Shift bufs to beginning of array to allow reordering
366                          * later
367                          */
368                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
369                                 slave_bufs[i][j] =
370                                         slave_bufs[i][(slave_tx_count - 1) + j];
371                         }
372                 }
373         }
374
375         /*
376          * If there are tx burst failures we move packets to end of bufs to
377          * preserve expected PMD behaviour of all failed transmitted being
378          * at the end of the input mbuf array
379          */
380         if (unlikely(total_tx_fail_count > 0)) {
381                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382
383                 for (i = 0; i < slave_count; i++) {
384                         if (slave_tx_fail_count[i] > 0) {
385                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
386                                         bufs[bufs_idx++] = slave_bufs[i][j];
387                         }
388                 }
389         }
390
391         return total_tx_count;
392 }
393
394
395 static uint16_t
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
397                 uint16_t nb_pkts)
398 {
399         /* Cast to structure, containing bonded device's port id and queue id */
400         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401         struct bond_dev_private *internals = bd_rx_q->dev_private;
402         struct ether_addr bond_mac;
403
404         struct ether_hdr *hdr;
405
406         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407         uint16_t num_rx_total = 0;      /* Total number of received packets */
408         uint16_t slaves[RTE_MAX_ETHPORTS];
409         uint16_t slave_count, idx;
410
411         uint8_t collecting;  /* current slave collecting status */
412         const uint8_t promisc = internals->promiscuous_en;
413         uint8_t i, j, k;
414         uint8_t subtype;
415
416         rte_eth_macaddr_get(internals->port_id, &bond_mac);
417         /* Copy slave list to protect against slave up/down changes during tx
418          * bursting */
419         slave_count = internals->active_slave_count;
420         memcpy(slaves, internals->active_slaves,
421                         sizeof(internals->active_slaves[0]) * slave_count);
422
423         idx = internals->active_slave;
424         if (idx >= slave_count) {
425                 internals->active_slave = 0;
426                 idx = 0;
427         }
428         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429                 j = num_rx_total;
430                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
431                                          COLLECTING);
432
433                 /* Read packets from this slave */
434                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
436
437                 for (k = j; k < 2 && k < num_rx_total; k++)
438                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439
440                 /* Handle slow protocol packets. */
441                 while (j < num_rx_total) {
442
443                         /* If packet is not pure L2 and is known, skip it */
444                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
445                                 j++;
446                                 continue;
447                         }
448
449                         if (j + 3 < num_rx_total)
450                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451
452                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454
455                         /* Remove packet from array if it is slow packet or slave is not
456                          * in collecting state or bonding interface is not in promiscuous
457                          * mode and packet address does not match. */
458                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459                                 !collecting || (!promisc &&
460                                         !is_multicast_ether_addr(&hdr->d_addr) &&
461                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462
463                                 if (hdr->ether_type == ether_type_slow_be) {
464                                         bond_mode_8023ad_handle_slow_pkt(
465                                             internals, slaves[idx], bufs[j]);
466                                 } else
467                                         rte_pktmbuf_free(bufs[j]);
468
469                                 /* Packet is managed by mode 4 or dropped, shift the array */
470                                 num_rx_total--;
471                                 if (j < num_rx_total) {
472                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
473                                                 (num_rx_total - j));
474                                 }
475                         } else
476                                 j++;
477                 }
478                 if (unlikely(++idx == slave_count))
479                         idx = 0;
480         }
481
482         internals->active_slave = idx;
483         return num_rx_total;
484 }
485
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491
492 static void
493 arp_op_name(uint16_t arp_op, char *buf)
494 {
495         switch (arp_op) {
496         case ARP_OP_REQUEST:
497                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
498                 return;
499         case ARP_OP_REPLY:
500                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501                 return;
502         case ARP_OP_REVREQUEST:
503                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504                                 "Reverse ARP Request");
505                 return;
506         case ARP_OP_REVREPLY:
507                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508                                 "Reverse ARP Reply");
509                 return;
510         case ARP_OP_INVREQUEST:
511                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512                                 "Peer Identify Request");
513                 return;
514         case ARP_OP_INVREPLY:
515                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516                                 "Peer Identify Reply");
517                 return;
518         default:
519                 break;
520         }
521         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
522         return;
523 }
524 #endif
525 #define MaxIPv4String   16
526 static void
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
528 {
529         uint32_t ipv4_addr;
530
531         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
534                 ipv4_addr & 0xFF);
535 }
536
537 #define MAX_CLIENTS_NUMBER      128
538 uint8_t active_clients;
539 struct client_stats_t {
540         uint16_t port;
541         uint32_t ipv4_addr;
542         uint32_t ipv4_rx_packets;
543         uint32_t ipv4_tx_packets;
544 };
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
546
547 static void
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
549 {
550         int i = 0;
551
552         for (; i < MAX_CLIENTS_NUMBER; i++)     {
553                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
554                         /* Just update RX packets number for this client */
555                         if (TXorRXindicator == &burstnumberRX)
556                                 client_stats[i].ipv4_rx_packets++;
557                         else
558                                 client_stats[i].ipv4_tx_packets++;
559                         return;
560                 }
561         }
562         /* We have a new client. Insert him to the table, and increment stats */
563         if (TXorRXindicator == &burstnumberRX)
564                 client_stats[active_clients].ipv4_rx_packets++;
565         else
566                 client_stats[active_clients].ipv4_tx_packets++;
567         client_stats[active_clients].ipv4_addr = addr;
568         client_stats[active_clients].port = port;
569         active_clients++;
570
571 }
572
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
575                 RTE_LOG(DEBUG, PMD, \
576                 "%s " \
577                 "port:%d " \
578                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
579                 "SrcIP:%s " \
580                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
581                 "DstIP:%s " \
582                 "%s " \
583                 "%d\n", \
584                 info, \
585                 port, \
586                 eth_h->s_addr.addr_bytes[0], \
587                 eth_h->s_addr.addr_bytes[1], \
588                 eth_h->s_addr.addr_bytes[2], \
589                 eth_h->s_addr.addr_bytes[3], \
590                 eth_h->s_addr.addr_bytes[4], \
591                 eth_h->s_addr.addr_bytes[5], \
592                 src_ip, \
593                 eth_h->d_addr.addr_bytes[0], \
594                 eth_h->d_addr.addr_bytes[1], \
595                 eth_h->d_addr.addr_bytes[2], \
596                 eth_h->d_addr.addr_bytes[3], \
597                 eth_h->d_addr.addr_bytes[4], \
598                 eth_h->d_addr.addr_bytes[5], \
599                 dst_ip, \
600                 arp_op, \
601                 ++burstnumber)
602 #endif
603
604 static void
605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
606                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
607 {
608         struct ipv4_hdr *ipv4_h;
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610         struct arp_hdr *arp_h;
611         char dst_ip[16];
612         char ArpOp[24];
613         char buf[16];
614 #endif
615         char src_ip[16];
616
617         uint16_t ether_type = eth_h->ether_type;
618         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
619
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621         strlcpy(buf, info, 16);
622 #endif
623
624         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
625                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
626                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
628                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
629                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
630 #endif
631                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
632         }
633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
634         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
636                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
638                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
639                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
640         }
641 #endif
642 }
643 #endif
644
645 static uint16_t
646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
647 {
648         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
649         struct bond_dev_private *internals = bd_tx_q->dev_private;
650         struct ether_hdr *eth_h;
651         uint16_t ether_type, offset;
652         uint16_t nb_recv_pkts;
653         int i;
654
655         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
656
657         for (i = 0; i < nb_recv_pkts; i++) {
658                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
659                 ether_type = eth_h->ether_type;
660                 offset = get_vlan_offset(eth_h, &ether_type);
661
662                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
664                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
665 #endif
666                         bond_mode_alb_arp_recv(eth_h, offset, internals);
667                 }
668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
669                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
670                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
671 #endif
672         }
673
674         return nb_recv_pkts;
675 }
676
677 static uint16_t
678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
679                 uint16_t nb_pkts)
680 {
681         struct bond_dev_private *internals;
682         struct bond_tx_queue *bd_tx_q;
683
684         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
685         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
686
687         uint16_t num_of_slaves;
688         uint16_t slaves[RTE_MAX_ETHPORTS];
689
690         uint16_t num_tx_total = 0, num_tx_slave;
691
692         static int slave_idx = 0;
693         int i, cslave_idx = 0, tx_fail_total = 0;
694
695         bd_tx_q = (struct bond_tx_queue *)queue;
696         internals = bd_tx_q->dev_private;
697
698         /* Copy slave list to protect against slave up/down changes during tx
699          * bursting */
700         num_of_slaves = internals->active_slave_count;
701         memcpy(slaves, internals->active_slaves,
702                         sizeof(internals->active_slaves[0]) * num_of_slaves);
703
704         if (num_of_slaves < 1)
705                 return num_tx_total;
706
707         /* Populate slaves mbuf with which packets are to be sent on it  */
708         for (i = 0; i < nb_pkts; i++) {
709                 cslave_idx = (slave_idx + i) % num_of_slaves;
710                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
711         }
712
713         /* increment current slave index so the next call to tx burst starts on the
714          * next slave */
715         slave_idx = ++cslave_idx;
716
717         /* Send packet burst on each slave device */
718         for (i = 0; i < num_of_slaves; i++) {
719                 if (slave_nb_pkts[i] > 0) {
720                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
721                                         slave_bufs[i], slave_nb_pkts[i]);
722
723                         /* if tx burst fails move packets to end of bufs */
724                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
725                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
726
727                                 tx_fail_total += tx_fail_slave;
728
729                                 memcpy(&bufs[nb_pkts - tx_fail_total],
730                                                 &slave_bufs[i][num_tx_slave],
731                                                 tx_fail_slave * sizeof(bufs[0]));
732                         }
733                         num_tx_total += num_tx_slave;
734                 }
735         }
736
737         return num_tx_total;
738 }
739
740 static uint16_t
741 bond_ethdev_tx_burst_active_backup(void *queue,
742                 struct rte_mbuf **bufs, uint16_t nb_pkts)
743 {
744         struct bond_dev_private *internals;
745         struct bond_tx_queue *bd_tx_q;
746
747         bd_tx_q = (struct bond_tx_queue *)queue;
748         internals = bd_tx_q->dev_private;
749
750         if (internals->active_slave_count < 1)
751                 return 0;
752
753         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
754                         bufs, nb_pkts);
755 }
756
757 static inline uint16_t
758 ether_hash(struct ether_hdr *eth_hdr)
759 {
760         unaligned_uint16_t *word_src_addr =
761                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
762         unaligned_uint16_t *word_dst_addr =
763                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
764
765         return (word_src_addr[0] ^ word_dst_addr[0]) ^
766                         (word_src_addr[1] ^ word_dst_addr[1]) ^
767                         (word_src_addr[2] ^ word_dst_addr[2]);
768 }
769
770 static inline uint32_t
771 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
772 {
773         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
774 }
775
776 static inline uint32_t
777 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
778 {
779         unaligned_uint32_t *word_src_addr =
780                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
781         unaligned_uint32_t *word_dst_addr =
782                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
783
784         return (word_src_addr[0] ^ word_dst_addr[0]) ^
785                         (word_src_addr[1] ^ word_dst_addr[1]) ^
786                         (word_src_addr[2] ^ word_dst_addr[2]) ^
787                         (word_src_addr[3] ^ word_dst_addr[3]);
788 }
789
790
791 void
792 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint8_t slave_count, uint16_t *slaves)
794 {
795         struct ether_hdr *eth_hdr;
796         uint32_t hash;
797         int i;
798
799         for (i = 0; i < nb_pkts; i++) {
800                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
801
802                 hash = ether_hash(eth_hdr);
803
804                 slaves[i] = (hash ^= hash >> 8) % slave_count;
805         }
806 }
807
808 void
809 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
810                 uint8_t slave_count, uint16_t *slaves)
811 {
812         uint16_t i;
813         struct ether_hdr *eth_hdr;
814         uint16_t proto;
815         size_t vlan_offset;
816         uint32_t hash, l3hash;
817
818         for (i = 0; i < nb_pkts; i++) {
819                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
820                 l3hash = 0;
821
822                 proto = eth_hdr->ether_type;
823                 hash = ether_hash(eth_hdr);
824
825                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
826
827                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829                                         ((char *)(eth_hdr + 1) + vlan_offset);
830                         l3hash = ipv4_hash(ipv4_hdr);
831
832                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834                                         ((char *)(eth_hdr + 1) + vlan_offset);
835                         l3hash = ipv6_hash(ipv6_hdr);
836                 }
837
838                 hash = hash ^ l3hash;
839                 hash ^= hash >> 16;
840                 hash ^= hash >> 8;
841
842                 slaves[i] = hash % slave_count;
843         }
844 }
845
846 void
847 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
848                 uint8_t slave_count, uint16_t *slaves)
849 {
850         struct ether_hdr *eth_hdr;
851         uint16_t proto;
852         size_t vlan_offset;
853         int i;
854
855         struct udp_hdr *udp_hdr;
856         struct tcp_hdr *tcp_hdr;
857         uint32_t hash, l3hash, l4hash;
858
859         for (i = 0; i < nb_pkts; i++) {
860                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
861                 proto = eth_hdr->ether_type;
862                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
863                 l3hash = 0;
864                 l4hash = 0;
865
866                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
867                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
868                                         ((char *)(eth_hdr + 1) + vlan_offset);
869                         size_t ip_hdr_offset;
870
871                         l3hash = ipv4_hash(ipv4_hdr);
872
873                         /* there is no L4 header in fragmented packet */
874                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
875                                                                 == 0)) {
876                                 ip_hdr_offset = (ipv4_hdr->version_ihl
877                                         & IPV4_HDR_IHL_MASK) *
878                                         IPV4_IHL_MULTIPLIER;
879
880                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
881                                         tcp_hdr = (struct tcp_hdr *)
882                                                 ((char *)ipv4_hdr +
883                                                         ip_hdr_offset);
884                                         l4hash = HASH_L4_PORTS(tcp_hdr);
885                                 } else if (ipv4_hdr->next_proto_id ==
886                                                                 IPPROTO_UDP) {
887                                         udp_hdr = (struct udp_hdr *)
888                                                 ((char *)ipv4_hdr +
889                                                         ip_hdr_offset);
890                                         l4hash = HASH_L4_PORTS(udp_hdr);
891                                 }
892                         }
893                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
894                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
895                                         ((char *)(eth_hdr + 1) + vlan_offset);
896                         l3hash = ipv6_hash(ipv6_hdr);
897
898                         if (ipv6_hdr->proto == IPPROTO_TCP) {
899                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
900                                 l4hash = HASH_L4_PORTS(tcp_hdr);
901                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
902                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
903                                 l4hash = HASH_L4_PORTS(udp_hdr);
904                         }
905                 }
906
907                 hash = l3hash ^ l4hash;
908                 hash ^= hash >> 16;
909                 hash ^= hash >> 8;
910
911                 slaves[i] = hash % slave_count;
912         }
913 }
914
915 struct bwg_slave {
916         uint64_t bwg_left_int;
917         uint64_t bwg_left_remainder;
918         uint8_t slave;
919 };
920
921 void
922 bond_tlb_activate_slave(struct bond_dev_private *internals) {
923         int i;
924
925         for (i = 0; i < internals->active_slave_count; i++) {
926                 tlb_last_obytets[internals->active_slaves[i]] = 0;
927         }
928 }
929
930 static int
931 bandwidth_cmp(const void *a, const void *b)
932 {
933         const struct bwg_slave *bwg_a = a;
934         const struct bwg_slave *bwg_b = b;
935         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
936         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
937                         (int64_t)bwg_a->bwg_left_remainder;
938         if (diff > 0)
939                 return 1;
940         else if (diff < 0)
941                 return -1;
942         else if (diff2 > 0)
943                 return 1;
944         else if (diff2 < 0)
945                 return -1;
946         else
947                 return 0;
948 }
949
950 static void
951 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
952                 struct bwg_slave *bwg_slave)
953 {
954         struct rte_eth_link link_status;
955
956         rte_eth_link_get_nowait(port_id, &link_status);
957         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
958         if (link_bwg == 0)
959                 return;
960         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
961         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
962         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
963 }
964
965 static void
966 bond_ethdev_update_tlb_slave_cb(void *arg)
967 {
968         struct bond_dev_private *internals = arg;
969         struct rte_eth_stats slave_stats;
970         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
971         uint8_t slave_count;
972         uint64_t tx_bytes;
973
974         uint8_t update_stats = 0;
975         uint8_t i, slave_id;
976
977         internals->slave_update_idx++;
978
979
980         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
981                 update_stats = 1;
982
983         for (i = 0; i < internals->active_slave_count; i++) {
984                 slave_id = internals->active_slaves[i];
985                 rte_eth_stats_get(slave_id, &slave_stats);
986                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
987                 bandwidth_left(slave_id, tx_bytes,
988                                 internals->slave_update_idx, &bwg_array[i]);
989                 bwg_array[i].slave = slave_id;
990
991                 if (update_stats) {
992                         tlb_last_obytets[slave_id] = slave_stats.obytes;
993                 }
994         }
995
996         if (update_stats == 1)
997                 internals->slave_update_idx = 0;
998
999         slave_count = i;
1000         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1001         for (i = 0; i < slave_count; i++)
1002                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1003
1004         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1005                         (struct bond_dev_private *)internals);
1006 }
1007
1008 static uint16_t
1009 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1010 {
1011         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1012         struct bond_dev_private *internals = bd_tx_q->dev_private;
1013
1014         struct rte_eth_dev *primary_port =
1015                         &rte_eth_devices[internals->primary_port];
1016         uint16_t num_tx_total = 0;
1017         uint16_t i, j;
1018
1019         uint16_t num_of_slaves = internals->active_slave_count;
1020         uint16_t slaves[RTE_MAX_ETHPORTS];
1021
1022         struct ether_hdr *ether_hdr;
1023         struct ether_addr primary_slave_addr;
1024         struct ether_addr active_slave_addr;
1025
1026         if (num_of_slaves < 1)
1027                 return num_tx_total;
1028
1029         memcpy(slaves, internals->tlb_slaves_order,
1030                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1031
1032
1033         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1034
1035         if (nb_pkts > 3) {
1036                 for (i = 0; i < 3; i++)
1037                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1038         }
1039
1040         for (i = 0; i < num_of_slaves; i++) {
1041                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1042                 for (j = num_tx_total; j < nb_pkts; j++) {
1043                         if (j + 3 < nb_pkts)
1044                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1045
1046                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1047                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1048                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1049 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1050                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1051 #endif
1052                 }
1053
1054                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1055                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1056
1057                 if (num_tx_total == nb_pkts)
1058                         break;
1059         }
1060
1061         return num_tx_total;
1062 }
1063
1064 void
1065 bond_tlb_disable(struct bond_dev_private *internals)
1066 {
1067         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1068 }
1069
1070 void
1071 bond_tlb_enable(struct bond_dev_private *internals)
1072 {
1073         bond_ethdev_update_tlb_slave_cb(internals);
1074 }
1075
1076 static uint16_t
1077 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1078 {
1079         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1080         struct bond_dev_private *internals = bd_tx_q->dev_private;
1081
1082         struct ether_hdr *eth_h;
1083         uint16_t ether_type, offset;
1084
1085         struct client_data *client_info;
1086
1087         /*
1088          * We create transmit buffers for every slave and one additional to send
1089          * through tlb. In worst case every packet will be send on one port.
1090          */
1091         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1092         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1093
1094         /*
1095          * We create separate transmit buffers for update packets as they won't
1096          * be counted in num_tx_total.
1097          */
1098         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1099         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1100
1101         struct rte_mbuf *upd_pkt;
1102         size_t pkt_size;
1103
1104         uint16_t num_send, num_not_send = 0;
1105         uint16_t num_tx_total = 0;
1106         uint16_t slave_idx;
1107
1108         int i, j;
1109
1110         /* Search tx buffer for ARP packets and forward them to alb */
1111         for (i = 0; i < nb_pkts; i++) {
1112                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1113                 ether_type = eth_h->ether_type;
1114                 offset = get_vlan_offset(eth_h, &ether_type);
1115
1116                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1117                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1118
1119                         /* Change src mac in eth header */
1120                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1121
1122                         /* Add packet to slave tx buffer */
1123                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1124                         slave_bufs_pkts[slave_idx]++;
1125                 } else {
1126                         /* If packet is not ARP, send it with TLB policy */
1127                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1128                                         bufs[i];
1129                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1130                 }
1131         }
1132
1133         /* Update connected client ARP tables */
1134         if (internals->mode6.ntt) {
1135                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1136                         client_info = &internals->mode6.client_table[i];
1137
1138                         if (client_info->in_use) {
1139                                 /* Allocate new packet to send ARP update on current slave */
1140                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1141                                 if (upd_pkt == NULL) {
1142                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1143                                         continue;
1144                                 }
1145                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1146                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1147                                 upd_pkt->data_len = pkt_size;
1148                                 upd_pkt->pkt_len = pkt_size;
1149
1150                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1151                                                 internals);
1152
1153                                 /* Add packet to update tx buffer */
1154                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1155                                 update_bufs_pkts[slave_idx]++;
1156                         }
1157                 }
1158                 internals->mode6.ntt = 0;
1159         }
1160
1161         /* Send ARP packets on proper slaves */
1162         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1163                 if (slave_bufs_pkts[i] > 0) {
1164                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1165                                         slave_bufs[i], slave_bufs_pkts[i]);
1166                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1167                                 bufs[nb_pkts - 1 - num_not_send - j] =
1168                                                 slave_bufs[i][nb_pkts - 1 - j];
1169                         }
1170
1171                         num_tx_total += num_send;
1172                         num_not_send += slave_bufs_pkts[i] - num_send;
1173
1174 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1175         /* Print TX stats including update packets */
1176                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send update packets on proper slaves */
1185         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1186                 if (update_bufs_pkts[i] > 0) {
1187                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1188                                         update_bufs_pkts[i]);
1189                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1190                                 rte_pktmbuf_free(update_bufs[i][j]);
1191                         }
1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1193                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1194                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1195                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1196                         }
1197 #endif
1198                 }
1199         }
1200
1201         /* Send non-ARP packets using tlb policy */
1202         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1203                 num_send = bond_ethdev_tx_burst_tlb(queue,
1204                                 slave_bufs[RTE_MAX_ETHPORTS],
1205                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1206
1207                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1208                         bufs[nb_pkts - 1 - num_not_send - j] =
1209                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1210                 }
1211
1212                 num_tx_total += num_send;
1213         }
1214
1215         return num_tx_total;
1216 }
1217
1218 static uint16_t
1219 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1220                 uint16_t nb_bufs)
1221 {
1222         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1223         struct bond_dev_private *internals = bd_tx_q->dev_private;
1224
1225         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1226         uint16_t slave_count;
1227
1228         /* Array to sort mbufs for transmission on each slave into */
1229         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1230         /* Number of mbufs for transmission on each slave */
1231         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1232         /* Mapping array generated by hash function to map mbufs to slaves */
1233         uint16_t bufs_slave_port_idxs[nb_bufs];
1234
1235         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1236         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1237
1238         uint16_t i, j;
1239
1240         if (unlikely(nb_bufs == 0))
1241                 return 0;
1242
1243         /* Copy slave list to protect against slave up/down changes during tx
1244          * bursting */
1245         slave_count = internals->active_slave_count;
1246         if (unlikely(slave_count < 1))
1247                 return 0;
1248
1249         memcpy(slave_port_ids, internals->active_slaves,
1250                         sizeof(slave_port_ids[0]) * slave_count);
1251
1252         /*
1253          * Populate slaves mbuf with the packets which are to be sent on it
1254          * selecting output slave using hash based on xmit policy
1255          */
1256         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1257                         bufs_slave_port_idxs);
1258
1259         for (i = 0; i < nb_bufs; i++) {
1260                 /* Populate slave mbuf arrays with mbufs for that slave. */
1261                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1262
1263                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1264         }
1265
1266         /* Send packet burst on each slave device */
1267         for (i = 0; i < slave_count; i++) {
1268                 if (slave_nb_bufs[i] == 0)
1269                         continue;
1270
1271                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1272                                 bd_tx_q->queue_id, slave_bufs[i],
1273                                 slave_nb_bufs[i]);
1274
1275                 total_tx_count += slave_tx_count;
1276
1277                 /* If tx burst fails move packets to end of bufs */
1278                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1279                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1280                                         slave_tx_count;
1281                         total_tx_fail_count += slave_tx_fail_count[i];
1282
1283                         /*
1284                          * Shift bufs to beginning of array to allow reordering
1285                          * later
1286                          */
1287                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1288                                 slave_bufs[i][j] =
1289                                         slave_bufs[i][(slave_tx_count - 1) + j];
1290                         }
1291                 }
1292         }
1293
1294         /*
1295          * If there are tx burst failures we move packets to end of bufs to
1296          * preserve expected PMD behaviour of all failed transmitted being
1297          * at the end of the input mbuf array
1298          */
1299         if (unlikely(total_tx_fail_count > 0)) {
1300                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1301
1302                 for (i = 0; i < slave_count; i++) {
1303                         if (slave_tx_fail_count[i] > 0) {
1304                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1305                                         bufs[bufs_idx++] = slave_bufs[i][j];
1306                         }
1307                 }
1308         }
1309
1310         return total_tx_count;
1311 }
1312
1313 static uint16_t
1314 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1315                 uint16_t nb_bufs)
1316 {
1317         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1318         struct bond_dev_private *internals = bd_tx_q->dev_private;
1319
1320         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1321         uint16_t slave_count;
1322
1323         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1324         uint16_t dist_slave_count;
1325
1326         /* 2-D array to sort mbufs for transmission on each slave into */
1327         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1328         /* Number of mbufs for transmission on each slave */
1329         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1330         /* Mapping array generated by hash function to map mbufs to slaves */
1331         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1332
1333         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1334         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1335
1336         uint16_t i, j;
1337
1338         if (unlikely(nb_bufs == 0))
1339                 return 0;
1340
1341         /* Copy slave list to protect against slave up/down changes during tx
1342          * bursting */
1343         slave_count = internals->active_slave_count;
1344         if (unlikely(slave_count < 1))
1345                 return 0;
1346
1347         memcpy(slave_port_ids, internals->active_slaves,
1348                         sizeof(slave_port_ids[0]) * slave_count);
1349
1350         dist_slave_count = 0;
1351         for (i = 0; i < slave_count; i++) {
1352                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1353
1354                 if (ACTOR_STATE(port, DISTRIBUTING))
1355                         dist_slave_port_ids[dist_slave_count++] =
1356                                         slave_port_ids[i];
1357         }
1358
1359         if (likely(dist_slave_count > 1)) {
1360
1361                 /*
1362                  * Populate slaves mbuf with the packets which are to be sent
1363                  * on it, selecting output slave using hash based on xmit policy
1364                  */
1365                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1366                                 bufs_slave_port_idxs);
1367
1368                 for (i = 0; i < nb_bufs; i++) {
1369                         /*
1370                          * Populate slave mbuf arrays with mbufs for that
1371                          * slave
1372                          */
1373                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1374
1375                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1376                                         bufs[i];
1377                 }
1378
1379
1380                 /* Send packet burst on each slave device */
1381                 for (i = 0; i < dist_slave_count; i++) {
1382                         if (slave_nb_bufs[i] == 0)
1383                                 continue;
1384
1385                         slave_tx_count = rte_eth_tx_burst(
1386                                         dist_slave_port_ids[i],
1387                                         bd_tx_q->queue_id, slave_bufs[i],
1388                                         slave_nb_bufs[i]);
1389
1390                         total_tx_count += slave_tx_count;
1391
1392                         /* If tx burst fails move packets to end of bufs */
1393                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1394                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1395                                                 slave_tx_count;
1396                                 total_tx_fail_count += slave_tx_fail_count[i];
1397
1398                                 /*
1399                                  * Shift bufs to beginning of array to allow
1400                                  * reordering later
1401                                  */
1402                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1403                                         slave_bufs[i][j] =
1404                                                 slave_bufs[i]
1405                                                         [(slave_tx_count - 1)
1406                                                         + j];
1407                         }
1408                 }
1409
1410                 /*
1411                  * If there are tx burst failures we move packets to end of
1412                  * bufs to preserve expected PMD behaviour of all failed
1413                  * transmitted being at the end of the input mbuf array
1414                  */
1415                 if (unlikely(total_tx_fail_count > 0)) {
1416                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1417
1418                         for (i = 0; i < slave_count; i++) {
1419                                 if (slave_tx_fail_count[i] > 0) {
1420                                         for (j = 0;
1421                                                 j < slave_tx_fail_count[i];
1422                                                 j++) {
1423                                                 bufs[bufs_idx++] =
1424                                                         slave_bufs[i][j];
1425                                         }
1426                                 }
1427                         }
1428                 }
1429         }
1430
1431         /* Check for LACP control packets and send if available */
1432         for (i = 0; i < slave_count; i++) {
1433                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1434                 struct rte_mbuf *ctrl_pkt = NULL;
1435
1436                 if (likely(rte_ring_empty(port->tx_ring)))
1437                         continue;
1438
1439                 if (rte_ring_dequeue(port->tx_ring,
1440                                      (void **)&ctrl_pkt) != -ENOENT) {
1441                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1442                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1443                         /*
1444                          * re-enqueue LAG control plane packets to buffering
1445                          * ring if transmission fails so the packet isn't lost.
1446                          */
1447                         if (slave_tx_count != 1)
1448                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1449                 }
1450         }
1451
1452         return total_tx_count;
1453 }
1454
1455 static uint16_t
1456 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1457                 uint16_t nb_pkts)
1458 {
1459         struct bond_dev_private *internals;
1460         struct bond_tx_queue *bd_tx_q;
1461
1462         uint8_t tx_failed_flag = 0, num_of_slaves;
1463         uint16_t slaves[RTE_MAX_ETHPORTS];
1464
1465         uint16_t max_nb_of_tx_pkts = 0;
1466
1467         int slave_tx_total[RTE_MAX_ETHPORTS];
1468         int i, most_successful_tx_slave = -1;
1469
1470         bd_tx_q = (struct bond_tx_queue *)queue;
1471         internals = bd_tx_q->dev_private;
1472
1473         /* Copy slave list to protect against slave up/down changes during tx
1474          * bursting */
1475         num_of_slaves = internals->active_slave_count;
1476         memcpy(slaves, internals->active_slaves,
1477                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1478
1479         if (num_of_slaves < 1)
1480                 return 0;
1481
1482         /* Increment reference count on mbufs */
1483         for (i = 0; i < nb_pkts; i++)
1484                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1485
1486         /* Transmit burst on each active slave */
1487         for (i = 0; i < num_of_slaves; i++) {
1488                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1489                                         bufs, nb_pkts);
1490
1491                 if (unlikely(slave_tx_total[i] < nb_pkts))
1492                         tx_failed_flag = 1;
1493
1494                 /* record the value and slave index for the slave which transmits the
1495                  * maximum number of packets */
1496                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1497                         max_nb_of_tx_pkts = slave_tx_total[i];
1498                         most_successful_tx_slave = i;
1499                 }
1500         }
1501
1502         /* if slaves fail to transmit packets from burst, the calling application
1503          * is not expected to know about multiple references to packets so we must
1504          * handle failures of all packets except those of the most successful slave
1505          */
1506         if (unlikely(tx_failed_flag))
1507                 for (i = 0; i < num_of_slaves; i++)
1508                         if (i != most_successful_tx_slave)
1509                                 while (slave_tx_total[i] < nb_pkts)
1510                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1511
1512         return max_nb_of_tx_pkts;
1513 }
1514
1515 void
1516 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1517 {
1518         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1519
1520         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1521                 /**
1522                  * If in mode 4 then save the link properties of the first
1523                  * slave, all subsequent slaves must match these properties
1524                  */
1525                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1526
1527                 bond_link->link_autoneg = slave_link->link_autoneg;
1528                 bond_link->link_duplex = slave_link->link_duplex;
1529                 bond_link->link_speed = slave_link->link_speed;
1530         } else {
1531                 /**
1532                  * In any other mode the link properties are set to default
1533                  * values of AUTONEG/DUPLEX
1534                  */
1535                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1536                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1537         }
1538 }
1539
1540 int
1541 link_properties_valid(struct rte_eth_dev *ethdev,
1542                 struct rte_eth_link *slave_link)
1543 {
1544         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1545
1546         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1547                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1548
1549                 if (bond_link->link_duplex != slave_link->link_duplex ||
1550                         bond_link->link_autoneg != slave_link->link_autoneg ||
1551                         bond_link->link_speed != slave_link->link_speed)
1552                         return -1;
1553         }
1554
1555         return 0;
1556 }
1557
1558 int
1559 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1560 {
1561         struct ether_addr *mac_addr;
1562
1563         if (eth_dev == NULL) {
1564                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1565                 return -1;
1566         }
1567
1568         if (dst_mac_addr == NULL) {
1569                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1570                 return -1;
1571         }
1572
1573         mac_addr = eth_dev->data->mac_addrs;
1574
1575         ether_addr_copy(mac_addr, dst_mac_addr);
1576         return 0;
1577 }
1578
1579 int
1580 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1581 {
1582         struct ether_addr *mac_addr;
1583
1584         if (eth_dev == NULL) {
1585                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1586                 return -1;
1587         }
1588
1589         if (new_mac_addr == NULL) {
1590                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1591                 return -1;
1592         }
1593
1594         mac_addr = eth_dev->data->mac_addrs;
1595
1596         /* If new MAC is different to current MAC then update */
1597         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1598                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1599
1600         return 0;
1601 }
1602
1603 int
1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 {
1606         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1607         int i;
1608
1609         /* Update slave devices MAC addresses */
1610         if (internals->slave_count < 1)
1611                 return -1;
1612
1613         switch (internals->mode) {
1614         case BONDING_MODE_ROUND_ROBIN:
1615         case BONDING_MODE_BALANCE:
1616         case BONDING_MODE_BROADCAST:
1617                 for (i = 0; i < internals->slave_count; i++) {
1618                         if (rte_eth_dev_default_mac_addr_set(
1619                                         internals->slaves[i].port_id,
1620                                         bonded_eth_dev->data->mac_addrs)) {
1621                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1622                                                 internals->slaves[i].port_id);
1623                                 return -1;
1624                         }
1625                 }
1626                 break;
1627         case BONDING_MODE_8023AD:
1628                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629                 break;
1630         case BONDING_MODE_ACTIVE_BACKUP:
1631         case BONDING_MODE_TLB:
1632         case BONDING_MODE_ALB:
1633         default:
1634                 for (i = 0; i < internals->slave_count; i++) {
1635                         if (internals->slaves[i].port_id ==
1636                                         internals->current_primary_port) {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->primary_port,
1639                                                 bonded_eth_dev->data->mac_addrs)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->current_primary_port);
1642                                         return -1;
1643                                 }
1644                         } else {
1645                                 if (rte_eth_dev_default_mac_addr_set(
1646                                                 internals->slaves[i].port_id,
1647                                                 &internals->slaves[i].persisted_mac_addr)) {
1648                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1649                                                         internals->slaves[i].port_id);
1650                                         return -1;
1651                                 }
1652                         }
1653                 }
1654         }
1655
1656         return 0;
1657 }
1658
1659 int
1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 {
1662         struct bond_dev_private *internals;
1663
1664         internals = eth_dev->data->dev_private;
1665
1666         switch (mode) {
1667         case BONDING_MODE_ROUND_ROBIN:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_ACTIVE_BACKUP:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674                 break;
1675         case BONDING_MODE_BALANCE:
1676                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1677                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678                 break;
1679         case BONDING_MODE_BROADCAST:
1680                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1681                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682                 break;
1683         case BONDING_MODE_8023AD:
1684                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1685                         return -1;
1686
1687                 if (internals->mode4.dedicated_queues.enabled == 0) {
1688                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1689                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1690                         RTE_LOG(WARNING, PMD,
1691                                 "Using mode 4, it is necessary to do TX burst "
1692                                 "and RX burst at least every 100ms.\n");
1693                 } else {
1694                         /* Use flow director's optimization */
1695                         eth_dev->rx_pkt_burst =
1696                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1697                         eth_dev->tx_pkt_burst =
1698                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1699                 }
1700                 break;
1701         case BONDING_MODE_TLB:
1702                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1703                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704                 break;
1705         case BONDING_MODE_ALB:
1706                 if (bond_mode_alb_enable(eth_dev) != 0)
1707                         return -1;
1708
1709                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1710                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1711                 break;
1712         default:
1713                 return -1;
1714         }
1715
1716         internals->mode = mode;
1717
1718         return 0;
1719 }
1720
1721
1722 static int
1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1724                 struct rte_eth_dev *slave_eth_dev)
1725 {
1726         int errval = 0;
1727         struct bond_dev_private *internals = (struct bond_dev_private *)
1728                 bonded_eth_dev->data->dev_private;
1729         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1730
1731         if (port->slow_pool == NULL) {
1732                 char mem_name[256];
1733                 int slave_id = slave_eth_dev->data->port_id;
1734
1735                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736                                 slave_id);
1737                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1738                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1739                         slave_eth_dev->data->numa_node);
1740
1741                 /* Any memory allocation failure in initialization is critical because
1742                  * resources can't be free, so reinitialization is impossible. */
1743                 if (port->slow_pool == NULL) {
1744                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1745                                 slave_id, mem_name, rte_strerror(rte_errno));
1746                 }
1747         }
1748
1749         if (internals->mode4.dedicated_queues.enabled == 1) {
1750                 /* Configure slow Rx queue */
1751
1752                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1753                                 internals->mode4.dedicated_queues.rx_qid, 128,
1754                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755                                 NULL, port->slow_pool);
1756                 if (errval != 0) {
1757                         RTE_BOND_LOG(ERR,
1758                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1759                                         slave_eth_dev->data->port_id,
1760                                         internals->mode4.dedicated_queues.rx_qid,
1761                                         errval);
1762                         return errval;
1763                 }
1764
1765                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1766                                 internals->mode4.dedicated_queues.tx_qid, 512,
1767                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1768                                 NULL);
1769                 if (errval != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id,
1773                                 internals->mode4.dedicated_queues.tx_qid,
1774                                 errval);
1775                         return errval;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 int
1782 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1783                 struct rte_eth_dev *slave_eth_dev)
1784 {
1785         struct bond_rx_queue *bd_rx_q;
1786         struct bond_tx_queue *bd_tx_q;
1787         uint16_t nb_rx_queues;
1788         uint16_t nb_tx_queues;
1789
1790         int errval;
1791         uint16_t q_id;
1792         struct rte_flow_error flow_error;
1793
1794         struct bond_dev_private *internals = (struct bond_dev_private *)
1795                 bonded_eth_dev->data->dev_private;
1796
1797         /* Stop slave */
1798         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799
1800         /* Enable interrupts on slave device if supported */
1801         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1802                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803
1804         /* If RSS is enabled for bonding, try to enable it for slaves  */
1805         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1806                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1807                                 != 0) {
1808                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1809                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1810                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1811                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1812                 } else {
1813                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1814                 }
1815
1816                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1817                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1818                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1819                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1820         }
1821
1822         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1823                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1824
1825         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1826         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1827
1828         if (internals->mode == BONDING_MODE_8023AD) {
1829                 if (internals->mode4.dedicated_queues.enabled == 1) {
1830                         nb_rx_queues++;
1831                         nb_tx_queues++;
1832                 }
1833         }
1834
1835         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1836                                      bonded_eth_dev->data->mtu);
1837         if (errval != 0 && errval != -ENOTSUP) {
1838                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1839                                 slave_eth_dev->data->port_id, errval);
1840                 return errval;
1841         }
1842
1843         /* Configure device */
1844         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1845                         nb_rx_queues, nb_tx_queues,
1846                         &(slave_eth_dev->data->dev_conf));
1847         if (errval != 0) {
1848                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1849                                 slave_eth_dev->data->port_id, errval);
1850                 return errval;
1851         }
1852
1853         /* Setup Rx Queues */
1854         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1855                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1856
1857                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1858                                 bd_rx_q->nb_rx_desc,
1859                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1860                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1861                 if (errval != 0) {
1862                         RTE_BOND_LOG(ERR,
1863                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1864                                         slave_eth_dev->data->port_id, q_id, errval);
1865                         return errval;
1866                 }
1867         }
1868
1869         /* Setup Tx Queues */
1870         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1871                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1872
1873                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1874                                 bd_tx_q->nb_tx_desc,
1875                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1876                                 &bd_tx_q->tx_conf);
1877                 if (errval != 0) {
1878                         RTE_BOND_LOG(ERR,
1879                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1880                                 slave_eth_dev->data->port_id, q_id, errval);
1881                         return errval;
1882                 }
1883         }
1884
1885         if (internals->mode == BONDING_MODE_8023AD &&
1886                         internals->mode4.dedicated_queues.enabled == 1) {
1887                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1888                                 != 0)
1889                         return errval;
1890
1891                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1892                                 slave_eth_dev->data->port_id) != 0) {
1893                         RTE_BOND_LOG(ERR,
1894                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1895                                 slave_eth_dev->data->port_id, q_id, errval);
1896                         return -1;
1897                 }
1898
1899                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1900                         rte_flow_destroy(slave_eth_dev->data->port_id,
1901                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1902                                         &flow_error);
1903
1904                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1905                                 slave_eth_dev->data->port_id);
1906         }
1907
1908         /* Start device */
1909         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1910         if (errval != 0) {
1911                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1912                                 slave_eth_dev->data->port_id, errval);
1913                 return -1;
1914         }
1915
1916         /* If RSS is enabled for bonding, synchronize RETA */
1917         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1918                 int i;
1919                 struct bond_dev_private *internals;
1920
1921                 internals = bonded_eth_dev->data->dev_private;
1922
1923                 for (i = 0; i < internals->slave_count; i++) {
1924                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1925                                 errval = rte_eth_dev_rss_reta_update(
1926                                                 slave_eth_dev->data->port_id,
1927                                                 &internals->reta_conf[0],
1928                                                 internals->slaves[i].reta_size);
1929                                 if (errval != 0) {
1930                                         RTE_LOG(WARNING, PMD,
1931                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1932                                                         " RSS Configuration for bonding may be inconsistent.\n",
1933                                                         slave_eth_dev->data->port_id, errval);
1934                                 }
1935                                 break;
1936                         }
1937                 }
1938         }
1939
1940         /* If lsc interrupt is set, check initial slave's link status */
1941         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1942                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1943                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1944                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1945                         NULL);
1946         }
1947
1948         return 0;
1949 }
1950
1951 void
1952 slave_remove(struct bond_dev_private *internals,
1953                 struct rte_eth_dev *slave_eth_dev)
1954 {
1955         uint8_t i;
1956
1957         for (i = 0; i < internals->slave_count; i++)
1958                 if (internals->slaves[i].port_id ==
1959                                 slave_eth_dev->data->port_id)
1960                         break;
1961
1962         if (i < (internals->slave_count - 1))
1963                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1964                                 sizeof(internals->slaves[0]) *
1965                                 (internals->slave_count - i - 1));
1966
1967         internals->slave_count--;
1968
1969         /* force reconfiguration of slave interfaces */
1970         _rte_eth_dev_reset(slave_eth_dev);
1971 }
1972
1973 static void
1974 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1975
1976 void
1977 slave_add(struct bond_dev_private *internals,
1978                 struct rte_eth_dev *slave_eth_dev)
1979 {
1980         struct bond_slave_details *slave_details =
1981                         &internals->slaves[internals->slave_count];
1982
1983         slave_details->port_id = slave_eth_dev->data->port_id;
1984         slave_details->last_link_status = 0;
1985
1986         /* Mark slave devices that don't support interrupts so we can
1987          * compensate when we start the bond
1988          */
1989         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1990                 slave_details->link_status_poll_enabled = 1;
1991         }
1992
1993         slave_details->link_status_wait_to_complete = 0;
1994         /* clean tlb_last_obytes when adding port for bonding device */
1995         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1996                         sizeof(struct ether_addr));
1997 }
1998
1999 void
2000 bond_ethdev_primary_set(struct bond_dev_private *internals,
2001                 uint16_t slave_port_id)
2002 {
2003         int i;
2004
2005         if (internals->active_slave_count < 1)
2006                 internals->current_primary_port = slave_port_id;
2007         else
2008                 /* Search bonded device slave ports for new proposed primary port */
2009                 for (i = 0; i < internals->active_slave_count; i++) {
2010                         if (internals->active_slaves[i] == slave_port_id)
2011                                 internals->current_primary_port = slave_port_id;
2012                 }
2013 }
2014
2015 static void
2016 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2017
2018 static int
2019 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2020 {
2021         struct bond_dev_private *internals;
2022         int i;
2023
2024         /* slave eth dev will be started by bonded device */
2025         if (check_for_bonded_ethdev(eth_dev)) {
2026                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2027                                 eth_dev->data->port_id);
2028                 return -1;
2029         }
2030
2031         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2032         eth_dev->data->dev_started = 1;
2033
2034         internals = eth_dev->data->dev_private;
2035
2036         if (internals->slave_count == 0) {
2037                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2038                 goto out_err;
2039         }
2040
2041         if (internals->user_defined_mac == 0) {
2042                 struct ether_addr *new_mac_addr = NULL;
2043
2044                 for (i = 0; i < internals->slave_count; i++)
2045                         if (internals->slaves[i].port_id == internals->primary_port)
2046                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2047
2048                 if (new_mac_addr == NULL)
2049                         goto out_err;
2050
2051                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2052                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2053                                         eth_dev->data->port_id);
2054                         goto out_err;
2055                 }
2056         }
2057
2058         /* Update all slave devices MACs*/
2059         if (mac_address_slaves_update(eth_dev) != 0)
2060                 goto out_err;
2061
2062         /* If bonded device is configure in promiscuous mode then re-apply config */
2063         if (internals->promiscuous_en)
2064                 bond_ethdev_promiscuous_enable(eth_dev);
2065
2066         if (internals->mode == BONDING_MODE_8023AD) {
2067                 if (internals->mode4.dedicated_queues.enabled == 1) {
2068                         internals->mode4.dedicated_queues.rx_qid =
2069                                         eth_dev->data->nb_rx_queues;
2070                         internals->mode4.dedicated_queues.tx_qid =
2071                                         eth_dev->data->nb_tx_queues;
2072                 }
2073         }
2074
2075
2076         /* Reconfigure each slave device if starting bonded device */
2077         for (i = 0; i < internals->slave_count; i++) {
2078                 struct rte_eth_dev *slave_ethdev =
2079                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2080                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2081                         RTE_BOND_LOG(ERR,
2082                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2083                                 eth_dev->data->port_id,
2084                                 internals->slaves[i].port_id);
2085                         goto out_err;
2086                 }
2087                 /* We will need to poll for link status if any slave doesn't
2088                  * support interrupts
2089                  */
2090                 if (internals->slaves[i].link_status_poll_enabled)
2091                         internals->link_status_polling_enabled = 1;
2092         }
2093
2094         /* start polling if needed */
2095         if (internals->link_status_polling_enabled) {
2096                 rte_eal_alarm_set(
2097                         internals->link_status_polling_interval_ms * 1000,
2098                         bond_ethdev_slave_link_status_change_monitor,
2099                         (void *)&rte_eth_devices[internals->port_id]);
2100         }
2101
2102         if (internals->user_defined_primary_port)
2103                 bond_ethdev_primary_set(internals, internals->primary_port);
2104
2105         if (internals->mode == BONDING_MODE_8023AD)
2106                 bond_mode_8023ad_start(eth_dev);
2107
2108         if (internals->mode == BONDING_MODE_TLB ||
2109                         internals->mode == BONDING_MODE_ALB)
2110                 bond_tlb_enable(internals);
2111
2112         return 0;
2113
2114 out_err:
2115         eth_dev->data->dev_started = 0;
2116         return -1;
2117 }
2118
2119 static void
2120 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2121 {
2122         uint8_t i;
2123
2124         if (dev->data->rx_queues != NULL) {
2125                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2126                         rte_free(dev->data->rx_queues[i]);
2127                         dev->data->rx_queues[i] = NULL;
2128                 }
2129                 dev->data->nb_rx_queues = 0;
2130         }
2131
2132         if (dev->data->tx_queues != NULL) {
2133                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2134                         rte_free(dev->data->tx_queues[i]);
2135                         dev->data->tx_queues[i] = NULL;
2136                 }
2137                 dev->data->nb_tx_queues = 0;
2138         }
2139 }
2140
2141 void
2142 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2143 {
2144         struct bond_dev_private *internals = eth_dev->data->dev_private;
2145         uint8_t i;
2146
2147         if (internals->mode == BONDING_MODE_8023AD) {
2148                 struct port *port;
2149                 void *pkt = NULL;
2150
2151                 bond_mode_8023ad_stop(eth_dev);
2152
2153                 /* Discard all messages to/from mode 4 state machines */
2154                 for (i = 0; i < internals->active_slave_count; i++) {
2155                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2156
2157                         RTE_ASSERT(port->rx_ring != NULL);
2158                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2159                                 rte_pktmbuf_free(pkt);
2160
2161                         RTE_ASSERT(port->tx_ring != NULL);
2162                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2163                                 rte_pktmbuf_free(pkt);
2164                 }
2165         }
2166
2167         if (internals->mode == BONDING_MODE_TLB ||
2168                         internals->mode == BONDING_MODE_ALB) {
2169                 bond_tlb_disable(internals);
2170                 for (i = 0; i < internals->active_slave_count; i++)
2171                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2172         }
2173
2174         internals->active_slave_count = 0;
2175         internals->link_status_polling_enabled = 0;
2176         for (i = 0; i < internals->slave_count; i++)
2177                 internals->slaves[i].last_link_status = 0;
2178
2179         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2180         eth_dev->data->dev_started = 0;
2181 }
2182
2183 void
2184 bond_ethdev_close(struct rte_eth_dev *dev)
2185 {
2186         struct bond_dev_private *internals = dev->data->dev_private;
2187         uint8_t bond_port_id = internals->port_id;
2188         int skipped = 0;
2189
2190         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2191         while (internals->slave_count != skipped) {
2192                 uint16_t port_id = internals->slaves[skipped].port_id;
2193
2194                 rte_eth_dev_stop(port_id);
2195
2196                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2197                         RTE_LOG(ERR, EAL,
2198                                 "Failed to remove port %d from bonded device "
2199                                 "%s\n", port_id, dev->device->name);
2200                         skipped++;
2201                 }
2202         }
2203         bond_ethdev_free_queues(dev);
2204         rte_bitmap_reset(internals->vlan_filter_bmp);
2205 }
2206
2207 /* forward declaration */
2208 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2209
2210 static void
2211 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2212 {
2213         struct bond_dev_private *internals = dev->data->dev_private;
2214
2215         uint16_t max_nb_rx_queues = UINT16_MAX;
2216         uint16_t max_nb_tx_queues = UINT16_MAX;
2217
2218         dev_info->max_mac_addrs = 1;
2219
2220         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2221                         internals->candidate_max_rx_pktlen :
2222                         ETHER_MAX_JUMBO_FRAME_LEN;
2223
2224         /* Max number of tx/rx queues that the bonded device can support is the
2225          * minimum values of the bonded slaves, as all slaves must be capable
2226          * of supporting the same number of tx/rx queues.
2227          */
2228         if (internals->slave_count > 0) {
2229                 struct rte_eth_dev_info slave_info;
2230                 uint8_t idx;
2231
2232                 for (idx = 0; idx < internals->slave_count; idx++) {
2233                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2234                                         &slave_info);
2235
2236                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2237                                 max_nb_rx_queues = slave_info.max_rx_queues;
2238
2239                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2240                                 max_nb_tx_queues = slave_info.max_tx_queues;
2241                 }
2242         }
2243
2244         dev_info->max_rx_queues = max_nb_rx_queues;
2245         dev_info->max_tx_queues = max_nb_tx_queues;
2246
2247         /**
2248          * If dedicated hw queues enabled for link bonding device in LACP mode
2249          * then we need to reduce the maximum number of data path queues by 1.
2250          */
2251         if (internals->mode == BONDING_MODE_8023AD &&
2252                 internals->mode4.dedicated_queues.enabled == 1) {
2253                 dev_info->max_rx_queues--;
2254                 dev_info->max_tx_queues--;
2255         }
2256
2257         dev_info->min_rx_bufsize = 0;
2258
2259         dev_info->rx_offload_capa = internals->rx_offload_capa;
2260         dev_info->tx_offload_capa = internals->tx_offload_capa;
2261         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2262
2263         dev_info->reta_size = internals->reta_size;
2264 }
2265
2266 static int
2267 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2268 {
2269         int res;
2270         uint16_t i;
2271         struct bond_dev_private *internals = dev->data->dev_private;
2272
2273         /* don't do this while a slave is being added */
2274         rte_spinlock_lock(&internals->lock);
2275
2276         if (on)
2277                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2278         else
2279                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2280
2281         for (i = 0; i < internals->slave_count; i++) {
2282                 uint16_t port_id = internals->slaves[i].port_id;
2283
2284                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2285                 if (res == ENOTSUP)
2286                         RTE_LOG(WARNING, PMD,
2287                                 "Setting VLAN filter on slave port %u not supported.\n",
2288                                 port_id);
2289         }
2290
2291         rte_spinlock_unlock(&internals->lock);
2292         return 0;
2293 }
2294
2295 static int
2296 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2297                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2298                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2299 {
2300         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2301                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2302                                         0, dev->data->numa_node);
2303         if (bd_rx_q == NULL)
2304                 return -1;
2305
2306         bd_rx_q->queue_id = rx_queue_id;
2307         bd_rx_q->dev_private = dev->data->dev_private;
2308
2309         bd_rx_q->nb_rx_desc = nb_rx_desc;
2310
2311         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2312         bd_rx_q->mb_pool = mb_pool;
2313
2314         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2315
2316         return 0;
2317 }
2318
2319 static int
2320 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2321                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2322                 const struct rte_eth_txconf *tx_conf)
2323 {
2324         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2325                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2326                                         0, dev->data->numa_node);
2327
2328         if (bd_tx_q == NULL)
2329                 return -1;
2330
2331         bd_tx_q->queue_id = tx_queue_id;
2332         bd_tx_q->dev_private = dev->data->dev_private;
2333
2334         bd_tx_q->nb_tx_desc = nb_tx_desc;
2335         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2336
2337         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2338
2339         return 0;
2340 }
2341
2342 static void
2343 bond_ethdev_rx_queue_release(void *queue)
2344 {
2345         if (queue == NULL)
2346                 return;
2347
2348         rte_free(queue);
2349 }
2350
2351 static void
2352 bond_ethdev_tx_queue_release(void *queue)
2353 {
2354         if (queue == NULL)
2355                 return;
2356
2357         rte_free(queue);
2358 }
2359
2360 static void
2361 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2362 {
2363         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2364         struct bond_dev_private *internals;
2365
2366         /* Default value for polling slave found is true as we don't want to
2367          * disable the polling thread if we cannot get the lock */
2368         int i, polling_slave_found = 1;
2369
2370         if (cb_arg == NULL)
2371                 return;
2372
2373         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2374         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2375
2376         if (!bonded_ethdev->data->dev_started ||
2377                 !internals->link_status_polling_enabled)
2378                 return;
2379
2380         /* If device is currently being configured then don't check slaves link
2381          * status, wait until next period */
2382         if (rte_spinlock_trylock(&internals->lock)) {
2383                 if (internals->slave_count > 0)
2384                         polling_slave_found = 0;
2385
2386                 for (i = 0; i < internals->slave_count; i++) {
2387                         if (!internals->slaves[i].link_status_poll_enabled)
2388                                 continue;
2389
2390                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2391                         polling_slave_found = 1;
2392
2393                         /* Update slave link status */
2394                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2395                                         internals->slaves[i].link_status_wait_to_complete);
2396
2397                         /* if link status has changed since last checked then call lsc
2398                          * event callback */
2399                         if (slave_ethdev->data->dev_link.link_status !=
2400                                         internals->slaves[i].last_link_status) {
2401                                 internals->slaves[i].last_link_status =
2402                                                 slave_ethdev->data->dev_link.link_status;
2403
2404                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2405                                                 RTE_ETH_EVENT_INTR_LSC,
2406                                                 &bonded_ethdev->data->port_id,
2407                                                 NULL);
2408                         }
2409                 }
2410                 rte_spinlock_unlock(&internals->lock);
2411         }
2412
2413         if (polling_slave_found)
2414                 /* Set alarm to continue monitoring link status of slave ethdev's */
2415                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2416                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2417 }
2418
2419 static int
2420 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2421 {
2422         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2423
2424         struct bond_dev_private *bond_ctx;
2425         struct rte_eth_link slave_link;
2426
2427         uint32_t idx;
2428
2429         bond_ctx = ethdev->data->dev_private;
2430
2431         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2432
2433         if (ethdev->data->dev_started == 0 ||
2434                         bond_ctx->active_slave_count == 0) {
2435                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2436                 return 0;
2437         }
2438
2439         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2440
2441         if (wait_to_complete)
2442                 link_update = rte_eth_link_get;
2443         else
2444                 link_update = rte_eth_link_get_nowait;
2445
2446         switch (bond_ctx->mode) {
2447         case BONDING_MODE_BROADCAST:
2448                 /**
2449                  * Setting link speed to UINT32_MAX to ensure we pick up the
2450                  * value of the first active slave
2451                  */
2452                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2453
2454                 /**
2455                  * link speed is minimum value of all the slaves link speed as
2456                  * packet loss will occur on this slave if transmission at rates
2457                  * greater than this are attempted
2458                  */
2459                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2460                         link_update(bond_ctx->active_slaves[0], &slave_link);
2461
2462                         if (slave_link.link_speed <
2463                                         ethdev->data->dev_link.link_speed)
2464                                 ethdev->data->dev_link.link_speed =
2465                                                 slave_link.link_speed;
2466                 }
2467                 break;
2468         case BONDING_MODE_ACTIVE_BACKUP:
2469                 /* Current primary slave */
2470                 link_update(bond_ctx->current_primary_port, &slave_link);
2471
2472                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2473                 break;
2474         case BONDING_MODE_8023AD:
2475                 ethdev->data->dev_link.link_autoneg =
2476                                 bond_ctx->mode4.slave_link.link_autoneg;
2477                 ethdev->data->dev_link.link_duplex =
2478                                 bond_ctx->mode4.slave_link.link_duplex;
2479                 /* fall through to update link speed */
2480         case BONDING_MODE_ROUND_ROBIN:
2481         case BONDING_MODE_BALANCE:
2482         case BONDING_MODE_TLB:
2483         case BONDING_MODE_ALB:
2484         default:
2485                 /**
2486                  * In theses mode the maximum theoretical link speed is the sum
2487                  * of all the slaves
2488                  */
2489                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2490
2491                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2492                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2493
2494                         ethdev->data->dev_link.link_speed +=
2495                                         slave_link.link_speed;
2496                 }
2497         }
2498
2499
2500         return 0;
2501 }
2502
2503
2504 static int
2505 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2506 {
2507         struct bond_dev_private *internals = dev->data->dev_private;
2508         struct rte_eth_stats slave_stats;
2509         int i, j;
2510
2511         for (i = 0; i < internals->slave_count; i++) {
2512                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2513
2514                 stats->ipackets += slave_stats.ipackets;
2515                 stats->opackets += slave_stats.opackets;
2516                 stats->ibytes += slave_stats.ibytes;
2517                 stats->obytes += slave_stats.obytes;
2518                 stats->imissed += slave_stats.imissed;
2519                 stats->ierrors += slave_stats.ierrors;
2520                 stats->oerrors += slave_stats.oerrors;
2521                 stats->rx_nombuf += slave_stats.rx_nombuf;
2522
2523                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2524                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2525                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2526                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2527                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2528                         stats->q_errors[j] += slave_stats.q_errors[j];
2529                 }
2530
2531         }
2532
2533         return 0;
2534 }
2535
2536 static void
2537 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2538 {
2539         struct bond_dev_private *internals = dev->data->dev_private;
2540         int i;
2541
2542         for (i = 0; i < internals->slave_count; i++)
2543                 rte_eth_stats_reset(internals->slaves[i].port_id);
2544 }
2545
2546 static void
2547 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2548 {
2549         struct bond_dev_private *internals = eth_dev->data->dev_private;
2550         int i;
2551
2552         internals->promiscuous_en = 1;
2553
2554         switch (internals->mode) {
2555         /* Promiscuous mode is propagated to all slaves */
2556         case BONDING_MODE_ROUND_ROBIN:
2557         case BONDING_MODE_BALANCE:
2558         case BONDING_MODE_BROADCAST:
2559                 for (i = 0; i < internals->slave_count; i++)
2560                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2561                 break;
2562         /* In mode4 promiscus mode is managed when slave is added/removed */
2563         case BONDING_MODE_8023AD:
2564                 break;
2565         /* Promiscuous mode is propagated only to primary slave */
2566         case BONDING_MODE_ACTIVE_BACKUP:
2567         case BONDING_MODE_TLB:
2568         case BONDING_MODE_ALB:
2569         default:
2570                 rte_eth_promiscuous_enable(internals->current_primary_port);
2571         }
2572 }
2573
2574 static void
2575 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2576 {
2577         struct bond_dev_private *internals = dev->data->dev_private;
2578         int i;
2579
2580         internals->promiscuous_en = 0;
2581
2582         switch (internals->mode) {
2583         /* Promiscuous mode is propagated to all slaves */
2584         case BONDING_MODE_ROUND_ROBIN:
2585         case BONDING_MODE_BALANCE:
2586         case BONDING_MODE_BROADCAST:
2587                 for (i = 0; i < internals->slave_count; i++)
2588                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2589                 break;
2590         /* In mode4 promiscus mode is set managed when slave is added/removed */
2591         case BONDING_MODE_8023AD:
2592                 break;
2593         /* Promiscuous mode is propagated only to primary slave */
2594         case BONDING_MODE_ACTIVE_BACKUP:
2595         case BONDING_MODE_TLB:
2596         case BONDING_MODE_ALB:
2597         default:
2598                 rte_eth_promiscuous_disable(internals->current_primary_port);
2599         }
2600 }
2601
2602 static void
2603 bond_ethdev_delayed_lsc_propagation(void *arg)
2604 {
2605         if (arg == NULL)
2606                 return;
2607
2608         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2609                         RTE_ETH_EVENT_INTR_LSC, NULL);
2610 }
2611
2612 int
2613 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2614                 void *param, void *ret_param __rte_unused)
2615 {
2616         struct rte_eth_dev *bonded_eth_dev;
2617         struct bond_dev_private *internals;
2618         struct rte_eth_link link;
2619         int rc = -1;
2620
2621         int i, valid_slave = 0;
2622         uint8_t active_pos;
2623         uint8_t lsc_flag = 0;
2624
2625         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2626                 return rc;
2627
2628         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2629
2630         if (check_for_bonded_ethdev(bonded_eth_dev))
2631                 return rc;
2632
2633         internals = bonded_eth_dev->data->dev_private;
2634
2635         /* If the device isn't started don't handle interrupts */
2636         if (!bonded_eth_dev->data->dev_started)
2637                 return rc;
2638
2639         /* verify that port_id is a valid slave of bonded port */
2640         for (i = 0; i < internals->slave_count; i++) {
2641                 if (internals->slaves[i].port_id == port_id) {
2642                         valid_slave = 1;
2643                         break;
2644                 }
2645         }
2646
2647         if (!valid_slave)
2648                 return rc;
2649
2650         /* Search for port in active port list */
2651         active_pos = find_slave_by_id(internals->active_slaves,
2652                         internals->active_slave_count, port_id);
2653
2654         rte_eth_link_get_nowait(port_id, &link);
2655         if (link.link_status) {
2656                 if (active_pos < internals->active_slave_count)
2657                         return rc;
2658
2659                 /* if no active slave ports then set this port to be primary port */
2660                 if (internals->active_slave_count < 1) {
2661                         /* If first active slave, then change link status */
2662                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2663                         internals->current_primary_port = port_id;
2664                         lsc_flag = 1;
2665
2666                         mac_address_slaves_update(bonded_eth_dev);
2667                 }
2668
2669                 activate_slave(bonded_eth_dev, port_id);
2670
2671                 /* If user has defined the primary port then default to using it */
2672                 if (internals->user_defined_primary_port &&
2673                                 internals->primary_port == port_id)
2674                         bond_ethdev_primary_set(internals, port_id);
2675         } else {
2676                 if (active_pos == internals->active_slave_count)
2677                         return rc;
2678
2679                 /* Remove from active slave list */
2680                 deactivate_slave(bonded_eth_dev, port_id);
2681
2682                 if (internals->active_slave_count < 1)
2683                         lsc_flag = 1;
2684
2685                 /* Update primary id, take first active slave from list or if none
2686                  * available set to -1 */
2687                 if (port_id == internals->current_primary_port) {
2688                         if (internals->active_slave_count > 0)
2689                                 bond_ethdev_primary_set(internals,
2690                                                 internals->active_slaves[0]);
2691                         else
2692                                 internals->current_primary_port = internals->primary_port;
2693                 }
2694         }
2695
2696         /**
2697          * Update bonded device link properties after any change to active
2698          * slaves
2699          */
2700         bond_ethdev_link_update(bonded_eth_dev, 0);
2701
2702         if (lsc_flag) {
2703                 /* Cancel any possible outstanding interrupts if delays are enabled */
2704                 if (internals->link_up_delay_ms > 0 ||
2705                         internals->link_down_delay_ms > 0)
2706                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2707                                         bonded_eth_dev);
2708
2709                 if (bonded_eth_dev->data->dev_link.link_status) {
2710                         if (internals->link_up_delay_ms > 0)
2711                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2712                                                 bond_ethdev_delayed_lsc_propagation,
2713                                                 (void *)bonded_eth_dev);
2714                         else
2715                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2716                                                 RTE_ETH_EVENT_INTR_LSC,
2717                                                 NULL);
2718
2719                 } else {
2720                         if (internals->link_down_delay_ms > 0)
2721                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2722                                                 bond_ethdev_delayed_lsc_propagation,
2723                                                 (void *)bonded_eth_dev);
2724                         else
2725                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2726                                                 RTE_ETH_EVENT_INTR_LSC,
2727                                                 NULL);
2728                 }
2729         }
2730         return 0;
2731 }
2732
2733 static int
2734 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2735                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2736 {
2737         unsigned i, j;
2738         int result = 0;
2739         int slave_reta_size;
2740         unsigned reta_count;
2741         struct bond_dev_private *internals = dev->data->dev_private;
2742
2743         if (reta_size != internals->reta_size)
2744                 return -EINVAL;
2745
2746          /* Copy RETA table */
2747         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2748
2749         for (i = 0; i < reta_count; i++) {
2750                 internals->reta_conf[i].mask = reta_conf[i].mask;
2751                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2752                         if ((reta_conf[i].mask >> j) & 0x01)
2753                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2754         }
2755
2756         /* Fill rest of array */
2757         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2758                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2759                                 sizeof(internals->reta_conf[0]) * reta_count);
2760
2761         /* Propagate RETA over slaves */
2762         for (i = 0; i < internals->slave_count; i++) {
2763                 slave_reta_size = internals->slaves[i].reta_size;
2764                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2765                                 &internals->reta_conf[0], slave_reta_size);
2766                 if (result < 0)
2767                         return result;
2768         }
2769
2770         return 0;
2771 }
2772
2773 static int
2774 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2775                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2776 {
2777         int i, j;
2778         struct bond_dev_private *internals = dev->data->dev_private;
2779
2780         if (reta_size != internals->reta_size)
2781                 return -EINVAL;
2782
2783          /* Copy RETA table */
2784         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2785                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2786                         if ((reta_conf[i].mask >> j) & 0x01)
2787                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2788
2789         return 0;
2790 }
2791
2792 static int
2793 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2794                 struct rte_eth_rss_conf *rss_conf)
2795 {
2796         int i, result = 0;
2797         struct bond_dev_private *internals = dev->data->dev_private;
2798         struct rte_eth_rss_conf bond_rss_conf;
2799
2800         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2801
2802         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2803
2804         if (bond_rss_conf.rss_hf != 0)
2805                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2806
2807         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2808                         sizeof(internals->rss_key)) {
2809                 if (bond_rss_conf.rss_key_len == 0)
2810                         bond_rss_conf.rss_key_len = 40;
2811                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2812                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2813                                 internals->rss_key_len);
2814         }
2815
2816         for (i = 0; i < internals->slave_count; i++) {
2817                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2818                                 &bond_rss_conf);
2819                 if (result < 0)
2820                         return result;
2821         }
2822
2823         return 0;
2824 }
2825
2826 static int
2827 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2828                 struct rte_eth_rss_conf *rss_conf)
2829 {
2830         struct bond_dev_private *internals = dev->data->dev_private;
2831
2832         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2833         rss_conf->rss_key_len = internals->rss_key_len;
2834         if (rss_conf->rss_key)
2835                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2836
2837         return 0;
2838 }
2839
2840 static int
2841 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2842 {
2843         struct rte_eth_dev *slave_eth_dev;
2844         struct bond_dev_private *internals = dev->data->dev_private;
2845         int ret, i;
2846
2847         rte_spinlock_lock(&internals->lock);
2848
2849         for (i = 0; i < internals->slave_count; i++) {
2850                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2851                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2852                         rte_spinlock_unlock(&internals->lock);
2853                         return -ENOTSUP;
2854                 }
2855         }
2856         for (i = 0; i < internals->slave_count; i++) {
2857                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2858                 if (ret < 0) {
2859                         rte_spinlock_unlock(&internals->lock);
2860                         return ret;
2861                 }
2862         }
2863
2864         rte_spinlock_unlock(&internals->lock);
2865         return 0;
2866 }
2867
2868 static void
2869 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2870 {
2871         if (mac_address_set(dev, addr))
2872                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2873 }
2874
2875 const struct eth_dev_ops default_dev_ops = {
2876         .dev_start            = bond_ethdev_start,
2877         .dev_stop             = bond_ethdev_stop,
2878         .dev_close            = bond_ethdev_close,
2879         .dev_configure        = bond_ethdev_configure,
2880         .dev_infos_get        = bond_ethdev_info,
2881         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2882         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2883         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2884         .rx_queue_release     = bond_ethdev_rx_queue_release,
2885         .tx_queue_release     = bond_ethdev_tx_queue_release,
2886         .link_update          = bond_ethdev_link_update,
2887         .stats_get            = bond_ethdev_stats_get,
2888         .stats_reset          = bond_ethdev_stats_reset,
2889         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2890         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2891         .reta_update          = bond_ethdev_rss_reta_update,
2892         .reta_query           = bond_ethdev_rss_reta_query,
2893         .rss_hash_update      = bond_ethdev_rss_hash_update,
2894         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2895         .mtu_set              = bond_ethdev_mtu_set,
2896         .mac_addr_set         = bond_ethdev_mac_address_set
2897 };
2898
2899 static int
2900 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2901 {
2902         const char *name = rte_vdev_device_name(dev);
2903         uint8_t socket_id = dev->device.numa_node;
2904         struct bond_dev_private *internals = NULL;
2905         struct rte_eth_dev *eth_dev = NULL;
2906         uint32_t vlan_filter_bmp_size;
2907
2908         /* now do all data allocation - for eth_dev structure, dummy pci driver
2909          * and internal (private) data
2910          */
2911
2912         /* reserve an ethdev entry */
2913         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2914         if (eth_dev == NULL) {
2915                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2916                 goto err;
2917         }
2918
2919         internals = eth_dev->data->dev_private;
2920         eth_dev->data->nb_rx_queues = (uint16_t)1;
2921         eth_dev->data->nb_tx_queues = (uint16_t)1;
2922
2923         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2924                         socket_id);
2925         if (eth_dev->data->mac_addrs == NULL) {
2926                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2927                 goto err;
2928         }
2929
2930         eth_dev->dev_ops = &default_dev_ops;
2931         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2932
2933         rte_spinlock_init(&internals->lock);
2934
2935         internals->port_id = eth_dev->data->port_id;
2936         internals->mode = BONDING_MODE_INVALID;
2937         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2938         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2939         internals->burst_xmit_hash = burst_xmit_l2_hash;
2940         internals->user_defined_mac = 0;
2941
2942         internals->link_status_polling_enabled = 0;
2943
2944         internals->link_status_polling_interval_ms =
2945                 DEFAULT_POLLING_INTERVAL_10_MS;
2946         internals->link_down_delay_ms = 0;
2947         internals->link_up_delay_ms = 0;
2948
2949         internals->slave_count = 0;
2950         internals->active_slave_count = 0;
2951         internals->rx_offload_capa = 0;
2952         internals->tx_offload_capa = 0;
2953         internals->candidate_max_rx_pktlen = 0;
2954         internals->max_rx_pktlen = 0;
2955
2956         /* Initially allow to choose any offload type */
2957         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2958
2959         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2960         memset(internals->slaves, 0, sizeof(internals->slaves));
2961
2962         /* Set mode 4 default configuration */
2963         bond_mode_8023ad_setup(eth_dev, NULL);
2964         if (bond_ethdev_mode_set(eth_dev, mode)) {
2965                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2966                                  eth_dev->data->port_id, mode);
2967                 goto err;
2968         }
2969
2970         vlan_filter_bmp_size =
2971                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2972         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2973                                                    RTE_CACHE_LINE_SIZE);
2974         if (internals->vlan_filter_bmpmem == NULL) {
2975                 RTE_BOND_LOG(ERR,
2976                              "Failed to allocate vlan bitmap for bonded device %u\n",
2977                              eth_dev->data->port_id);
2978                 goto err;
2979         }
2980
2981         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2982                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2983         if (internals->vlan_filter_bmp == NULL) {
2984                 RTE_BOND_LOG(ERR,
2985                              "Failed to init vlan bitmap for bonded device %u\n",
2986                              eth_dev->data->port_id);
2987                 rte_free(internals->vlan_filter_bmpmem);
2988                 goto err;
2989         }
2990
2991         return eth_dev->data->port_id;
2992
2993 err:
2994         rte_free(internals);
2995         if (eth_dev != NULL) {
2996                 rte_free(eth_dev->data->mac_addrs);
2997                 rte_eth_dev_release_port(eth_dev);
2998         }
2999         return -1;
3000 }
3001
3002 static int
3003 bond_probe(struct rte_vdev_device *dev)
3004 {
3005         const char *name;
3006         struct bond_dev_private *internals;
3007         struct rte_kvargs *kvlist;
3008         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3009         int  arg_count, port_id;
3010         uint8_t agg_mode;
3011
3012         if (!dev)
3013                 return -EINVAL;
3014
3015         name = rte_vdev_device_name(dev);
3016         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3017
3018         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3019                 pmd_bond_init_valid_arguments);
3020         if (kvlist == NULL)
3021                 return -1;
3022
3023         /* Parse link bonding mode */
3024         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3025                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3026                                 &bond_ethdev_parse_slave_mode_kvarg,
3027                                 &bonding_mode) != 0) {
3028                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3029                                         name);
3030                         goto parse_error;
3031                 }
3032         } else {
3033                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3034                                 "device %s\n", name);
3035                 goto parse_error;
3036         }
3037
3038         /* Parse socket id to create bonding device on */
3039         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3040         if (arg_count == 1) {
3041                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3042                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3043                                 != 0) {
3044                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3045                                         "bonded device %s\n", name);
3046                         goto parse_error;
3047                 }
3048         } else if (arg_count > 1) {
3049                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3050                                 "bonded device %s\n", name);
3051                 goto parse_error;
3052         } else {
3053                 socket_id = rte_socket_id();
3054         }
3055
3056         dev->device.numa_node = socket_id;
3057
3058         /* Create link bonding eth device */
3059         port_id = bond_alloc(dev, bonding_mode);
3060         if (port_id < 0) {
3061                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3062                                 "socket %u.\n", name, bonding_mode, socket_id);
3063                 goto parse_error;
3064         }
3065         internals = rte_eth_devices[port_id].data->dev_private;
3066         internals->kvlist = kvlist;
3067
3068
3069         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3070                 if (rte_kvargs_process(kvlist,
3071                                 PMD_BOND_AGG_MODE_KVARG,
3072                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3073                                 &agg_mode) != 0) {
3074                         RTE_LOG(ERR, EAL,
3075                                         "Failed to parse agg selection mode for bonded device %s\n",
3076                                         name);
3077                         goto parse_error;
3078                 }
3079
3080                 if (internals->mode == BONDING_MODE_8023AD)
3081                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3082                                         agg_mode);
3083         } else {
3084                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3085         }
3086
3087         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3088                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
3089         return 0;
3090
3091 parse_error:
3092         rte_kvargs_free(kvlist);
3093
3094         return -1;
3095 }
3096
3097 static int
3098 bond_remove(struct rte_vdev_device *dev)
3099 {
3100         struct rte_eth_dev *eth_dev;
3101         struct bond_dev_private *internals;
3102         const char *name;
3103
3104         if (!dev)
3105                 return -EINVAL;
3106
3107         name = rte_vdev_device_name(dev);
3108         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3109
3110         /* now free all data allocation - for eth_dev structure,
3111          * dummy pci driver and internal (private) data
3112          */
3113
3114         /* find an ethdev entry */
3115         eth_dev = rte_eth_dev_allocated(name);
3116         if (eth_dev == NULL)
3117                 return -ENODEV;
3118
3119         RTE_ASSERT(eth_dev->device == &dev->device);
3120
3121         internals = eth_dev->data->dev_private;
3122         if (internals->slave_count != 0)
3123                 return -EBUSY;
3124
3125         if (eth_dev->data->dev_started == 1) {
3126                 bond_ethdev_stop(eth_dev);
3127                 bond_ethdev_close(eth_dev);
3128         }
3129
3130         eth_dev->dev_ops = NULL;
3131         eth_dev->rx_pkt_burst = NULL;
3132         eth_dev->tx_pkt_burst = NULL;
3133
3134         internals = eth_dev->data->dev_private;
3135         rte_bitmap_free(internals->vlan_filter_bmp);
3136         rte_free(internals->vlan_filter_bmpmem);
3137         rte_free(eth_dev->data->dev_private);
3138         rte_free(eth_dev->data->mac_addrs);
3139
3140         rte_eth_dev_release_port(eth_dev);
3141
3142         return 0;
3143 }
3144
3145 /* this part will resolve the slave portids after all the other pdev and vdev
3146  * have been allocated */
3147 static int
3148 bond_ethdev_configure(struct rte_eth_dev *dev)
3149 {
3150         const char *name = dev->device->name;
3151         struct bond_dev_private *internals = dev->data->dev_private;
3152         struct rte_kvargs *kvlist = internals->kvlist;
3153         int arg_count;
3154         uint16_t port_id = dev - rte_eth_devices;
3155         uint8_t agg_mode;
3156
3157         static const uint8_t default_rss_key[40] = {
3158                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3159                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3160                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3161                 0xBE, 0xAC, 0x01, 0xFA
3162         };
3163
3164         unsigned i, j;
3165
3166         /* If RSS is enabled, fill table and key with default values */
3167         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3168                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3169                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3170                 memcpy(internals->rss_key, default_rss_key, 40);
3171
3172                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3173                         internals->reta_conf[i].mask = ~0LL;
3174                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3175                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3176                 }
3177         }
3178
3179         /* set the max_rx_pktlen */
3180         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3181
3182         /*
3183          * if no kvlist, it means that this bonded device has been created
3184          * through the bonding api.
3185          */
3186         if (!kvlist)
3187                 return 0;
3188
3189         /* Parse MAC address for bonded device */
3190         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3191         if (arg_count == 1) {
3192                 struct ether_addr bond_mac;
3193
3194                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3195                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3196                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3197                                         name);
3198                         return -1;
3199                 }
3200
3201                 /* Set MAC address */
3202                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3203                         RTE_LOG(ERR, EAL,
3204                                         "Failed to set mac address on bonded device %s\n",
3205                                         name);
3206                         return -1;
3207                 }
3208         } else if (arg_count > 1) {
3209                 RTE_LOG(ERR, EAL,
3210                                 "MAC address can be specified only once for bonded device %s\n",
3211                                 name);
3212                 return -1;
3213         }
3214
3215         /* Parse/set balance mode transmit policy */
3216         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3217         if (arg_count == 1) {
3218                 uint8_t xmit_policy;
3219
3220                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3221                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3222                                                 0) {
3223                         RTE_LOG(INFO, EAL,
3224                                         "Invalid xmit policy specified for bonded device %s\n",
3225                                         name);
3226                         return -1;
3227                 }
3228
3229                 /* Set balance mode transmit policy*/
3230                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3231                         RTE_LOG(ERR, EAL,
3232                                         "Failed to set balance xmit policy on bonded device %s\n",
3233                                         name);
3234                         return -1;
3235                 }
3236         } else if (arg_count > 1) {
3237                 RTE_LOG(ERR, EAL,
3238                                 "Transmit policy can be specified only once for bonded device"
3239                                 " %s\n", name);
3240                 return -1;
3241         }
3242
3243         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3244                 if (rte_kvargs_process(kvlist,
3245                                 PMD_BOND_AGG_MODE_KVARG,
3246                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3247                                 &agg_mode) != 0) {
3248                         RTE_LOG(ERR, EAL,
3249                                         "Failed to parse agg selection mode for bonded device %s\n",
3250                                         name);
3251                 }
3252                 if (internals->mode == BONDING_MODE_8023AD)
3253                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3254                                                 agg_mode);
3255         }
3256
3257         /* Parse/add slave ports to bonded device */
3258         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3259                 struct bond_ethdev_slave_ports slave_ports;
3260                 unsigned i;
3261
3262                 memset(&slave_ports, 0, sizeof(slave_ports));
3263
3264                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3265                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3266                         RTE_LOG(ERR, EAL,
3267                                         "Failed to parse slave ports for bonded device %s\n",
3268                                         name);
3269                         return -1;
3270                 }
3271
3272                 for (i = 0; i < slave_ports.slave_count; i++) {
3273                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3274                                 RTE_LOG(ERR, EAL,
3275                                                 "Failed to add port %d as slave to bonded device %s\n",
3276                                                 slave_ports.slaves[i], name);
3277                         }
3278                 }
3279
3280         } else {
3281                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3282                 return -1;
3283         }
3284
3285         /* Parse/set primary slave port id*/
3286         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3287         if (arg_count == 1) {
3288                 uint16_t primary_slave_port_id;
3289
3290                 if (rte_kvargs_process(kvlist,
3291                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3292                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3293                                 &primary_slave_port_id) < 0) {
3294                         RTE_LOG(INFO, EAL,
3295                                         "Invalid primary slave port id specified for bonded device"
3296                                         " %s\n", name);
3297                         return -1;
3298                 }
3299
3300                 /* Set balance mode transmit policy*/
3301                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3302                                 != 0) {
3303                         RTE_LOG(ERR, EAL,
3304                                         "Failed to set primary slave port %d on bonded device %s\n",
3305                                         primary_slave_port_id, name);
3306                         return -1;
3307                 }
3308         } else if (arg_count > 1) {
3309                 RTE_LOG(INFO, EAL,
3310                                 "Primary slave can be specified only once for bonded device"
3311                                 " %s\n", name);
3312                 return -1;
3313         }
3314
3315         /* Parse link status monitor polling interval */
3316         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3317         if (arg_count == 1) {
3318                 uint32_t lsc_poll_interval_ms;
3319
3320                 if (rte_kvargs_process(kvlist,
3321                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3322                                 &bond_ethdev_parse_time_ms_kvarg,
3323                                 &lsc_poll_interval_ms) < 0) {
3324                         RTE_LOG(INFO, EAL,
3325                                         "Invalid lsc polling interval value specified for bonded"
3326                                         " device %s\n", name);
3327                         return -1;
3328                 }
3329
3330                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3331                                 != 0) {
3332                         RTE_LOG(ERR, EAL,
3333                                         "Failed to set lsc monitor polling interval (%u ms) on"
3334                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3335                         return -1;
3336                 }
3337         } else if (arg_count > 1) {
3338                 RTE_LOG(INFO, EAL,
3339                                 "LSC polling interval can be specified only once for bonded"
3340                                 " device %s\n", name);
3341                 return -1;
3342         }
3343
3344         /* Parse link up interrupt propagation delay */
3345         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3346         if (arg_count == 1) {
3347                 uint32_t link_up_delay_ms;
3348
3349                 if (rte_kvargs_process(kvlist,
3350                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3351                                 &bond_ethdev_parse_time_ms_kvarg,
3352                                 &link_up_delay_ms) < 0) {
3353                         RTE_LOG(INFO, EAL,
3354                                         "Invalid link up propagation delay value specified for"
3355                                         " bonded device %s\n", name);
3356                         return -1;
3357                 }
3358
3359                 /* Set balance mode transmit policy*/
3360                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3361                                 != 0) {
3362                         RTE_LOG(ERR, EAL,
3363                                         "Failed to set link up propagation delay (%u ms) on bonded"
3364                                         " device %s\n", link_up_delay_ms, name);
3365                         return -1;
3366                 }
3367         } else if (arg_count > 1) {
3368                 RTE_LOG(INFO, EAL,
3369                                 "Link up propagation delay can be specified only once for"
3370                                 " bonded device %s\n", name);
3371                 return -1;
3372         }
3373
3374         /* Parse link down interrupt propagation delay */
3375         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3376         if (arg_count == 1) {
3377                 uint32_t link_down_delay_ms;
3378
3379                 if (rte_kvargs_process(kvlist,
3380                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3381                                 &bond_ethdev_parse_time_ms_kvarg,
3382                                 &link_down_delay_ms) < 0) {
3383                         RTE_LOG(INFO, EAL,
3384                                         "Invalid link down propagation delay value specified for"
3385                                         " bonded device %s\n", name);
3386                         return -1;
3387                 }
3388
3389                 /* Set balance mode transmit policy*/
3390                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3391                                 != 0) {
3392                         RTE_LOG(ERR, EAL,
3393                                         "Failed to set link down propagation delay (%u ms) on"
3394                                         " bonded device %s\n", link_down_delay_ms, name);
3395                         return -1;
3396                 }
3397         } else if (arg_count > 1) {
3398                 RTE_LOG(INFO, EAL,
3399                                 "Link down propagation delay can be specified only once for"
3400                                 " bonded device %s\n", name);
3401                 return -1;
3402         }
3403
3404         return 0;
3405 }
3406
3407 struct rte_vdev_driver pmd_bond_drv = {
3408         .probe = bond_probe,
3409         .remove = bond_remove,
3410 };
3411
3412 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3413 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3414
3415 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3416         "slave=<ifc> "
3417         "primary=<ifc> "
3418         "mode=[0-6] "
3419         "xmit_policy=[l2 | l23 | l34] "
3420         "agg_mode=[count | stable | bandwidth] "
3421         "socket_id=<int> "
3422         "mac=<mac addr> "
3423         "lsc_poll_period_ms=<int> "
3424         "up_delay=<int> "
3425         "down_delay=<int>");