ethdev: return diagnostic when setting MAC address
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
33
34 static inline size_t
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 {
37         size_t vlan_offset = 0;
38
39         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41
42                 vlan_offset = sizeof(struct vlan_hdr);
43                 *proto = vlan_hdr->eth_proto;
44
45                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46                         vlan_hdr = vlan_hdr + 1;
47                         *proto = vlan_hdr->eth_proto;
48                         vlan_offset += sizeof(struct vlan_hdr);
49                 }
50         }
51         return vlan_offset;
52 }
53
54 static uint16_t
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 {
57         struct bond_dev_private *internals;
58
59         uint16_t num_rx_slave = 0;
60         uint16_t num_rx_total = 0;
61
62         int i;
63
64         /* Cast to structure, containing bonded device's port id and queue id */
65         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66
67         internals = bd_rx_q->dev_private;
68
69
70         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71                 /* Offset of pointer to *bufs increases as packets are received
72                  * from other slaves */
73                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75                 if (num_rx_slave) {
76                         num_rx_total += num_rx_slave;
77                         nb_pkts -= num_rx_slave;
78                 }
79         }
80
81         return num_rx_total;
82 }
83
84 static uint16_t
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
86                 uint16_t nb_pkts)
87 {
88         struct bond_dev_private *internals;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95         return rte_eth_rx_burst(internals->current_primary_port,
96                         bd_rx_q->queue_id, bufs, nb_pkts);
97 }
98
99 static inline uint8_t
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 {
102         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103
104         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105                 (ethertype == ether_type_slow_be &&
106                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
107 }
108
109 /*****************************************************************************
110  * Flow director's setup for mode 4 optimization
111  */
112
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114         .dst.addr_bytes = { 0 },
115         .src.addr_bytes = { 0 },
116         .type = RTE_BE16(ETHER_TYPE_SLOW),
117 };
118
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120         .dst.addr_bytes = { 0 },
121         .src.addr_bytes = { 0 },
122         .type = 0xFFFF,
123 };
124
125 static struct rte_flow_item flow_item_8023ad[] = {
126         {
127                 .type = RTE_FLOW_ITEM_TYPE_ETH,
128                 .spec = &flow_item_eth_type_8023ad,
129                 .last = NULL,
130                 .mask = &flow_item_eth_mask_type_8023ad,
131         },
132         {
133                 .type = RTE_FLOW_ITEM_TYPE_END,
134                 .spec = NULL,
135                 .last = NULL,
136                 .mask = NULL,
137         }
138 };
139
140 const struct rte_flow_attr flow_attr_8023ad = {
141         .group = 0,
142         .priority = 0,
143         .ingress = 1,
144         .egress = 0,
145         .reserved = 0,
146 };
147
148 int
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150                 uint16_t slave_port) {
151         struct rte_eth_dev_info slave_info;
152         struct rte_flow_error error;
153         struct bond_dev_private *internals = (struct bond_dev_private *)
154                         (bond_dev->data->dev_private);
155
156         const struct rte_flow_action_queue lacp_queue_conf = {
157                 .index = 0,
158         };
159
160         const struct rte_flow_action actions[] = {
161                 {
162                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
163                         .conf = &lacp_queue_conf
164                 },
165                 {
166                         .type = RTE_FLOW_ACTION_TYPE_END,
167                 }
168         };
169
170         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171                         flow_item_8023ad, actions, &error);
172         if (ret < 0) {
173                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174                                 __func__, error.message, slave_port,
175                                 internals->mode4.dedicated_queues.rx_qid);
176                 return -1;
177         }
178
179         rte_eth_dev_info_get(slave_port, &slave_info);
180         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182                 RTE_BOND_LOG(ERR,
183                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
184                         __func__, slave_port);
185                 return -1;
186         }
187
188         return 0;
189 }
190
191 int
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194         struct bond_dev_private *internals = (struct bond_dev_private *)
195                         (bond_dev->data->dev_private);
196         struct rte_eth_dev_info bond_info;
197         uint16_t idx;
198
199         /* Verify if all slaves in bonding supports flow director and */
200         if (internals->slave_count > 0) {
201                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202
203                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205
206                 for (idx = 0; idx < internals->slave_count; idx++) {
207                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
208                                         internals->slaves[idx].port_id) != 0)
209                                 return -1;
210                 }
211         }
212
213         return 0;
214 }
215
216 int
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218
219         struct rte_flow_error error;
220         struct bond_dev_private *internals = (struct bond_dev_private *)
221                         (bond_dev->data->dev_private);
222
223         struct rte_flow_action_queue lacp_queue_conf = {
224                 .index = internals->mode4.dedicated_queues.rx_qid,
225         };
226
227         const struct rte_flow_action actions[] = {
228                 {
229                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
230                         .conf = &lacp_queue_conf
231                 },
232                 {
233                         .type = RTE_FLOW_ACTION_TYPE_END,
234                 }
235         };
236
237         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
239         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241                                 "(slave_port=%d queue_id=%d)",
242                                 error.message, slave_port,
243                                 internals->mode4.dedicated_queues.rx_qid);
244                 return -1;
245         }
246
247         return 0;
248 }
249
250 static uint16_t
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
252                 uint16_t nb_pkts)
253 {
254         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255         struct bond_dev_private *internals = bd_rx_q->dev_private;
256         uint16_t num_rx_total = 0;      /* Total number of received packets */
257         uint16_t slaves[RTE_MAX_ETHPORTS];
258         uint16_t slave_count;
259
260         uint16_t i, idx;
261
262         /* Copy slave list to protect against slave up/down changes during tx
263          * bursting */
264         slave_count = internals->active_slave_count;
265         memcpy(slaves, internals->active_slaves,
266                         sizeof(internals->active_slaves[0]) * slave_count);
267
268         for (i = 0, idx = internals->active_slave;
269                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270                 idx = idx % slave_count;
271
272                 /* Read packets from this slave */
273                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
275         }
276
277         internals->active_slave = idx;
278
279         return num_rx_total;
280 }
281
282 static uint16_t
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
284                 uint16_t nb_bufs)
285 {
286         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287         struct bond_dev_private *internals = bd_tx_q->dev_private;
288
289         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290         uint16_t slave_count;
291
292         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293         uint16_t dist_slave_count;
294
295         /* 2-D array to sort mbufs for transmission on each slave into */
296         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297         /* Number of mbufs for transmission on each slave */
298         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299         /* Mapping array generated by hash function to map mbufs to slaves */
300         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301
302         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
304
305         uint16_t i, j;
306
307         if (unlikely(nb_bufs == 0))
308                 return 0;
309
310         /* Copy slave list to protect against slave up/down changes during tx
311          * bursting */
312         slave_count = internals->active_slave_count;
313         if (unlikely(slave_count < 1))
314                 return 0;
315
316         memcpy(slave_port_ids, internals->active_slaves,
317                         sizeof(slave_port_ids[0]) * slave_count);
318
319
320         dist_slave_count = 0;
321         for (i = 0; i < slave_count; i++) {
322                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323
324                 if (ACTOR_STATE(port, DISTRIBUTING))
325                         dist_slave_port_ids[dist_slave_count++] =
326                                         slave_port_ids[i];
327         }
328
329         if (unlikely(dist_slave_count < 1))
330                 return 0;
331
332         /*
333          * Populate slaves mbuf with the packets which are to be sent on it
334          * selecting output slave using hash based on xmit policy
335          */
336         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337                         bufs_slave_port_idxs);
338
339         for (i = 0; i < nb_bufs; i++) {
340                 /* Populate slave mbuf arrays with mbufs for that slave. */
341                 uint8_t slave_idx = bufs_slave_port_idxs[i];
342
343                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
344         }
345
346
347         /* Send packet burst on each slave device */
348         for (i = 0; i < dist_slave_count; i++) {
349                 if (slave_nb_bufs[i] == 0)
350                         continue;
351
352                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353                                 bd_tx_q->queue_id, slave_bufs[i],
354                                 slave_nb_bufs[i]);
355
356                 total_tx_count += slave_tx_count;
357
358                 /* If tx burst fails move packets to end of bufs */
359                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
361                                         slave_tx_count;
362                         total_tx_fail_count += slave_tx_fail_count[i];
363
364                         /*
365                          * Shift bufs to beginning of array to allow reordering
366                          * later
367                          */
368                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
369                                 slave_bufs[i][j] =
370                                         slave_bufs[i][(slave_tx_count - 1) + j];
371                         }
372                 }
373         }
374
375         /*
376          * If there are tx burst failures we move packets to end of bufs to
377          * preserve expected PMD behaviour of all failed transmitted being
378          * at the end of the input mbuf array
379          */
380         if (unlikely(total_tx_fail_count > 0)) {
381                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382
383                 for (i = 0; i < slave_count; i++) {
384                         if (slave_tx_fail_count[i] > 0) {
385                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
386                                         bufs[bufs_idx++] = slave_bufs[i][j];
387                         }
388                 }
389         }
390
391         return total_tx_count;
392 }
393
394
395 static uint16_t
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
397                 uint16_t nb_pkts)
398 {
399         /* Cast to structure, containing bonded device's port id and queue id */
400         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401         struct bond_dev_private *internals = bd_rx_q->dev_private;
402         struct ether_addr bond_mac;
403
404         struct ether_hdr *hdr;
405
406         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407         uint16_t num_rx_total = 0;      /* Total number of received packets */
408         uint16_t slaves[RTE_MAX_ETHPORTS];
409         uint16_t slave_count, idx;
410
411         uint8_t collecting;  /* current slave collecting status */
412         const uint8_t promisc = internals->promiscuous_en;
413         uint8_t i, j, k;
414         uint8_t subtype;
415
416         rte_eth_macaddr_get(internals->port_id, &bond_mac);
417         /* Copy slave list to protect against slave up/down changes during tx
418          * bursting */
419         slave_count = internals->active_slave_count;
420         memcpy(slaves, internals->active_slaves,
421                         sizeof(internals->active_slaves[0]) * slave_count);
422
423         idx = internals->active_slave;
424         if (idx >= slave_count) {
425                 internals->active_slave = 0;
426                 idx = 0;
427         }
428         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429                 j = num_rx_total;
430                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
431                                          COLLECTING);
432
433                 /* Read packets from this slave */
434                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
436
437                 for (k = j; k < 2 && k < num_rx_total; k++)
438                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439
440                 /* Handle slow protocol packets. */
441                 while (j < num_rx_total) {
442
443                         /* If packet is not pure L2 and is known, skip it */
444                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
445                                 j++;
446                                 continue;
447                         }
448
449                         if (j + 3 < num_rx_total)
450                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451
452                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454
455                         /* Remove packet from array if it is slow packet or slave is not
456                          * in collecting state or bonding interface is not in promiscuous
457                          * mode and packet address does not match. */
458                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459                                 !collecting || (!promisc &&
460                                         !is_multicast_ether_addr(&hdr->d_addr) &&
461                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462
463                                 if (hdr->ether_type == ether_type_slow_be) {
464                                         bond_mode_8023ad_handle_slow_pkt(
465                                             internals, slaves[idx], bufs[j]);
466                                 } else
467                                         rte_pktmbuf_free(bufs[j]);
468
469                                 /* Packet is managed by mode 4 or dropped, shift the array */
470                                 num_rx_total--;
471                                 if (j < num_rx_total) {
472                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
473                                                 (num_rx_total - j));
474                                 }
475                         } else
476                                 j++;
477                 }
478                 if (unlikely(++idx == slave_count))
479                         idx = 0;
480         }
481
482         internals->active_slave = idx;
483         return num_rx_total;
484 }
485
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491
492 static void
493 arp_op_name(uint16_t arp_op, char *buf)
494 {
495         switch (arp_op) {
496         case ARP_OP_REQUEST:
497                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
498                 return;
499         case ARP_OP_REPLY:
500                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501                 return;
502         case ARP_OP_REVREQUEST:
503                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504                                 "Reverse ARP Request");
505                 return;
506         case ARP_OP_REVREPLY:
507                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508                                 "Reverse ARP Reply");
509                 return;
510         case ARP_OP_INVREQUEST:
511                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512                                 "Peer Identify Request");
513                 return;
514         case ARP_OP_INVREPLY:
515                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516                                 "Peer Identify Reply");
517                 return;
518         default:
519                 break;
520         }
521         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
522         return;
523 }
524 #endif
525 #define MaxIPv4String   16
526 static void
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
528 {
529         uint32_t ipv4_addr;
530
531         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
534                 ipv4_addr & 0xFF);
535 }
536
537 #define MAX_CLIENTS_NUMBER      128
538 uint8_t active_clients;
539 struct client_stats_t {
540         uint16_t port;
541         uint32_t ipv4_addr;
542         uint32_t ipv4_rx_packets;
543         uint32_t ipv4_tx_packets;
544 };
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
546
547 static void
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
549 {
550         int i = 0;
551
552         for (; i < MAX_CLIENTS_NUMBER; i++)     {
553                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
554                         /* Just update RX packets number for this client */
555                         if (TXorRXindicator == &burstnumberRX)
556                                 client_stats[i].ipv4_rx_packets++;
557                         else
558                                 client_stats[i].ipv4_tx_packets++;
559                         return;
560                 }
561         }
562         /* We have a new client. Insert him to the table, and increment stats */
563         if (TXorRXindicator == &burstnumberRX)
564                 client_stats[active_clients].ipv4_rx_packets++;
565         else
566                 client_stats[active_clients].ipv4_tx_packets++;
567         client_stats[active_clients].ipv4_addr = addr;
568         client_stats[active_clients].port = port;
569         active_clients++;
570
571 }
572
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
575                 RTE_LOG(DEBUG, PMD, \
576                 "%s " \
577                 "port:%d " \
578                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
579                 "SrcIP:%s " \
580                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
581                 "DstIP:%s " \
582                 "%s " \
583                 "%d\n", \
584                 info, \
585                 port, \
586                 eth_h->s_addr.addr_bytes[0], \
587                 eth_h->s_addr.addr_bytes[1], \
588                 eth_h->s_addr.addr_bytes[2], \
589                 eth_h->s_addr.addr_bytes[3], \
590                 eth_h->s_addr.addr_bytes[4], \
591                 eth_h->s_addr.addr_bytes[5], \
592                 src_ip, \
593                 eth_h->d_addr.addr_bytes[0], \
594                 eth_h->d_addr.addr_bytes[1], \
595                 eth_h->d_addr.addr_bytes[2], \
596                 eth_h->d_addr.addr_bytes[3], \
597                 eth_h->d_addr.addr_bytes[4], \
598                 eth_h->d_addr.addr_bytes[5], \
599                 dst_ip, \
600                 arp_op, \
601                 ++burstnumber)
602 #endif
603
604 static void
605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
606                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
607 {
608         struct ipv4_hdr *ipv4_h;
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610         struct arp_hdr *arp_h;
611         char dst_ip[16];
612         char ArpOp[24];
613         char buf[16];
614 #endif
615         char src_ip[16];
616
617         uint16_t ether_type = eth_h->ether_type;
618         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
619
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621         strlcpy(buf, info, 16);
622 #endif
623
624         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
625                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
626                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
628                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
629                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
630 #endif
631                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
632         }
633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
634         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
636                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
638                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
639                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
640         }
641 #endif
642 }
643 #endif
644
645 static uint16_t
646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
647 {
648         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
649         struct bond_dev_private *internals = bd_tx_q->dev_private;
650         struct ether_hdr *eth_h;
651         uint16_t ether_type, offset;
652         uint16_t nb_recv_pkts;
653         int i;
654
655         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
656
657         for (i = 0; i < nb_recv_pkts; i++) {
658                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
659                 ether_type = eth_h->ether_type;
660                 offset = get_vlan_offset(eth_h, &ether_type);
661
662                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
664                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
665 #endif
666                         bond_mode_alb_arp_recv(eth_h, offset, internals);
667                 }
668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
669                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
670                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
671 #endif
672         }
673
674         return nb_recv_pkts;
675 }
676
677 static uint16_t
678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
679                 uint16_t nb_pkts)
680 {
681         struct bond_dev_private *internals;
682         struct bond_tx_queue *bd_tx_q;
683
684         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
685         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
686
687         uint16_t num_of_slaves;
688         uint16_t slaves[RTE_MAX_ETHPORTS];
689
690         uint16_t num_tx_total = 0, num_tx_slave;
691
692         static int slave_idx = 0;
693         int i, cslave_idx = 0, tx_fail_total = 0;
694
695         bd_tx_q = (struct bond_tx_queue *)queue;
696         internals = bd_tx_q->dev_private;
697
698         /* Copy slave list to protect against slave up/down changes during tx
699          * bursting */
700         num_of_slaves = internals->active_slave_count;
701         memcpy(slaves, internals->active_slaves,
702                         sizeof(internals->active_slaves[0]) * num_of_slaves);
703
704         if (num_of_slaves < 1)
705                 return num_tx_total;
706
707         /* Populate slaves mbuf with which packets are to be sent on it  */
708         for (i = 0; i < nb_pkts; i++) {
709                 cslave_idx = (slave_idx + i) % num_of_slaves;
710                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
711         }
712
713         /* increment current slave index so the next call to tx burst starts on the
714          * next slave */
715         slave_idx = ++cslave_idx;
716
717         /* Send packet burst on each slave device */
718         for (i = 0; i < num_of_slaves; i++) {
719                 if (slave_nb_pkts[i] > 0) {
720                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
721                                         slave_bufs[i], slave_nb_pkts[i]);
722
723                         /* if tx burst fails move packets to end of bufs */
724                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
725                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
726
727                                 tx_fail_total += tx_fail_slave;
728
729                                 memcpy(&bufs[nb_pkts - tx_fail_total],
730                                                 &slave_bufs[i][num_tx_slave],
731                                                 tx_fail_slave * sizeof(bufs[0]));
732                         }
733                         num_tx_total += num_tx_slave;
734                 }
735         }
736
737         return num_tx_total;
738 }
739
740 static uint16_t
741 bond_ethdev_tx_burst_active_backup(void *queue,
742                 struct rte_mbuf **bufs, uint16_t nb_pkts)
743 {
744         struct bond_dev_private *internals;
745         struct bond_tx_queue *bd_tx_q;
746
747         bd_tx_q = (struct bond_tx_queue *)queue;
748         internals = bd_tx_q->dev_private;
749
750         if (internals->active_slave_count < 1)
751                 return 0;
752
753         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
754                         bufs, nb_pkts);
755 }
756
757 static inline uint16_t
758 ether_hash(struct ether_hdr *eth_hdr)
759 {
760         unaligned_uint16_t *word_src_addr =
761                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
762         unaligned_uint16_t *word_dst_addr =
763                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
764
765         return (word_src_addr[0] ^ word_dst_addr[0]) ^
766                         (word_src_addr[1] ^ word_dst_addr[1]) ^
767                         (word_src_addr[2] ^ word_dst_addr[2]);
768 }
769
770 static inline uint32_t
771 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
772 {
773         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
774 }
775
776 static inline uint32_t
777 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
778 {
779         unaligned_uint32_t *word_src_addr =
780                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
781         unaligned_uint32_t *word_dst_addr =
782                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
783
784         return (word_src_addr[0] ^ word_dst_addr[0]) ^
785                         (word_src_addr[1] ^ word_dst_addr[1]) ^
786                         (word_src_addr[2] ^ word_dst_addr[2]) ^
787                         (word_src_addr[3] ^ word_dst_addr[3]);
788 }
789
790
791 void
792 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793                 uint8_t slave_count, uint16_t *slaves)
794 {
795         struct ether_hdr *eth_hdr;
796         uint32_t hash;
797         int i;
798
799         for (i = 0; i < nb_pkts; i++) {
800                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
801
802                 hash = ether_hash(eth_hdr);
803
804                 slaves[i] = (hash ^= hash >> 8) % slave_count;
805         }
806 }
807
808 void
809 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
810                 uint8_t slave_count, uint16_t *slaves)
811 {
812         uint16_t i;
813         struct ether_hdr *eth_hdr;
814         uint16_t proto;
815         size_t vlan_offset;
816         uint32_t hash, l3hash;
817
818         for (i = 0; i < nb_pkts; i++) {
819                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
820                 l3hash = 0;
821
822                 proto = eth_hdr->ether_type;
823                 hash = ether_hash(eth_hdr);
824
825                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
826
827                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829                                         ((char *)(eth_hdr + 1) + vlan_offset);
830                         l3hash = ipv4_hash(ipv4_hdr);
831
832                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834                                         ((char *)(eth_hdr + 1) + vlan_offset);
835                         l3hash = ipv6_hash(ipv6_hdr);
836                 }
837
838                 hash = hash ^ l3hash;
839                 hash ^= hash >> 16;
840                 hash ^= hash >> 8;
841
842                 slaves[i] = hash % slave_count;
843         }
844 }
845
846 void
847 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
848                 uint8_t slave_count, uint16_t *slaves)
849 {
850         struct ether_hdr *eth_hdr;
851         uint16_t proto;
852         size_t vlan_offset;
853         int i;
854
855         struct udp_hdr *udp_hdr;
856         struct tcp_hdr *tcp_hdr;
857         uint32_t hash, l3hash, l4hash;
858
859         for (i = 0; i < nb_pkts; i++) {
860                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
861                 proto = eth_hdr->ether_type;
862                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
863                 l3hash = 0;
864                 l4hash = 0;
865
866                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
867                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
868                                         ((char *)(eth_hdr + 1) + vlan_offset);
869                         size_t ip_hdr_offset;
870
871                         l3hash = ipv4_hash(ipv4_hdr);
872
873                         /* there is no L4 header in fragmented packet */
874                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
875                                                                 == 0)) {
876                                 ip_hdr_offset = (ipv4_hdr->version_ihl
877                                         & IPV4_HDR_IHL_MASK) *
878                                         IPV4_IHL_MULTIPLIER;
879
880                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
881                                         tcp_hdr = (struct tcp_hdr *)
882                                                 ((char *)ipv4_hdr +
883                                                         ip_hdr_offset);
884                                         l4hash = HASH_L4_PORTS(tcp_hdr);
885                                 } else if (ipv4_hdr->next_proto_id ==
886                                                                 IPPROTO_UDP) {
887                                         udp_hdr = (struct udp_hdr *)
888                                                 ((char *)ipv4_hdr +
889                                                         ip_hdr_offset);
890                                         l4hash = HASH_L4_PORTS(udp_hdr);
891                                 }
892                         }
893                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
894                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
895                                         ((char *)(eth_hdr + 1) + vlan_offset);
896                         l3hash = ipv6_hash(ipv6_hdr);
897
898                         if (ipv6_hdr->proto == IPPROTO_TCP) {
899                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
900                                 l4hash = HASH_L4_PORTS(tcp_hdr);
901                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
902                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
903                                 l4hash = HASH_L4_PORTS(udp_hdr);
904                         }
905                 }
906
907                 hash = l3hash ^ l4hash;
908                 hash ^= hash >> 16;
909                 hash ^= hash >> 8;
910
911                 slaves[i] = hash % slave_count;
912         }
913 }
914
915 struct bwg_slave {
916         uint64_t bwg_left_int;
917         uint64_t bwg_left_remainder;
918         uint8_t slave;
919 };
920
921 void
922 bond_tlb_activate_slave(struct bond_dev_private *internals) {
923         int i;
924
925         for (i = 0; i < internals->active_slave_count; i++) {
926                 tlb_last_obytets[internals->active_slaves[i]] = 0;
927         }
928 }
929
930 static int
931 bandwidth_cmp(const void *a, const void *b)
932 {
933         const struct bwg_slave *bwg_a = a;
934         const struct bwg_slave *bwg_b = b;
935         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
936         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
937                         (int64_t)bwg_a->bwg_left_remainder;
938         if (diff > 0)
939                 return 1;
940         else if (diff < 0)
941                 return -1;
942         else if (diff2 > 0)
943                 return 1;
944         else if (diff2 < 0)
945                 return -1;
946         else
947                 return 0;
948 }
949
950 static void
951 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
952                 struct bwg_slave *bwg_slave)
953 {
954         struct rte_eth_link link_status;
955
956         rte_eth_link_get_nowait(port_id, &link_status);
957         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
958         if (link_bwg == 0)
959                 return;
960         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
961         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
962         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
963 }
964
965 static void
966 bond_ethdev_update_tlb_slave_cb(void *arg)
967 {
968         struct bond_dev_private *internals = arg;
969         struct rte_eth_stats slave_stats;
970         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
971         uint8_t slave_count;
972         uint64_t tx_bytes;
973
974         uint8_t update_stats = 0;
975         uint8_t i, slave_id;
976
977         internals->slave_update_idx++;
978
979
980         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
981                 update_stats = 1;
982
983         for (i = 0; i < internals->active_slave_count; i++) {
984                 slave_id = internals->active_slaves[i];
985                 rte_eth_stats_get(slave_id, &slave_stats);
986                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
987                 bandwidth_left(slave_id, tx_bytes,
988                                 internals->slave_update_idx, &bwg_array[i]);
989                 bwg_array[i].slave = slave_id;
990
991                 if (update_stats) {
992                         tlb_last_obytets[slave_id] = slave_stats.obytes;
993                 }
994         }
995
996         if (update_stats == 1)
997                 internals->slave_update_idx = 0;
998
999         slave_count = i;
1000         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1001         for (i = 0; i < slave_count; i++)
1002                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1003
1004         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1005                         (struct bond_dev_private *)internals);
1006 }
1007
1008 static uint16_t
1009 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1010 {
1011         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1012         struct bond_dev_private *internals = bd_tx_q->dev_private;
1013
1014         struct rte_eth_dev *primary_port =
1015                         &rte_eth_devices[internals->primary_port];
1016         uint16_t num_tx_total = 0;
1017         uint16_t i, j;
1018
1019         uint16_t num_of_slaves = internals->active_slave_count;
1020         uint16_t slaves[RTE_MAX_ETHPORTS];
1021
1022         struct ether_hdr *ether_hdr;
1023         struct ether_addr primary_slave_addr;
1024         struct ether_addr active_slave_addr;
1025
1026         if (num_of_slaves < 1)
1027                 return num_tx_total;
1028
1029         memcpy(slaves, internals->tlb_slaves_order,
1030                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1031
1032
1033         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1034
1035         if (nb_pkts > 3) {
1036                 for (i = 0; i < 3; i++)
1037                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1038         }
1039
1040         for (i = 0; i < num_of_slaves; i++) {
1041                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1042                 for (j = num_tx_total; j < nb_pkts; j++) {
1043                         if (j + 3 < nb_pkts)
1044                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1045
1046                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1047                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1048                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1049 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1050                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1051 #endif
1052                 }
1053
1054                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1055                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1056
1057                 if (num_tx_total == nb_pkts)
1058                         break;
1059         }
1060
1061         return num_tx_total;
1062 }
1063
1064 void
1065 bond_tlb_disable(struct bond_dev_private *internals)
1066 {
1067         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1068 }
1069
1070 void
1071 bond_tlb_enable(struct bond_dev_private *internals)
1072 {
1073         bond_ethdev_update_tlb_slave_cb(internals);
1074 }
1075
1076 static uint16_t
1077 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1078 {
1079         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1080         struct bond_dev_private *internals = bd_tx_q->dev_private;
1081
1082         struct ether_hdr *eth_h;
1083         uint16_t ether_type, offset;
1084
1085         struct client_data *client_info;
1086
1087         /*
1088          * We create transmit buffers for every slave and one additional to send
1089          * through tlb. In worst case every packet will be send on one port.
1090          */
1091         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1092         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1093
1094         /*
1095          * We create separate transmit buffers for update packets as they won't
1096          * be counted in num_tx_total.
1097          */
1098         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1099         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1100
1101         struct rte_mbuf *upd_pkt;
1102         size_t pkt_size;
1103
1104         uint16_t num_send, num_not_send = 0;
1105         uint16_t num_tx_total = 0;
1106         uint16_t slave_idx;
1107
1108         int i, j;
1109
1110         /* Search tx buffer for ARP packets and forward them to alb */
1111         for (i = 0; i < nb_pkts; i++) {
1112                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1113                 ether_type = eth_h->ether_type;
1114                 offset = get_vlan_offset(eth_h, &ether_type);
1115
1116                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1117                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1118
1119                         /* Change src mac in eth header */
1120                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1121
1122                         /* Add packet to slave tx buffer */
1123                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1124                         slave_bufs_pkts[slave_idx]++;
1125                 } else {
1126                         /* If packet is not ARP, send it with TLB policy */
1127                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1128                                         bufs[i];
1129                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1130                 }
1131         }
1132
1133         /* Update connected client ARP tables */
1134         if (internals->mode6.ntt) {
1135                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1136                         client_info = &internals->mode6.client_table[i];
1137
1138                         if (client_info->in_use) {
1139                                 /* Allocate new packet to send ARP update on current slave */
1140                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1141                                 if (upd_pkt == NULL) {
1142                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1143                                         continue;
1144                                 }
1145                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1146                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1147                                 upd_pkt->data_len = pkt_size;
1148                                 upd_pkt->pkt_len = pkt_size;
1149
1150                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1151                                                 internals);
1152
1153                                 /* Add packet to update tx buffer */
1154                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1155                                 update_bufs_pkts[slave_idx]++;
1156                         }
1157                 }
1158                 internals->mode6.ntt = 0;
1159         }
1160
1161         /* Send ARP packets on proper slaves */
1162         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1163                 if (slave_bufs_pkts[i] > 0) {
1164                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1165                                         slave_bufs[i], slave_bufs_pkts[i]);
1166                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1167                                 bufs[nb_pkts - 1 - num_not_send - j] =
1168                                                 slave_bufs[i][nb_pkts - 1 - j];
1169                         }
1170
1171                         num_tx_total += num_send;
1172                         num_not_send += slave_bufs_pkts[i] - num_send;
1173
1174 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1175         /* Print TX stats including update packets */
1176                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1177                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1178                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1179                         }
1180 #endif
1181                 }
1182         }
1183
1184         /* Send update packets on proper slaves */
1185         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1186                 if (update_bufs_pkts[i] > 0) {
1187                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1188                                         update_bufs_pkts[i]);
1189                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1190                                 rte_pktmbuf_free(update_bufs[i][j]);
1191                         }
1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1193                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1194                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1195                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1196                         }
1197 #endif
1198                 }
1199         }
1200
1201         /* Send non-ARP packets using tlb policy */
1202         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1203                 num_send = bond_ethdev_tx_burst_tlb(queue,
1204                                 slave_bufs[RTE_MAX_ETHPORTS],
1205                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1206
1207                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1208                         bufs[nb_pkts - 1 - num_not_send - j] =
1209                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1210                 }
1211
1212                 num_tx_total += num_send;
1213         }
1214
1215         return num_tx_total;
1216 }
1217
1218 static uint16_t
1219 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1220                 uint16_t nb_bufs)
1221 {
1222         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1223         struct bond_dev_private *internals = bd_tx_q->dev_private;
1224
1225         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1226         uint16_t slave_count;
1227
1228         /* Array to sort mbufs for transmission on each slave into */
1229         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1230         /* Number of mbufs for transmission on each slave */
1231         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1232         /* Mapping array generated by hash function to map mbufs to slaves */
1233         uint16_t bufs_slave_port_idxs[nb_bufs];
1234
1235         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1236         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1237
1238         uint16_t i, j;
1239
1240         if (unlikely(nb_bufs == 0))
1241                 return 0;
1242
1243         /* Copy slave list to protect against slave up/down changes during tx
1244          * bursting */
1245         slave_count = internals->active_slave_count;
1246         if (unlikely(slave_count < 1))
1247                 return 0;
1248
1249         memcpy(slave_port_ids, internals->active_slaves,
1250                         sizeof(slave_port_ids[0]) * slave_count);
1251
1252         /*
1253          * Populate slaves mbuf with the packets which are to be sent on it
1254          * selecting output slave using hash based on xmit policy
1255          */
1256         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1257                         bufs_slave_port_idxs);
1258
1259         for (i = 0; i < nb_bufs; i++) {
1260                 /* Populate slave mbuf arrays with mbufs for that slave. */
1261                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1262
1263                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1264         }
1265
1266         /* Send packet burst on each slave device */
1267         for (i = 0; i < slave_count; i++) {
1268                 if (slave_nb_bufs[i] == 0)
1269                         continue;
1270
1271                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1272                                 bd_tx_q->queue_id, slave_bufs[i],
1273                                 slave_nb_bufs[i]);
1274
1275                 total_tx_count += slave_tx_count;
1276
1277                 /* If tx burst fails move packets to end of bufs */
1278                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1279                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1280                                         slave_tx_count;
1281                         total_tx_fail_count += slave_tx_fail_count[i];
1282
1283                         /*
1284                          * Shift bufs to beginning of array to allow reordering
1285                          * later
1286                          */
1287                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1288                                 slave_bufs[i][j] =
1289                                         slave_bufs[i][(slave_tx_count - 1) + j];
1290                         }
1291                 }
1292         }
1293
1294         /*
1295          * If there are tx burst failures we move packets to end of bufs to
1296          * preserve expected PMD behaviour of all failed transmitted being
1297          * at the end of the input mbuf array
1298          */
1299         if (unlikely(total_tx_fail_count > 0)) {
1300                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1301
1302                 for (i = 0; i < slave_count; i++) {
1303                         if (slave_tx_fail_count[i] > 0) {
1304                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1305                                         bufs[bufs_idx++] = slave_bufs[i][j];
1306                         }
1307                 }
1308         }
1309
1310         return total_tx_count;
1311 }
1312
1313 static uint16_t
1314 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1315                 uint16_t nb_bufs)
1316 {
1317         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1318         struct bond_dev_private *internals = bd_tx_q->dev_private;
1319
1320         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1321         uint16_t slave_count;
1322
1323         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1324         uint16_t dist_slave_count;
1325
1326         /* 2-D array to sort mbufs for transmission on each slave into */
1327         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1328         /* Number of mbufs for transmission on each slave */
1329         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1330         /* Mapping array generated by hash function to map mbufs to slaves */
1331         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1332
1333         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1334         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1335
1336         uint16_t i, j;
1337
1338         if (unlikely(nb_bufs == 0))
1339                 return 0;
1340
1341         /* Copy slave list to protect against slave up/down changes during tx
1342          * bursting */
1343         slave_count = internals->active_slave_count;
1344         if (unlikely(slave_count < 1))
1345                 return 0;
1346
1347         memcpy(slave_port_ids, internals->active_slaves,
1348                         sizeof(slave_port_ids[0]) * slave_count);
1349
1350         dist_slave_count = 0;
1351         for (i = 0; i < slave_count; i++) {
1352                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1353
1354                 if (ACTOR_STATE(port, DISTRIBUTING))
1355                         dist_slave_port_ids[dist_slave_count++] =
1356                                         slave_port_ids[i];
1357         }
1358
1359         if (likely(dist_slave_count > 1)) {
1360
1361                 /*
1362                  * Populate slaves mbuf with the packets which are to be sent
1363                  * on it, selecting output slave using hash based on xmit policy
1364                  */
1365                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1366                                 bufs_slave_port_idxs);
1367
1368                 for (i = 0; i < nb_bufs; i++) {
1369                         /*
1370                          * Populate slave mbuf arrays with mbufs for that
1371                          * slave
1372                          */
1373                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1374
1375                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1376                                         bufs[i];
1377                 }
1378
1379
1380                 /* Send packet burst on each slave device */
1381                 for (i = 0; i < dist_slave_count; i++) {
1382                         if (slave_nb_bufs[i] == 0)
1383                                 continue;
1384
1385                         slave_tx_count = rte_eth_tx_burst(
1386                                         dist_slave_port_ids[i],
1387                                         bd_tx_q->queue_id, slave_bufs[i],
1388                                         slave_nb_bufs[i]);
1389
1390                         total_tx_count += slave_tx_count;
1391
1392                         /* If tx burst fails move packets to end of bufs */
1393                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1394                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1395                                                 slave_tx_count;
1396                                 total_tx_fail_count += slave_tx_fail_count[i];
1397
1398                                 /*
1399                                  * Shift bufs to beginning of array to allow
1400                                  * reordering later
1401                                  */
1402                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1403                                         slave_bufs[i][j] =
1404                                                 slave_bufs[i]
1405                                                         [(slave_tx_count - 1)
1406                                                         + j];
1407                         }
1408                 }
1409
1410                 /*
1411                  * If there are tx burst failures we move packets to end of
1412                  * bufs to preserve expected PMD behaviour of all failed
1413                  * transmitted being at the end of the input mbuf array
1414                  */
1415                 if (unlikely(total_tx_fail_count > 0)) {
1416                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1417
1418                         for (i = 0; i < slave_count; i++) {
1419                                 if (slave_tx_fail_count[i] > 0) {
1420                                         for (j = 0;
1421                                                 j < slave_tx_fail_count[i];
1422                                                 j++) {
1423                                                 bufs[bufs_idx++] =
1424                                                         slave_bufs[i][j];
1425                                         }
1426                                 }
1427                         }
1428                 }
1429         }
1430
1431         /* Check for LACP control packets and send if available */
1432         for (i = 0; i < slave_count; i++) {
1433                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1434                 struct rte_mbuf *ctrl_pkt = NULL;
1435
1436                 if (likely(rte_ring_empty(port->tx_ring)))
1437                         continue;
1438
1439                 if (rte_ring_dequeue(port->tx_ring,
1440                                      (void **)&ctrl_pkt) != -ENOENT) {
1441                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1442                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1443                         /*
1444                          * re-enqueue LAG control plane packets to buffering
1445                          * ring if transmission fails so the packet isn't lost.
1446                          */
1447                         if (slave_tx_count != 1)
1448                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1449                 }
1450         }
1451
1452         return total_tx_count;
1453 }
1454
1455 static uint16_t
1456 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1457                 uint16_t nb_pkts)
1458 {
1459         struct bond_dev_private *internals;
1460         struct bond_tx_queue *bd_tx_q;
1461
1462         uint8_t tx_failed_flag = 0, num_of_slaves;
1463         uint16_t slaves[RTE_MAX_ETHPORTS];
1464
1465         uint16_t max_nb_of_tx_pkts = 0;
1466
1467         int slave_tx_total[RTE_MAX_ETHPORTS];
1468         int i, most_successful_tx_slave = -1;
1469
1470         bd_tx_q = (struct bond_tx_queue *)queue;
1471         internals = bd_tx_q->dev_private;
1472
1473         /* Copy slave list to protect against slave up/down changes during tx
1474          * bursting */
1475         num_of_slaves = internals->active_slave_count;
1476         memcpy(slaves, internals->active_slaves,
1477                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1478
1479         if (num_of_slaves < 1)
1480                 return 0;
1481
1482         /* Increment reference count on mbufs */
1483         for (i = 0; i < nb_pkts; i++)
1484                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1485
1486         /* Transmit burst on each active slave */
1487         for (i = 0; i < num_of_slaves; i++) {
1488                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1489                                         bufs, nb_pkts);
1490
1491                 if (unlikely(slave_tx_total[i] < nb_pkts))
1492                         tx_failed_flag = 1;
1493
1494                 /* record the value and slave index for the slave which transmits the
1495                  * maximum number of packets */
1496                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1497                         max_nb_of_tx_pkts = slave_tx_total[i];
1498                         most_successful_tx_slave = i;
1499                 }
1500         }
1501
1502         /* if slaves fail to transmit packets from burst, the calling application
1503          * is not expected to know about multiple references to packets so we must
1504          * handle failures of all packets except those of the most successful slave
1505          */
1506         if (unlikely(tx_failed_flag))
1507                 for (i = 0; i < num_of_slaves; i++)
1508                         if (i != most_successful_tx_slave)
1509                                 while (slave_tx_total[i] < nb_pkts)
1510                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1511
1512         return max_nb_of_tx_pkts;
1513 }
1514
1515 void
1516 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1517 {
1518         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1519
1520         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1521                 /**
1522                  * If in mode 4 then save the link properties of the first
1523                  * slave, all subsequent slaves must match these properties
1524                  */
1525                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1526
1527                 bond_link->link_autoneg = slave_link->link_autoneg;
1528                 bond_link->link_duplex = slave_link->link_duplex;
1529                 bond_link->link_speed = slave_link->link_speed;
1530         } else {
1531                 /**
1532                  * In any other mode the link properties are set to default
1533                  * values of AUTONEG/DUPLEX
1534                  */
1535                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1536                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1537         }
1538 }
1539
1540 int
1541 link_properties_valid(struct rte_eth_dev *ethdev,
1542                 struct rte_eth_link *slave_link)
1543 {
1544         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1545
1546         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1547                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1548
1549                 if (bond_link->link_duplex != slave_link->link_duplex ||
1550                         bond_link->link_autoneg != slave_link->link_autoneg ||
1551                         bond_link->link_speed != slave_link->link_speed)
1552                         return -1;
1553         }
1554
1555         return 0;
1556 }
1557
1558 int
1559 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1560 {
1561         struct ether_addr *mac_addr;
1562
1563         if (eth_dev == NULL) {
1564                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1565                 return -1;
1566         }
1567
1568         if (dst_mac_addr == NULL) {
1569                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1570                 return -1;
1571         }
1572
1573         mac_addr = eth_dev->data->mac_addrs;
1574
1575         ether_addr_copy(mac_addr, dst_mac_addr);
1576         return 0;
1577 }
1578
1579 int
1580 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1581 {
1582         struct ether_addr *mac_addr;
1583
1584         if (eth_dev == NULL) {
1585                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1586                 return -1;
1587         }
1588
1589         if (new_mac_addr == NULL) {
1590                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1591                 return -1;
1592         }
1593
1594         mac_addr = eth_dev->data->mac_addrs;
1595
1596         /* If new MAC is different to current MAC then update */
1597         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1598                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1599
1600         return 0;
1601 }
1602
1603 int
1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 {
1606         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1607         int i;
1608
1609         /* Update slave devices MAC addresses */
1610         if (internals->slave_count < 1)
1611                 return -1;
1612
1613         switch (internals->mode) {
1614         case BONDING_MODE_ROUND_ROBIN:
1615         case BONDING_MODE_BALANCE:
1616         case BONDING_MODE_BROADCAST:
1617                 for (i = 0; i < internals->slave_count; i++) {
1618                         if (rte_eth_dev_default_mac_addr_set(
1619                                         internals->slaves[i].port_id,
1620                                         bonded_eth_dev->data->mac_addrs)) {
1621                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1622                                                 internals->slaves[i].port_id);
1623                                 return -1;
1624                         }
1625                 }
1626                 break;
1627         case BONDING_MODE_8023AD:
1628                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629                 break;
1630         case BONDING_MODE_ACTIVE_BACKUP:
1631         case BONDING_MODE_TLB:
1632         case BONDING_MODE_ALB:
1633         default:
1634                 for (i = 0; i < internals->slave_count; i++) {
1635                         if (internals->slaves[i].port_id ==
1636                                         internals->current_primary_port) {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->primary_port,
1639                                                 bonded_eth_dev->data->mac_addrs)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->current_primary_port);
1642                                         return -1;
1643                                 }
1644                         } else {
1645                                 if (rte_eth_dev_default_mac_addr_set(
1646                                                 internals->slaves[i].port_id,
1647                                                 &internals->slaves[i].persisted_mac_addr)) {
1648                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1649                                                         internals->slaves[i].port_id);
1650                                         return -1;
1651                                 }
1652                         }
1653                 }
1654         }
1655
1656         return 0;
1657 }
1658
1659 int
1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 {
1662         struct bond_dev_private *internals;
1663
1664         internals = eth_dev->data->dev_private;
1665
1666         switch (mode) {
1667         case BONDING_MODE_ROUND_ROBIN:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_ACTIVE_BACKUP:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674                 break;
1675         case BONDING_MODE_BALANCE:
1676                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1677                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678                 break;
1679         case BONDING_MODE_BROADCAST:
1680                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1681                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682                 break;
1683         case BONDING_MODE_8023AD:
1684                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1685                         return -1;
1686
1687                 if (internals->mode4.dedicated_queues.enabled == 0) {
1688                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1689                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1690                         RTE_LOG(WARNING, PMD,
1691                                 "Using mode 4, it is necessary to do TX burst "
1692                                 "and RX burst at least every 100ms.\n");
1693                 } else {
1694                         /* Use flow director's optimization */
1695                         eth_dev->rx_pkt_burst =
1696                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1697                         eth_dev->tx_pkt_burst =
1698                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1699                 }
1700                 break;
1701         case BONDING_MODE_TLB:
1702                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1703                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704                 break;
1705         case BONDING_MODE_ALB:
1706                 if (bond_mode_alb_enable(eth_dev) != 0)
1707                         return -1;
1708
1709                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1710                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1711                 break;
1712         default:
1713                 return -1;
1714         }
1715
1716         internals->mode = mode;
1717
1718         return 0;
1719 }
1720
1721
1722 static int
1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1724                 struct rte_eth_dev *slave_eth_dev)
1725 {
1726         int errval = 0;
1727         struct bond_dev_private *internals = (struct bond_dev_private *)
1728                 bonded_eth_dev->data->dev_private;
1729         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1730
1731         if (port->slow_pool == NULL) {
1732                 char mem_name[256];
1733                 int slave_id = slave_eth_dev->data->port_id;
1734
1735                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736                                 slave_id);
1737                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1738                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1739                         slave_eth_dev->data->numa_node);
1740
1741                 /* Any memory allocation failure in initialization is critical because
1742                  * resources can't be free, so reinitialization is impossible. */
1743                 if (port->slow_pool == NULL) {
1744                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1745                                 slave_id, mem_name, rte_strerror(rte_errno));
1746                 }
1747         }
1748
1749         if (internals->mode4.dedicated_queues.enabled == 1) {
1750                 /* Configure slow Rx queue */
1751
1752                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1753                                 internals->mode4.dedicated_queues.rx_qid, 128,
1754                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755                                 NULL, port->slow_pool);
1756                 if (errval != 0) {
1757                         RTE_BOND_LOG(ERR,
1758                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1759                                         slave_eth_dev->data->port_id,
1760                                         internals->mode4.dedicated_queues.rx_qid,
1761                                         errval);
1762                         return errval;
1763                 }
1764
1765                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1766                                 internals->mode4.dedicated_queues.tx_qid, 512,
1767                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1768                                 NULL);
1769                 if (errval != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id,
1773                                 internals->mode4.dedicated_queues.tx_qid,
1774                                 errval);
1775                         return errval;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 int
1782 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1783                 struct rte_eth_dev *slave_eth_dev)
1784 {
1785         struct bond_rx_queue *bd_rx_q;
1786         struct bond_tx_queue *bd_tx_q;
1787         uint16_t nb_rx_queues;
1788         uint16_t nb_tx_queues;
1789
1790         int errval;
1791         uint16_t q_id;
1792         struct rte_flow_error flow_error;
1793
1794         struct bond_dev_private *internals = (struct bond_dev_private *)
1795                 bonded_eth_dev->data->dev_private;
1796
1797         /* Stop slave */
1798         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799
1800         /* Enable interrupts on slave device if supported */
1801         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1802                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803
1804         /* If RSS is enabled for bonding, try to enable it for slaves  */
1805         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1806                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1807                                 != 0) {
1808                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1809                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1810                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1811                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1812                 } else {
1813                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1814                 }
1815
1816                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1817                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1818                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1819                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1820         }
1821
1822         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1823                         DEV_RX_OFFLOAD_VLAN_FILTER)
1824                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1825                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1826         else
1827                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1828                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1829
1830         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1831         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1832
1833         if (internals->mode == BONDING_MODE_8023AD) {
1834                 if (internals->mode4.dedicated_queues.enabled == 1) {
1835                         nb_rx_queues++;
1836                         nb_tx_queues++;
1837                 }
1838         }
1839
1840         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1841                                      bonded_eth_dev->data->mtu);
1842         if (errval != 0 && errval != -ENOTSUP) {
1843                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1844                                 slave_eth_dev->data->port_id, errval);
1845                 return errval;
1846         }
1847
1848         /* Configure device */
1849         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1850                         nb_rx_queues, nb_tx_queues,
1851                         &(slave_eth_dev->data->dev_conf));
1852         if (errval != 0) {
1853                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1854                                 slave_eth_dev->data->port_id, errval);
1855                 return errval;
1856         }
1857
1858         /* Setup Rx Queues */
1859         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1860                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1861
1862                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1863                                 bd_rx_q->nb_rx_desc,
1864                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1865                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1866                 if (errval != 0) {
1867                         RTE_BOND_LOG(ERR,
1868                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1869                                         slave_eth_dev->data->port_id, q_id, errval);
1870                         return errval;
1871                 }
1872         }
1873
1874         /* Setup Tx Queues */
1875         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1876                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1877
1878                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1879                                 bd_tx_q->nb_tx_desc,
1880                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1881                                 &bd_tx_q->tx_conf);
1882                 if (errval != 0) {
1883                         RTE_BOND_LOG(ERR,
1884                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1885                                 slave_eth_dev->data->port_id, q_id, errval);
1886                         return errval;
1887                 }
1888         }
1889
1890         if (internals->mode == BONDING_MODE_8023AD &&
1891                         internals->mode4.dedicated_queues.enabled == 1) {
1892                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1893                                 != 0)
1894                         return errval;
1895
1896                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1897                                 slave_eth_dev->data->port_id) != 0) {
1898                         RTE_BOND_LOG(ERR,
1899                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1900                                 slave_eth_dev->data->port_id, q_id, errval);
1901                         return -1;
1902                 }
1903
1904                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1905                         rte_flow_destroy(slave_eth_dev->data->port_id,
1906                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1907                                         &flow_error);
1908
1909                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1910                                 slave_eth_dev->data->port_id);
1911         }
1912
1913         /* Start device */
1914         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1915         if (errval != 0) {
1916                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1917                                 slave_eth_dev->data->port_id, errval);
1918                 return -1;
1919         }
1920
1921         /* If RSS is enabled for bonding, synchronize RETA */
1922         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1923                 int i;
1924                 struct bond_dev_private *internals;
1925
1926                 internals = bonded_eth_dev->data->dev_private;
1927
1928                 for (i = 0; i < internals->slave_count; i++) {
1929                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1930                                 errval = rte_eth_dev_rss_reta_update(
1931                                                 slave_eth_dev->data->port_id,
1932                                                 &internals->reta_conf[0],
1933                                                 internals->slaves[i].reta_size);
1934                                 if (errval != 0) {
1935                                         RTE_LOG(WARNING, PMD,
1936                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1937                                                         " RSS Configuration for bonding may be inconsistent.\n",
1938                                                         slave_eth_dev->data->port_id, errval);
1939                                 }
1940                                 break;
1941                         }
1942                 }
1943         }
1944
1945         /* If lsc interrupt is set, check initial slave's link status */
1946         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1947                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1948                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1949                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1950                         NULL);
1951         }
1952
1953         return 0;
1954 }
1955
1956 void
1957 slave_remove(struct bond_dev_private *internals,
1958                 struct rte_eth_dev *slave_eth_dev)
1959 {
1960         uint8_t i;
1961
1962         for (i = 0; i < internals->slave_count; i++)
1963                 if (internals->slaves[i].port_id ==
1964                                 slave_eth_dev->data->port_id)
1965                         break;
1966
1967         if (i < (internals->slave_count - 1))
1968                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1969                                 sizeof(internals->slaves[0]) *
1970                                 (internals->slave_count - i - 1));
1971
1972         internals->slave_count--;
1973
1974         /* force reconfiguration of slave interfaces */
1975         _rte_eth_dev_reset(slave_eth_dev);
1976 }
1977
1978 static void
1979 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1980
1981 void
1982 slave_add(struct bond_dev_private *internals,
1983                 struct rte_eth_dev *slave_eth_dev)
1984 {
1985         struct bond_slave_details *slave_details =
1986                         &internals->slaves[internals->slave_count];
1987
1988         slave_details->port_id = slave_eth_dev->data->port_id;
1989         slave_details->last_link_status = 0;
1990
1991         /* Mark slave devices that don't support interrupts so we can
1992          * compensate when we start the bond
1993          */
1994         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1995                 slave_details->link_status_poll_enabled = 1;
1996         }
1997
1998         slave_details->link_status_wait_to_complete = 0;
1999         /* clean tlb_last_obytes when adding port for bonding device */
2000         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2001                         sizeof(struct ether_addr));
2002 }
2003
2004 void
2005 bond_ethdev_primary_set(struct bond_dev_private *internals,
2006                 uint16_t slave_port_id)
2007 {
2008         int i;
2009
2010         if (internals->active_slave_count < 1)
2011                 internals->current_primary_port = slave_port_id;
2012         else
2013                 /* Search bonded device slave ports for new proposed primary port */
2014                 for (i = 0; i < internals->active_slave_count; i++) {
2015                         if (internals->active_slaves[i] == slave_port_id)
2016                                 internals->current_primary_port = slave_port_id;
2017                 }
2018 }
2019
2020 static void
2021 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2022
2023 static int
2024 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2025 {
2026         struct bond_dev_private *internals;
2027         int i;
2028
2029         /* slave eth dev will be started by bonded device */
2030         if (check_for_bonded_ethdev(eth_dev)) {
2031                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2032                                 eth_dev->data->port_id);
2033                 return -1;
2034         }
2035
2036         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2037         eth_dev->data->dev_started = 1;
2038
2039         internals = eth_dev->data->dev_private;
2040
2041         if (internals->slave_count == 0) {
2042                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2043                 goto out_err;
2044         }
2045
2046         if (internals->user_defined_mac == 0) {
2047                 struct ether_addr *new_mac_addr = NULL;
2048
2049                 for (i = 0; i < internals->slave_count; i++)
2050                         if (internals->slaves[i].port_id == internals->primary_port)
2051                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2052
2053                 if (new_mac_addr == NULL)
2054                         goto out_err;
2055
2056                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2057                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2058                                         eth_dev->data->port_id);
2059                         goto out_err;
2060                 }
2061         }
2062
2063         /* Update all slave devices MACs*/
2064         if (mac_address_slaves_update(eth_dev) != 0)
2065                 goto out_err;
2066
2067         /* If bonded device is configure in promiscuous mode then re-apply config */
2068         if (internals->promiscuous_en)
2069                 bond_ethdev_promiscuous_enable(eth_dev);
2070
2071         if (internals->mode == BONDING_MODE_8023AD) {
2072                 if (internals->mode4.dedicated_queues.enabled == 1) {
2073                         internals->mode4.dedicated_queues.rx_qid =
2074                                         eth_dev->data->nb_rx_queues;
2075                         internals->mode4.dedicated_queues.tx_qid =
2076                                         eth_dev->data->nb_tx_queues;
2077                 }
2078         }
2079
2080
2081         /* Reconfigure each slave device if starting bonded device */
2082         for (i = 0; i < internals->slave_count; i++) {
2083                 struct rte_eth_dev *slave_ethdev =
2084                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2085                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2086                         RTE_BOND_LOG(ERR,
2087                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2088                                 eth_dev->data->port_id,
2089                                 internals->slaves[i].port_id);
2090                         goto out_err;
2091                 }
2092                 /* We will need to poll for link status if any slave doesn't
2093                  * support interrupts
2094                  */
2095                 if (internals->slaves[i].link_status_poll_enabled)
2096                         internals->link_status_polling_enabled = 1;
2097         }
2098
2099         /* start polling if needed */
2100         if (internals->link_status_polling_enabled) {
2101                 rte_eal_alarm_set(
2102                         internals->link_status_polling_interval_ms * 1000,
2103                         bond_ethdev_slave_link_status_change_monitor,
2104                         (void *)&rte_eth_devices[internals->port_id]);
2105         }
2106
2107         if (internals->user_defined_primary_port)
2108                 bond_ethdev_primary_set(internals, internals->primary_port);
2109
2110         if (internals->mode == BONDING_MODE_8023AD)
2111                 bond_mode_8023ad_start(eth_dev);
2112
2113         if (internals->mode == BONDING_MODE_TLB ||
2114                         internals->mode == BONDING_MODE_ALB)
2115                 bond_tlb_enable(internals);
2116
2117         return 0;
2118
2119 out_err:
2120         eth_dev->data->dev_started = 0;
2121         return -1;
2122 }
2123
2124 static void
2125 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2126 {
2127         uint8_t i;
2128
2129         if (dev->data->rx_queues != NULL) {
2130                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2131                         rte_free(dev->data->rx_queues[i]);
2132                         dev->data->rx_queues[i] = NULL;
2133                 }
2134                 dev->data->nb_rx_queues = 0;
2135         }
2136
2137         if (dev->data->tx_queues != NULL) {
2138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2139                         rte_free(dev->data->tx_queues[i]);
2140                         dev->data->tx_queues[i] = NULL;
2141                 }
2142                 dev->data->nb_tx_queues = 0;
2143         }
2144 }
2145
2146 void
2147 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2148 {
2149         struct bond_dev_private *internals = eth_dev->data->dev_private;
2150         uint8_t i;
2151
2152         if (internals->mode == BONDING_MODE_8023AD) {
2153                 struct port *port;
2154                 void *pkt = NULL;
2155
2156                 bond_mode_8023ad_stop(eth_dev);
2157
2158                 /* Discard all messages to/from mode 4 state machines */
2159                 for (i = 0; i < internals->active_slave_count; i++) {
2160                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2161
2162                         RTE_ASSERT(port->rx_ring != NULL);
2163                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2164                                 rte_pktmbuf_free(pkt);
2165
2166                         RTE_ASSERT(port->tx_ring != NULL);
2167                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2168                                 rte_pktmbuf_free(pkt);
2169                 }
2170         }
2171
2172         if (internals->mode == BONDING_MODE_TLB ||
2173                         internals->mode == BONDING_MODE_ALB) {
2174                 bond_tlb_disable(internals);
2175                 for (i = 0; i < internals->active_slave_count; i++)
2176                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2177         }
2178
2179         internals->active_slave_count = 0;
2180         internals->link_status_polling_enabled = 0;
2181         for (i = 0; i < internals->slave_count; i++)
2182                 internals->slaves[i].last_link_status = 0;
2183
2184         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2185         eth_dev->data->dev_started = 0;
2186 }
2187
2188 void
2189 bond_ethdev_close(struct rte_eth_dev *dev)
2190 {
2191         struct bond_dev_private *internals = dev->data->dev_private;
2192         uint8_t bond_port_id = internals->port_id;
2193         int skipped = 0;
2194
2195         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2196         while (internals->slave_count != skipped) {
2197                 uint16_t port_id = internals->slaves[skipped].port_id;
2198
2199                 rte_eth_dev_stop(port_id);
2200
2201                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2202                         RTE_LOG(ERR, EAL,
2203                                 "Failed to remove port %d from bonded device "
2204                                 "%s\n", port_id, dev->device->name);
2205                         skipped++;
2206                 }
2207         }
2208         bond_ethdev_free_queues(dev);
2209         rte_bitmap_reset(internals->vlan_filter_bmp);
2210 }
2211
2212 /* forward declaration */
2213 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2214
2215 static void
2216 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2217 {
2218         struct bond_dev_private *internals = dev->data->dev_private;
2219
2220         uint16_t max_nb_rx_queues = UINT16_MAX;
2221         uint16_t max_nb_tx_queues = UINT16_MAX;
2222
2223         dev_info->max_mac_addrs = 1;
2224
2225         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2226                         internals->candidate_max_rx_pktlen :
2227                         ETHER_MAX_JUMBO_FRAME_LEN;
2228
2229         /* Max number of tx/rx queues that the bonded device can support is the
2230          * minimum values of the bonded slaves, as all slaves must be capable
2231          * of supporting the same number of tx/rx queues.
2232          */
2233         if (internals->slave_count > 0) {
2234                 struct rte_eth_dev_info slave_info;
2235                 uint8_t idx;
2236
2237                 for (idx = 0; idx < internals->slave_count; idx++) {
2238                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2239                                         &slave_info);
2240
2241                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2242                                 max_nb_rx_queues = slave_info.max_rx_queues;
2243
2244                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2245                                 max_nb_tx_queues = slave_info.max_tx_queues;
2246                 }
2247         }
2248
2249         dev_info->max_rx_queues = max_nb_rx_queues;
2250         dev_info->max_tx_queues = max_nb_tx_queues;
2251
2252         /**
2253          * If dedicated hw queues enabled for link bonding device in LACP mode
2254          * then we need to reduce the maximum number of data path queues by 1.
2255          */
2256         if (internals->mode == BONDING_MODE_8023AD &&
2257                 internals->mode4.dedicated_queues.enabled == 1) {
2258                 dev_info->max_rx_queues--;
2259                 dev_info->max_tx_queues--;
2260         }
2261
2262         dev_info->min_rx_bufsize = 0;
2263
2264         dev_info->rx_offload_capa = internals->rx_offload_capa;
2265         dev_info->tx_offload_capa = internals->tx_offload_capa;
2266         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2267         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2268         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2269
2270         dev_info->reta_size = internals->reta_size;
2271 }
2272
2273 static int
2274 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2275 {
2276         int res;
2277         uint16_t i;
2278         struct bond_dev_private *internals = dev->data->dev_private;
2279
2280         /* don't do this while a slave is being added */
2281         rte_spinlock_lock(&internals->lock);
2282
2283         if (on)
2284                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2285         else
2286                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2287
2288         for (i = 0; i < internals->slave_count; i++) {
2289                 uint16_t port_id = internals->slaves[i].port_id;
2290
2291                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2292                 if (res == ENOTSUP)
2293                         RTE_LOG(WARNING, PMD,
2294                                 "Setting VLAN filter on slave port %u not supported.\n",
2295                                 port_id);
2296         }
2297
2298         rte_spinlock_unlock(&internals->lock);
2299         return 0;
2300 }
2301
2302 static int
2303 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2304                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2305                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2306 {
2307         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2308                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2309                                         0, dev->data->numa_node);
2310         if (bd_rx_q == NULL)
2311                 return -1;
2312
2313         bd_rx_q->queue_id = rx_queue_id;
2314         bd_rx_q->dev_private = dev->data->dev_private;
2315
2316         bd_rx_q->nb_rx_desc = nb_rx_desc;
2317
2318         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2319         bd_rx_q->mb_pool = mb_pool;
2320
2321         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2322
2323         return 0;
2324 }
2325
2326 static int
2327 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2328                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2329                 const struct rte_eth_txconf *tx_conf)
2330 {
2331         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2332                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2333                                         0, dev->data->numa_node);
2334
2335         if (bd_tx_q == NULL)
2336                 return -1;
2337
2338         bd_tx_q->queue_id = tx_queue_id;
2339         bd_tx_q->dev_private = dev->data->dev_private;
2340
2341         bd_tx_q->nb_tx_desc = nb_tx_desc;
2342         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2343
2344         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2345
2346         return 0;
2347 }
2348
2349 static void
2350 bond_ethdev_rx_queue_release(void *queue)
2351 {
2352         if (queue == NULL)
2353                 return;
2354
2355         rte_free(queue);
2356 }
2357
2358 static void
2359 bond_ethdev_tx_queue_release(void *queue)
2360 {
2361         if (queue == NULL)
2362                 return;
2363
2364         rte_free(queue);
2365 }
2366
2367 static void
2368 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2369 {
2370         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2371         struct bond_dev_private *internals;
2372
2373         /* Default value for polling slave found is true as we don't want to
2374          * disable the polling thread if we cannot get the lock */
2375         int i, polling_slave_found = 1;
2376
2377         if (cb_arg == NULL)
2378                 return;
2379
2380         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2381         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2382
2383         if (!bonded_ethdev->data->dev_started ||
2384                 !internals->link_status_polling_enabled)
2385                 return;
2386
2387         /* If device is currently being configured then don't check slaves link
2388          * status, wait until next period */
2389         if (rte_spinlock_trylock(&internals->lock)) {
2390                 if (internals->slave_count > 0)
2391                         polling_slave_found = 0;
2392
2393                 for (i = 0; i < internals->slave_count; i++) {
2394                         if (!internals->slaves[i].link_status_poll_enabled)
2395                                 continue;
2396
2397                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2398                         polling_slave_found = 1;
2399
2400                         /* Update slave link status */
2401                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2402                                         internals->slaves[i].link_status_wait_to_complete);
2403
2404                         /* if link status has changed since last checked then call lsc
2405                          * event callback */
2406                         if (slave_ethdev->data->dev_link.link_status !=
2407                                         internals->slaves[i].last_link_status) {
2408                                 internals->slaves[i].last_link_status =
2409                                                 slave_ethdev->data->dev_link.link_status;
2410
2411                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2412                                                 RTE_ETH_EVENT_INTR_LSC,
2413                                                 &bonded_ethdev->data->port_id,
2414                                                 NULL);
2415                         }
2416                 }
2417                 rte_spinlock_unlock(&internals->lock);
2418         }
2419
2420         if (polling_slave_found)
2421                 /* Set alarm to continue monitoring link status of slave ethdev's */
2422                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2423                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2424 }
2425
2426 static int
2427 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2428 {
2429         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2430
2431         struct bond_dev_private *bond_ctx;
2432         struct rte_eth_link slave_link;
2433
2434         uint32_t idx;
2435
2436         bond_ctx = ethdev->data->dev_private;
2437
2438         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2439
2440         if (ethdev->data->dev_started == 0 ||
2441                         bond_ctx->active_slave_count == 0) {
2442                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2443                 return 0;
2444         }
2445
2446         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2447
2448         if (wait_to_complete)
2449                 link_update = rte_eth_link_get;
2450         else
2451                 link_update = rte_eth_link_get_nowait;
2452
2453         switch (bond_ctx->mode) {
2454         case BONDING_MODE_BROADCAST:
2455                 /**
2456                  * Setting link speed to UINT32_MAX to ensure we pick up the
2457                  * value of the first active slave
2458                  */
2459                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2460
2461                 /**
2462                  * link speed is minimum value of all the slaves link speed as
2463                  * packet loss will occur on this slave if transmission at rates
2464                  * greater than this are attempted
2465                  */
2466                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2467                         link_update(bond_ctx->active_slaves[0], &slave_link);
2468
2469                         if (slave_link.link_speed <
2470                                         ethdev->data->dev_link.link_speed)
2471                                 ethdev->data->dev_link.link_speed =
2472                                                 slave_link.link_speed;
2473                 }
2474                 break;
2475         case BONDING_MODE_ACTIVE_BACKUP:
2476                 /* Current primary slave */
2477                 link_update(bond_ctx->current_primary_port, &slave_link);
2478
2479                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2480                 break;
2481         case BONDING_MODE_8023AD:
2482                 ethdev->data->dev_link.link_autoneg =
2483                                 bond_ctx->mode4.slave_link.link_autoneg;
2484                 ethdev->data->dev_link.link_duplex =
2485                                 bond_ctx->mode4.slave_link.link_duplex;
2486                 /* fall through to update link speed */
2487         case BONDING_MODE_ROUND_ROBIN:
2488         case BONDING_MODE_BALANCE:
2489         case BONDING_MODE_TLB:
2490         case BONDING_MODE_ALB:
2491         default:
2492                 /**
2493                  * In theses mode the maximum theoretical link speed is the sum
2494                  * of all the slaves
2495                  */
2496                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2497
2498                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2499                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2500
2501                         ethdev->data->dev_link.link_speed +=
2502                                         slave_link.link_speed;
2503                 }
2504         }
2505
2506
2507         return 0;
2508 }
2509
2510
2511 static int
2512 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2513 {
2514         struct bond_dev_private *internals = dev->data->dev_private;
2515         struct rte_eth_stats slave_stats;
2516         int i, j;
2517
2518         for (i = 0; i < internals->slave_count; i++) {
2519                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2520
2521                 stats->ipackets += slave_stats.ipackets;
2522                 stats->opackets += slave_stats.opackets;
2523                 stats->ibytes += slave_stats.ibytes;
2524                 stats->obytes += slave_stats.obytes;
2525                 stats->imissed += slave_stats.imissed;
2526                 stats->ierrors += slave_stats.ierrors;
2527                 stats->oerrors += slave_stats.oerrors;
2528                 stats->rx_nombuf += slave_stats.rx_nombuf;
2529
2530                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2531                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2532                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2533                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2534                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2535                         stats->q_errors[j] += slave_stats.q_errors[j];
2536                 }
2537
2538         }
2539
2540         return 0;
2541 }
2542
2543 static void
2544 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2545 {
2546         struct bond_dev_private *internals = dev->data->dev_private;
2547         int i;
2548
2549         for (i = 0; i < internals->slave_count; i++)
2550                 rte_eth_stats_reset(internals->slaves[i].port_id);
2551 }
2552
2553 static void
2554 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2555 {
2556         struct bond_dev_private *internals = eth_dev->data->dev_private;
2557         int i;
2558
2559         internals->promiscuous_en = 1;
2560
2561         switch (internals->mode) {
2562         /* Promiscuous mode is propagated to all slaves */
2563         case BONDING_MODE_ROUND_ROBIN:
2564         case BONDING_MODE_BALANCE:
2565         case BONDING_MODE_BROADCAST:
2566                 for (i = 0; i < internals->slave_count; i++)
2567                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2568                 break;
2569         /* In mode4 promiscus mode is managed when slave is added/removed */
2570         case BONDING_MODE_8023AD:
2571                 break;
2572         /* Promiscuous mode is propagated only to primary slave */
2573         case BONDING_MODE_ACTIVE_BACKUP:
2574         case BONDING_MODE_TLB:
2575         case BONDING_MODE_ALB:
2576         default:
2577                 rte_eth_promiscuous_enable(internals->current_primary_port);
2578         }
2579 }
2580
2581 static void
2582 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2583 {
2584         struct bond_dev_private *internals = dev->data->dev_private;
2585         int i;
2586
2587         internals->promiscuous_en = 0;
2588
2589         switch (internals->mode) {
2590         /* Promiscuous mode is propagated to all slaves */
2591         case BONDING_MODE_ROUND_ROBIN:
2592         case BONDING_MODE_BALANCE:
2593         case BONDING_MODE_BROADCAST:
2594                 for (i = 0; i < internals->slave_count; i++)
2595                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2596                 break;
2597         /* In mode4 promiscus mode is set managed when slave is added/removed */
2598         case BONDING_MODE_8023AD:
2599                 break;
2600         /* Promiscuous mode is propagated only to primary slave */
2601         case BONDING_MODE_ACTIVE_BACKUP:
2602         case BONDING_MODE_TLB:
2603         case BONDING_MODE_ALB:
2604         default:
2605                 rte_eth_promiscuous_disable(internals->current_primary_port);
2606         }
2607 }
2608
2609 static void
2610 bond_ethdev_delayed_lsc_propagation(void *arg)
2611 {
2612         if (arg == NULL)
2613                 return;
2614
2615         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2616                         RTE_ETH_EVENT_INTR_LSC, NULL);
2617 }
2618
2619 int
2620 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2621                 void *param, void *ret_param __rte_unused)
2622 {
2623         struct rte_eth_dev *bonded_eth_dev;
2624         struct bond_dev_private *internals;
2625         struct rte_eth_link link;
2626         int rc = -1;
2627
2628         int i, valid_slave = 0;
2629         uint8_t active_pos;
2630         uint8_t lsc_flag = 0;
2631
2632         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2633                 return rc;
2634
2635         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2636
2637         if (check_for_bonded_ethdev(bonded_eth_dev))
2638                 return rc;
2639
2640         internals = bonded_eth_dev->data->dev_private;
2641
2642         /* If the device isn't started don't handle interrupts */
2643         if (!bonded_eth_dev->data->dev_started)
2644                 return rc;
2645
2646         /* verify that port_id is a valid slave of bonded port */
2647         for (i = 0; i < internals->slave_count; i++) {
2648                 if (internals->slaves[i].port_id == port_id) {
2649                         valid_slave = 1;
2650                         break;
2651                 }
2652         }
2653
2654         if (!valid_slave)
2655                 return rc;
2656
2657         /* Search for port in active port list */
2658         active_pos = find_slave_by_id(internals->active_slaves,
2659                         internals->active_slave_count, port_id);
2660
2661         rte_eth_link_get_nowait(port_id, &link);
2662         if (link.link_status) {
2663                 if (active_pos < internals->active_slave_count)
2664                         return rc;
2665
2666                 /* if no active slave ports then set this port to be primary port */
2667                 if (internals->active_slave_count < 1) {
2668                         /* If first active slave, then change link status */
2669                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2670                         internals->current_primary_port = port_id;
2671                         lsc_flag = 1;
2672
2673                         mac_address_slaves_update(bonded_eth_dev);
2674                 }
2675
2676                 activate_slave(bonded_eth_dev, port_id);
2677
2678                 /* If user has defined the primary port then default to using it */
2679                 if (internals->user_defined_primary_port &&
2680                                 internals->primary_port == port_id)
2681                         bond_ethdev_primary_set(internals, port_id);
2682         } else {
2683                 if (active_pos == internals->active_slave_count)
2684                         return rc;
2685
2686                 /* Remove from active slave list */
2687                 deactivate_slave(bonded_eth_dev, port_id);
2688
2689                 if (internals->active_slave_count < 1)
2690                         lsc_flag = 1;
2691
2692                 /* Update primary id, take first active slave from list or if none
2693                  * available set to -1 */
2694                 if (port_id == internals->current_primary_port) {
2695                         if (internals->active_slave_count > 0)
2696                                 bond_ethdev_primary_set(internals,
2697                                                 internals->active_slaves[0]);
2698                         else
2699                                 internals->current_primary_port = internals->primary_port;
2700                 }
2701         }
2702
2703         /**
2704          * Update bonded device link properties after any change to active
2705          * slaves
2706          */
2707         bond_ethdev_link_update(bonded_eth_dev, 0);
2708
2709         if (lsc_flag) {
2710                 /* Cancel any possible outstanding interrupts if delays are enabled */
2711                 if (internals->link_up_delay_ms > 0 ||
2712                         internals->link_down_delay_ms > 0)
2713                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2714                                         bonded_eth_dev);
2715
2716                 if (bonded_eth_dev->data->dev_link.link_status) {
2717                         if (internals->link_up_delay_ms > 0)
2718                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2719                                                 bond_ethdev_delayed_lsc_propagation,
2720                                                 (void *)bonded_eth_dev);
2721                         else
2722                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2723                                                 RTE_ETH_EVENT_INTR_LSC,
2724                                                 NULL);
2725
2726                 } else {
2727                         if (internals->link_down_delay_ms > 0)
2728                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2729                                                 bond_ethdev_delayed_lsc_propagation,
2730                                                 (void *)bonded_eth_dev);
2731                         else
2732                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2733                                                 RTE_ETH_EVENT_INTR_LSC,
2734                                                 NULL);
2735                 }
2736         }
2737         return 0;
2738 }
2739
2740 static int
2741 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2742                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2743 {
2744         unsigned i, j;
2745         int result = 0;
2746         int slave_reta_size;
2747         unsigned reta_count;
2748         struct bond_dev_private *internals = dev->data->dev_private;
2749
2750         if (reta_size != internals->reta_size)
2751                 return -EINVAL;
2752
2753          /* Copy RETA table */
2754         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2755
2756         for (i = 0; i < reta_count; i++) {
2757                 internals->reta_conf[i].mask = reta_conf[i].mask;
2758                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2759                         if ((reta_conf[i].mask >> j) & 0x01)
2760                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2761         }
2762
2763         /* Fill rest of array */
2764         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2765                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2766                                 sizeof(internals->reta_conf[0]) * reta_count);
2767
2768         /* Propagate RETA over slaves */
2769         for (i = 0; i < internals->slave_count; i++) {
2770                 slave_reta_size = internals->slaves[i].reta_size;
2771                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2772                                 &internals->reta_conf[0], slave_reta_size);
2773                 if (result < 0)
2774                         return result;
2775         }
2776
2777         return 0;
2778 }
2779
2780 static int
2781 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2782                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2783 {
2784         int i, j;
2785         struct bond_dev_private *internals = dev->data->dev_private;
2786
2787         if (reta_size != internals->reta_size)
2788                 return -EINVAL;
2789
2790          /* Copy RETA table */
2791         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2792                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2793                         if ((reta_conf[i].mask >> j) & 0x01)
2794                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2795
2796         return 0;
2797 }
2798
2799 static int
2800 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2801                 struct rte_eth_rss_conf *rss_conf)
2802 {
2803         int i, result = 0;
2804         struct bond_dev_private *internals = dev->data->dev_private;
2805         struct rte_eth_rss_conf bond_rss_conf;
2806
2807         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2808
2809         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2810
2811         if (bond_rss_conf.rss_hf != 0)
2812                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2813
2814         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2815                         sizeof(internals->rss_key)) {
2816                 if (bond_rss_conf.rss_key_len == 0)
2817                         bond_rss_conf.rss_key_len = 40;
2818                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2819                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2820                                 internals->rss_key_len);
2821         }
2822
2823         for (i = 0; i < internals->slave_count; i++) {
2824                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2825                                 &bond_rss_conf);
2826                 if (result < 0)
2827                         return result;
2828         }
2829
2830         return 0;
2831 }
2832
2833 static int
2834 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2835                 struct rte_eth_rss_conf *rss_conf)
2836 {
2837         struct bond_dev_private *internals = dev->data->dev_private;
2838
2839         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2840         rss_conf->rss_key_len = internals->rss_key_len;
2841         if (rss_conf->rss_key)
2842                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2843
2844         return 0;
2845 }
2846
2847 static int
2848 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2849 {
2850         struct rte_eth_dev *slave_eth_dev;
2851         struct bond_dev_private *internals = dev->data->dev_private;
2852         int ret, i;
2853
2854         rte_spinlock_lock(&internals->lock);
2855
2856         for (i = 0; i < internals->slave_count; i++) {
2857                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2858                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2859                         rte_spinlock_unlock(&internals->lock);
2860                         return -ENOTSUP;
2861                 }
2862         }
2863         for (i = 0; i < internals->slave_count; i++) {
2864                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2865                 if (ret < 0) {
2866                         rte_spinlock_unlock(&internals->lock);
2867                         return ret;
2868                 }
2869         }
2870
2871         rte_spinlock_unlock(&internals->lock);
2872         return 0;
2873 }
2874
2875 static int
2876 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2877 {
2878         if (mac_address_set(dev, addr)) {
2879                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2880                 return -EINVAL;
2881         }
2882
2883         return 0;
2884 }
2885
2886 const struct eth_dev_ops default_dev_ops = {
2887         .dev_start            = bond_ethdev_start,
2888         .dev_stop             = bond_ethdev_stop,
2889         .dev_close            = bond_ethdev_close,
2890         .dev_configure        = bond_ethdev_configure,
2891         .dev_infos_get        = bond_ethdev_info,
2892         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2893         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2894         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2895         .rx_queue_release     = bond_ethdev_rx_queue_release,
2896         .tx_queue_release     = bond_ethdev_tx_queue_release,
2897         .link_update          = bond_ethdev_link_update,
2898         .stats_get            = bond_ethdev_stats_get,
2899         .stats_reset          = bond_ethdev_stats_reset,
2900         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2901         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2902         .reta_update          = bond_ethdev_rss_reta_update,
2903         .reta_query           = bond_ethdev_rss_reta_query,
2904         .rss_hash_update      = bond_ethdev_rss_hash_update,
2905         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2906         .mtu_set              = bond_ethdev_mtu_set,
2907         .mac_addr_set         = bond_ethdev_mac_address_set
2908 };
2909
2910 static int
2911 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2912 {
2913         const char *name = rte_vdev_device_name(dev);
2914         uint8_t socket_id = dev->device.numa_node;
2915         struct bond_dev_private *internals = NULL;
2916         struct rte_eth_dev *eth_dev = NULL;
2917         uint32_t vlan_filter_bmp_size;
2918
2919         /* now do all data allocation - for eth_dev structure, dummy pci driver
2920          * and internal (private) data
2921          */
2922
2923         /* reserve an ethdev entry */
2924         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2925         if (eth_dev == NULL) {
2926                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2927                 goto err;
2928         }
2929
2930         internals = eth_dev->data->dev_private;
2931         eth_dev->data->nb_rx_queues = (uint16_t)1;
2932         eth_dev->data->nb_tx_queues = (uint16_t)1;
2933
2934         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2935                         socket_id);
2936         if (eth_dev->data->mac_addrs == NULL) {
2937                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2938                 goto err;
2939         }
2940
2941         eth_dev->dev_ops = &default_dev_ops;
2942         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2943
2944         rte_spinlock_init(&internals->lock);
2945
2946         internals->port_id = eth_dev->data->port_id;
2947         internals->mode = BONDING_MODE_INVALID;
2948         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2949         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2950         internals->burst_xmit_hash = burst_xmit_l2_hash;
2951         internals->user_defined_mac = 0;
2952
2953         internals->link_status_polling_enabled = 0;
2954
2955         internals->link_status_polling_interval_ms =
2956                 DEFAULT_POLLING_INTERVAL_10_MS;
2957         internals->link_down_delay_ms = 0;
2958         internals->link_up_delay_ms = 0;
2959
2960         internals->slave_count = 0;
2961         internals->active_slave_count = 0;
2962         internals->rx_offload_capa = 0;
2963         internals->tx_offload_capa = 0;
2964         internals->rx_queue_offload_capa = 0;
2965         internals->tx_queue_offload_capa = 0;
2966         internals->candidate_max_rx_pktlen = 0;
2967         internals->max_rx_pktlen = 0;
2968
2969         /* Initially allow to choose any offload type */
2970         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2971
2972         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2973         memset(internals->slaves, 0, sizeof(internals->slaves));
2974
2975         /* Set mode 4 default configuration */
2976         bond_mode_8023ad_setup(eth_dev, NULL);
2977         if (bond_ethdev_mode_set(eth_dev, mode)) {
2978                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2979                                  eth_dev->data->port_id, mode);
2980                 goto err;
2981         }
2982
2983         vlan_filter_bmp_size =
2984                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2985         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2986                                                    RTE_CACHE_LINE_SIZE);
2987         if (internals->vlan_filter_bmpmem == NULL) {
2988                 RTE_BOND_LOG(ERR,
2989                              "Failed to allocate vlan bitmap for bonded device %u\n",
2990                              eth_dev->data->port_id);
2991                 goto err;
2992         }
2993
2994         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2995                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2996         if (internals->vlan_filter_bmp == NULL) {
2997                 RTE_BOND_LOG(ERR,
2998                              "Failed to init vlan bitmap for bonded device %u\n",
2999                              eth_dev->data->port_id);
3000                 rte_free(internals->vlan_filter_bmpmem);
3001                 goto err;
3002         }
3003
3004         return eth_dev->data->port_id;
3005
3006 err:
3007         rte_free(internals);
3008         if (eth_dev != NULL) {
3009                 rte_free(eth_dev->data->mac_addrs);
3010                 rte_eth_dev_release_port(eth_dev);
3011         }
3012         return -1;
3013 }
3014
3015 static int
3016 bond_probe(struct rte_vdev_device *dev)
3017 {
3018         const char *name;
3019         struct bond_dev_private *internals;
3020         struct rte_kvargs *kvlist;
3021         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3022         int  arg_count, port_id;
3023         uint8_t agg_mode;
3024
3025         if (!dev)
3026                 return -EINVAL;
3027
3028         name = rte_vdev_device_name(dev);
3029         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3030
3031         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3032                 pmd_bond_init_valid_arguments);
3033         if (kvlist == NULL)
3034                 return -1;
3035
3036         /* Parse link bonding mode */
3037         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3038                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3039                                 &bond_ethdev_parse_slave_mode_kvarg,
3040                                 &bonding_mode) != 0) {
3041                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3042                                         name);
3043                         goto parse_error;
3044                 }
3045         } else {
3046                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3047                                 "device %s\n", name);
3048                 goto parse_error;
3049         }
3050
3051         /* Parse socket id to create bonding device on */
3052         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3053         if (arg_count == 1) {
3054                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3055                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3056                                 != 0) {
3057                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3058                                         "bonded device %s\n", name);
3059                         goto parse_error;
3060                 }
3061         } else if (arg_count > 1) {
3062                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3063                                 "bonded device %s\n", name);
3064                 goto parse_error;
3065         } else {
3066                 socket_id = rte_socket_id();
3067         }
3068
3069         dev->device.numa_node = socket_id;
3070
3071         /* Create link bonding eth device */
3072         port_id = bond_alloc(dev, bonding_mode);
3073         if (port_id < 0) {
3074                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3075                                 "socket %u.\n", name, bonding_mode, socket_id);
3076                 goto parse_error;
3077         }
3078         internals = rte_eth_devices[port_id].data->dev_private;
3079         internals->kvlist = kvlist;
3080
3081
3082         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3083                 if (rte_kvargs_process(kvlist,
3084                                 PMD_BOND_AGG_MODE_KVARG,
3085                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3086                                 &agg_mode) != 0) {
3087                         RTE_LOG(ERR, EAL,
3088                                         "Failed to parse agg selection mode for bonded device %s\n",
3089                                         name);
3090                         goto parse_error;
3091                 }
3092
3093                 if (internals->mode == BONDING_MODE_8023AD)
3094                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3095                                         agg_mode);
3096         } else {
3097                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3098         }
3099
3100         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3101                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
3102         return 0;
3103
3104 parse_error:
3105         rte_kvargs_free(kvlist);
3106
3107         return -1;
3108 }
3109
3110 static int
3111 bond_remove(struct rte_vdev_device *dev)
3112 {
3113         struct rte_eth_dev *eth_dev;
3114         struct bond_dev_private *internals;
3115         const char *name;
3116
3117         if (!dev)
3118                 return -EINVAL;
3119
3120         name = rte_vdev_device_name(dev);
3121         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3122
3123         /* now free all data allocation - for eth_dev structure,
3124          * dummy pci driver and internal (private) data
3125          */
3126
3127         /* find an ethdev entry */
3128         eth_dev = rte_eth_dev_allocated(name);
3129         if (eth_dev == NULL)
3130                 return -ENODEV;
3131
3132         RTE_ASSERT(eth_dev->device == &dev->device);
3133
3134         internals = eth_dev->data->dev_private;
3135         if (internals->slave_count != 0)
3136                 return -EBUSY;
3137
3138         if (eth_dev->data->dev_started == 1) {
3139                 bond_ethdev_stop(eth_dev);
3140                 bond_ethdev_close(eth_dev);
3141         }
3142
3143         eth_dev->dev_ops = NULL;
3144         eth_dev->rx_pkt_burst = NULL;
3145         eth_dev->tx_pkt_burst = NULL;
3146
3147         internals = eth_dev->data->dev_private;
3148         rte_bitmap_free(internals->vlan_filter_bmp);
3149         rte_free(internals->vlan_filter_bmpmem);
3150         rte_free(eth_dev->data->dev_private);
3151         rte_free(eth_dev->data->mac_addrs);
3152
3153         rte_eth_dev_release_port(eth_dev);
3154
3155         return 0;
3156 }
3157
3158 /* this part will resolve the slave portids after all the other pdev and vdev
3159  * have been allocated */
3160 static int
3161 bond_ethdev_configure(struct rte_eth_dev *dev)
3162 {
3163         const char *name = dev->device->name;
3164         struct bond_dev_private *internals = dev->data->dev_private;
3165         struct rte_kvargs *kvlist = internals->kvlist;
3166         int arg_count;
3167         uint16_t port_id = dev - rte_eth_devices;
3168         uint8_t agg_mode;
3169
3170         static const uint8_t default_rss_key[40] = {
3171                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3172                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3173                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3174                 0xBE, 0xAC, 0x01, 0xFA
3175         };
3176
3177         unsigned i, j;
3178
3179         /* If RSS is enabled, fill table and key with default values */
3180         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3181                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3182                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3183                 memcpy(internals->rss_key, default_rss_key, 40);
3184
3185                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3186                         internals->reta_conf[i].mask = ~0LL;
3187                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3188                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3189                 }
3190         }
3191
3192         /* set the max_rx_pktlen */
3193         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3194
3195         /*
3196          * if no kvlist, it means that this bonded device has been created
3197          * through the bonding api.
3198          */
3199         if (!kvlist)
3200                 return 0;
3201
3202         /* Parse MAC address for bonded device */
3203         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3204         if (arg_count == 1) {
3205                 struct ether_addr bond_mac;
3206
3207                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3208                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3209                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3210                                         name);
3211                         return -1;
3212                 }
3213
3214                 /* Set MAC address */
3215                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3216                         RTE_LOG(ERR, EAL,
3217                                         "Failed to set mac address on bonded device %s\n",
3218                                         name);
3219                         return -1;
3220                 }
3221         } else if (arg_count > 1) {
3222                 RTE_LOG(ERR, EAL,
3223                                 "MAC address can be specified only once for bonded device %s\n",
3224                                 name);
3225                 return -1;
3226         }
3227
3228         /* Parse/set balance mode transmit policy */
3229         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3230         if (arg_count == 1) {
3231                 uint8_t xmit_policy;
3232
3233                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3234                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3235                                                 0) {
3236                         RTE_LOG(INFO, EAL,
3237                                         "Invalid xmit policy specified for bonded device %s\n",
3238                                         name);
3239                         return -1;
3240                 }
3241
3242                 /* Set balance mode transmit policy*/
3243                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3244                         RTE_LOG(ERR, EAL,
3245                                         "Failed to set balance xmit policy on bonded device %s\n",
3246                                         name);
3247                         return -1;
3248                 }
3249         } else if (arg_count > 1) {
3250                 RTE_LOG(ERR, EAL,
3251                                 "Transmit policy can be specified only once for bonded device"
3252                                 " %s\n", name);
3253                 return -1;
3254         }
3255
3256         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3257                 if (rte_kvargs_process(kvlist,
3258                                 PMD_BOND_AGG_MODE_KVARG,
3259                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3260                                 &agg_mode) != 0) {
3261                         RTE_LOG(ERR, EAL,
3262                                         "Failed to parse agg selection mode for bonded device %s\n",
3263                                         name);
3264                 }
3265                 if (internals->mode == BONDING_MODE_8023AD)
3266                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3267                                                 agg_mode);
3268         }
3269
3270         /* Parse/add slave ports to bonded device */
3271         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3272                 struct bond_ethdev_slave_ports slave_ports;
3273                 unsigned i;
3274
3275                 memset(&slave_ports, 0, sizeof(slave_ports));
3276
3277                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3278                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3279                         RTE_LOG(ERR, EAL,
3280                                         "Failed to parse slave ports for bonded device %s\n",
3281                                         name);
3282                         return -1;
3283                 }
3284
3285                 for (i = 0; i < slave_ports.slave_count; i++) {
3286                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3287                                 RTE_LOG(ERR, EAL,
3288                                                 "Failed to add port %d as slave to bonded device %s\n",
3289                                                 slave_ports.slaves[i], name);
3290                         }
3291                 }
3292
3293         } else {
3294                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3295                 return -1;
3296         }
3297
3298         /* Parse/set primary slave port id*/
3299         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3300         if (arg_count == 1) {
3301                 uint16_t primary_slave_port_id;
3302
3303                 if (rte_kvargs_process(kvlist,
3304                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3305                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3306                                 &primary_slave_port_id) < 0) {
3307                         RTE_LOG(INFO, EAL,
3308                                         "Invalid primary slave port id specified for bonded device"
3309                                         " %s\n", name);
3310                         return -1;
3311                 }
3312
3313                 /* Set balance mode transmit policy*/
3314                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3315                                 != 0) {
3316                         RTE_LOG(ERR, EAL,
3317                                         "Failed to set primary slave port %d on bonded device %s\n",
3318                                         primary_slave_port_id, name);
3319                         return -1;
3320                 }
3321         } else if (arg_count > 1) {
3322                 RTE_LOG(INFO, EAL,
3323                                 "Primary slave can be specified only once for bonded device"
3324                                 " %s\n", name);
3325                 return -1;
3326         }
3327
3328         /* Parse link status monitor polling interval */
3329         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3330         if (arg_count == 1) {
3331                 uint32_t lsc_poll_interval_ms;
3332
3333                 if (rte_kvargs_process(kvlist,
3334                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3335                                 &bond_ethdev_parse_time_ms_kvarg,
3336                                 &lsc_poll_interval_ms) < 0) {
3337                         RTE_LOG(INFO, EAL,
3338                                         "Invalid lsc polling interval value specified for bonded"
3339                                         " device %s\n", name);
3340                         return -1;
3341                 }
3342
3343                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3344                                 != 0) {
3345                         RTE_LOG(ERR, EAL,
3346                                         "Failed to set lsc monitor polling interval (%u ms) on"
3347                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3348                         return -1;
3349                 }
3350         } else if (arg_count > 1) {
3351                 RTE_LOG(INFO, EAL,
3352                                 "LSC polling interval can be specified only once for bonded"
3353                                 " device %s\n", name);
3354                 return -1;
3355         }
3356
3357         /* Parse link up interrupt propagation delay */
3358         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3359         if (arg_count == 1) {
3360                 uint32_t link_up_delay_ms;
3361
3362                 if (rte_kvargs_process(kvlist,
3363                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3364                                 &bond_ethdev_parse_time_ms_kvarg,
3365                                 &link_up_delay_ms) < 0) {
3366                         RTE_LOG(INFO, EAL,
3367                                         "Invalid link up propagation delay value specified for"
3368                                         " bonded device %s\n", name);
3369                         return -1;
3370                 }
3371
3372                 /* Set balance mode transmit policy*/
3373                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3374                                 != 0) {
3375                         RTE_LOG(ERR, EAL,
3376                                         "Failed to set link up propagation delay (%u ms) on bonded"
3377                                         " device %s\n", link_up_delay_ms, name);
3378                         return -1;
3379                 }
3380         } else if (arg_count > 1) {
3381                 RTE_LOG(INFO, EAL,
3382                                 "Link up propagation delay can be specified only once for"
3383                                 " bonded device %s\n", name);
3384                 return -1;
3385         }
3386
3387         /* Parse link down interrupt propagation delay */
3388         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3389         if (arg_count == 1) {
3390                 uint32_t link_down_delay_ms;
3391
3392                 if (rte_kvargs_process(kvlist,
3393                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3394                                 &bond_ethdev_parse_time_ms_kvarg,
3395                                 &link_down_delay_ms) < 0) {
3396                         RTE_LOG(INFO, EAL,
3397                                         "Invalid link down propagation delay value specified for"
3398                                         " bonded device %s\n", name);
3399                         return -1;
3400                 }
3401
3402                 /* Set balance mode transmit policy*/
3403                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3404                                 != 0) {
3405                         RTE_LOG(ERR, EAL,
3406                                         "Failed to set link down propagation delay (%u ms) on"
3407                                         " bonded device %s\n", link_down_delay_ms, name);
3408                         return -1;
3409                 }
3410         } else if (arg_count > 1) {
3411                 RTE_LOG(INFO, EAL,
3412                                 "Link down propagation delay can be specified only once for"
3413                                 " bonded device %s\n", name);
3414                 return -1;
3415         }
3416
3417         return 0;
3418 }
3419
3420 struct rte_vdev_driver pmd_bond_drv = {
3421         .probe = bond_probe,
3422         .remove = bond_remove,
3423 };
3424
3425 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3426 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3427
3428 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3429         "slave=<ifc> "
3430         "primary=<ifc> "
3431         "mode=[0-6] "
3432         "xmit_policy=[l2 | l23 | l34] "
3433         "agg_mode=[count | stable | bandwidth] "
3434         "socket_id=<int> "
3435         "mac=<mac addr> "
3436         "lsc_poll_period_ms=<int> "
3437         "up_delay=<int> "
3438         "down_delay=<int>");