02d94b1b124050f840de8b3b097746873707c3bc
[dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
33
34 static inline size_t
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 {
37         size_t vlan_offset = 0;
38
39         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41
42                 vlan_offset = sizeof(struct vlan_hdr);
43                 *proto = vlan_hdr->eth_proto;
44
45                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46                         vlan_hdr = vlan_hdr + 1;
47                         *proto = vlan_hdr->eth_proto;
48                         vlan_offset += sizeof(struct vlan_hdr);
49                 }
50         }
51         return vlan_offset;
52 }
53
54 static uint16_t
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 {
57         struct bond_dev_private *internals;
58
59         uint16_t num_rx_slave = 0;
60         uint16_t num_rx_total = 0;
61
62         int i;
63
64         /* Cast to structure, containing bonded device's port id and queue id */
65         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66
67         internals = bd_rx_q->dev_private;
68
69
70         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71                 /* Offset of pointer to *bufs increases as packets are received
72                  * from other slaves */
73                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75                 if (num_rx_slave) {
76                         num_rx_total += num_rx_slave;
77                         nb_pkts -= num_rx_slave;
78                 }
79         }
80
81         return num_rx_total;
82 }
83
84 static uint16_t
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
86                 uint16_t nb_pkts)
87 {
88         struct bond_dev_private *internals;
89
90         /* Cast to structure, containing bonded device's port id and queue id */
91         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93         internals = bd_rx_q->dev_private;
94
95         return rte_eth_rx_burst(internals->current_primary_port,
96                         bd_rx_q->queue_id, bufs, nb_pkts);
97 }
98
99 static inline uint8_t
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 {
102         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103
104         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105                 (ethertype == ether_type_slow_be &&
106                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
107 }
108
109 /*****************************************************************************
110  * Flow director's setup for mode 4 optimization
111  */
112
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114         .dst.addr_bytes = { 0 },
115         .src.addr_bytes = { 0 },
116         .type = RTE_BE16(ETHER_TYPE_SLOW),
117 };
118
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120         .dst.addr_bytes = { 0 },
121         .src.addr_bytes = { 0 },
122         .type = 0xFFFF,
123 };
124
125 static struct rte_flow_item flow_item_8023ad[] = {
126         {
127                 .type = RTE_FLOW_ITEM_TYPE_ETH,
128                 .spec = &flow_item_eth_type_8023ad,
129                 .last = NULL,
130                 .mask = &flow_item_eth_mask_type_8023ad,
131         },
132         {
133                 .type = RTE_FLOW_ITEM_TYPE_END,
134                 .spec = NULL,
135                 .last = NULL,
136                 .mask = NULL,
137         }
138 };
139
140 const struct rte_flow_attr flow_attr_8023ad = {
141         .group = 0,
142         .priority = 0,
143         .ingress = 1,
144         .egress = 0,
145         .reserved = 0,
146 };
147
148 int
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150                 uint16_t slave_port) {
151         struct rte_eth_dev_info slave_info;
152         struct rte_flow_error error;
153         struct bond_dev_private *internals = (struct bond_dev_private *)
154                         (bond_dev->data->dev_private);
155
156         const struct rte_flow_action_queue lacp_queue_conf = {
157                 .index = 0,
158         };
159
160         const struct rte_flow_action actions[] = {
161                 {
162                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
163                         .conf = &lacp_queue_conf
164                 },
165                 {
166                         .type = RTE_FLOW_ACTION_TYPE_END,
167                 }
168         };
169
170         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171                         flow_item_8023ad, actions, &error);
172         if (ret < 0) {
173                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174                                 __func__, error.message, slave_port,
175                                 internals->mode4.dedicated_queues.rx_qid);
176                 return -1;
177         }
178
179         rte_eth_dev_info_get(slave_port, &slave_info);
180         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182                 RTE_BOND_LOG(ERR,
183                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
184                         __func__, slave_port);
185                 return -1;
186         }
187
188         return 0;
189 }
190
191 int
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194         struct bond_dev_private *internals = (struct bond_dev_private *)
195                         (bond_dev->data->dev_private);
196         struct rte_eth_dev_info bond_info;
197         uint16_t idx;
198
199         /* Verify if all slaves in bonding supports flow director and */
200         if (internals->slave_count > 0) {
201                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202
203                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205
206                 for (idx = 0; idx < internals->slave_count; idx++) {
207                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
208                                         internals->slaves[idx].port_id) != 0)
209                                 return -1;
210                 }
211         }
212
213         return 0;
214 }
215
216 int
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218
219         struct rte_flow_error error;
220         struct bond_dev_private *internals = (struct bond_dev_private *)
221                         (bond_dev->data->dev_private);
222
223         struct rte_flow_action_queue lacp_queue_conf = {
224                 .index = internals->mode4.dedicated_queues.rx_qid,
225         };
226
227         const struct rte_flow_action actions[] = {
228                 {
229                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
230                         .conf = &lacp_queue_conf
231                 },
232                 {
233                         .type = RTE_FLOW_ACTION_TYPE_END,
234                 }
235         };
236
237         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
239         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241                                 "(slave_port=%d queue_id=%d)",
242                                 error.message, slave_port,
243                                 internals->mode4.dedicated_queues.rx_qid);
244                 return -1;
245         }
246
247         return 0;
248 }
249
250 static uint16_t
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
252                 uint16_t nb_pkts)
253 {
254         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255         struct bond_dev_private *internals = bd_rx_q->dev_private;
256         uint16_t num_rx_total = 0;      /* Total number of received packets */
257         uint16_t slaves[RTE_MAX_ETHPORTS];
258         uint16_t slave_count;
259
260         uint16_t i, idx;
261
262         /* Copy slave list to protect against slave up/down changes during tx
263          * bursting */
264         slave_count = internals->active_slave_count;
265         memcpy(slaves, internals->active_slaves,
266                         sizeof(internals->active_slaves[0]) * slave_count);
267
268         for (i = 0, idx = internals->active_slave;
269                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270                 idx = idx % slave_count;
271
272                 /* Read packets from this slave */
273                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
275         }
276
277         internals->active_slave = idx;
278
279         return num_rx_total;
280 }
281
282 static uint16_t
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
284                 uint16_t nb_bufs)
285 {
286         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287         struct bond_dev_private *internals = bd_tx_q->dev_private;
288
289         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290         uint16_t slave_count;
291
292         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293         uint16_t dist_slave_count;
294
295         /* 2-D array to sort mbufs for transmission on each slave into */
296         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297         /* Number of mbufs for transmission on each slave */
298         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299         /* Mapping array generated by hash function to map mbufs to slaves */
300         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301
302         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
304
305         uint16_t i, j;
306
307         if (unlikely(nb_bufs == 0))
308                 return 0;
309
310         /* Copy slave list to protect against slave up/down changes during tx
311          * bursting */
312         slave_count = internals->active_slave_count;
313         if (unlikely(slave_count < 1))
314                 return 0;
315
316         memcpy(slave_port_ids, internals->active_slaves,
317                         sizeof(slave_port_ids[0]) * slave_count);
318
319
320         dist_slave_count = 0;
321         for (i = 0; i < slave_count; i++) {
322                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323
324                 if (ACTOR_STATE(port, DISTRIBUTING))
325                         dist_slave_port_ids[dist_slave_count++] =
326                                         slave_port_ids[i];
327         }
328
329         if (unlikely(dist_slave_count < 1))
330                 return 0;
331
332         /*
333          * Populate slaves mbuf with the packets which are to be sent on it
334          * selecting output slave using hash based on xmit policy
335          */
336         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337                         bufs_slave_port_idxs);
338
339         for (i = 0; i < nb_bufs; i++) {
340                 /* Populate slave mbuf arrays with mbufs for that slave. */
341                 uint8_t slave_idx = bufs_slave_port_idxs[i];
342
343                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
344         }
345
346
347         /* Send packet burst on each slave device */
348         for (i = 0; i < dist_slave_count; i++) {
349                 if (slave_nb_bufs[i] == 0)
350                         continue;
351
352                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353                                 bd_tx_q->queue_id, slave_bufs[i],
354                                 slave_nb_bufs[i]);
355
356                 total_tx_count += slave_tx_count;
357
358                 /* If tx burst fails move packets to end of bufs */
359                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
361                                         slave_tx_count;
362                         total_tx_fail_count += slave_tx_fail_count[i];
363
364                         /*
365                          * Shift bufs to beginning of array to allow reordering
366                          * later
367                          */
368                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
369                                 slave_bufs[i][j] =
370                                         slave_bufs[i][(slave_tx_count - 1) + j];
371                         }
372                 }
373         }
374
375         /*
376          * If there are tx burst failures we move packets to end of bufs to
377          * preserve expected PMD behaviour of all failed transmitted being
378          * at the end of the input mbuf array
379          */
380         if (unlikely(total_tx_fail_count > 0)) {
381                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382
383                 for (i = 0; i < slave_count; i++) {
384                         if (slave_tx_fail_count[i] > 0) {
385                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
386                                         bufs[bufs_idx++] = slave_bufs[i][j];
387                         }
388                 }
389         }
390
391         return total_tx_count;
392 }
393
394
395 static uint16_t
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
397                 uint16_t nb_pkts)
398 {
399         /* Cast to structure, containing bonded device's port id and queue id */
400         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401         struct bond_dev_private *internals = bd_rx_q->dev_private;
402         struct ether_addr bond_mac;
403
404         struct ether_hdr *hdr;
405
406         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407         uint16_t num_rx_total = 0;      /* Total number of received packets */
408         uint16_t slaves[RTE_MAX_ETHPORTS];
409         uint16_t slave_count, idx;
410
411         uint8_t collecting;  /* current slave collecting status */
412         const uint8_t promisc = internals->promiscuous_en;
413         uint8_t i, j, k;
414         uint8_t subtype;
415
416         rte_eth_macaddr_get(internals->port_id, &bond_mac);
417         /* Copy slave list to protect against slave up/down changes during tx
418          * bursting */
419         slave_count = internals->active_slave_count;
420         memcpy(slaves, internals->active_slaves,
421                         sizeof(internals->active_slaves[0]) * slave_count);
422
423         idx = internals->active_slave;
424         if (idx >= slave_count) {
425                 internals->active_slave = 0;
426                 idx = 0;
427         }
428         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429                 j = num_rx_total;
430                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
431                                          COLLECTING);
432
433                 /* Read packets from this slave */
434                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
436
437                 for (k = j; k < 2 && k < num_rx_total; k++)
438                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439
440                 /* Handle slow protocol packets. */
441                 while (j < num_rx_total) {
442
443                         /* If packet is not pure L2 and is known, skip it */
444                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
445                                 j++;
446                                 continue;
447                         }
448
449                         if (j + 3 < num_rx_total)
450                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451
452                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454
455                         /* Remove packet from array if it is slow packet or slave is not
456                          * in collecting state or bonding interface is not in promiscuous
457                          * mode and packet address does not match. */
458                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459                                 !collecting || (!promisc &&
460                                         !is_multicast_ether_addr(&hdr->d_addr) &&
461                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462
463                                 if (hdr->ether_type == ether_type_slow_be) {
464                                         bond_mode_8023ad_handle_slow_pkt(
465                                             internals, slaves[idx], bufs[j]);
466                                 } else
467                                         rte_pktmbuf_free(bufs[j]);
468
469                                 /* Packet is managed by mode 4 or dropped, shift the array */
470                                 num_rx_total--;
471                                 if (j < num_rx_total) {
472                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
473                                                 (num_rx_total - j));
474                                 }
475                         } else
476                                 j++;
477                 }
478                 if (unlikely(++idx == slave_count))
479                         idx = 0;
480         }
481
482         internals->active_slave = idx;
483         return num_rx_total;
484 }
485
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491
492 static void
493 arp_op_name(uint16_t arp_op, char *buf)
494 {
495         switch (arp_op) {
496         case ARP_OP_REQUEST:
497                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
498                 return;
499         case ARP_OP_REPLY:
500                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501                 return;
502         case ARP_OP_REVREQUEST:
503                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504                                 "Reverse ARP Request");
505                 return;
506         case ARP_OP_REVREPLY:
507                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508                                 "Reverse ARP Reply");
509                 return;
510         case ARP_OP_INVREQUEST:
511                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512                                 "Peer Identify Request");
513                 return;
514         case ARP_OP_INVREPLY:
515                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516                                 "Peer Identify Reply");
517                 return;
518         default:
519                 break;
520         }
521         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
522         return;
523 }
524 #endif
525 #define MaxIPv4String   16
526 static void
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
528 {
529         uint32_t ipv4_addr;
530
531         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
534                 ipv4_addr & 0xFF);
535 }
536
537 #define MAX_CLIENTS_NUMBER      128
538 uint8_t active_clients;
539 struct client_stats_t {
540         uint16_t port;
541         uint32_t ipv4_addr;
542         uint32_t ipv4_rx_packets;
543         uint32_t ipv4_tx_packets;
544 };
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
546
547 static void
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
549 {
550         int i = 0;
551
552         for (; i < MAX_CLIENTS_NUMBER; i++)     {
553                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
554                         /* Just update RX packets number for this client */
555                         if (TXorRXindicator == &burstnumberRX)
556                                 client_stats[i].ipv4_rx_packets++;
557                         else
558                                 client_stats[i].ipv4_tx_packets++;
559                         return;
560                 }
561         }
562         /* We have a new client. Insert him to the table, and increment stats */
563         if (TXorRXindicator == &burstnumberRX)
564                 client_stats[active_clients].ipv4_rx_packets++;
565         else
566                 client_stats[active_clients].ipv4_tx_packets++;
567         client_stats[active_clients].ipv4_addr = addr;
568         client_stats[active_clients].port = port;
569         active_clients++;
570
571 }
572
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
575         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
576                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
577                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
578                 info,                                                   \
579                 port,                                                   \
580                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
581                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
582                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
583                 src_ip,                                                 \
584                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
585                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
586                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
587                 dst_ip,                                                 \
588                 arp_op, ++burstnumber)
589 #endif
590
591 static void
592 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
593                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
594 {
595         struct ipv4_hdr *ipv4_h;
596 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
597         struct arp_hdr *arp_h;
598         char dst_ip[16];
599         char ArpOp[24];
600         char buf[16];
601 #endif
602         char src_ip[16];
603
604         uint16_t ether_type = eth_h->ether_type;
605         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
606
607 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
608         strlcpy(buf, info, 16);
609 #endif
610
611         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
612                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
613                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
614 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
615                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
616                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
617 #endif
618                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
619         }
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
622                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
623                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
624                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
625                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
626                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
627         }
628 #endif
629 }
630 #endif
631
632 static uint16_t
633 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
634 {
635         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
636         struct bond_dev_private *internals = bd_tx_q->dev_private;
637         struct ether_hdr *eth_h;
638         uint16_t ether_type, offset;
639         uint16_t nb_recv_pkts;
640         int i;
641
642         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
643
644         for (i = 0; i < nb_recv_pkts; i++) {
645                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
646                 ether_type = eth_h->ether_type;
647                 offset = get_vlan_offset(eth_h, &ether_type);
648
649                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
652 #endif
653                         bond_mode_alb_arp_recv(eth_h, offset, internals);
654                 }
655 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
656                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
657                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
658 #endif
659         }
660
661         return nb_recv_pkts;
662 }
663
664 static uint16_t
665 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
666                 uint16_t nb_pkts)
667 {
668         struct bond_dev_private *internals;
669         struct bond_tx_queue *bd_tx_q;
670
671         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
672         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
673
674         uint16_t num_of_slaves;
675         uint16_t slaves[RTE_MAX_ETHPORTS];
676
677         uint16_t num_tx_total = 0, num_tx_slave;
678
679         static int slave_idx = 0;
680         int i, cslave_idx = 0, tx_fail_total = 0;
681
682         bd_tx_q = (struct bond_tx_queue *)queue;
683         internals = bd_tx_q->dev_private;
684
685         /* Copy slave list to protect against slave up/down changes during tx
686          * bursting */
687         num_of_slaves = internals->active_slave_count;
688         memcpy(slaves, internals->active_slaves,
689                         sizeof(internals->active_slaves[0]) * num_of_slaves);
690
691         if (num_of_slaves < 1)
692                 return num_tx_total;
693
694         /* Populate slaves mbuf with which packets are to be sent on it  */
695         for (i = 0; i < nb_pkts; i++) {
696                 cslave_idx = (slave_idx + i) % num_of_slaves;
697                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
698         }
699
700         /* increment current slave index so the next call to tx burst starts on the
701          * next slave */
702         slave_idx = ++cslave_idx;
703
704         /* Send packet burst on each slave device */
705         for (i = 0; i < num_of_slaves; i++) {
706                 if (slave_nb_pkts[i] > 0) {
707                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
708                                         slave_bufs[i], slave_nb_pkts[i]);
709
710                         /* if tx burst fails move packets to end of bufs */
711                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
712                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
713
714                                 tx_fail_total += tx_fail_slave;
715
716                                 memcpy(&bufs[nb_pkts - tx_fail_total],
717                                                 &slave_bufs[i][num_tx_slave],
718                                                 tx_fail_slave * sizeof(bufs[0]));
719                         }
720                         num_tx_total += num_tx_slave;
721                 }
722         }
723
724         return num_tx_total;
725 }
726
727 static uint16_t
728 bond_ethdev_tx_burst_active_backup(void *queue,
729                 struct rte_mbuf **bufs, uint16_t nb_pkts)
730 {
731         struct bond_dev_private *internals;
732         struct bond_tx_queue *bd_tx_q;
733
734         bd_tx_q = (struct bond_tx_queue *)queue;
735         internals = bd_tx_q->dev_private;
736
737         if (internals->active_slave_count < 1)
738                 return 0;
739
740         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
741                         bufs, nb_pkts);
742 }
743
744 static inline uint16_t
745 ether_hash(struct ether_hdr *eth_hdr)
746 {
747         unaligned_uint16_t *word_src_addr =
748                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
749         unaligned_uint16_t *word_dst_addr =
750                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
751
752         return (word_src_addr[0] ^ word_dst_addr[0]) ^
753                         (word_src_addr[1] ^ word_dst_addr[1]) ^
754                         (word_src_addr[2] ^ word_dst_addr[2]);
755 }
756
757 static inline uint32_t
758 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
759 {
760         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
761 }
762
763 static inline uint32_t
764 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
765 {
766         unaligned_uint32_t *word_src_addr =
767                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
768         unaligned_uint32_t *word_dst_addr =
769                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
770
771         return (word_src_addr[0] ^ word_dst_addr[0]) ^
772                         (word_src_addr[1] ^ word_dst_addr[1]) ^
773                         (word_src_addr[2] ^ word_dst_addr[2]) ^
774                         (word_src_addr[3] ^ word_dst_addr[3]);
775 }
776
777
778 void
779 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
780                 uint8_t slave_count, uint16_t *slaves)
781 {
782         struct ether_hdr *eth_hdr;
783         uint32_t hash;
784         int i;
785
786         for (i = 0; i < nb_pkts; i++) {
787                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
788
789                 hash = ether_hash(eth_hdr);
790
791                 slaves[i] = (hash ^= hash >> 8) % slave_count;
792         }
793 }
794
795 void
796 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
797                 uint8_t slave_count, uint16_t *slaves)
798 {
799         uint16_t i;
800         struct ether_hdr *eth_hdr;
801         uint16_t proto;
802         size_t vlan_offset;
803         uint32_t hash, l3hash;
804
805         for (i = 0; i < nb_pkts; i++) {
806                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
807                 l3hash = 0;
808
809                 proto = eth_hdr->ether_type;
810                 hash = ether_hash(eth_hdr);
811
812                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
813
814                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
815                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
816                                         ((char *)(eth_hdr + 1) + vlan_offset);
817                         l3hash = ipv4_hash(ipv4_hdr);
818
819                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
820                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
821                                         ((char *)(eth_hdr + 1) + vlan_offset);
822                         l3hash = ipv6_hash(ipv6_hdr);
823                 }
824
825                 hash = hash ^ l3hash;
826                 hash ^= hash >> 16;
827                 hash ^= hash >> 8;
828
829                 slaves[i] = hash % slave_count;
830         }
831 }
832
833 void
834 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
835                 uint8_t slave_count, uint16_t *slaves)
836 {
837         struct ether_hdr *eth_hdr;
838         uint16_t proto;
839         size_t vlan_offset;
840         int i;
841
842         struct udp_hdr *udp_hdr;
843         struct tcp_hdr *tcp_hdr;
844         uint32_t hash, l3hash, l4hash;
845
846         for (i = 0; i < nb_pkts; i++) {
847                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
848                 proto = eth_hdr->ether_type;
849                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
850                 l3hash = 0;
851                 l4hash = 0;
852
853                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
854                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
855                                         ((char *)(eth_hdr + 1) + vlan_offset);
856                         size_t ip_hdr_offset;
857
858                         l3hash = ipv4_hash(ipv4_hdr);
859
860                         /* there is no L4 header in fragmented packet */
861                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
862                                                                 == 0)) {
863                                 ip_hdr_offset = (ipv4_hdr->version_ihl
864                                         & IPV4_HDR_IHL_MASK) *
865                                         IPV4_IHL_MULTIPLIER;
866
867                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
868                                         tcp_hdr = (struct tcp_hdr *)
869                                                 ((char *)ipv4_hdr +
870                                                         ip_hdr_offset);
871                                         l4hash = HASH_L4_PORTS(tcp_hdr);
872                                 } else if (ipv4_hdr->next_proto_id ==
873                                                                 IPPROTO_UDP) {
874                                         udp_hdr = (struct udp_hdr *)
875                                                 ((char *)ipv4_hdr +
876                                                         ip_hdr_offset);
877                                         l4hash = HASH_L4_PORTS(udp_hdr);
878                                 }
879                         }
880                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
881                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
882                                         ((char *)(eth_hdr + 1) + vlan_offset);
883                         l3hash = ipv6_hash(ipv6_hdr);
884
885                         if (ipv6_hdr->proto == IPPROTO_TCP) {
886                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
887                                 l4hash = HASH_L4_PORTS(tcp_hdr);
888                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
889                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
890                                 l4hash = HASH_L4_PORTS(udp_hdr);
891                         }
892                 }
893
894                 hash = l3hash ^ l4hash;
895                 hash ^= hash >> 16;
896                 hash ^= hash >> 8;
897
898                 slaves[i] = hash % slave_count;
899         }
900 }
901
902 struct bwg_slave {
903         uint64_t bwg_left_int;
904         uint64_t bwg_left_remainder;
905         uint8_t slave;
906 };
907
908 void
909 bond_tlb_activate_slave(struct bond_dev_private *internals) {
910         int i;
911
912         for (i = 0; i < internals->active_slave_count; i++) {
913                 tlb_last_obytets[internals->active_slaves[i]] = 0;
914         }
915 }
916
917 static int
918 bandwidth_cmp(const void *a, const void *b)
919 {
920         const struct bwg_slave *bwg_a = a;
921         const struct bwg_slave *bwg_b = b;
922         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
923         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
924                         (int64_t)bwg_a->bwg_left_remainder;
925         if (diff > 0)
926                 return 1;
927         else if (diff < 0)
928                 return -1;
929         else if (diff2 > 0)
930                 return 1;
931         else if (diff2 < 0)
932                 return -1;
933         else
934                 return 0;
935 }
936
937 static void
938 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
939                 struct bwg_slave *bwg_slave)
940 {
941         struct rte_eth_link link_status;
942
943         rte_eth_link_get_nowait(port_id, &link_status);
944         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
945         if (link_bwg == 0)
946                 return;
947         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
948         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
949         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
950 }
951
952 static void
953 bond_ethdev_update_tlb_slave_cb(void *arg)
954 {
955         struct bond_dev_private *internals = arg;
956         struct rte_eth_stats slave_stats;
957         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
958         uint8_t slave_count;
959         uint64_t tx_bytes;
960
961         uint8_t update_stats = 0;
962         uint8_t i, slave_id;
963
964         internals->slave_update_idx++;
965
966
967         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
968                 update_stats = 1;
969
970         for (i = 0; i < internals->active_slave_count; i++) {
971                 slave_id = internals->active_slaves[i];
972                 rte_eth_stats_get(slave_id, &slave_stats);
973                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
974                 bandwidth_left(slave_id, tx_bytes,
975                                 internals->slave_update_idx, &bwg_array[i]);
976                 bwg_array[i].slave = slave_id;
977
978                 if (update_stats) {
979                         tlb_last_obytets[slave_id] = slave_stats.obytes;
980                 }
981         }
982
983         if (update_stats == 1)
984                 internals->slave_update_idx = 0;
985
986         slave_count = i;
987         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
988         for (i = 0; i < slave_count; i++)
989                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
990
991         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
992                         (struct bond_dev_private *)internals);
993 }
994
995 static uint16_t
996 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
997 {
998         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
999         struct bond_dev_private *internals = bd_tx_q->dev_private;
1000
1001         struct rte_eth_dev *primary_port =
1002                         &rte_eth_devices[internals->primary_port];
1003         uint16_t num_tx_total = 0;
1004         uint16_t i, j;
1005
1006         uint16_t num_of_slaves = internals->active_slave_count;
1007         uint16_t slaves[RTE_MAX_ETHPORTS];
1008
1009         struct ether_hdr *ether_hdr;
1010         struct ether_addr primary_slave_addr;
1011         struct ether_addr active_slave_addr;
1012
1013         if (num_of_slaves < 1)
1014                 return num_tx_total;
1015
1016         memcpy(slaves, internals->tlb_slaves_order,
1017                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1018
1019
1020         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1021
1022         if (nb_pkts > 3) {
1023                 for (i = 0; i < 3; i++)
1024                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1025         }
1026
1027         for (i = 0; i < num_of_slaves; i++) {
1028                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1029                 for (j = num_tx_total; j < nb_pkts; j++) {
1030                         if (j + 3 < nb_pkts)
1031                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1032
1033                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1034                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1035                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1036 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1037                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1038 #endif
1039                 }
1040
1041                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1042                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1043
1044                 if (num_tx_total == nb_pkts)
1045                         break;
1046         }
1047
1048         return num_tx_total;
1049 }
1050
1051 void
1052 bond_tlb_disable(struct bond_dev_private *internals)
1053 {
1054         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1055 }
1056
1057 void
1058 bond_tlb_enable(struct bond_dev_private *internals)
1059 {
1060         bond_ethdev_update_tlb_slave_cb(internals);
1061 }
1062
1063 static uint16_t
1064 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1065 {
1066         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1067         struct bond_dev_private *internals = bd_tx_q->dev_private;
1068
1069         struct ether_hdr *eth_h;
1070         uint16_t ether_type, offset;
1071
1072         struct client_data *client_info;
1073
1074         /*
1075          * We create transmit buffers for every slave and one additional to send
1076          * through tlb. In worst case every packet will be send on one port.
1077          */
1078         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1079         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1080
1081         /*
1082          * We create separate transmit buffers for update packets as they won't
1083          * be counted in num_tx_total.
1084          */
1085         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1086         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1087
1088         struct rte_mbuf *upd_pkt;
1089         size_t pkt_size;
1090
1091         uint16_t num_send, num_not_send = 0;
1092         uint16_t num_tx_total = 0;
1093         uint16_t slave_idx;
1094
1095         int i, j;
1096
1097         /* Search tx buffer for ARP packets and forward them to alb */
1098         for (i = 0; i < nb_pkts; i++) {
1099                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1100                 ether_type = eth_h->ether_type;
1101                 offset = get_vlan_offset(eth_h, &ether_type);
1102
1103                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1104                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1105
1106                         /* Change src mac in eth header */
1107                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1108
1109                         /* Add packet to slave tx buffer */
1110                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1111                         slave_bufs_pkts[slave_idx]++;
1112                 } else {
1113                         /* If packet is not ARP, send it with TLB policy */
1114                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1115                                         bufs[i];
1116                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1117                 }
1118         }
1119
1120         /* Update connected client ARP tables */
1121         if (internals->mode6.ntt) {
1122                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1123                         client_info = &internals->mode6.client_table[i];
1124
1125                         if (client_info->in_use) {
1126                                 /* Allocate new packet to send ARP update on current slave */
1127                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1128                                 if (upd_pkt == NULL) {
1129                                         RTE_BOND_LOG(ERR,
1130                                                      "Failed to allocate ARP packet from pool");
1131                                         continue;
1132                                 }
1133                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1134                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1135                                 upd_pkt->data_len = pkt_size;
1136                                 upd_pkt->pkt_len = pkt_size;
1137
1138                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1139                                                 internals);
1140
1141                                 /* Add packet to update tx buffer */
1142                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1143                                 update_bufs_pkts[slave_idx]++;
1144                         }
1145                 }
1146                 internals->mode6.ntt = 0;
1147         }
1148
1149         /* Send ARP packets on proper slaves */
1150         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1151                 if (slave_bufs_pkts[i] > 0) {
1152                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1153                                         slave_bufs[i], slave_bufs_pkts[i]);
1154                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1155                                 bufs[nb_pkts - 1 - num_not_send - j] =
1156                                                 slave_bufs[i][nb_pkts - 1 - j];
1157                         }
1158
1159                         num_tx_total += num_send;
1160                         num_not_send += slave_bufs_pkts[i] - num_send;
1161
1162 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1163         /* Print TX stats including update packets */
1164                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1165                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1166                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1167                         }
1168 #endif
1169                 }
1170         }
1171
1172         /* Send update packets on proper slaves */
1173         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1174                 if (update_bufs_pkts[i] > 0) {
1175                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1176                                         update_bufs_pkts[i]);
1177                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1178                                 rte_pktmbuf_free(update_bufs[i][j]);
1179                         }
1180 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1181                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1182                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1183                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1184                         }
1185 #endif
1186                 }
1187         }
1188
1189         /* Send non-ARP packets using tlb policy */
1190         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1191                 num_send = bond_ethdev_tx_burst_tlb(queue,
1192                                 slave_bufs[RTE_MAX_ETHPORTS],
1193                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1194
1195                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1196                         bufs[nb_pkts - 1 - num_not_send - j] =
1197                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1198                 }
1199
1200                 num_tx_total += num_send;
1201         }
1202
1203         return num_tx_total;
1204 }
1205
1206 static uint16_t
1207 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1208                 uint16_t nb_bufs)
1209 {
1210         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1211         struct bond_dev_private *internals = bd_tx_q->dev_private;
1212
1213         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1214         uint16_t slave_count;
1215
1216         /* Array to sort mbufs for transmission on each slave into */
1217         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1218         /* Number of mbufs for transmission on each slave */
1219         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1220         /* Mapping array generated by hash function to map mbufs to slaves */
1221         uint16_t bufs_slave_port_idxs[nb_bufs];
1222
1223         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1224         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1225
1226         uint16_t i, j;
1227
1228         if (unlikely(nb_bufs == 0))
1229                 return 0;
1230
1231         /* Copy slave list to protect against slave up/down changes during tx
1232          * bursting */
1233         slave_count = internals->active_slave_count;
1234         if (unlikely(slave_count < 1))
1235                 return 0;
1236
1237         memcpy(slave_port_ids, internals->active_slaves,
1238                         sizeof(slave_port_ids[0]) * slave_count);
1239
1240         /*
1241          * Populate slaves mbuf with the packets which are to be sent on it
1242          * selecting output slave using hash based on xmit policy
1243          */
1244         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1245                         bufs_slave_port_idxs);
1246
1247         for (i = 0; i < nb_bufs; i++) {
1248                 /* Populate slave mbuf arrays with mbufs for that slave. */
1249                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1250
1251                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1252         }
1253
1254         /* Send packet burst on each slave device */
1255         for (i = 0; i < slave_count; i++) {
1256                 if (slave_nb_bufs[i] == 0)
1257                         continue;
1258
1259                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1260                                 bd_tx_q->queue_id, slave_bufs[i],
1261                                 slave_nb_bufs[i]);
1262
1263                 total_tx_count += slave_tx_count;
1264
1265                 /* If tx burst fails move packets to end of bufs */
1266                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1267                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1268                                         slave_tx_count;
1269                         total_tx_fail_count += slave_tx_fail_count[i];
1270
1271                         /*
1272                          * Shift bufs to beginning of array to allow reordering
1273                          * later
1274                          */
1275                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1276                                 slave_bufs[i][j] =
1277                                         slave_bufs[i][(slave_tx_count - 1) + j];
1278                         }
1279                 }
1280         }
1281
1282         /*
1283          * If there are tx burst failures we move packets to end of bufs to
1284          * preserve expected PMD behaviour of all failed transmitted being
1285          * at the end of the input mbuf array
1286          */
1287         if (unlikely(total_tx_fail_count > 0)) {
1288                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1289
1290                 for (i = 0; i < slave_count; i++) {
1291                         if (slave_tx_fail_count[i] > 0) {
1292                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1293                                         bufs[bufs_idx++] = slave_bufs[i][j];
1294                         }
1295                 }
1296         }
1297
1298         return total_tx_count;
1299 }
1300
1301 static uint16_t
1302 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1303                 uint16_t nb_bufs)
1304 {
1305         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1306         struct bond_dev_private *internals = bd_tx_q->dev_private;
1307
1308         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1309         uint16_t slave_count;
1310
1311         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1312         uint16_t dist_slave_count;
1313
1314         /* 2-D array to sort mbufs for transmission on each slave into */
1315         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1316         /* Number of mbufs for transmission on each slave */
1317         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1318         /* Mapping array generated by hash function to map mbufs to slaves */
1319         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1320
1321         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1322         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1323
1324         uint16_t i, j;
1325
1326         if (unlikely(nb_bufs == 0))
1327                 return 0;
1328
1329         /* Copy slave list to protect against slave up/down changes during tx
1330          * bursting */
1331         slave_count = internals->active_slave_count;
1332         if (unlikely(slave_count < 1))
1333                 return 0;
1334
1335         memcpy(slave_port_ids, internals->active_slaves,
1336                         sizeof(slave_port_ids[0]) * slave_count);
1337
1338         dist_slave_count = 0;
1339         for (i = 0; i < slave_count; i++) {
1340                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1341
1342                 if (ACTOR_STATE(port, DISTRIBUTING))
1343                         dist_slave_port_ids[dist_slave_count++] =
1344                                         slave_port_ids[i];
1345         }
1346
1347         if (likely(dist_slave_count > 1)) {
1348
1349                 /*
1350                  * Populate slaves mbuf with the packets which are to be sent
1351                  * on it, selecting output slave using hash based on xmit policy
1352                  */
1353                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1354                                 bufs_slave_port_idxs);
1355
1356                 for (i = 0; i < nb_bufs; i++) {
1357                         /*
1358                          * Populate slave mbuf arrays with mbufs for that
1359                          * slave
1360                          */
1361                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1362
1363                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1364                                         bufs[i];
1365                 }
1366
1367
1368                 /* Send packet burst on each slave device */
1369                 for (i = 0; i < dist_slave_count; i++) {
1370                         if (slave_nb_bufs[i] == 0)
1371                                 continue;
1372
1373                         slave_tx_count = rte_eth_tx_burst(
1374                                         dist_slave_port_ids[i],
1375                                         bd_tx_q->queue_id, slave_bufs[i],
1376                                         slave_nb_bufs[i]);
1377
1378                         total_tx_count += slave_tx_count;
1379
1380                         /* If tx burst fails move packets to end of bufs */
1381                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1382                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1383                                                 slave_tx_count;
1384                                 total_tx_fail_count += slave_tx_fail_count[i];
1385
1386                                 /*
1387                                  * Shift bufs to beginning of array to allow
1388                                  * reordering later
1389                                  */
1390                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1391                                         slave_bufs[i][j] =
1392                                                 slave_bufs[i]
1393                                                         [(slave_tx_count - 1)
1394                                                         + j];
1395                         }
1396                 }
1397
1398                 /*
1399                  * If there are tx burst failures we move packets to end of
1400                  * bufs to preserve expected PMD behaviour of all failed
1401                  * transmitted being at the end of the input mbuf array
1402                  */
1403                 if (unlikely(total_tx_fail_count > 0)) {
1404                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1405
1406                         for (i = 0; i < slave_count; i++) {
1407                                 if (slave_tx_fail_count[i] > 0) {
1408                                         for (j = 0;
1409                                                 j < slave_tx_fail_count[i];
1410                                                 j++) {
1411                                                 bufs[bufs_idx++] =
1412                                                         slave_bufs[i][j];
1413                                         }
1414                                 }
1415                         }
1416                 }
1417         }
1418
1419         /* Check for LACP control packets and send if available */
1420         for (i = 0; i < slave_count; i++) {
1421                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1422                 struct rte_mbuf *ctrl_pkt = NULL;
1423
1424                 if (likely(rte_ring_empty(port->tx_ring)))
1425                         continue;
1426
1427                 if (rte_ring_dequeue(port->tx_ring,
1428                                      (void **)&ctrl_pkt) != -ENOENT) {
1429                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1430                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1431                         /*
1432                          * re-enqueue LAG control plane packets to buffering
1433                          * ring if transmission fails so the packet isn't lost.
1434                          */
1435                         if (slave_tx_count != 1)
1436                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1437                 }
1438         }
1439
1440         return total_tx_count;
1441 }
1442
1443 static uint16_t
1444 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1445                 uint16_t nb_pkts)
1446 {
1447         struct bond_dev_private *internals;
1448         struct bond_tx_queue *bd_tx_q;
1449
1450         uint8_t tx_failed_flag = 0, num_of_slaves;
1451         uint16_t slaves[RTE_MAX_ETHPORTS];
1452
1453         uint16_t max_nb_of_tx_pkts = 0;
1454
1455         int slave_tx_total[RTE_MAX_ETHPORTS];
1456         int i, most_successful_tx_slave = -1;
1457
1458         bd_tx_q = (struct bond_tx_queue *)queue;
1459         internals = bd_tx_q->dev_private;
1460
1461         /* Copy slave list to protect against slave up/down changes during tx
1462          * bursting */
1463         num_of_slaves = internals->active_slave_count;
1464         memcpy(slaves, internals->active_slaves,
1465                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1466
1467         if (num_of_slaves < 1)
1468                 return 0;
1469
1470         /* Increment reference count on mbufs */
1471         for (i = 0; i < nb_pkts; i++)
1472                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1473
1474         /* Transmit burst on each active slave */
1475         for (i = 0; i < num_of_slaves; i++) {
1476                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1477                                         bufs, nb_pkts);
1478
1479                 if (unlikely(slave_tx_total[i] < nb_pkts))
1480                         tx_failed_flag = 1;
1481
1482                 /* record the value and slave index for the slave which transmits the
1483                  * maximum number of packets */
1484                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1485                         max_nb_of_tx_pkts = slave_tx_total[i];
1486                         most_successful_tx_slave = i;
1487                 }
1488         }
1489
1490         /* if slaves fail to transmit packets from burst, the calling application
1491          * is not expected to know about multiple references to packets so we must
1492          * handle failures of all packets except those of the most successful slave
1493          */
1494         if (unlikely(tx_failed_flag))
1495                 for (i = 0; i < num_of_slaves; i++)
1496                         if (i != most_successful_tx_slave)
1497                                 while (slave_tx_total[i] < nb_pkts)
1498                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1499
1500         return max_nb_of_tx_pkts;
1501 }
1502
1503 void
1504 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1505 {
1506         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1507
1508         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1509                 /**
1510                  * If in mode 4 then save the link properties of the first
1511                  * slave, all subsequent slaves must match these properties
1512                  */
1513                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1514
1515                 bond_link->link_autoneg = slave_link->link_autoneg;
1516                 bond_link->link_duplex = slave_link->link_duplex;
1517                 bond_link->link_speed = slave_link->link_speed;
1518         } else {
1519                 /**
1520                  * In any other mode the link properties are set to default
1521                  * values of AUTONEG/DUPLEX
1522                  */
1523                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1524                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1525         }
1526 }
1527
1528 int
1529 link_properties_valid(struct rte_eth_dev *ethdev,
1530                 struct rte_eth_link *slave_link)
1531 {
1532         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1533
1534         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1535                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1536
1537                 if (bond_link->link_duplex != slave_link->link_duplex ||
1538                         bond_link->link_autoneg != slave_link->link_autoneg ||
1539                         bond_link->link_speed != slave_link->link_speed)
1540                         return -1;
1541         }
1542
1543         return 0;
1544 }
1545
1546 int
1547 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1548 {
1549         struct ether_addr *mac_addr;
1550
1551         if (eth_dev == NULL) {
1552                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1553                 return -1;
1554         }
1555
1556         if (dst_mac_addr == NULL) {
1557                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1558                 return -1;
1559         }
1560
1561         mac_addr = eth_dev->data->mac_addrs;
1562
1563         ether_addr_copy(mac_addr, dst_mac_addr);
1564         return 0;
1565 }
1566
1567 int
1568 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1569 {
1570         struct ether_addr *mac_addr;
1571
1572         if (eth_dev == NULL) {
1573                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1574                 return -1;
1575         }
1576
1577         if (new_mac_addr == NULL) {
1578                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1579                 return -1;
1580         }
1581
1582         mac_addr = eth_dev->data->mac_addrs;
1583
1584         /* If new MAC is different to current MAC then update */
1585         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1586                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1587
1588         return 0;
1589 }
1590
1591 int
1592 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1593 {
1594         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1595         int i;
1596
1597         /* Update slave devices MAC addresses */
1598         if (internals->slave_count < 1)
1599                 return -1;
1600
1601         switch (internals->mode) {
1602         case BONDING_MODE_ROUND_ROBIN:
1603         case BONDING_MODE_BALANCE:
1604         case BONDING_MODE_BROADCAST:
1605                 for (i = 0; i < internals->slave_count; i++) {
1606                         if (rte_eth_dev_default_mac_addr_set(
1607                                         internals->slaves[i].port_id,
1608                                         bonded_eth_dev->data->mac_addrs)) {
1609                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1610                                                 internals->slaves[i].port_id);
1611                                 return -1;
1612                         }
1613                 }
1614                 break;
1615         case BONDING_MODE_8023AD:
1616                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1617                 break;
1618         case BONDING_MODE_ACTIVE_BACKUP:
1619         case BONDING_MODE_TLB:
1620         case BONDING_MODE_ALB:
1621         default:
1622                 for (i = 0; i < internals->slave_count; i++) {
1623                         if (internals->slaves[i].port_id ==
1624                                         internals->current_primary_port) {
1625                                 if (rte_eth_dev_default_mac_addr_set(
1626                                                 internals->primary_port,
1627                                                 bonded_eth_dev->data->mac_addrs)) {
1628                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1629                                                         internals->current_primary_port);
1630                                         return -1;
1631                                 }
1632                         } else {
1633                                 if (rte_eth_dev_default_mac_addr_set(
1634                                                 internals->slaves[i].port_id,
1635                                                 &internals->slaves[i].persisted_mac_addr)) {
1636                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1637                                                         internals->slaves[i].port_id);
1638                                         return -1;
1639                                 }
1640                         }
1641                 }
1642         }
1643
1644         return 0;
1645 }
1646
1647 int
1648 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1649 {
1650         struct bond_dev_private *internals;
1651
1652         internals = eth_dev->data->dev_private;
1653
1654         switch (mode) {
1655         case BONDING_MODE_ROUND_ROBIN:
1656                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1657                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1658                 break;
1659         case BONDING_MODE_ACTIVE_BACKUP:
1660                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1661                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1662                 break;
1663         case BONDING_MODE_BALANCE:
1664                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1665                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1666                 break;
1667         case BONDING_MODE_BROADCAST:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_8023AD:
1672                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1673                         return -1;
1674
1675                 if (internals->mode4.dedicated_queues.enabled == 0) {
1676                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1677                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1678                         RTE_BOND_LOG(WARNING,
1679                                 "Using mode 4, it is necessary to do TX burst "
1680                                 "and RX burst at least every 100ms.");
1681                 } else {
1682                         /* Use flow director's optimization */
1683                         eth_dev->rx_pkt_burst =
1684                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1685                         eth_dev->tx_pkt_burst =
1686                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1687                 }
1688                 break;
1689         case BONDING_MODE_TLB:
1690                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1691                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1692                 break;
1693         case BONDING_MODE_ALB:
1694                 if (bond_mode_alb_enable(eth_dev) != 0)
1695                         return -1;
1696
1697                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1698                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1699                 break;
1700         default:
1701                 return -1;
1702         }
1703
1704         internals->mode = mode;
1705
1706         return 0;
1707 }
1708
1709
1710 static int
1711 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1712                 struct rte_eth_dev *slave_eth_dev)
1713 {
1714         int errval = 0;
1715         struct bond_dev_private *internals = (struct bond_dev_private *)
1716                 bonded_eth_dev->data->dev_private;
1717         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1718
1719         if (port->slow_pool == NULL) {
1720                 char mem_name[256];
1721                 int slave_id = slave_eth_dev->data->port_id;
1722
1723                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1724                                 slave_id);
1725                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1726                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1727                         slave_eth_dev->data->numa_node);
1728
1729                 /* Any memory allocation failure in initialization is critical because
1730                  * resources can't be free, so reinitialization is impossible. */
1731                 if (port->slow_pool == NULL) {
1732                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1733                                 slave_id, mem_name, rte_strerror(rte_errno));
1734                 }
1735         }
1736
1737         if (internals->mode4.dedicated_queues.enabled == 1) {
1738                 /* Configure slow Rx queue */
1739
1740                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1741                                 internals->mode4.dedicated_queues.rx_qid, 128,
1742                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1743                                 NULL, port->slow_pool);
1744                 if (errval != 0) {
1745                         RTE_BOND_LOG(ERR,
1746                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1747                                         slave_eth_dev->data->port_id,
1748                                         internals->mode4.dedicated_queues.rx_qid,
1749                                         errval);
1750                         return errval;
1751                 }
1752
1753                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1754                                 internals->mode4.dedicated_queues.tx_qid, 512,
1755                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1756                                 NULL);
1757                 if (errval != 0) {
1758                         RTE_BOND_LOG(ERR,
1759                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1760                                 slave_eth_dev->data->port_id,
1761                                 internals->mode4.dedicated_queues.tx_qid,
1762                                 errval);
1763                         return errval;
1764                 }
1765         }
1766         return 0;
1767 }
1768
1769 int
1770 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1771                 struct rte_eth_dev *slave_eth_dev)
1772 {
1773         struct bond_rx_queue *bd_rx_q;
1774         struct bond_tx_queue *bd_tx_q;
1775         uint16_t nb_rx_queues;
1776         uint16_t nb_tx_queues;
1777
1778         int errval;
1779         uint16_t q_id;
1780         struct rte_flow_error flow_error;
1781
1782         struct bond_dev_private *internals = (struct bond_dev_private *)
1783                 bonded_eth_dev->data->dev_private;
1784
1785         /* Stop slave */
1786         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1787
1788         /* Enable interrupts on slave device if supported */
1789         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1790                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1791
1792         /* If RSS is enabled for bonding, try to enable it for slaves  */
1793         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1794                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1795                                 != 0) {
1796                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1797                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1798                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1799                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1800                 } else {
1801                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1802                 }
1803
1804                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1805                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1806                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1807                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1808         }
1809
1810         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1811                         DEV_RX_OFFLOAD_VLAN_FILTER)
1812                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1813                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1814         else
1815                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1816                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1817
1818         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1819         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1820
1821         if (internals->mode == BONDING_MODE_8023AD) {
1822                 if (internals->mode4.dedicated_queues.enabled == 1) {
1823                         nb_rx_queues++;
1824                         nb_tx_queues++;
1825                 }
1826         }
1827
1828         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1829                                      bonded_eth_dev->data->mtu);
1830         if (errval != 0 && errval != -ENOTSUP) {
1831                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1832                                 slave_eth_dev->data->port_id, errval);
1833                 return errval;
1834         }
1835
1836         /* Configure device */
1837         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1838                         nb_rx_queues, nb_tx_queues,
1839                         &(slave_eth_dev->data->dev_conf));
1840         if (errval != 0) {
1841                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1842                                 slave_eth_dev->data->port_id, errval);
1843                 return errval;
1844         }
1845
1846         /* Setup Rx Queues */
1847         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1848                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1849
1850                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1851                                 bd_rx_q->nb_rx_desc,
1852                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1853                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1854                 if (errval != 0) {
1855                         RTE_BOND_LOG(ERR,
1856                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1857                                         slave_eth_dev->data->port_id, q_id, errval);
1858                         return errval;
1859                 }
1860         }
1861
1862         /* Setup Tx Queues */
1863         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1864                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1865
1866                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1867                                 bd_tx_q->nb_tx_desc,
1868                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1869                                 &bd_tx_q->tx_conf);
1870                 if (errval != 0) {
1871                         RTE_BOND_LOG(ERR,
1872                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1873                                 slave_eth_dev->data->port_id, q_id, errval);
1874                         return errval;
1875                 }
1876         }
1877
1878         if (internals->mode == BONDING_MODE_8023AD &&
1879                         internals->mode4.dedicated_queues.enabled == 1) {
1880                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1881                                 != 0)
1882                         return errval;
1883
1884                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1885                                 slave_eth_dev->data->port_id) != 0) {
1886                         RTE_BOND_LOG(ERR,
1887                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1888                                 slave_eth_dev->data->port_id, q_id, errval);
1889                         return -1;
1890                 }
1891
1892                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1893                         rte_flow_destroy(slave_eth_dev->data->port_id,
1894                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1895                                         &flow_error);
1896
1897                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1898                                 slave_eth_dev->data->port_id);
1899         }
1900
1901         /* Start device */
1902         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1903         if (errval != 0) {
1904                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1905                                 slave_eth_dev->data->port_id, errval);
1906                 return -1;
1907         }
1908
1909         /* If RSS is enabled for bonding, synchronize RETA */
1910         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1911                 int i;
1912                 struct bond_dev_private *internals;
1913
1914                 internals = bonded_eth_dev->data->dev_private;
1915
1916                 for (i = 0; i < internals->slave_count; i++) {
1917                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1918                                 errval = rte_eth_dev_rss_reta_update(
1919                                                 slave_eth_dev->data->port_id,
1920                                                 &internals->reta_conf[0],
1921                                                 internals->slaves[i].reta_size);
1922                                 if (errval != 0) {
1923                                         RTE_BOND_LOG(WARNING,
1924                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1925                                                      " RSS Configuration for bonding may be inconsistent.",
1926                                                      slave_eth_dev->data->port_id, errval);
1927                                 }
1928                                 break;
1929                         }
1930                 }
1931         }
1932
1933         /* If lsc interrupt is set, check initial slave's link status */
1934         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1935                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1936                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1937                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1938                         NULL);
1939         }
1940
1941         return 0;
1942 }
1943
1944 void
1945 slave_remove(struct bond_dev_private *internals,
1946                 struct rte_eth_dev *slave_eth_dev)
1947 {
1948         uint8_t i;
1949
1950         for (i = 0; i < internals->slave_count; i++)
1951                 if (internals->slaves[i].port_id ==
1952                                 slave_eth_dev->data->port_id)
1953                         break;
1954
1955         if (i < (internals->slave_count - 1)) {
1956                 struct rte_flow *flow;
1957
1958                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1959                                 sizeof(internals->slaves[0]) *
1960                                 (internals->slave_count - i - 1));
1961                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1962                         memmove(&flow->flows[i], &flow->flows[i + 1],
1963                                 sizeof(flow->flows[0]) *
1964                                 (internals->slave_count - i - 1));
1965                         flow->flows[internals->slave_count - 1] = NULL;
1966                 }
1967         }
1968
1969         internals->slave_count--;
1970
1971         /* force reconfiguration of slave interfaces */
1972         _rte_eth_dev_reset(slave_eth_dev);
1973 }
1974
1975 static void
1976 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1977
1978 void
1979 slave_add(struct bond_dev_private *internals,
1980                 struct rte_eth_dev *slave_eth_dev)
1981 {
1982         struct bond_slave_details *slave_details =
1983                         &internals->slaves[internals->slave_count];
1984
1985         slave_details->port_id = slave_eth_dev->data->port_id;
1986         slave_details->last_link_status = 0;
1987
1988         /* Mark slave devices that don't support interrupts so we can
1989          * compensate when we start the bond
1990          */
1991         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1992                 slave_details->link_status_poll_enabled = 1;
1993         }
1994
1995         slave_details->link_status_wait_to_complete = 0;
1996         /* clean tlb_last_obytes when adding port for bonding device */
1997         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1998                         sizeof(struct ether_addr));
1999 }
2000
2001 void
2002 bond_ethdev_primary_set(struct bond_dev_private *internals,
2003                 uint16_t slave_port_id)
2004 {
2005         int i;
2006
2007         if (internals->active_slave_count < 1)
2008                 internals->current_primary_port = slave_port_id;
2009         else
2010                 /* Search bonded device slave ports for new proposed primary port */
2011                 for (i = 0; i < internals->active_slave_count; i++) {
2012                         if (internals->active_slaves[i] == slave_port_id)
2013                                 internals->current_primary_port = slave_port_id;
2014                 }
2015 }
2016
2017 static void
2018 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2019
2020 static int
2021 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2022 {
2023         struct bond_dev_private *internals;
2024         int i;
2025
2026         /* slave eth dev will be started by bonded device */
2027         if (check_for_bonded_ethdev(eth_dev)) {
2028                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2029                                 eth_dev->data->port_id);
2030                 return -1;
2031         }
2032
2033         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2034         eth_dev->data->dev_started = 1;
2035
2036         internals = eth_dev->data->dev_private;
2037
2038         if (internals->slave_count == 0) {
2039                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2040                 goto out_err;
2041         }
2042
2043         if (internals->user_defined_mac == 0) {
2044                 struct ether_addr *new_mac_addr = NULL;
2045
2046                 for (i = 0; i < internals->slave_count; i++)
2047                         if (internals->slaves[i].port_id == internals->primary_port)
2048                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2049
2050                 if (new_mac_addr == NULL)
2051                         goto out_err;
2052
2053                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2054                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2055                                         eth_dev->data->port_id);
2056                         goto out_err;
2057                 }
2058         }
2059
2060         /* Update all slave devices MACs*/
2061         if (mac_address_slaves_update(eth_dev) != 0)
2062                 goto out_err;
2063
2064         /* If bonded device is configure in promiscuous mode then re-apply config */
2065         if (internals->promiscuous_en)
2066                 bond_ethdev_promiscuous_enable(eth_dev);
2067
2068         if (internals->mode == BONDING_MODE_8023AD) {
2069                 if (internals->mode4.dedicated_queues.enabled == 1) {
2070                         internals->mode4.dedicated_queues.rx_qid =
2071                                         eth_dev->data->nb_rx_queues;
2072                         internals->mode4.dedicated_queues.tx_qid =
2073                                         eth_dev->data->nb_tx_queues;
2074                 }
2075         }
2076
2077
2078         /* Reconfigure each slave device if starting bonded device */
2079         for (i = 0; i < internals->slave_count; i++) {
2080                 struct rte_eth_dev *slave_ethdev =
2081                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2082                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2083                         RTE_BOND_LOG(ERR,
2084                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2085                                 eth_dev->data->port_id,
2086                                 internals->slaves[i].port_id);
2087                         goto out_err;
2088                 }
2089                 /* We will need to poll for link status if any slave doesn't
2090                  * support interrupts
2091                  */
2092                 if (internals->slaves[i].link_status_poll_enabled)
2093                         internals->link_status_polling_enabled = 1;
2094         }
2095
2096         /* start polling if needed */
2097         if (internals->link_status_polling_enabled) {
2098                 rte_eal_alarm_set(
2099                         internals->link_status_polling_interval_ms * 1000,
2100                         bond_ethdev_slave_link_status_change_monitor,
2101                         (void *)&rte_eth_devices[internals->port_id]);
2102         }
2103
2104         if (internals->user_defined_primary_port)
2105                 bond_ethdev_primary_set(internals, internals->primary_port);
2106
2107         if (internals->mode == BONDING_MODE_8023AD)
2108                 bond_mode_8023ad_start(eth_dev);
2109
2110         if (internals->mode == BONDING_MODE_TLB ||
2111                         internals->mode == BONDING_MODE_ALB)
2112                 bond_tlb_enable(internals);
2113
2114         return 0;
2115
2116 out_err:
2117         eth_dev->data->dev_started = 0;
2118         return -1;
2119 }
2120
2121 static void
2122 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2123 {
2124         uint8_t i;
2125
2126         if (dev->data->rx_queues != NULL) {
2127                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2128                         rte_free(dev->data->rx_queues[i]);
2129                         dev->data->rx_queues[i] = NULL;
2130                 }
2131                 dev->data->nb_rx_queues = 0;
2132         }
2133
2134         if (dev->data->tx_queues != NULL) {
2135                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2136                         rte_free(dev->data->tx_queues[i]);
2137                         dev->data->tx_queues[i] = NULL;
2138                 }
2139                 dev->data->nb_tx_queues = 0;
2140         }
2141 }
2142
2143 void
2144 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2145 {
2146         struct bond_dev_private *internals = eth_dev->data->dev_private;
2147         uint8_t i;
2148
2149         if (internals->mode == BONDING_MODE_8023AD) {
2150                 struct port *port;
2151                 void *pkt = NULL;
2152
2153                 bond_mode_8023ad_stop(eth_dev);
2154
2155                 /* Discard all messages to/from mode 4 state machines */
2156                 for (i = 0; i < internals->active_slave_count; i++) {
2157                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2158
2159                         RTE_ASSERT(port->rx_ring != NULL);
2160                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2161                                 rte_pktmbuf_free(pkt);
2162
2163                         RTE_ASSERT(port->tx_ring != NULL);
2164                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2165                                 rte_pktmbuf_free(pkt);
2166                 }
2167         }
2168
2169         if (internals->mode == BONDING_MODE_TLB ||
2170                         internals->mode == BONDING_MODE_ALB) {
2171                 bond_tlb_disable(internals);
2172                 for (i = 0; i < internals->active_slave_count; i++)
2173                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2174         }
2175
2176         internals->active_slave_count = 0;
2177         internals->link_status_polling_enabled = 0;
2178         for (i = 0; i < internals->slave_count; i++)
2179                 internals->slaves[i].last_link_status = 0;
2180
2181         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2182         eth_dev->data->dev_started = 0;
2183 }
2184
2185 void
2186 bond_ethdev_close(struct rte_eth_dev *dev)
2187 {
2188         struct bond_dev_private *internals = dev->data->dev_private;
2189         uint8_t bond_port_id = internals->port_id;
2190         int skipped = 0;
2191         struct rte_flow_error ferror;
2192
2193         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2194         while (internals->slave_count != skipped) {
2195                 uint16_t port_id = internals->slaves[skipped].port_id;
2196
2197                 rte_eth_dev_stop(port_id);
2198
2199                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2200                         RTE_BOND_LOG(ERR,
2201                                      "Failed to remove port %d from bonded device %s",
2202                                      port_id, dev->device->name);
2203                         skipped++;
2204                 }
2205         }
2206         bond_flow_ops.flush(dev, &ferror);
2207         bond_ethdev_free_queues(dev);
2208         rte_bitmap_reset(internals->vlan_filter_bmp);
2209 }
2210
2211 /* forward declaration */
2212 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2213
2214 static void
2215 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2216 {
2217         struct bond_dev_private *internals = dev->data->dev_private;
2218
2219         uint16_t max_nb_rx_queues = UINT16_MAX;
2220         uint16_t max_nb_tx_queues = UINT16_MAX;
2221
2222         dev_info->max_mac_addrs = 1;
2223
2224         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2225                         internals->candidate_max_rx_pktlen :
2226                         ETHER_MAX_JUMBO_FRAME_LEN;
2227
2228         /* Max number of tx/rx queues that the bonded device can support is the
2229          * minimum values of the bonded slaves, as all slaves must be capable
2230          * of supporting the same number of tx/rx queues.
2231          */
2232         if (internals->slave_count > 0) {
2233                 struct rte_eth_dev_info slave_info;
2234                 uint8_t idx;
2235
2236                 for (idx = 0; idx < internals->slave_count; idx++) {
2237                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2238                                         &slave_info);
2239
2240                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2241                                 max_nb_rx_queues = slave_info.max_rx_queues;
2242
2243                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2244                                 max_nb_tx_queues = slave_info.max_tx_queues;
2245                 }
2246         }
2247
2248         dev_info->max_rx_queues = max_nb_rx_queues;
2249         dev_info->max_tx_queues = max_nb_tx_queues;
2250
2251         /**
2252          * If dedicated hw queues enabled for link bonding device in LACP mode
2253          * then we need to reduce the maximum number of data path queues by 1.
2254          */
2255         if (internals->mode == BONDING_MODE_8023AD &&
2256                 internals->mode4.dedicated_queues.enabled == 1) {
2257                 dev_info->max_rx_queues--;
2258                 dev_info->max_tx_queues--;
2259         }
2260
2261         dev_info->min_rx_bufsize = 0;
2262
2263         dev_info->rx_offload_capa = internals->rx_offload_capa;
2264         dev_info->tx_offload_capa = internals->tx_offload_capa;
2265         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2266         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2267         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2268
2269         dev_info->reta_size = internals->reta_size;
2270 }
2271
2272 static int
2273 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2274 {
2275         int res;
2276         uint16_t i;
2277         struct bond_dev_private *internals = dev->data->dev_private;
2278
2279         /* don't do this while a slave is being added */
2280         rte_spinlock_lock(&internals->lock);
2281
2282         if (on)
2283                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2284         else
2285                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2286
2287         for (i = 0; i < internals->slave_count; i++) {
2288                 uint16_t port_id = internals->slaves[i].port_id;
2289
2290                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2291                 if (res == ENOTSUP)
2292                         RTE_BOND_LOG(WARNING,
2293                                      "Setting VLAN filter on slave port %u not supported.",
2294                                      port_id);
2295         }
2296
2297         rte_spinlock_unlock(&internals->lock);
2298         return 0;
2299 }
2300
2301 static int
2302 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2303                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2304                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2305 {
2306         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2307                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2308                                         0, dev->data->numa_node);
2309         if (bd_rx_q == NULL)
2310                 return -1;
2311
2312         bd_rx_q->queue_id = rx_queue_id;
2313         bd_rx_q->dev_private = dev->data->dev_private;
2314
2315         bd_rx_q->nb_rx_desc = nb_rx_desc;
2316
2317         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2318         bd_rx_q->mb_pool = mb_pool;
2319
2320         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2321
2322         return 0;
2323 }
2324
2325 static int
2326 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2327                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2328                 const struct rte_eth_txconf *tx_conf)
2329 {
2330         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2331                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2332                                         0, dev->data->numa_node);
2333
2334         if (bd_tx_q == NULL)
2335                 return -1;
2336
2337         bd_tx_q->queue_id = tx_queue_id;
2338         bd_tx_q->dev_private = dev->data->dev_private;
2339
2340         bd_tx_q->nb_tx_desc = nb_tx_desc;
2341         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2342
2343         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2344
2345         return 0;
2346 }
2347
2348 static void
2349 bond_ethdev_rx_queue_release(void *queue)
2350 {
2351         if (queue == NULL)
2352                 return;
2353
2354         rte_free(queue);
2355 }
2356
2357 static void
2358 bond_ethdev_tx_queue_release(void *queue)
2359 {
2360         if (queue == NULL)
2361                 return;
2362
2363         rte_free(queue);
2364 }
2365
2366 static void
2367 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2368 {
2369         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2370         struct bond_dev_private *internals;
2371
2372         /* Default value for polling slave found is true as we don't want to
2373          * disable the polling thread if we cannot get the lock */
2374         int i, polling_slave_found = 1;
2375
2376         if (cb_arg == NULL)
2377                 return;
2378
2379         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2380         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2381
2382         if (!bonded_ethdev->data->dev_started ||
2383                 !internals->link_status_polling_enabled)
2384                 return;
2385
2386         /* If device is currently being configured then don't check slaves link
2387          * status, wait until next period */
2388         if (rte_spinlock_trylock(&internals->lock)) {
2389                 if (internals->slave_count > 0)
2390                         polling_slave_found = 0;
2391
2392                 for (i = 0; i < internals->slave_count; i++) {
2393                         if (!internals->slaves[i].link_status_poll_enabled)
2394                                 continue;
2395
2396                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2397                         polling_slave_found = 1;
2398
2399                         /* Update slave link status */
2400                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2401                                         internals->slaves[i].link_status_wait_to_complete);
2402
2403                         /* if link status has changed since last checked then call lsc
2404                          * event callback */
2405                         if (slave_ethdev->data->dev_link.link_status !=
2406                                         internals->slaves[i].last_link_status) {
2407                                 internals->slaves[i].last_link_status =
2408                                                 slave_ethdev->data->dev_link.link_status;
2409
2410                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2411                                                 RTE_ETH_EVENT_INTR_LSC,
2412                                                 &bonded_ethdev->data->port_id,
2413                                                 NULL);
2414                         }
2415                 }
2416                 rte_spinlock_unlock(&internals->lock);
2417         }
2418
2419         if (polling_slave_found)
2420                 /* Set alarm to continue monitoring link status of slave ethdev's */
2421                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2422                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2423 }
2424
2425 static int
2426 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2427 {
2428         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2429
2430         struct bond_dev_private *bond_ctx;
2431         struct rte_eth_link slave_link;
2432
2433         uint32_t idx;
2434
2435         bond_ctx = ethdev->data->dev_private;
2436
2437         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2438
2439         if (ethdev->data->dev_started == 0 ||
2440                         bond_ctx->active_slave_count == 0) {
2441                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2442                 return 0;
2443         }
2444
2445         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2446
2447         if (wait_to_complete)
2448                 link_update = rte_eth_link_get;
2449         else
2450                 link_update = rte_eth_link_get_nowait;
2451
2452         switch (bond_ctx->mode) {
2453         case BONDING_MODE_BROADCAST:
2454                 /**
2455                  * Setting link speed to UINT32_MAX to ensure we pick up the
2456                  * value of the first active slave
2457                  */
2458                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2459
2460                 /**
2461                  * link speed is minimum value of all the slaves link speed as
2462                  * packet loss will occur on this slave if transmission at rates
2463                  * greater than this are attempted
2464                  */
2465                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2466                         link_update(bond_ctx->active_slaves[0], &slave_link);
2467
2468                         if (slave_link.link_speed <
2469                                         ethdev->data->dev_link.link_speed)
2470                                 ethdev->data->dev_link.link_speed =
2471                                                 slave_link.link_speed;
2472                 }
2473                 break;
2474         case BONDING_MODE_ACTIVE_BACKUP:
2475                 /* Current primary slave */
2476                 link_update(bond_ctx->current_primary_port, &slave_link);
2477
2478                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2479                 break;
2480         case BONDING_MODE_8023AD:
2481                 ethdev->data->dev_link.link_autoneg =
2482                                 bond_ctx->mode4.slave_link.link_autoneg;
2483                 ethdev->data->dev_link.link_duplex =
2484                                 bond_ctx->mode4.slave_link.link_duplex;
2485                 /* fall through to update link speed */
2486         case BONDING_MODE_ROUND_ROBIN:
2487         case BONDING_MODE_BALANCE:
2488         case BONDING_MODE_TLB:
2489         case BONDING_MODE_ALB:
2490         default:
2491                 /**
2492                  * In theses mode the maximum theoretical link speed is the sum
2493                  * of all the slaves
2494                  */
2495                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2496
2497                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2498                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2499
2500                         ethdev->data->dev_link.link_speed +=
2501                                         slave_link.link_speed;
2502                 }
2503         }
2504
2505
2506         return 0;
2507 }
2508
2509
2510 static int
2511 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2512 {
2513         struct bond_dev_private *internals = dev->data->dev_private;
2514         struct rte_eth_stats slave_stats;
2515         int i, j;
2516
2517         for (i = 0; i < internals->slave_count; i++) {
2518                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2519
2520                 stats->ipackets += slave_stats.ipackets;
2521                 stats->opackets += slave_stats.opackets;
2522                 stats->ibytes += slave_stats.ibytes;
2523                 stats->obytes += slave_stats.obytes;
2524                 stats->imissed += slave_stats.imissed;
2525                 stats->ierrors += slave_stats.ierrors;
2526                 stats->oerrors += slave_stats.oerrors;
2527                 stats->rx_nombuf += slave_stats.rx_nombuf;
2528
2529                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2530                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2531                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2532                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2533                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2534                         stats->q_errors[j] += slave_stats.q_errors[j];
2535                 }
2536
2537         }
2538
2539         return 0;
2540 }
2541
2542 static void
2543 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2544 {
2545         struct bond_dev_private *internals = dev->data->dev_private;
2546         int i;
2547
2548         for (i = 0; i < internals->slave_count; i++)
2549                 rte_eth_stats_reset(internals->slaves[i].port_id);
2550 }
2551
2552 static void
2553 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2554 {
2555         struct bond_dev_private *internals = eth_dev->data->dev_private;
2556         int i;
2557
2558         internals->promiscuous_en = 1;
2559
2560         switch (internals->mode) {
2561         /* Promiscuous mode is propagated to all slaves */
2562         case BONDING_MODE_ROUND_ROBIN:
2563         case BONDING_MODE_BALANCE:
2564         case BONDING_MODE_BROADCAST:
2565                 for (i = 0; i < internals->slave_count; i++)
2566                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2567                 break;
2568         /* In mode4 promiscus mode is managed when slave is added/removed */
2569         case BONDING_MODE_8023AD:
2570                 break;
2571         /* Promiscuous mode is propagated only to primary slave */
2572         case BONDING_MODE_ACTIVE_BACKUP:
2573         case BONDING_MODE_TLB:
2574         case BONDING_MODE_ALB:
2575         default:
2576                 rte_eth_promiscuous_enable(internals->current_primary_port);
2577         }
2578 }
2579
2580 static void
2581 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2582 {
2583         struct bond_dev_private *internals = dev->data->dev_private;
2584         int i;
2585
2586         internals->promiscuous_en = 0;
2587
2588         switch (internals->mode) {
2589         /* Promiscuous mode is propagated to all slaves */
2590         case BONDING_MODE_ROUND_ROBIN:
2591         case BONDING_MODE_BALANCE:
2592         case BONDING_MODE_BROADCAST:
2593                 for (i = 0; i < internals->slave_count; i++)
2594                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2595                 break;
2596         /* In mode4 promiscus mode is set managed when slave is added/removed */
2597         case BONDING_MODE_8023AD:
2598                 break;
2599         /* Promiscuous mode is propagated only to primary slave */
2600         case BONDING_MODE_ACTIVE_BACKUP:
2601         case BONDING_MODE_TLB:
2602         case BONDING_MODE_ALB:
2603         default:
2604                 rte_eth_promiscuous_disable(internals->current_primary_port);
2605         }
2606 }
2607
2608 static void
2609 bond_ethdev_delayed_lsc_propagation(void *arg)
2610 {
2611         if (arg == NULL)
2612                 return;
2613
2614         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2615                         RTE_ETH_EVENT_INTR_LSC, NULL);
2616 }
2617
2618 int
2619 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2620                 void *param, void *ret_param __rte_unused)
2621 {
2622         struct rte_eth_dev *bonded_eth_dev;
2623         struct bond_dev_private *internals;
2624         struct rte_eth_link link;
2625         int rc = -1;
2626
2627         int i, valid_slave = 0;
2628         uint8_t active_pos;
2629         uint8_t lsc_flag = 0;
2630
2631         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2632                 return rc;
2633
2634         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2635
2636         if (check_for_bonded_ethdev(bonded_eth_dev))
2637                 return rc;
2638
2639         internals = bonded_eth_dev->data->dev_private;
2640
2641         /* If the device isn't started don't handle interrupts */
2642         if (!bonded_eth_dev->data->dev_started)
2643                 return rc;
2644
2645         /* verify that port_id is a valid slave of bonded port */
2646         for (i = 0; i < internals->slave_count; i++) {
2647                 if (internals->slaves[i].port_id == port_id) {
2648                         valid_slave = 1;
2649                         break;
2650                 }
2651         }
2652
2653         if (!valid_slave)
2654                 return rc;
2655
2656         /* Synchronize lsc callback parallel calls either by real link event
2657          * from the slaves PMDs or by the bonding PMD itself.
2658          */
2659         rte_spinlock_lock(&internals->lsc_lock);
2660
2661         /* Search for port in active port list */
2662         active_pos = find_slave_by_id(internals->active_slaves,
2663                         internals->active_slave_count, port_id);
2664
2665         rte_eth_link_get_nowait(port_id, &link);
2666         if (link.link_status) {
2667                 if (active_pos < internals->active_slave_count) {
2668                         rte_spinlock_unlock(&internals->lsc_lock);
2669                         return rc;
2670                 }
2671
2672                 /* if no active slave ports then set this port to be primary port */
2673                 if (internals->active_slave_count < 1) {
2674                         /* If first active slave, then change link status */
2675                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2676                         internals->current_primary_port = port_id;
2677                         lsc_flag = 1;
2678
2679                         mac_address_slaves_update(bonded_eth_dev);
2680                 }
2681
2682                 activate_slave(bonded_eth_dev, port_id);
2683
2684                 /* If user has defined the primary port then default to using it */
2685                 if (internals->user_defined_primary_port &&
2686                                 internals->primary_port == port_id)
2687                         bond_ethdev_primary_set(internals, port_id);
2688         } else {
2689                 if (active_pos == internals->active_slave_count) {
2690                         rte_spinlock_unlock(&internals->lsc_lock);
2691                         return rc;
2692                 }
2693
2694                 /* Remove from active slave list */
2695                 deactivate_slave(bonded_eth_dev, port_id);
2696
2697                 if (internals->active_slave_count < 1)
2698                         lsc_flag = 1;
2699
2700                 /* Update primary id, take first active slave from list or if none
2701                  * available set to -1 */
2702                 if (port_id == internals->current_primary_port) {
2703                         if (internals->active_slave_count > 0)
2704                                 bond_ethdev_primary_set(internals,
2705                                                 internals->active_slaves[0]);
2706                         else
2707                                 internals->current_primary_port = internals->primary_port;
2708                 }
2709         }
2710
2711         /**
2712          * Update bonded device link properties after any change to active
2713          * slaves
2714          */
2715         bond_ethdev_link_update(bonded_eth_dev, 0);
2716
2717         if (lsc_flag) {
2718                 /* Cancel any possible outstanding interrupts if delays are enabled */
2719                 if (internals->link_up_delay_ms > 0 ||
2720                         internals->link_down_delay_ms > 0)
2721                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2722                                         bonded_eth_dev);
2723
2724                 if (bonded_eth_dev->data->dev_link.link_status) {
2725                         if (internals->link_up_delay_ms > 0)
2726                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2727                                                 bond_ethdev_delayed_lsc_propagation,
2728                                                 (void *)bonded_eth_dev);
2729                         else
2730                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2731                                                 RTE_ETH_EVENT_INTR_LSC,
2732                                                 NULL);
2733
2734                 } else {
2735                         if (internals->link_down_delay_ms > 0)
2736                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2737                                                 bond_ethdev_delayed_lsc_propagation,
2738                                                 (void *)bonded_eth_dev);
2739                         else
2740                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2741                                                 RTE_ETH_EVENT_INTR_LSC,
2742                                                 NULL);
2743                 }
2744         }
2745
2746         rte_spinlock_unlock(&internals->lsc_lock);
2747
2748         return 0;
2749 }
2750
2751 static int
2752 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2753                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2754 {
2755         unsigned i, j;
2756         int result = 0;
2757         int slave_reta_size;
2758         unsigned reta_count;
2759         struct bond_dev_private *internals = dev->data->dev_private;
2760
2761         if (reta_size != internals->reta_size)
2762                 return -EINVAL;
2763
2764          /* Copy RETA table */
2765         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2766
2767         for (i = 0; i < reta_count; i++) {
2768                 internals->reta_conf[i].mask = reta_conf[i].mask;
2769                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2770                         if ((reta_conf[i].mask >> j) & 0x01)
2771                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2772         }
2773
2774         /* Fill rest of array */
2775         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2776                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2777                                 sizeof(internals->reta_conf[0]) * reta_count);
2778
2779         /* Propagate RETA over slaves */
2780         for (i = 0; i < internals->slave_count; i++) {
2781                 slave_reta_size = internals->slaves[i].reta_size;
2782                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2783                                 &internals->reta_conf[0], slave_reta_size);
2784                 if (result < 0)
2785                         return result;
2786         }
2787
2788         return 0;
2789 }
2790
2791 static int
2792 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2793                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2794 {
2795         int i, j;
2796         struct bond_dev_private *internals = dev->data->dev_private;
2797
2798         if (reta_size != internals->reta_size)
2799                 return -EINVAL;
2800
2801          /* Copy RETA table */
2802         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2803                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2804                         if ((reta_conf[i].mask >> j) & 0x01)
2805                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2806
2807         return 0;
2808 }
2809
2810 static int
2811 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2812                 struct rte_eth_rss_conf *rss_conf)
2813 {
2814         int i, result = 0;
2815         struct bond_dev_private *internals = dev->data->dev_private;
2816         struct rte_eth_rss_conf bond_rss_conf;
2817
2818         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2819
2820         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2821
2822         if (bond_rss_conf.rss_hf != 0)
2823                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2824
2825         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2826                         sizeof(internals->rss_key)) {
2827                 if (bond_rss_conf.rss_key_len == 0)
2828                         bond_rss_conf.rss_key_len = 40;
2829                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2830                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2831                                 internals->rss_key_len);
2832         }
2833
2834         for (i = 0; i < internals->slave_count; i++) {
2835                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2836                                 &bond_rss_conf);
2837                 if (result < 0)
2838                         return result;
2839         }
2840
2841         return 0;
2842 }
2843
2844 static int
2845 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2846                 struct rte_eth_rss_conf *rss_conf)
2847 {
2848         struct bond_dev_private *internals = dev->data->dev_private;
2849
2850         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2851         rss_conf->rss_key_len = internals->rss_key_len;
2852         if (rss_conf->rss_key)
2853                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2854
2855         return 0;
2856 }
2857
2858 static int
2859 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2860 {
2861         struct rte_eth_dev *slave_eth_dev;
2862         struct bond_dev_private *internals = dev->data->dev_private;
2863         int ret, i;
2864
2865         rte_spinlock_lock(&internals->lock);
2866
2867         for (i = 0; i < internals->slave_count; i++) {
2868                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2869                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2870                         rte_spinlock_unlock(&internals->lock);
2871                         return -ENOTSUP;
2872                 }
2873         }
2874         for (i = 0; i < internals->slave_count; i++) {
2875                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2876                 if (ret < 0) {
2877                         rte_spinlock_unlock(&internals->lock);
2878                         return ret;
2879                 }
2880         }
2881
2882         rte_spinlock_unlock(&internals->lock);
2883         return 0;
2884 }
2885
2886 static int
2887 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2888 {
2889         if (mac_address_set(dev, addr)) {
2890                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2891                 return -EINVAL;
2892         }
2893
2894         return 0;
2895 }
2896
2897 static int
2898 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2899                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2900 {
2901         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2902                 *(const void **)arg = &bond_flow_ops;
2903                 return 0;
2904         }
2905         return -ENOTSUP;
2906 }
2907
2908 const struct eth_dev_ops default_dev_ops = {
2909         .dev_start            = bond_ethdev_start,
2910         .dev_stop             = bond_ethdev_stop,
2911         .dev_close            = bond_ethdev_close,
2912         .dev_configure        = bond_ethdev_configure,
2913         .dev_infos_get        = bond_ethdev_info,
2914         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2915         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2916         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2917         .rx_queue_release     = bond_ethdev_rx_queue_release,
2918         .tx_queue_release     = bond_ethdev_tx_queue_release,
2919         .link_update          = bond_ethdev_link_update,
2920         .stats_get            = bond_ethdev_stats_get,
2921         .stats_reset          = bond_ethdev_stats_reset,
2922         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2923         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2924         .reta_update          = bond_ethdev_rss_reta_update,
2925         .reta_query           = bond_ethdev_rss_reta_query,
2926         .rss_hash_update      = bond_ethdev_rss_hash_update,
2927         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2928         .mtu_set              = bond_ethdev_mtu_set,
2929         .mac_addr_set         = bond_ethdev_mac_address_set,
2930         .filter_ctrl          = bond_filter_ctrl
2931 };
2932
2933 static int
2934 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2935 {
2936         const char *name = rte_vdev_device_name(dev);
2937         uint8_t socket_id = dev->device.numa_node;
2938         struct bond_dev_private *internals = NULL;
2939         struct rte_eth_dev *eth_dev = NULL;
2940         uint32_t vlan_filter_bmp_size;
2941
2942         /* now do all data allocation - for eth_dev structure, dummy pci driver
2943          * and internal (private) data
2944          */
2945
2946         /* reserve an ethdev entry */
2947         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2948         if (eth_dev == NULL) {
2949                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2950                 goto err;
2951         }
2952
2953         internals = eth_dev->data->dev_private;
2954         eth_dev->data->nb_rx_queues = (uint16_t)1;
2955         eth_dev->data->nb_tx_queues = (uint16_t)1;
2956
2957         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2958                         socket_id);
2959         if (eth_dev->data->mac_addrs == NULL) {
2960                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2961                 goto err;
2962         }
2963
2964         eth_dev->dev_ops = &default_dev_ops;
2965         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2966
2967         rte_spinlock_init(&internals->lock);
2968         rte_spinlock_init(&internals->lsc_lock);
2969
2970         internals->port_id = eth_dev->data->port_id;
2971         internals->mode = BONDING_MODE_INVALID;
2972         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2973         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2974         internals->burst_xmit_hash = burst_xmit_l2_hash;
2975         internals->user_defined_mac = 0;
2976
2977         internals->link_status_polling_enabled = 0;
2978
2979         internals->link_status_polling_interval_ms =
2980                 DEFAULT_POLLING_INTERVAL_10_MS;
2981         internals->link_down_delay_ms = 0;
2982         internals->link_up_delay_ms = 0;
2983
2984         internals->slave_count = 0;
2985         internals->active_slave_count = 0;
2986         internals->rx_offload_capa = 0;
2987         internals->tx_offload_capa = 0;
2988         internals->rx_queue_offload_capa = 0;
2989         internals->tx_queue_offload_capa = 0;
2990         internals->candidate_max_rx_pktlen = 0;
2991         internals->max_rx_pktlen = 0;
2992
2993         /* Initially allow to choose any offload type */
2994         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2995
2996         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2997         memset(internals->slaves, 0, sizeof(internals->slaves));
2998
2999         TAILQ_INIT(&internals->flow_list);
3000         internals->flow_isolated_valid = 0;
3001
3002         /* Set mode 4 default configuration */
3003         bond_mode_8023ad_setup(eth_dev, NULL);
3004         if (bond_ethdev_mode_set(eth_dev, mode)) {
3005                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3006                                  eth_dev->data->port_id, mode);
3007                 goto err;
3008         }
3009
3010         vlan_filter_bmp_size =
3011                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3012         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3013                                                    RTE_CACHE_LINE_SIZE);
3014         if (internals->vlan_filter_bmpmem == NULL) {
3015                 RTE_BOND_LOG(ERR,
3016                              "Failed to allocate vlan bitmap for bonded device %u",
3017                              eth_dev->data->port_id);
3018                 goto err;
3019         }
3020
3021         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3022                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3023         if (internals->vlan_filter_bmp == NULL) {
3024                 RTE_BOND_LOG(ERR,
3025                              "Failed to init vlan bitmap for bonded device %u",
3026                              eth_dev->data->port_id);
3027                 rte_free(internals->vlan_filter_bmpmem);
3028                 goto err;
3029         }
3030
3031         return eth_dev->data->port_id;
3032
3033 err:
3034         rte_free(internals);
3035         if (eth_dev != NULL) {
3036                 rte_free(eth_dev->data->mac_addrs);
3037                 rte_eth_dev_release_port(eth_dev);
3038         }
3039         return -1;
3040 }
3041
3042 static int
3043 bond_probe(struct rte_vdev_device *dev)
3044 {
3045         const char *name;
3046         struct bond_dev_private *internals;
3047         struct rte_kvargs *kvlist;
3048         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3049         int  arg_count, port_id;
3050         uint8_t agg_mode;
3051         struct rte_eth_dev *eth_dev;
3052
3053         if (!dev)
3054                 return -EINVAL;
3055
3056         name = rte_vdev_device_name(dev);
3057         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3058
3059         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3060             strlen(rte_vdev_device_args(dev)) == 0) {
3061                 eth_dev = rte_eth_dev_attach_secondary(name);
3062                 if (!eth_dev) {
3063                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3064                         return -1;
3065                 }
3066                 /* TODO: request info from primary to set up Rx and Tx */
3067                 eth_dev->dev_ops = &default_dev_ops;
3068                 rte_eth_dev_probing_finish(eth_dev);
3069                 return 0;
3070         }
3071
3072         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3073                 pmd_bond_init_valid_arguments);
3074         if (kvlist == NULL)
3075                 return -1;
3076
3077         /* Parse link bonding mode */
3078         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3079                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3080                                 &bond_ethdev_parse_slave_mode_kvarg,
3081                                 &bonding_mode) != 0) {
3082                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3083                                         name);
3084                         goto parse_error;
3085                 }
3086         } else {
3087                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3088                                 "device %s", name);
3089                 goto parse_error;
3090         }
3091
3092         /* Parse socket id to create bonding device on */
3093         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3094         if (arg_count == 1) {
3095                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3096                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3097                                 != 0) {
3098                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3099                                         "bonded device %s", name);
3100                         goto parse_error;
3101                 }
3102         } else if (arg_count > 1) {
3103                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3104                                 "bonded device %s", name);
3105                 goto parse_error;
3106         } else {
3107                 socket_id = rte_socket_id();
3108         }
3109
3110         dev->device.numa_node = socket_id;
3111
3112         /* Create link bonding eth device */
3113         port_id = bond_alloc(dev, bonding_mode);
3114         if (port_id < 0) {
3115                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3116                                 "socket %u.",   name, bonding_mode, socket_id);
3117                 goto parse_error;
3118         }
3119         internals = rte_eth_devices[port_id].data->dev_private;
3120         internals->kvlist = kvlist;
3121
3122
3123         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3124                 if (rte_kvargs_process(kvlist,
3125                                 PMD_BOND_AGG_MODE_KVARG,
3126                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3127                                 &agg_mode) != 0) {
3128                         RTE_BOND_LOG(ERR,
3129                                         "Failed to parse agg selection mode for bonded device %s",
3130                                         name);
3131                         goto parse_error;
3132                 }
3133
3134                 if (internals->mode == BONDING_MODE_8023AD)
3135                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3136                                         agg_mode);
3137         } else {
3138                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3139         }
3140
3141         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3142         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3143                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3144         return 0;
3145
3146 parse_error:
3147         rte_kvargs_free(kvlist);
3148
3149         return -1;
3150 }
3151
3152 static int
3153 bond_remove(struct rte_vdev_device *dev)
3154 {
3155         struct rte_eth_dev *eth_dev;
3156         struct bond_dev_private *internals;
3157         const char *name;
3158
3159         if (!dev)
3160                 return -EINVAL;
3161
3162         name = rte_vdev_device_name(dev);
3163         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3164
3165         /* now free all data allocation - for eth_dev structure,
3166          * dummy pci driver and internal (private) data
3167          */
3168
3169         /* find an ethdev entry */
3170         eth_dev = rte_eth_dev_allocated(name);
3171         if (eth_dev == NULL)
3172                 return -ENODEV;
3173
3174         RTE_ASSERT(eth_dev->device == &dev->device);
3175
3176         internals = eth_dev->data->dev_private;
3177         if (internals->slave_count != 0)
3178                 return -EBUSY;
3179
3180         if (eth_dev->data->dev_started == 1) {
3181                 bond_ethdev_stop(eth_dev);
3182                 bond_ethdev_close(eth_dev);
3183         }
3184
3185         eth_dev->dev_ops = NULL;
3186         eth_dev->rx_pkt_burst = NULL;
3187         eth_dev->tx_pkt_burst = NULL;
3188
3189         internals = eth_dev->data->dev_private;
3190         /* Try to release mempool used in mode6. If the bond
3191          * device is not mode6, free the NULL is not problem.
3192          */
3193         rte_mempool_free(internals->mode6.mempool);
3194         rte_bitmap_free(internals->vlan_filter_bmp);
3195         rte_free(internals->vlan_filter_bmpmem);
3196         rte_free(eth_dev->data->dev_private);
3197         rte_free(eth_dev->data->mac_addrs);
3198
3199         rte_eth_dev_release_port(eth_dev);
3200
3201         return 0;
3202 }
3203
3204 /* this part will resolve the slave portids after all the other pdev and vdev
3205  * have been allocated */
3206 static int
3207 bond_ethdev_configure(struct rte_eth_dev *dev)
3208 {
3209         const char *name = dev->device->name;
3210         struct bond_dev_private *internals = dev->data->dev_private;
3211         struct rte_kvargs *kvlist = internals->kvlist;
3212         int arg_count;
3213         uint16_t port_id = dev - rte_eth_devices;
3214         uint8_t agg_mode;
3215
3216         static const uint8_t default_rss_key[40] = {
3217                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3218                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3219                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3220                 0xBE, 0xAC, 0x01, 0xFA
3221         };
3222
3223         unsigned i, j;
3224
3225         /* If RSS is enabled, fill table and key with default values */
3226         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3227                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3228                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3229                 memcpy(internals->rss_key, default_rss_key, 40);
3230
3231                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3232                         internals->reta_conf[i].mask = ~0LL;
3233                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3234                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3235                 }
3236         }
3237
3238         /* set the max_rx_pktlen */
3239         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3240
3241         /*
3242          * if no kvlist, it means that this bonded device has been created
3243          * through the bonding api.
3244          */
3245         if (!kvlist)
3246                 return 0;
3247
3248         /* Parse MAC address for bonded device */
3249         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3250         if (arg_count == 1) {
3251                 struct ether_addr bond_mac;
3252
3253                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3254                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3255                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3256                                      name);
3257                         return -1;
3258                 }
3259
3260                 /* Set MAC address */
3261                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3262                         RTE_BOND_LOG(ERR,
3263                                      "Failed to set mac address on bonded device %s",
3264                                      name);
3265                         return -1;
3266                 }
3267         } else if (arg_count > 1) {
3268                 RTE_BOND_LOG(ERR,
3269                              "MAC address can be specified only once for bonded device %s",
3270                              name);
3271                 return -1;
3272         }
3273
3274         /* Parse/set balance mode transmit policy */
3275         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3276         if (arg_count == 1) {
3277                 uint8_t xmit_policy;
3278
3279                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3280                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3281                     0) {
3282                         RTE_BOND_LOG(INFO,
3283                                      "Invalid xmit policy specified for bonded device %s",
3284                                      name);
3285                         return -1;
3286                 }
3287
3288                 /* Set balance mode transmit policy*/
3289                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3290                         RTE_BOND_LOG(ERR,
3291                                      "Failed to set balance xmit policy on bonded device %s",
3292                                      name);
3293                         return -1;
3294                 }
3295         } else if (arg_count > 1) {
3296                 RTE_BOND_LOG(ERR,
3297                              "Transmit policy can be specified only once for bonded device %s",
3298                              name);
3299                 return -1;
3300         }
3301
3302         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3303                 if (rte_kvargs_process(kvlist,
3304                                        PMD_BOND_AGG_MODE_KVARG,
3305                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3306                                        &agg_mode) != 0) {
3307                         RTE_BOND_LOG(ERR,
3308                                      "Failed to parse agg selection mode for bonded device %s",
3309                                      name);
3310                 }
3311                 if (internals->mode == BONDING_MODE_8023AD)
3312                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3313                                                               agg_mode);
3314         }
3315
3316         /* Parse/add slave ports to bonded device */
3317         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3318                 struct bond_ethdev_slave_ports slave_ports;
3319                 unsigned i;
3320
3321                 memset(&slave_ports, 0, sizeof(slave_ports));
3322
3323                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3324                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3325                         RTE_BOND_LOG(ERR,
3326                                      "Failed to parse slave ports for bonded device %s",
3327                                      name);
3328                         return -1;
3329                 }
3330
3331                 for (i = 0; i < slave_ports.slave_count; i++) {
3332                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3333                                 RTE_BOND_LOG(ERR,
3334                                              "Failed to add port %d as slave to bonded device %s",
3335                                              slave_ports.slaves[i], name);
3336                         }
3337                 }
3338
3339         } else {
3340                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3341                 return -1;
3342         }
3343
3344         /* Parse/set primary slave port id*/
3345         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3346         if (arg_count == 1) {
3347                 uint16_t primary_slave_port_id;
3348
3349                 if (rte_kvargs_process(kvlist,
3350                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3351                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3352                                        &primary_slave_port_id) < 0) {
3353                         RTE_BOND_LOG(INFO,
3354                                      "Invalid primary slave port id specified for bonded device %s",
3355                                      name);
3356                         return -1;
3357                 }
3358
3359                 /* Set balance mode transmit policy*/
3360                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3361                     != 0) {
3362                         RTE_BOND_LOG(ERR,
3363                                      "Failed to set primary slave port %d on bonded device %s",
3364                                      primary_slave_port_id, name);
3365                         return -1;
3366                 }
3367         } else if (arg_count > 1) {
3368                 RTE_BOND_LOG(INFO,
3369                              "Primary slave can be specified only once for bonded device %s",
3370                              name);
3371                 return -1;
3372         }
3373
3374         /* Parse link status monitor polling interval */
3375         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3376         if (arg_count == 1) {
3377                 uint32_t lsc_poll_interval_ms;
3378
3379                 if (rte_kvargs_process(kvlist,
3380                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3381                                        &bond_ethdev_parse_time_ms_kvarg,
3382                                        &lsc_poll_interval_ms) < 0) {
3383                         RTE_BOND_LOG(INFO,
3384                                      "Invalid lsc polling interval value specified for bonded"
3385                                      " device %s", name);
3386                         return -1;
3387                 }
3388
3389                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3390                     != 0) {
3391                         RTE_BOND_LOG(ERR,
3392                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3393                                      lsc_poll_interval_ms, name);
3394                         return -1;
3395                 }
3396         } else if (arg_count > 1) {
3397                 RTE_BOND_LOG(INFO,
3398                              "LSC polling interval can be specified only once for bonded"
3399                              " device %s", name);
3400                 return -1;
3401         }
3402
3403         /* Parse link up interrupt propagation delay */
3404         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3405         if (arg_count == 1) {
3406                 uint32_t link_up_delay_ms;
3407
3408                 if (rte_kvargs_process(kvlist,
3409                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3410                                        &bond_ethdev_parse_time_ms_kvarg,
3411                                        &link_up_delay_ms) < 0) {
3412                         RTE_BOND_LOG(INFO,
3413                                      "Invalid link up propagation delay value specified for"
3414                                      " bonded device %s", name);
3415                         return -1;
3416                 }
3417
3418                 /* Set balance mode transmit policy*/
3419                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3420                     != 0) {
3421                         RTE_BOND_LOG(ERR,
3422                                      "Failed to set link up propagation delay (%u ms) on bonded"
3423                                      " device %s", link_up_delay_ms, name);
3424                         return -1;
3425                 }
3426         } else if (arg_count > 1) {
3427                 RTE_BOND_LOG(INFO,
3428                              "Link up propagation delay can be specified only once for"
3429                              " bonded device %s", name);
3430                 return -1;
3431         }
3432
3433         /* Parse link down interrupt propagation delay */
3434         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3435         if (arg_count == 1) {
3436                 uint32_t link_down_delay_ms;
3437
3438                 if (rte_kvargs_process(kvlist,
3439                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3440                                        &bond_ethdev_parse_time_ms_kvarg,
3441                                        &link_down_delay_ms) < 0) {
3442                         RTE_BOND_LOG(INFO,
3443                                      "Invalid link down propagation delay value specified for"
3444                                      " bonded device %s", name);
3445                         return -1;
3446                 }
3447
3448                 /* Set balance mode transmit policy*/
3449                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3450                     != 0) {
3451                         RTE_BOND_LOG(ERR,
3452                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3453                                      link_down_delay_ms, name);
3454                         return -1;
3455                 }
3456         } else if (arg_count > 1) {
3457                 RTE_BOND_LOG(INFO,
3458                              "Link down propagation delay can be specified only once for  bonded device %s",
3459                              name);
3460                 return -1;
3461         }
3462
3463         return 0;
3464 }
3465
3466 struct rte_vdev_driver pmd_bond_drv = {
3467         .probe = bond_probe,
3468         .remove = bond_remove,
3469 };
3470
3471 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3472 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3473
3474 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3475         "slave=<ifc> "
3476         "primary=<ifc> "
3477         "mode=[0-6] "
3478         "xmit_policy=[l2 | l23 | l34] "
3479         "agg_mode=[count | stable | bandwidth] "
3480         "socket_id=<int> "
3481         "mac=<mac addr> "
3482         "lsc_poll_period_ms=<int> "
3483         "up_delay=<int> "
3484         "down_delay=<int>");
3485
3486 int bond_logtype;
3487
3488 RTE_INIT(bond_init_log);
3489 static void
3490 bond_init_log(void)
3491 {
3492         bond_logtype = rte_log_register("pmd.net.bon");
3493         if (bond_logtype >= 0)
3494                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3495 }