bond: add mode 4
[dpdk.git] / lib / librte_pmd_bond / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_mbuf.h>
35 #include <rte_malloc.h>
36 #include <rte_ethdev.h>
37 #include <rte_tcp.h>
38 #include <rte_udp.h>
39 #include <rte_ip.h>
40 #include <rte_devargs.h>
41 #include <rte_kvargs.h>
42 #include <rte_dev.h>
43 #include <rte_alarm.h>
44
45 #include "rte_eth_bond.h"
46 #include "rte_eth_bond_private.h"
47 #include "rte_eth_bond_8023ad_private.h"
48
49 static uint16_t
50 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
51 {
52         struct bond_dev_private *internals;
53
54         uint16_t num_rx_slave = 0;
55         uint16_t num_rx_total = 0;
56
57         int i;
58
59         /* Cast to structure, containing bonded device's port id and queue id */
60         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
61
62         internals = bd_rx_q->dev_private;
63
64
65         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
66                 /* Offset of pointer to *bufs increases as packets are received
67                  * from other slaves */
68                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
69                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
70                 if (num_rx_slave) {
71                         num_rx_total += num_rx_slave;
72                         nb_pkts -= num_rx_slave;
73                 }
74         }
75
76         return num_rx_total;
77 }
78
79 static uint16_t
80 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
81                 uint16_t nb_pkts)
82 {
83         struct bond_dev_private *internals;
84
85         /* Cast to structure, containing bonded device's port id and queue id */
86         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
87
88         internals = bd_rx_q->dev_private;
89
90         return rte_eth_rx_burst(internals->current_primary_port,
91                         bd_rx_q->queue_id, bufs, nb_pkts);
92 }
93
94 static uint16_t
95 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
96                 uint16_t nb_pkts)
97 {
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100         struct bond_dev_private *internals = bd_rx_q->dev_private;
101         struct ether_addr bond_mac;
102
103         struct ether_hdr *hdr;
104
105         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
106         uint16_t num_rx_total = 0;      /* Total number of received packets */
107         uint8_t slaves[RTE_MAX_ETHPORTS];
108         uint8_t slave_count;
109
110         uint8_t collecting;  /* current slave collecting status */
111         const uint8_t promisc = internals->promiscuous_en;
112         uint8_t i, j, k;
113
114         rte_eth_macaddr_get(internals->port_id, &bond_mac);
115         /* Copy slave list to protect against slave up/down changes during tx
116          * bursting */
117         slave_count = internals->active_slave_count;
118         memcpy(slaves, internals->active_slaves,
119                         sizeof(internals->active_slaves[0]) * slave_count);
120
121         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
122                 j = num_rx_total;
123                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
124
125                 /* Read packets from this slave */
126                 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
127                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
128
129                 for (k = j; k < 2 && k < num_rx_total; k++)
130                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
131
132                 /* Handle slow protocol packets. */
133                 while (j < num_rx_total) {
134                         if (j + 3 < num_rx_total)
135                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
136
137                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
138                         /* Remove packet from array if it is slow packet or slave is not
139                          * in collecting state or bondign interface is not in promiscus
140                          * mode and packet address does not match. */
141                         if (unlikely(hdr->ether_type == ether_type_slow_be ||
142                                 !collecting || (!promisc &&
143                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
144
145                                 if (hdr->ether_type == ether_type_slow_be) {
146                                         bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
147                                                 bufs[j]);
148                                 } else
149                                         rte_pktmbuf_free(bufs[j]);
150
151                                 /* Packet is managed by mode 4 or dropped, shift the array */
152                                 num_rx_total--;
153                                 if (j < num_rx_total) {
154                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
155                                                 (num_rx_total - j));
156                                 }
157                         } else
158                                 j++;
159                 }
160         }
161
162         return num_rx_total;
163 }
164
165 static uint16_t
166 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
167                 uint16_t nb_pkts)
168 {
169         struct bond_dev_private *internals;
170         struct bond_tx_queue *bd_tx_q;
171
172         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
173         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
174
175         uint8_t num_of_slaves;
176         uint8_t slaves[RTE_MAX_ETHPORTS];
177
178         uint16_t num_tx_total = 0, num_tx_slave;
179
180         static int slave_idx = 0;
181         int i, cslave_idx = 0, tx_fail_total = 0;
182
183         bd_tx_q = (struct bond_tx_queue *)queue;
184         internals = bd_tx_q->dev_private;
185
186         /* Copy slave list to protect against slave up/down changes during tx
187          * bursting */
188         num_of_slaves = internals->active_slave_count;
189         memcpy(slaves, internals->active_slaves,
190                         sizeof(internals->active_slaves[0]) * num_of_slaves);
191
192         if (num_of_slaves < 1)
193                 return num_tx_total;
194
195         /* Populate slaves mbuf with which packets are to be sent on it  */
196         for (i = 0; i < nb_pkts; i++) {
197                 cslave_idx = (slave_idx + i) % num_of_slaves;
198                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
199         }
200
201         /* increment current slave index so the next call to tx burst starts on the
202          * next slave */
203         slave_idx = ++cslave_idx;
204
205         /* Send packet burst on each slave device */
206         for (i = 0; i < num_of_slaves; i++) {
207                 if (slave_nb_pkts[i] > 0) {
208                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
209                                         slave_bufs[i], slave_nb_pkts[i]);
210
211                         /* if tx burst fails move packets to end of bufs */
212                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
213                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
214
215                                 tx_fail_total += tx_fail_slave;
216
217                                 memcpy(&bufs[nb_pkts - tx_fail_total],
218                                                 &slave_bufs[i][num_tx_slave],
219                                                 tx_fail_slave * sizeof(bufs[0]));
220                         }
221                         num_tx_total += num_tx_slave;
222                 }
223         }
224
225         return num_tx_total;
226 }
227
228 static uint16_t
229 bond_ethdev_tx_burst_active_backup(void *queue,
230                 struct rte_mbuf **bufs, uint16_t nb_pkts)
231 {
232         struct bond_dev_private *internals;
233         struct bond_tx_queue *bd_tx_q;
234
235         bd_tx_q = (struct bond_tx_queue *)queue;
236         internals = bd_tx_q->dev_private;
237
238         if (internals->active_slave_count < 1)
239                 return 0;
240
241         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
242                         bufs, nb_pkts);
243 }
244
245 static inline uint16_t
246 ether_hash(struct ether_hdr *eth_hdr)
247 {
248         uint16_t *word_src_addr = (uint16_t *)eth_hdr->s_addr.addr_bytes;
249         uint16_t *word_dst_addr = (uint16_t *)eth_hdr->d_addr.addr_bytes;
250
251         return (word_src_addr[0] ^ word_dst_addr[0]) ^
252                         (word_src_addr[1] ^ word_dst_addr[1]) ^
253                         (word_src_addr[2] ^ word_dst_addr[2]);
254 }
255
256 static inline uint32_t
257 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
258 {
259         return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
260 }
261
262 static inline uint32_t
263 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
264 {
265         uint32_t *word_src_addr = (uint32_t *)&(ipv6_hdr->src_addr[0]);
266         uint32_t *word_dst_addr = (uint32_t *)&(ipv6_hdr->dst_addr[0]);
267
268         return (word_src_addr[0] ^ word_dst_addr[0]) ^
269                         (word_src_addr[1] ^ word_dst_addr[1]) ^
270                         (word_src_addr[2] ^ word_dst_addr[2]) ^
271                         (word_src_addr[3] ^ word_dst_addr[3]);
272 }
273
274 static uint32_t
275 udp_hash(struct udp_hdr *hdr)
276 {
277         return hdr->src_port ^ hdr->dst_port;
278 }
279
280 static inline uint16_t
281 xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
282 {
283         struct ether_hdr *eth_hdr;
284         struct udp_hdr *udp_hdr;
285         size_t eth_offset = 0;
286         uint32_t hash = 0;
287
288         if (slave_count == 1)
289                 return 0;
290
291         switch (policy) {
292         case BALANCE_XMIT_POLICY_LAYER2:
293                 eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
294
295                 hash = ether_hash(eth_hdr);
296                 hash ^= hash >> 8;
297                 return hash % slave_count;
298
299         case BALANCE_XMIT_POLICY_LAYER23:
300                 eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
301
302                 if (buf->ol_flags & PKT_RX_VLAN_PKT)
303                         eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
304                 else
305                         eth_offset = sizeof(struct ether_hdr);
306
307                 if (buf->ol_flags & PKT_RX_IPV4_HDR) {
308                         struct ipv4_hdr *ipv4_hdr;
309                         ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(buf,
310                                         unsigned char *) + eth_offset);
311
312                         hash = ether_hash(eth_hdr) ^ ipv4_hash(ipv4_hdr);
313
314                 } else {
315                         struct ipv6_hdr *ipv6_hdr;
316
317                         ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(buf,
318                                         unsigned char *) + eth_offset);
319
320                         hash = ether_hash(eth_hdr) ^ ipv6_hash(ipv6_hdr);
321                 }
322                 break;
323
324         case BALANCE_XMIT_POLICY_LAYER34:
325                 if (buf->ol_flags & PKT_RX_VLAN_PKT)
326                         eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
327                 else
328                         eth_offset = sizeof(struct ether_hdr);
329
330                 if (buf->ol_flags & PKT_RX_IPV4_HDR) {
331                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
332                                         (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
333
334                         if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
335                                 udp_hdr = (struct udp_hdr *)
336                                                 (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
337                                                                 sizeof(struct ipv4_hdr));
338                                 hash = ipv4_hash(ipv4_hdr) ^ udp_hash(udp_hdr);
339                         } else {
340                                 hash = ipv4_hash(ipv4_hdr);
341                         }
342                 } else {
343                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
344                                         (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
345
346                         if (ipv6_hdr->proto == IPPROTO_UDP) {
347                                 udp_hdr = (struct udp_hdr *)
348                                                 (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
349                                                                 sizeof(struct ipv6_hdr));
350                                 hash = ipv6_hash(ipv6_hdr) ^ udp_hash(udp_hdr);
351                         } else {
352                                 hash = ipv6_hash(ipv6_hdr);
353                         }
354                 }
355                 break;
356         }
357
358         hash ^= hash >> 16;
359         hash ^= hash >> 8;
360
361         return hash % slave_count;
362 }
363
364 static uint16_t
365 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
366                 uint16_t nb_pkts)
367 {
368         struct bond_dev_private *internals;
369         struct bond_tx_queue *bd_tx_q;
370
371         uint8_t num_of_slaves;
372         uint8_t slaves[RTE_MAX_ETHPORTS];
373
374         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
375
376         int i, op_slave_id;
377
378         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
379         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
380
381         bd_tx_q = (struct bond_tx_queue *)queue;
382         internals = bd_tx_q->dev_private;
383
384         /* Copy slave list to protect against slave up/down changes during tx
385          * bursting */
386         num_of_slaves = internals->active_slave_count;
387         memcpy(slaves, internals->active_slaves,
388                         sizeof(internals->active_slaves[0]) * num_of_slaves);
389
390         if (num_of_slaves < 1)
391                 return num_tx_total;
392
393         /* Populate slaves mbuf with the packets which are to be sent on it  */
394         for (i = 0; i < nb_pkts; i++) {
395                 /* Select output slave using hash based on xmit policy */
396                 op_slave_id = xmit_slave_hash(bufs[i], num_of_slaves,
397                                 internals->balance_xmit_policy);
398
399                 /* Populate slave mbuf arrays with mbufs for that slave */
400                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
401         }
402
403         /* Send packet burst on each slave device */
404         for (i = 0; i < num_of_slaves; i++) {
405                 if (slave_nb_pkts[i] > 0) {
406                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
407                                         slave_bufs[i], slave_nb_pkts[i]);
408
409                         /* if tx burst fails move packets to end of bufs */
410                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
411                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
412
413                                 tx_fail_total += slave_tx_fail_count;
414                                 memcpy(&bufs[nb_pkts - tx_fail_total],
415                                                 &slave_bufs[i][num_tx_slave],
416                                                 slave_tx_fail_count * sizeof(bufs[0]));
417                         }
418
419                         num_tx_total += num_tx_slave;
420                 }
421         }
422
423         return num_tx_total;
424 }
425
426 static uint16_t
427 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
428                 uint16_t nb_pkts)
429 {
430         struct bond_dev_private *internals;
431         struct bond_tx_queue *bd_tx_q;
432
433         uint8_t num_of_slaves;
434         uint8_t slaves[RTE_MAX_ETHPORTS];
435          /* possitions in slaves, not ID */
436         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
437         uint8_t distributing_count;
438
439         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
440         uint16_t i, j, op_slave_idx;
441         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
442
443         /* Allocate additional packets in case 8023AD mode. */
444         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
445         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS];
446
447         /* Total amount of packets in slave_bufs */
448         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
449         /* Slow packets placed in each slave */
450         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
451
452         bd_tx_q = (struct bond_tx_queue *)queue;
453         internals = bd_tx_q->dev_private;
454
455         /* Copy slave list to protect against slave up/down changes during tx
456          * bursting */
457         num_of_slaves = internals->active_slave_count;
458         if (num_of_slaves < 1)
459                 return num_tx_total;
460
461         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
462
463         distributing_count = 0;
464         for (i = 0; i < num_of_slaves; i++) {
465                 struct port *port = &mode_8023ad_ports[slaves[i]];
466
467                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
468                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
469                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
470
471                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
472                         slave_bufs[i][j] = slow_pkts[j];
473
474                 if (ACTOR_STATE(port, DISTRIBUTING))
475                         distributing_offsets[distributing_count++] = i;
476         }
477
478         if (likely(distributing_count > 0)) {
479                 /* Populate slaves mbuf with the packets which are to be sent on it */
480                 for (i = 0; i < nb_pkts; i++) {
481                         /* Select output slave using hash based on xmit policy */
482                         op_slave_idx = xmit_slave_hash(bufs[i], distributing_count,
483                                         internals->balance_xmit_policy);
484
485                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
486                          * slaves that are currently distributing. */
487                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
488                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
489                         slave_nb_pkts[slave_offset]++;
490                 }
491         }
492
493         /* Send packet burst on each slave device */
494         for (i = 0; i < num_of_slaves; i++) {
495                 if (slave_nb_pkts[i] == 0)
496                         continue;
497
498                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
499                                 slave_bufs[i], slave_nb_pkts[i]);
500
501                 /* If tx burst fails drop slow packets */
502                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
503                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
504
505                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
506                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
507
508                 /* If tx burst fails move packets to end of bufs */
509                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
510                         uint16_t j = nb_pkts - num_tx_fail_total;
511                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
512                                 bufs[j] = slave_bufs[i][num_tx_slave];
513                 }
514         }
515
516         return num_tx_total;
517 }
518
519 #ifdef RTE_MBUF_REFCNT
520 static uint16_t
521 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
522                 uint16_t nb_pkts)
523 {
524         struct bond_dev_private *internals;
525         struct bond_tx_queue *bd_tx_q;
526
527         uint8_t tx_failed_flag = 0, num_of_slaves;
528         uint8_t slaves[RTE_MAX_ETHPORTS];
529
530         uint16_t max_nb_of_tx_pkts = 0;
531
532         int slave_tx_total[RTE_MAX_ETHPORTS];
533         int i, most_successful_tx_slave = -1;
534
535         bd_tx_q = (struct bond_tx_queue *)queue;
536         internals = bd_tx_q->dev_private;
537
538         /* Copy slave list to protect against slave up/down changes during tx
539          * bursting */
540         num_of_slaves = internals->active_slave_count;
541         memcpy(slaves, internals->active_slaves,
542                         sizeof(internals->active_slaves[0]) * num_of_slaves);
543
544         if (num_of_slaves < 1)
545                 return 0;
546
547         /* Increment reference count on mbufs */
548         for (i = 0; i < nb_pkts; i++)
549                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
550
551         /* Transmit burst on each active slave */
552         for (i = 0; i < num_of_slaves; i++) {
553                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
554                                         bufs, nb_pkts);
555
556                 if (unlikely(slave_tx_total[i] < nb_pkts))
557                         tx_failed_flag = 1;
558
559                 /* record the value and slave index for the slave which transmits the
560                  * maximum number of packets */
561                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
562                         max_nb_of_tx_pkts = slave_tx_total[i];
563                         most_successful_tx_slave = i;
564                 }
565         }
566
567         /* if slaves fail to transmit packets from burst, the calling application
568          * is not expected to know about multiple references to packets so we must
569          * handle failures of all packets except those of the most successful slave
570          */
571         if (unlikely(tx_failed_flag))
572                 for (i = 0; i < num_of_slaves; i++)
573                         if (i != most_successful_tx_slave)
574                                 while (slave_tx_total[i] < nb_pkts)
575                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
576
577         return max_nb_of_tx_pkts;
578 }
579 #endif
580
581 void
582 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
583                 struct rte_eth_link *slave_dev_link)
584 {
585         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
586         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
587
588         if (slave_dev_link->link_status &&
589                 bonded_eth_dev->data->dev_started) {
590                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
591                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
592
593                 internals->link_props_set = 1;
594         }
595 }
596
597 void
598 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
599 {
600         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
601
602         memset(&(bonded_eth_dev->data->dev_link), 0,
603                         sizeof(bonded_eth_dev->data->dev_link));
604
605         internals->link_props_set = 0;
606 }
607
608 int
609 link_properties_valid(struct rte_eth_link *bonded_dev_link,
610                 struct rte_eth_link *slave_dev_link)
611 {
612         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
613                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
614                 return -1;
615
616         return 0;
617 }
618
619 int
620 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
621 {
622         struct ether_addr *mac_addr;
623
624         mac_addr = eth_dev->data->mac_addrs;
625
626         if (eth_dev == NULL) {
627                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
628                 return -1;
629         }
630
631         if (dst_mac_addr == NULL) {
632                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
633                 return -1;
634         }
635
636         ether_addr_copy(mac_addr, dst_mac_addr);
637         return 0;
638 }
639
640 int
641 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
642 {
643         struct ether_addr *mac_addr;
644
645         mac_addr = eth_dev->data->mac_addrs;
646
647         if (eth_dev == NULL) {
648                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
649                 return -1;
650         }
651
652         if (new_mac_addr == NULL) {
653                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
654                 return -1;
655         }
656
657         /* If new MAC is different to current MAC then update */
658         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
659                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
660
661         return 0;
662 }
663
664 int
665 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
666 {
667         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
668         int i;
669
670         /* Update slave devices MAC addresses */
671         if (internals->slave_count < 1)
672                 return -1;
673
674         switch (internals->mode) {
675         case BONDING_MODE_ROUND_ROBIN:
676         case BONDING_MODE_BALANCE:
677 #ifdef RTE_MBUF_REFCNT
678         case BONDING_MODE_BROADCAST:
679 #endif
680                 for (i = 0; i < internals->slave_count; i++) {
681                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
682                                         bonded_eth_dev->data->mac_addrs)) {
683                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
684                                                 internals->slaves[i].port_id);
685                                 return -1;
686                         }
687                 }
688                 break;
689         case BONDING_MODE_8023AD:
690                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
691                 break;
692         case BONDING_MODE_ACTIVE_BACKUP:
693         default:
694                 for (i = 0; i < internals->slave_count; i++) {
695                         if (internals->slaves[i].port_id ==
696                                         internals->current_primary_port) {
697                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
698                                                 bonded_eth_dev->data->mac_addrs)) {
699                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
700                                                         internals->current_primary_port);
701                                         return -1;
702                                 }
703                         } else {
704                                 if (mac_address_set(
705                                                 &rte_eth_devices[internals->slaves[i].port_id],
706                                                 &internals->slaves[i].persisted_mac_addr)) {
707                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
708                                                         internals->slaves[i].port_id);
709                                         return -1;
710                                 }
711                         }
712                 }
713         }
714
715         return 0;
716 }
717
718 int
719 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
720 {
721         struct bond_dev_private *internals;
722
723         internals = eth_dev->data->dev_private;
724
725         switch (mode) {
726         case BONDING_MODE_ROUND_ROBIN:
727                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
728                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
729                 break;
730         case BONDING_MODE_ACTIVE_BACKUP:
731                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
732                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
733                 break;
734         case BONDING_MODE_BALANCE:
735                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
736                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
737                 break;
738 #ifdef RTE_MBUF_REFCNT
739         case BONDING_MODE_BROADCAST:
740                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
741                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
742                 break;
743 #endif
744         case BONDING_MODE_8023AD:
745                 if (bond_mode_8023ad_enable(eth_dev) != 0)
746                         return -1;
747
748                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
749                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
750                 RTE_BOND_LOG(WARNING,
751                                 "Using mode 4, it is necessary to do TX burst and RX burst "
752                                 "at least every 100ms.");
753                 break;
754         default:
755                 return -1;
756         }
757
758         internals->mode = mode;
759
760         return 0;
761 }
762
763 int
764 slave_configure(struct rte_eth_dev *bonded_eth_dev,
765                 struct rte_eth_dev *slave_eth_dev)
766 {
767         struct bond_rx_queue *bd_rx_q;
768         struct bond_tx_queue *bd_tx_q;
769
770         int errval, q_id;
771
772         /* Stop slave */
773         rte_eth_dev_stop(slave_eth_dev->data->port_id);
774
775         /* Enable interrupts on slave device if supported */
776         if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
777                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
778
779         /* Configure device */
780         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
781                         bonded_eth_dev->data->nb_rx_queues,
782                         bonded_eth_dev->data->nb_tx_queues,
783                         &(slave_eth_dev->data->dev_conf));
784         if (errval != 0) {
785                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
786                                 slave_eth_dev->data->port_id, errval);
787                 return errval;
788         }
789
790         /* Setup Rx Queues */
791         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
792                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
793
794                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
795                                 bd_rx_q->nb_rx_desc,
796                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
797                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
798                 if (errval != 0) {
799                         RTE_BOND_LOG(ERR,
800                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
801                                         slave_eth_dev->data->port_id, q_id, errval);
802                         return errval;
803                 }
804         }
805
806         /* Setup Tx Queues */
807         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
808                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
809
810                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
811                                 bd_tx_q->nb_tx_desc,
812                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
813                                 &bd_tx_q->tx_conf);
814                 if (errval != 0) {
815                         RTE_BOND_LOG(ERR,
816                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
817                                         slave_eth_dev->data->port_id, q_id, errval);
818                         return errval;
819                 }
820         }
821
822         /* Start device */
823         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
824         if (errval != 0) {
825                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
826                                 slave_eth_dev->data->port_id, errval);
827                 return -1;
828         }
829
830         return 0;
831 }
832
833 void
834 slave_remove(struct bond_dev_private *internals,
835                 struct rte_eth_dev *slave_eth_dev)
836 {
837         int i, found = 0;
838
839         for (i = 0; i < internals->slave_count; i++) {
840                 if (internals->slaves[i].port_id ==     slave_eth_dev->data->port_id)
841                         found = 1;
842
843                 if (found && i < (internals->slave_count - 1))
844                         memcpy(&internals->slaves[i], &internals->slaves[i+1],
845                                         sizeof(internals->slaves[i]));
846         }
847
848         internals->slave_count--;
849 }
850
851 static void
852 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
853
854 void
855 slave_add(struct bond_dev_private *internals,
856                 struct rte_eth_dev *slave_eth_dev)
857 {
858         struct bond_slave_details *slave_details =
859                         &internals->slaves[internals->slave_count];
860
861         slave_details->port_id = slave_eth_dev->data->port_id;
862         slave_details->last_link_status = 0;
863
864         /* If slave device doesn't support interrupts then we need to enabled
865          * polling to monitor link status */
866         if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
867                 slave_details->link_status_poll_enabled = 1;
868
869                 if (!internals->link_status_polling_enabled) {
870                         internals->link_status_polling_enabled = 1;
871
872                         rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
873                                         bond_ethdev_slave_link_status_change_monitor,
874                                         (void *)&rte_eth_devices[internals->port_id]);
875                 }
876         }
877
878         slave_details->link_status_wait_to_complete = 0;
879
880         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
881                         sizeof(struct ether_addr));
882 }
883
884 void
885 bond_ethdev_primary_set(struct bond_dev_private *internals,
886                 uint8_t slave_port_id)
887 {
888         int i;
889
890         if (internals->active_slave_count < 1)
891                 internals->current_primary_port = slave_port_id;
892         else
893                 /* Search bonded device slave ports for new proposed primary port */
894                 for (i = 0; i < internals->active_slave_count; i++) {
895                         if (internals->active_slaves[i] == slave_port_id)
896                                 internals->current_primary_port = slave_port_id;
897                 }
898 }
899
900 static void
901 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
902
903 static int
904 bond_ethdev_start(struct rte_eth_dev *eth_dev)
905 {
906         struct bond_dev_private *internals;
907         int i;
908
909         /* slave eth dev will be started by bonded device */
910         if (valid_bonded_ethdev(eth_dev)) {
911                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
912                                 eth_dev->data->port_id);
913                 return -1;
914         }
915
916         eth_dev->data->dev_link.link_status = 0;
917         eth_dev->data->dev_started = 1;
918
919         internals = eth_dev->data->dev_private;
920
921         if (internals->slave_count == 0) {
922                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
923                 return -1;
924         }
925
926         if (internals->user_defined_mac == 0) {
927                 struct ether_addr *new_mac_addr = NULL;
928
929                 for (i = 0; i < internals->slave_count; i++)
930                         if (internals->slaves[i].port_id == internals->primary_port)
931                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
932
933                 if (new_mac_addr == NULL)
934                         return -1;
935
936                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
937                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
938                                         eth_dev->data->port_id);
939                         return -1;
940                 }
941         }
942
943         /* Update all slave devices MACs*/
944         if (mac_address_slaves_update(eth_dev) != 0)
945                 return -1;
946
947         /* If bonded device is configure in promiscuous mode then re-apply config */
948         if (internals->promiscuous_en)
949                 bond_ethdev_promiscuous_enable(eth_dev);
950
951         /* Reconfigure each slave device if starting bonded device */
952         for (i = 0; i < internals->slave_count; i++) {
953                 if (slave_configure(eth_dev,
954                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
955                         RTE_BOND_LOG(ERR,
956                                         "bonded port (%d) failed to reconfigure slave device (%d)",
957                                         eth_dev->data->port_id, internals->slaves[i].port_id);
958                         return -1;
959                 }
960         }
961
962         if (internals->user_defined_primary_port)
963                 bond_ethdev_primary_set(internals, internals->primary_port);
964
965         if (internals->mode == BONDING_MODE_8023AD)
966                 bond_mode_8023ad_start(eth_dev);
967
968         return 0;
969 }
970
971 static void
972 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
973 {
974         struct bond_dev_private *internals = eth_dev->data->dev_private;
975         uint8_t i;
976
977         if (internals->mode == BONDING_MODE_8023AD) {
978                 struct port *port;
979                 void *pkt = NULL;
980
981                 bond_mode_8023ad_stop(eth_dev);
982
983                 /* Discard all messages to/from mode 4 state machines */
984                 for (i = 0; i < internals->slave_count; i++) {
985                         port = &mode_8023ad_ports[internals->slaves[i].port_id];
986
987                         RTE_VERIFY(port->rx_ring != NULL);
988                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
989                                 rte_pktmbuf_free(pkt);
990
991                         RTE_VERIFY(port->tx_ring != NULL);
992                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
993                                 rte_pktmbuf_free(pkt);
994                 }
995         }
996
997         internals->active_slave_count = 0;
998         internals->link_status_polling_enabled = 0;
999
1000         eth_dev->data->dev_link.link_status = 0;
1001         eth_dev->data->dev_started = 0;
1002 }
1003
1004 static void
1005 bond_ethdev_close(struct rte_eth_dev *dev __rte_unused)
1006 {
1007 }
1008
1009 /* forward declaration */
1010 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1011
1012 static void
1013 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1014 {
1015         dev_info->driver_name = driver_name;
1016         dev_info->max_mac_addrs = 1;
1017
1018         dev_info->max_rx_pktlen = (uint32_t)2048;
1019
1020         dev_info->max_rx_queues = (uint16_t)128;
1021         dev_info->max_tx_queues = (uint16_t)512;
1022
1023         dev_info->min_rx_bufsize = 0;
1024         dev_info->pci_dev = dev->pci_dev;
1025 }
1026
1027 static int
1028 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1029                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1030                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1031 {
1032         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1033                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1034                                         0, dev->pci_dev->numa_node);
1035         if (bd_rx_q == NULL)
1036                 return -1;
1037
1038         bd_rx_q->queue_id = rx_queue_id;
1039         bd_rx_q->dev_private = dev->data->dev_private;
1040
1041         bd_rx_q->nb_rx_desc = nb_rx_desc;
1042
1043         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1044         bd_rx_q->mb_pool = mb_pool;
1045
1046         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1047
1048         return 0;
1049 }
1050
1051 static int
1052 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1053                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1054                 const struct rte_eth_txconf *tx_conf)
1055 {
1056         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1057                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1058                                         0, dev->pci_dev->numa_node);
1059
1060         if (bd_tx_q == NULL)
1061                 return -1;
1062
1063         bd_tx_q->queue_id = tx_queue_id;
1064         bd_tx_q->dev_private = dev->data->dev_private;
1065
1066         bd_tx_q->nb_tx_desc = nb_tx_desc;
1067         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1068
1069         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1070
1071         return 0;
1072 }
1073
1074 static void
1075 bond_ethdev_rx_queue_release(void *queue)
1076 {
1077         if (queue == NULL)
1078                 return;
1079
1080         rte_free(queue);
1081 }
1082
1083 static void
1084 bond_ethdev_tx_queue_release(void *queue)
1085 {
1086         if (queue == NULL)
1087                 return;
1088
1089         rte_free(queue);
1090 }
1091
1092 static void
1093 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1094 {
1095         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1096         struct bond_dev_private *internals;
1097
1098         /* Default value for polling slave found is true as we don't want to
1099          * disable the polling thread if we cannot get the lock */
1100         int i, polling_slave_found = 1;
1101
1102         if (cb_arg == NULL)
1103                 return;
1104
1105         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1106         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1107
1108         if (!bonded_ethdev->data->dev_started ||
1109                 !internals->link_status_polling_enabled)
1110                 return;
1111
1112         /* If device is currently being configured then don't check slaves link
1113          * status, wait until next period */
1114         if (rte_spinlock_trylock(&internals->lock)) {
1115                 if (internals->slave_count > 0)
1116                         polling_slave_found = 0;
1117
1118                 for (i = 0; i < internals->slave_count; i++) {
1119                         if (!internals->slaves[i].link_status_poll_enabled)
1120                                 continue;
1121
1122                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1123                         polling_slave_found = 1;
1124
1125                         /* Update slave link status */
1126                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1127                                         internals->slaves[i].link_status_wait_to_complete);
1128
1129                         /* if link status has changed since last checked then call lsc
1130                          * event callback */
1131                         if (slave_ethdev->data->dev_link.link_status !=
1132                                         internals->slaves[i].last_link_status) {
1133                                 internals->slaves[i].last_link_status =
1134                                                 slave_ethdev->data->dev_link.link_status;
1135
1136                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1137                                                 RTE_ETH_EVENT_INTR_LSC,
1138                                                 &bonded_ethdev->data->port_id);
1139                         }
1140                 }
1141                 rte_spinlock_unlock(&internals->lock);
1142         }
1143
1144         if (polling_slave_found)
1145                 /* Set alarm to continue monitoring link status of slave ethdev's */
1146                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1147                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1148 }
1149
1150 static int
1151 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1152                 int wait_to_complete)
1153 {
1154         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1155
1156         if (!bonded_eth_dev->data->dev_started ||
1157                 internals->active_slave_count == 0) {
1158                 bonded_eth_dev->data->dev_link.link_status = 0;
1159                 return 0;
1160         } else {
1161                 struct rte_eth_dev *slave_eth_dev;
1162                 int i, link_up = 0;
1163
1164                 for (i = 0; i < internals->active_slave_count; i++) {
1165                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1166
1167                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1168                                         wait_to_complete);
1169                         if (slave_eth_dev->data->dev_link.link_status == 1) {
1170                                 link_up = 1;
1171                                 break;
1172                         }
1173                 }
1174
1175                 bonded_eth_dev->data->dev_link.link_status = link_up;
1176         }
1177
1178         return 0;
1179 }
1180
1181 static void
1182 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1183 {
1184         struct bond_dev_private *internals = dev->data->dev_private;
1185         struct rte_eth_stats slave_stats;
1186
1187         int i;
1188
1189         /* clear bonded stats before populating from slaves */
1190         memset(stats, 0, sizeof(*stats));
1191
1192         for (i = 0; i < internals->slave_count; i++) {
1193                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1194
1195                 stats->ipackets += slave_stats.ipackets;
1196                 stats->opackets += slave_stats.opackets;
1197                 stats->ibytes += slave_stats.ibytes;
1198                 stats->obytes += slave_stats.obytes;
1199                 stats->ierrors += slave_stats.ierrors;
1200                 stats->oerrors += slave_stats.oerrors;
1201                 stats->imcasts += slave_stats.imcasts;
1202                 stats->rx_nombuf += slave_stats.rx_nombuf;
1203                 stats->fdirmatch += slave_stats.fdirmatch;
1204                 stats->fdirmiss += slave_stats.fdirmiss;
1205                 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1206                 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1207                 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1208                 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1209         }
1210 }
1211
1212 static void
1213 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1214 {
1215         struct bond_dev_private *internals = dev->data->dev_private;
1216         int i;
1217
1218         for (i = 0; i < internals->slave_count; i++)
1219                 rte_eth_stats_reset(internals->slaves[i].port_id);
1220 }
1221
1222 static void
1223 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1224 {
1225         struct bond_dev_private *internals = eth_dev->data->dev_private;
1226         int i;
1227
1228         internals->promiscuous_en = 1;
1229
1230         switch (internals->mode) {
1231         /* Promiscuous mode is propagated to all slaves */
1232         case BONDING_MODE_ROUND_ROBIN:
1233         case BONDING_MODE_BALANCE:
1234 #ifdef RTE_MBUF_REFCNT
1235         case BONDING_MODE_BROADCAST:
1236 #endif
1237                 for (i = 0; i < internals->slave_count; i++)
1238                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1239                 break;
1240         /* In mode4 promiscus mode is managed when slave is added/removed */
1241         case BONDING_MODE_8023AD:
1242                 break;
1243         /* Promiscuous mode is propagated only to primary slave */
1244         case BONDING_MODE_ACTIVE_BACKUP:
1245         default:
1246                 rte_eth_promiscuous_enable(internals->current_primary_port);
1247         }
1248 }
1249
1250 static void
1251 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1252 {
1253         struct bond_dev_private *internals = dev->data->dev_private;
1254         int i;
1255
1256         internals->promiscuous_en = 0;
1257
1258         switch (internals->mode) {
1259         /* Promiscuous mode is propagated to all slaves */
1260         case BONDING_MODE_ROUND_ROBIN:
1261         case BONDING_MODE_BALANCE:
1262 #ifdef RTE_MBUF_REFCNT
1263         case BONDING_MODE_BROADCAST:
1264 #endif
1265                 for (i = 0; i < internals->slave_count; i++)
1266                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1267                 break;
1268         /* In mode4 promiscus mode is set managed when slave is added/removed */
1269         case BONDING_MODE_8023AD:
1270                 break;
1271         /* Promiscuous mode is propagated only to primary slave */
1272         case BONDING_MODE_ACTIVE_BACKUP:
1273         default:
1274                 rte_eth_promiscuous_disable(internals->current_primary_port);
1275         }
1276 }
1277
1278 static void
1279 bond_ethdev_delayed_lsc_propagation(void *arg)
1280 {
1281         if (arg == NULL)
1282                 return;
1283
1284         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1285                         RTE_ETH_EVENT_INTR_LSC);
1286 }
1287
1288 void
1289 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1290                 void *param)
1291 {
1292         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1293         struct bond_dev_private *internals;
1294         struct rte_eth_link link;
1295
1296         int i, valid_slave = 0;
1297         uint8_t active_pos;
1298         uint8_t lsc_flag = 0;
1299
1300         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1301                 return;
1302
1303         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1304         slave_eth_dev = &rte_eth_devices[port_id];
1305
1306         if (valid_bonded_ethdev(bonded_eth_dev))
1307                 return;
1308
1309         internals = bonded_eth_dev->data->dev_private;
1310
1311         /* If the device isn't started don't handle interrupts */
1312         if (!bonded_eth_dev->data->dev_started)
1313                 return;
1314
1315         /* verify that port_id is a valid slave of bonded port */
1316         for (i = 0; i < internals->slave_count; i++) {
1317                 if (internals->slaves[i].port_id == port_id) {
1318                         valid_slave = 1;
1319                         break;
1320                 }
1321         }
1322
1323         if (!valid_slave)
1324                 return;
1325
1326         /* Search for port in active port list */
1327         active_pos = find_slave_by_id(internals->active_slaves,
1328                         internals->active_slave_count, port_id);
1329
1330         rte_eth_link_get_nowait(port_id, &link);
1331         if (link.link_status) {
1332                 if (active_pos < internals->active_slave_count)
1333                         return;
1334
1335                 /* if no active slave ports then set this port to be primary port */
1336                 if (internals->active_slave_count < 1) {
1337                         /* If first active slave, then change link status */
1338                         bonded_eth_dev->data->dev_link.link_status = 1;
1339                         internals->current_primary_port = port_id;
1340                         lsc_flag = 1;
1341
1342                         /* Inherit eth dev link properties from first active slave */
1343                         link_properties_set(bonded_eth_dev,
1344                                         &(slave_eth_dev->data->dev_link));
1345                 }
1346
1347                 activate_slave(bonded_eth_dev, port_id);
1348
1349                 /* If user has defined the primary port then default to using it */
1350                 if (internals->user_defined_primary_port &&
1351                                 internals->primary_port == port_id)
1352                         bond_ethdev_primary_set(internals, port_id);
1353         } else {
1354                 if (active_pos == internals->active_slave_count)
1355                         return;
1356
1357                 /* Remove from active slave list */
1358                 deactivate_slave(bonded_eth_dev, port_id);
1359
1360                 /* No active slaves, change link status to down and reset other
1361                  * link properties */
1362                 if (internals->active_slave_count < 1) {
1363                         lsc_flag = 1;
1364                         bonded_eth_dev->data->dev_link.link_status = 0;
1365
1366                         link_properties_reset(bonded_eth_dev);
1367                 }
1368
1369                 /* Update primary id, take first active slave from list or if none
1370                  * available set to -1 */
1371                 if (port_id == internals->current_primary_port) {
1372                         if (internals->active_slave_count > 0)
1373                                 bond_ethdev_primary_set(internals,
1374                                                 internals->active_slaves[0]);
1375                         else
1376                                 internals->current_primary_port = internals->primary_port;
1377                 }
1378         }
1379
1380         if (lsc_flag) {
1381                 /* Cancel any possible outstanding interrupts if delays are enabled */
1382                 if (internals->link_up_delay_ms > 0 ||
1383                         internals->link_down_delay_ms > 0)
1384                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
1385                                         bonded_eth_dev);
1386
1387                 if (bonded_eth_dev->data->dev_link.link_status) {
1388                         if (internals->link_up_delay_ms > 0)
1389                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
1390                                                 bond_ethdev_delayed_lsc_propagation,
1391                                                 (void *)bonded_eth_dev);
1392                         else
1393                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1394                                                 RTE_ETH_EVENT_INTR_LSC);
1395
1396                 } else {
1397                         if (internals->link_down_delay_ms > 0)
1398                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
1399                                                 bond_ethdev_delayed_lsc_propagation,
1400                                                 (void *)bonded_eth_dev);
1401                         else
1402                                 _rte_eth_dev_callback_process(bonded_eth_dev,
1403                                                 RTE_ETH_EVENT_INTR_LSC);
1404                 }
1405         }
1406 }
1407
1408 struct eth_dev_ops default_dev_ops = {
1409                 .dev_start = bond_ethdev_start,
1410                 .dev_stop = bond_ethdev_stop,
1411                 .dev_close = bond_ethdev_close,
1412                 .dev_configure = bond_ethdev_configure,
1413                 .dev_infos_get = bond_ethdev_info,
1414                 .rx_queue_setup = bond_ethdev_rx_queue_setup,
1415                 .tx_queue_setup = bond_ethdev_tx_queue_setup,
1416                 .rx_queue_release = bond_ethdev_rx_queue_release,
1417                 .tx_queue_release = bond_ethdev_tx_queue_release,
1418                 .link_update = bond_ethdev_link_update,
1419                 .stats_get = bond_ethdev_stats_get,
1420                 .stats_reset = bond_ethdev_stats_reset,
1421                 .promiscuous_enable = bond_ethdev_promiscuous_enable,
1422                 .promiscuous_disable = bond_ethdev_promiscuous_disable
1423 };
1424
1425 static int
1426 bond_init(const char *name, const char *params)
1427 {
1428         struct bond_dev_private *internals;
1429         struct rte_kvargs *kvlist;
1430         uint8_t bonding_mode, socket_id;
1431         int  arg_count, port_id;
1432
1433         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
1434
1435         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
1436         if (kvlist == NULL)
1437                 return -1;
1438
1439         /* Parse link bonding mode */
1440         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
1441                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
1442                                 &bond_ethdev_parse_slave_mode_kvarg, &bonding_mode) != 0) {
1443                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n", name);
1444                         return -1;
1445                 }
1446         } else {
1447                 RTE_LOG(ERR, EAL,
1448                                 "Mode must be specified only once for bonded device %s\n",
1449                                 name);
1450                 return -1;
1451         }
1452
1453         /* Parse socket id to create bonding device on */
1454         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
1455         if (arg_count == 1) {
1456                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
1457                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id) != 0) {
1458                         RTE_LOG(ERR, EAL,
1459                                         "Invalid socket Id specified for bonded device %s\n",
1460                                         name);
1461                         return -1;
1462                 }
1463         } else if (arg_count > 1) {
1464                 RTE_LOG(ERR, EAL,
1465                                 "Socket Id can be specified only once for bonded device %s\n",
1466                                 name);
1467                 return -1;
1468         } else {
1469                 socket_id = rte_socket_id();
1470         }
1471
1472         /* Create link bonding eth device */
1473         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
1474         if (port_id < 0) {
1475                 RTE_LOG(ERR, EAL,
1476                                 "Failed to create socket %s in mode %u on socket %u.\n",
1477                                 name, bonding_mode, socket_id);
1478                 return -1;
1479         }
1480         internals = rte_eth_devices[port_id].data->dev_private;
1481         internals->kvlist = kvlist;
1482
1483         RTE_LOG(INFO, EAL,
1484                         "Create bonded device %s on port %d in mode %u on socket %u.\n",
1485                         name, port_id, bonding_mode, socket_id);
1486         return 0;
1487 }
1488
1489 /* this part will resolve the slave portids after all the other pdev and vdev
1490  * have been allocated */
1491 static int
1492 bond_ethdev_configure(struct rte_eth_dev *dev)
1493 {
1494         char *name = dev->data->name;
1495         struct bond_dev_private *internals = dev->data->dev_private;
1496         struct rte_kvargs *kvlist = internals->kvlist;
1497         int arg_count, port_id = dev - rte_eth_devices;
1498
1499         /*
1500          * if no kvlist, it means that this bonded device has been created
1501          * through the bonding api.
1502          */
1503         if (!kvlist)
1504                 return 0;
1505
1506         /* Parse MAC address for bonded device */
1507         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
1508         if (arg_count == 1) {
1509                 struct ether_addr bond_mac;
1510
1511                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
1512                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
1513                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
1514                                         name);
1515                         return -1;
1516                 }
1517
1518                 /* Set MAC address */
1519                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
1520                         RTE_LOG(ERR, EAL,
1521                                         "Failed to set mac address on bonded device %s\n",
1522                                         name);
1523                         return -1;
1524                 }
1525         } else if (arg_count > 1) {
1526                 RTE_LOG(ERR, EAL,
1527                                 "MAC address can be specified only once for bonded device %s\n",
1528                                 name);
1529                 return -1;
1530         }
1531
1532         /* Parse/set balance mode transmit policy */
1533         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
1534         if (arg_count == 1) {
1535                 uint8_t xmit_policy;
1536
1537                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
1538                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
1539                                                 0) {
1540                         RTE_LOG(INFO, EAL,
1541                                         "Invalid xmit policy specified for bonded device %s\n",
1542                                         name);
1543                         return -1;
1544                 }
1545
1546                 /* Set balance mode transmit policy*/
1547                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
1548                         RTE_LOG(ERR, EAL,
1549                                         "Failed to set balance xmit policy on bonded device %s\n",
1550                                         name);
1551                         return -1;
1552                 }
1553         } else if (arg_count > 1) {
1554                 RTE_LOG(ERR, EAL,
1555                                 "Transmit policy can be specified only once for bonded device"
1556                                 " %s\n", name);
1557                 return -1;
1558         }
1559
1560         /* Parse/add slave ports to bonded device */
1561         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
1562                 struct bond_ethdev_slave_ports slave_ports;
1563                 unsigned i;
1564
1565                 memset(&slave_ports, 0, sizeof(slave_ports));
1566
1567                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
1568                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
1569                         RTE_LOG(ERR, EAL,
1570                                         "Failed to parse slave ports for bonded device %s\n",
1571                                         name);
1572                         return -1;
1573                 }
1574
1575                 for (i = 0; i < slave_ports.slave_count; i++) {
1576                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
1577                                 RTE_LOG(ERR, EAL,
1578                                                 "Failed to add port %d as slave to bonded device %s\n",
1579                                                 slave_ports.slaves[i], name);
1580                         }
1581                 }
1582
1583         } else {
1584                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
1585                 return -1;
1586         }
1587
1588         /* Parse/set primary slave port id*/
1589         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
1590         if (arg_count == 1) {
1591                 uint8_t primary_slave_port_id;
1592
1593                 if (rte_kvargs_process(kvlist,
1594                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
1595                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
1596                                 &primary_slave_port_id) < 0) {
1597                         RTE_LOG(INFO, EAL,
1598                                         "Invalid primary slave port id specified for bonded device"
1599                                         " %s\n", name);
1600                         return -1;
1601                 }
1602
1603                 /* Set balance mode transmit policy*/
1604                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
1605                                 != 0) {
1606                         RTE_LOG(ERR, EAL,
1607                                         "Failed to set primary slave port %d on bonded device %s\n",
1608                                         primary_slave_port_id, name);
1609                         return -1;
1610                 }
1611         } else if (arg_count > 1) {
1612                 RTE_LOG(INFO, EAL,
1613                                 "Primary slave can be specified only once for bonded device"
1614                                 " %s\n", name);
1615                 return -1;
1616         }
1617
1618         /* Parse link status monitor polling interval */
1619         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
1620         if (arg_count == 1) {
1621                 uint32_t lsc_poll_interval_ms;
1622
1623                 if (rte_kvargs_process(kvlist,
1624                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
1625                                 &bond_ethdev_parse_time_ms_kvarg,
1626                                 &lsc_poll_interval_ms) < 0) {
1627                         RTE_LOG(INFO, EAL,
1628                                         "Invalid lsc polling interval value specified for bonded"
1629                                         " device %s\n", name);
1630                         return -1;
1631                 }
1632
1633                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
1634                                 != 0) {
1635                         RTE_LOG(ERR, EAL,
1636                                         "Failed to set lsc monitor polling interval (%u ms) on"
1637                                         " bonded device %s\n", lsc_poll_interval_ms, name);
1638                         return -1;
1639                 }
1640         } else if (arg_count > 1) {
1641                 RTE_LOG(INFO, EAL,
1642                                 "LSC polling interval can be specified only once for bonded"
1643                                 " device %s\n", name);
1644                 return -1;
1645         }
1646
1647         /* Parse link up interrupt propagation delay */
1648         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
1649         if (arg_count == 1) {
1650                 uint32_t link_up_delay_ms;
1651
1652                 if (rte_kvargs_process(kvlist,
1653                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
1654                                 &bond_ethdev_parse_time_ms_kvarg,
1655                                 &link_up_delay_ms) < 0) {
1656                         RTE_LOG(INFO, EAL,
1657                                         "Invalid link up propagation delay value specified for"
1658                                         " bonded device %s\n", name);
1659                         return -1;
1660                 }
1661
1662                 /* Set balance mode transmit policy*/
1663                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
1664                                 != 0) {
1665                         RTE_LOG(ERR, EAL,
1666                                         "Failed to set link up propagation delay (%u ms) on bonded"
1667                                         " device %s\n", link_up_delay_ms, name);
1668                         return -1;
1669                 }
1670         } else if (arg_count > 1) {
1671                 RTE_LOG(INFO, EAL,
1672                                 "Link up propagation delay can be specified only once for"
1673                                 " bonded device %s\n", name);
1674                 return -1;
1675         }
1676
1677         /* Parse link down interrupt propagation delay */
1678         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
1679         if (arg_count == 1) {
1680                 uint32_t link_down_delay_ms;
1681
1682                 if (rte_kvargs_process(kvlist,
1683                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
1684                                 &bond_ethdev_parse_time_ms_kvarg,
1685                                 &link_down_delay_ms) < 0) {
1686                         RTE_LOG(INFO, EAL,
1687                                         "Invalid link down propagation delay value specified for"
1688                                         " bonded device %s\n", name);
1689                         return -1;
1690                 }
1691
1692                 /* Set balance mode transmit policy*/
1693                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
1694                                 != 0) {
1695                         RTE_LOG(ERR, EAL,
1696                                         "Failed to set link down propagation delay (%u ms) on"
1697                                         " bonded device %s\n", link_down_delay_ms, name);
1698                         return -1;
1699                 }
1700         } else if (arg_count > 1) {
1701                 RTE_LOG(INFO, EAL,
1702                                 "Link down propagation delay can be specified only once for"
1703                                 " bonded device %s\n", name);
1704                 return -1;
1705         }
1706
1707         return 0;
1708 }
1709
1710 static struct rte_driver bond_drv = {
1711         .name = "eth_bond",
1712         .type = PMD_VDEV,
1713         .init = bond_init,
1714 };
1715
1716 PMD_REGISTER_DRIVER(bond_drv);