4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
42 #include <rte_ip_frag.h>
43 #include <rte_devargs.h>
44 #include <rte_kvargs.h>
46 #include <rte_alarm.h>
47 #include <rte_cycles.h>
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51 #include "rte_eth_bond_8023ad_private.h"
53 #define REORDER_PERIOD_MS 10
55 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
57 /* Table for statistics in mode 5 TLB */
58 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
63 size_t vlan_offset = 0;
65 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
66 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
68 vlan_offset = sizeof(struct vlan_hdr);
69 *proto = vlan_hdr->eth_proto;
71 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
72 vlan_hdr = vlan_hdr + 1;
73 *proto = vlan_hdr->eth_proto;
74 vlan_offset += sizeof(struct vlan_hdr);
81 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
83 struct bond_dev_private *internals;
85 uint16_t num_rx_slave = 0;
86 uint16_t num_rx_total = 0;
90 /* Cast to structure, containing bonded device's port id and queue id */
91 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
93 internals = bd_rx_q->dev_private;
96 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
97 /* Offset of pointer to *bufs increases as packets are received
98 * from other slaves */
99 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
100 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
102 num_rx_total += num_rx_slave;
103 nb_pkts -= num_rx_slave;
111 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114 struct bond_dev_private *internals;
116 /* Cast to structure, containing bonded device's port id and queue id */
117 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
119 internals = bd_rx_q->dev_private;
121 return rte_eth_rx_burst(internals->current_primary_port,
122 bd_rx_q->queue_id, bufs, nb_pkts);
126 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
129 /* Cast to structure, containing bonded device's port id and queue id */
130 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
131 struct bond_dev_private *internals = bd_rx_q->dev_private;
132 struct ether_addr bond_mac;
134 struct ether_hdr *hdr;
136 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
137 uint16_t num_rx_total = 0; /* Total number of received packets */
138 uint8_t slaves[RTE_MAX_ETHPORTS];
141 uint8_t collecting; /* current slave collecting status */
142 const uint8_t promisc = internals->promiscuous_en;
145 rte_eth_macaddr_get(internals->port_id, &bond_mac);
146 /* Copy slave list to protect against slave up/down changes during tx
148 slave_count = internals->active_slave_count;
149 memcpy(slaves, internals->active_slaves,
150 sizeof(internals->active_slaves[0]) * slave_count);
152 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
154 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
156 /* Read packets from this slave */
157 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
158 &bufs[num_rx_total], nb_pkts - num_rx_total);
160 for (k = j; k < 2 && k < num_rx_total; k++)
161 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
163 /* Handle slow protocol packets. */
164 while (j < num_rx_total) {
165 if (j + 3 < num_rx_total)
166 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
168 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
169 /* Remove packet from array if it is slow packet or slave is not
170 * in collecting state or bondign interface is not in promiscus
171 * mode and packet address does not match. */
172 if (unlikely(hdr->ether_type == ether_type_slow_be ||
173 !collecting || (!promisc &&
174 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
176 if (hdr->ether_type == ether_type_slow_be) {
177 bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
180 rte_pktmbuf_free(bufs[j]);
182 /* Packet is managed by mode 4 or dropped, shift the array */
184 if (j < num_rx_total) {
185 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
196 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
197 uint32_t burstnumberRX;
198 uint32_t burstnumberTX;
200 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
203 arp_op_name(uint16_t arp_op, char *buf)
207 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
210 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
212 case ARP_OP_REVREQUEST:
213 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
214 "Reverse ARP Request");
216 case ARP_OP_REVREPLY:
217 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
218 "Reverse ARP Reply");
220 case ARP_OP_INVREQUEST:
221 snprintf(buf, sizeof("Peer Identify Request"), "%s",
222 "Peer Identify Request");
224 case ARP_OP_INVREPLY:
225 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
226 "Peer Identify Reply");
231 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
235 #define MaxIPv4String 16
237 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
241 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
242 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
243 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
247 #define MAX_CLIENTS_NUMBER 128
248 uint8_t active_clients;
249 struct client_stats_t {
252 uint32_t ipv4_rx_packets;
253 uint32_t ipv4_tx_packets;
255 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
258 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
262 for (; i < MAX_CLIENTS_NUMBER; i++) {
263 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
264 /* Just update RX packets number for this client */
265 if (TXorRXindicator == &burstnumberRX)
266 client_stats[i].ipv4_rx_packets++;
268 client_stats[i].ipv4_tx_packets++;
272 /* We have a new client. Insert him to the table, and increment stats */
273 if (TXorRXindicator == &burstnumberRX)
274 client_stats[active_clients].ipv4_rx_packets++;
276 client_stats[active_clients].ipv4_tx_packets++;
277 client_stats[active_clients].ipv4_addr = addr;
278 client_stats[active_clients].port = port;
283 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
284 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
285 RTE_LOG(DEBUG, PMD, \
288 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
290 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
296 eth_h->s_addr.addr_bytes[0], \
297 eth_h->s_addr.addr_bytes[1], \
298 eth_h->s_addr.addr_bytes[2], \
299 eth_h->s_addr.addr_bytes[3], \
300 eth_h->s_addr.addr_bytes[4], \
301 eth_h->s_addr.addr_bytes[5], \
303 eth_h->d_addr.addr_bytes[0], \
304 eth_h->d_addr.addr_bytes[1], \
305 eth_h->d_addr.addr_bytes[2], \
306 eth_h->d_addr.addr_bytes[3], \
307 eth_h->d_addr.addr_bytes[4], \
308 eth_h->d_addr.addr_bytes[5], \
315 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
316 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
318 struct ipv4_hdr *ipv4_h;
319 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
320 struct arp_hdr *arp_h;
327 uint16_t ether_type = eth_h->ether_type;
328 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
330 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
331 snprintf(buf, 16, "%s", info);
334 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
335 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
336 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
337 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
338 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
339 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
341 update_client_stats(ipv4_h->src_addr, port, burstnumber);
343 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
344 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
345 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
346 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
347 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
348 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
349 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
356 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
358 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
359 struct bond_dev_private *internals = bd_tx_q->dev_private;
360 struct ether_hdr *eth_h;
361 uint16_t ether_type, offset;
362 uint16_t nb_recv_pkts;
365 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
367 for (i = 0; i < nb_recv_pkts; i++) {
368 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
369 ether_type = eth_h->ether_type;
370 offset = get_vlan_offset(eth_h, ðer_type);
372 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
373 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
374 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
376 bond_mode_alb_arp_recv(eth_h, offset, internals);
378 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
379 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
380 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
388 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
391 struct bond_dev_private *internals;
392 struct bond_tx_queue *bd_tx_q;
394 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
395 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
397 uint8_t num_of_slaves;
398 uint8_t slaves[RTE_MAX_ETHPORTS];
400 uint16_t num_tx_total = 0, num_tx_slave;
402 static int slave_idx = 0;
403 int i, cslave_idx = 0, tx_fail_total = 0;
405 bd_tx_q = (struct bond_tx_queue *)queue;
406 internals = bd_tx_q->dev_private;
408 /* Copy slave list to protect against slave up/down changes during tx
410 num_of_slaves = internals->active_slave_count;
411 memcpy(slaves, internals->active_slaves,
412 sizeof(internals->active_slaves[0]) * num_of_slaves);
414 if (num_of_slaves < 1)
417 /* Populate slaves mbuf with which packets are to be sent on it */
418 for (i = 0; i < nb_pkts; i++) {
419 cslave_idx = (slave_idx + i) % num_of_slaves;
420 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
423 /* increment current slave index so the next call to tx burst starts on the
425 slave_idx = ++cslave_idx;
427 /* Send packet burst on each slave device */
428 for (i = 0; i < num_of_slaves; i++) {
429 if (slave_nb_pkts[i] > 0) {
430 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
431 slave_bufs[i], slave_nb_pkts[i]);
433 /* if tx burst fails move packets to end of bufs */
434 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
435 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
437 tx_fail_total += tx_fail_slave;
439 memcpy(&bufs[nb_pkts - tx_fail_total],
440 &slave_bufs[i][num_tx_slave],
441 tx_fail_slave * sizeof(bufs[0]));
443 num_tx_total += num_tx_slave;
451 bond_ethdev_tx_burst_active_backup(void *queue,
452 struct rte_mbuf **bufs, uint16_t nb_pkts)
454 struct bond_dev_private *internals;
455 struct bond_tx_queue *bd_tx_q;
457 bd_tx_q = (struct bond_tx_queue *)queue;
458 internals = bd_tx_q->dev_private;
460 if (internals->active_slave_count < 1)
463 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
467 static inline uint16_t
468 ether_hash(struct ether_hdr *eth_hdr)
470 unaligned_uint16_t *word_src_addr =
471 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
472 unaligned_uint16_t *word_dst_addr =
473 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
475 return (word_src_addr[0] ^ word_dst_addr[0]) ^
476 (word_src_addr[1] ^ word_dst_addr[1]) ^
477 (word_src_addr[2] ^ word_dst_addr[2]);
480 static inline uint32_t
481 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
483 return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
486 static inline uint32_t
487 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
489 unaligned_uint32_t *word_src_addr =
490 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
491 unaligned_uint32_t *word_dst_addr =
492 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
494 return (word_src_addr[0] ^ word_dst_addr[0]) ^
495 (word_src_addr[1] ^ word_dst_addr[1]) ^
496 (word_src_addr[2] ^ word_dst_addr[2]) ^
497 (word_src_addr[3] ^ word_dst_addr[3]);
501 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
503 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
505 uint32_t hash = ether_hash(eth_hdr);
507 return (hash ^= hash >> 8) % slave_count;
511 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
513 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
514 uint16_t proto = eth_hdr->ether_type;
515 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
516 uint32_t hash, l3hash = 0;
518 hash = ether_hash(eth_hdr);
520 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
521 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
522 ((char *)(eth_hdr + 1) + vlan_offset);
523 l3hash = ipv4_hash(ipv4_hdr);
525 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
526 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
527 ((char *)(eth_hdr + 1) + vlan_offset);
528 l3hash = ipv6_hash(ipv6_hdr);
531 hash = hash ^ l3hash;
535 return hash % slave_count;
539 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
541 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
542 uint16_t proto = eth_hdr->ether_type;
543 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
545 struct udp_hdr *udp_hdr = NULL;
546 struct tcp_hdr *tcp_hdr = NULL;
547 uint32_t hash, l3hash = 0, l4hash = 0;
549 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
550 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
551 ((char *)(eth_hdr + 1) + vlan_offset);
552 size_t ip_hdr_offset;
554 l3hash = ipv4_hash(ipv4_hdr);
556 /* there is no L4 header in fragmented packet */
557 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
558 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
561 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
562 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
564 l4hash = HASH_L4_PORTS(tcp_hdr);
565 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
566 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
568 l4hash = HASH_L4_PORTS(udp_hdr);
571 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
572 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
573 ((char *)(eth_hdr + 1) + vlan_offset);
574 l3hash = ipv6_hash(ipv6_hdr);
576 if (ipv6_hdr->proto == IPPROTO_TCP) {
577 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
578 l4hash = HASH_L4_PORTS(tcp_hdr);
579 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
580 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
581 l4hash = HASH_L4_PORTS(udp_hdr);
585 hash = l3hash ^ l4hash;
589 return hash % slave_count;
593 uint64_t bwg_left_int;
594 uint64_t bwg_left_remainder;
599 bond_tlb_activate_slave(struct bond_dev_private *internals) {
602 for (i = 0; i < internals->active_slave_count; i++) {
603 tlb_last_obytets[internals->active_slaves[i]] = 0;
608 bandwidth_cmp(const void *a, const void *b)
610 const struct bwg_slave *bwg_a = a;
611 const struct bwg_slave *bwg_b = b;
612 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
613 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
614 (int64_t)bwg_a->bwg_left_remainder;
628 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
629 struct bwg_slave *bwg_slave)
631 struct rte_eth_link link_status;
633 rte_eth_link_get(port_id, &link_status);
634 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
637 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
638 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
639 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
643 bond_ethdev_update_tlb_slave_cb(void *arg)
645 struct bond_dev_private *internals = arg;
646 struct rte_eth_stats slave_stats;
647 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
651 uint8_t update_stats = 0;
654 internals->slave_update_idx++;
657 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
660 for (i = 0; i < internals->active_slave_count; i++) {
661 slave_id = internals->active_slaves[i];
662 rte_eth_stats_get(slave_id, &slave_stats);
663 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
664 bandwidth_left(slave_id, tx_bytes,
665 internals->slave_update_idx, &bwg_array[i]);
666 bwg_array[i].slave = slave_id;
669 tlb_last_obytets[slave_id] = slave_stats.obytes;
673 if (update_stats == 1)
674 internals->slave_update_idx = 0;
677 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
678 for (i = 0; i < slave_count; i++)
679 internals->tlb_slaves_order[i] = bwg_array[i].slave;
681 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
682 (struct bond_dev_private *)internals);
686 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
688 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
689 struct bond_dev_private *internals = bd_tx_q->dev_private;
691 struct rte_eth_dev *primary_port =
692 &rte_eth_devices[internals->primary_port];
693 uint16_t num_tx_total = 0;
696 uint8_t num_of_slaves = internals->active_slave_count;
697 uint8_t slaves[RTE_MAX_ETHPORTS];
699 struct ether_hdr *ether_hdr;
700 struct ether_addr primary_slave_addr;
701 struct ether_addr active_slave_addr;
703 if (num_of_slaves < 1)
706 memcpy(slaves, internals->tlb_slaves_order,
707 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
710 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
713 for (i = 0; i < 3; i++)
714 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
717 for (i = 0; i < num_of_slaves; i++) {
718 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
719 for (j = num_tx_total; j < nb_pkts; j++) {
721 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
723 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
724 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
725 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
726 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
727 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
731 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
732 bufs + num_tx_total, nb_pkts - num_tx_total);
734 if (num_tx_total == nb_pkts)
742 bond_tlb_disable(struct bond_dev_private *internals)
744 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
748 bond_tlb_enable(struct bond_dev_private *internals)
750 bond_ethdev_update_tlb_slave_cb(internals);
754 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
756 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
757 struct bond_dev_private *internals = bd_tx_q->dev_private;
759 struct ether_hdr *eth_h;
760 uint16_t ether_type, offset;
762 struct client_data *client_info;
765 * We create transmit buffers for every slave and one additional to send
766 * through tlb. In worst case every packet will be send on one port.
768 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
769 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
772 * We create separate transmit buffers for update packets as they wont be
773 * counted in num_tx_total.
775 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
776 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
778 struct rte_mbuf *upd_pkt;
781 uint16_t num_send, num_not_send = 0;
782 uint16_t num_tx_total = 0;
787 /* Search tx buffer for ARP packets and forward them to alb */
788 for (i = 0; i < nb_pkts; i++) {
789 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
790 ether_type = eth_h->ether_type;
791 offset = get_vlan_offset(eth_h, ðer_type);
793 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
794 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
796 /* Change src mac in eth header */
797 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
799 /* Add packet to slave tx buffer */
800 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
801 slave_bufs_pkts[slave_idx]++;
803 /* If packet is not ARP, send it with TLB policy */
804 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
806 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
810 /* Update connected client ARP tables */
811 if (internals->mode6.ntt) {
812 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
813 client_info = &internals->mode6.client_table[i];
815 if (client_info->in_use) {
816 /* Allocate new packet to send ARP update on current slave */
817 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
818 if (upd_pkt == NULL) {
819 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
822 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
823 + client_info->vlan_count * sizeof(struct vlan_hdr);
824 upd_pkt->data_len = pkt_size;
825 upd_pkt->pkt_len = pkt_size;
827 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
830 /* Add packet to update tx buffer */
831 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
832 update_bufs_pkts[slave_idx]++;
835 internals->mode6.ntt = 0;
838 /* Send ARP packets on proper slaves */
839 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
840 if (slave_bufs_pkts[i] > 0) {
841 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
842 slave_bufs[i], slave_bufs_pkts[i]);
843 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
844 bufs[nb_pkts - 1 - num_not_send - j] =
845 slave_bufs[i][nb_pkts - 1 - j];
848 num_tx_total += num_send;
849 num_not_send += slave_bufs_pkts[i] - num_send;
851 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
852 /* Print TX stats including update packets */
853 for (j = 0; j < slave_bufs_pkts[i]; j++) {
854 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
855 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
861 /* Send update packets on proper slaves */
862 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
863 if (update_bufs_pkts[i] > 0) {
864 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
865 update_bufs_pkts[i]);
866 for (j = num_send; j < update_bufs_pkts[i]; j++) {
867 rte_pktmbuf_free(update_bufs[i][j]);
869 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
870 for (j = 0; j < update_bufs_pkts[i]; j++) {
871 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
872 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
878 /* Send non-ARP packets using tlb policy */
879 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
880 num_send = bond_ethdev_tx_burst_tlb(queue,
881 slave_bufs[RTE_MAX_ETHPORTS],
882 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
884 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
885 bufs[nb_pkts - 1 - num_not_send - j] =
886 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
889 num_tx_total += num_send;
890 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
897 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
900 struct bond_dev_private *internals;
901 struct bond_tx_queue *bd_tx_q;
903 uint8_t num_of_slaves;
904 uint8_t slaves[RTE_MAX_ETHPORTS];
906 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
910 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
911 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
913 bd_tx_q = (struct bond_tx_queue *)queue;
914 internals = bd_tx_q->dev_private;
916 /* Copy slave list to protect against slave up/down changes during tx
918 num_of_slaves = internals->active_slave_count;
919 memcpy(slaves, internals->active_slaves,
920 sizeof(internals->active_slaves[0]) * num_of_slaves);
922 if (num_of_slaves < 1)
925 /* Populate slaves mbuf with the packets which are to be sent on it */
926 for (i = 0; i < nb_pkts; i++) {
927 /* Select output slave using hash based on xmit policy */
928 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
930 /* Populate slave mbuf arrays with mbufs for that slave */
931 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
934 /* Send packet burst on each slave device */
935 for (i = 0; i < num_of_slaves; i++) {
936 if (slave_nb_pkts[i] > 0) {
937 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
938 slave_bufs[i], slave_nb_pkts[i]);
940 /* if tx burst fails move packets to end of bufs */
941 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
942 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
944 tx_fail_total += slave_tx_fail_count;
945 memcpy(&bufs[nb_pkts - tx_fail_total],
946 &slave_bufs[i][num_tx_slave],
947 slave_tx_fail_count * sizeof(bufs[0]));
950 num_tx_total += num_tx_slave;
958 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
961 struct bond_dev_private *internals;
962 struct bond_tx_queue *bd_tx_q;
964 uint8_t num_of_slaves;
965 uint8_t slaves[RTE_MAX_ETHPORTS];
966 /* positions in slaves, not ID */
967 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
968 uint8_t distributing_count;
970 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
971 uint16_t i, j, op_slave_idx;
972 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
974 /* Allocate additional packets in case 8023AD mode. */
975 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
976 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
978 /* Total amount of packets in slave_bufs */
979 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
980 /* Slow packets placed in each slave */
981 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
983 bd_tx_q = (struct bond_tx_queue *)queue;
984 internals = bd_tx_q->dev_private;
986 /* Copy slave list to protect against slave up/down changes during tx
988 num_of_slaves = internals->active_slave_count;
989 if (num_of_slaves < 1)
992 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
994 distributing_count = 0;
995 for (i = 0; i < num_of_slaves; i++) {
996 struct port *port = &mode_8023ad_ports[slaves[i]];
998 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
999 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
1000 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1002 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1003 slave_bufs[i][j] = slow_pkts[j];
1005 if (ACTOR_STATE(port, DISTRIBUTING))
1006 distributing_offsets[distributing_count++] = i;
1009 if (likely(distributing_count > 0)) {
1010 /* Populate slaves mbuf with the packets which are to be sent on it */
1011 for (i = 0; i < nb_pkts; i++) {
1012 /* Select output slave using hash based on xmit policy */
1013 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1015 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1016 * slaves that are currently distributing. */
1017 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1018 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1019 slave_nb_pkts[slave_offset]++;
1023 /* Send packet burst on each slave device */
1024 for (i = 0; i < num_of_slaves; i++) {
1025 if (slave_nb_pkts[i] == 0)
1028 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1029 slave_bufs[i], slave_nb_pkts[i]);
1031 /* If tx burst fails drop slow packets */
1032 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1033 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1035 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1036 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1038 /* If tx burst fails move packets to end of bufs */
1039 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1040 uint16_t j = nb_pkts - num_tx_fail_total;
1041 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1042 bufs[j] = slave_bufs[i][num_tx_slave];
1046 return num_tx_total;
1050 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1053 struct bond_dev_private *internals;
1054 struct bond_tx_queue *bd_tx_q;
1056 uint8_t tx_failed_flag = 0, num_of_slaves;
1057 uint8_t slaves[RTE_MAX_ETHPORTS];
1059 uint16_t max_nb_of_tx_pkts = 0;
1061 int slave_tx_total[RTE_MAX_ETHPORTS];
1062 int i, most_successful_tx_slave = -1;
1064 bd_tx_q = (struct bond_tx_queue *)queue;
1065 internals = bd_tx_q->dev_private;
1067 /* Copy slave list to protect against slave up/down changes during tx
1069 num_of_slaves = internals->active_slave_count;
1070 memcpy(slaves, internals->active_slaves,
1071 sizeof(internals->active_slaves[0]) * num_of_slaves);
1073 if (num_of_slaves < 1)
1076 /* Increment reference count on mbufs */
1077 for (i = 0; i < nb_pkts; i++)
1078 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1080 /* Transmit burst on each active slave */
1081 for (i = 0; i < num_of_slaves; i++) {
1082 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1085 if (unlikely(slave_tx_total[i] < nb_pkts))
1088 /* record the value and slave index for the slave which transmits the
1089 * maximum number of packets */
1090 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1091 max_nb_of_tx_pkts = slave_tx_total[i];
1092 most_successful_tx_slave = i;
1096 /* if slaves fail to transmit packets from burst, the calling application
1097 * is not expected to know about multiple references to packets so we must
1098 * handle failures of all packets except those of the most successful slave
1100 if (unlikely(tx_failed_flag))
1101 for (i = 0; i < num_of_slaves; i++)
1102 if (i != most_successful_tx_slave)
1103 while (slave_tx_total[i] < nb_pkts)
1104 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1106 return max_nb_of_tx_pkts;
1110 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1111 struct rte_eth_link *slave_dev_link)
1113 struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1114 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1116 if (slave_dev_link->link_status &&
1117 bonded_eth_dev->data->dev_started) {
1118 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1119 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1121 internals->link_props_set = 1;
1126 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1128 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1130 memset(&(bonded_eth_dev->data->dev_link), 0,
1131 sizeof(bonded_eth_dev->data->dev_link));
1133 internals->link_props_set = 0;
1137 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1138 struct rte_eth_link *slave_dev_link)
1140 if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1141 bonded_dev_link->link_speed != slave_dev_link->link_speed)
1148 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1150 struct ether_addr *mac_addr;
1152 if (eth_dev == NULL) {
1153 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1157 if (dst_mac_addr == NULL) {
1158 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1162 mac_addr = eth_dev->data->mac_addrs;
1164 ether_addr_copy(mac_addr, dst_mac_addr);
1169 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1171 struct ether_addr *mac_addr;
1173 if (eth_dev == NULL) {
1174 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1178 if (new_mac_addr == NULL) {
1179 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1183 mac_addr = eth_dev->data->mac_addrs;
1185 /* If new MAC is different to current MAC then update */
1186 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1187 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1193 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1195 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1198 /* Update slave devices MAC addresses */
1199 if (internals->slave_count < 1)
1202 switch (internals->mode) {
1203 case BONDING_MODE_ROUND_ROBIN:
1204 case BONDING_MODE_BALANCE:
1205 case BONDING_MODE_BROADCAST:
1206 for (i = 0; i < internals->slave_count; i++) {
1207 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1208 bonded_eth_dev->data->mac_addrs)) {
1209 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1210 internals->slaves[i].port_id);
1215 case BONDING_MODE_8023AD:
1216 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1218 case BONDING_MODE_ACTIVE_BACKUP:
1219 case BONDING_MODE_TLB:
1220 case BONDING_MODE_ALB:
1222 for (i = 0; i < internals->slave_count; i++) {
1223 if (internals->slaves[i].port_id ==
1224 internals->current_primary_port) {
1225 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1226 bonded_eth_dev->data->mac_addrs)) {
1227 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1228 internals->current_primary_port);
1232 if (mac_address_set(
1233 &rte_eth_devices[internals->slaves[i].port_id],
1234 &internals->slaves[i].persisted_mac_addr)) {
1235 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1236 internals->slaves[i].port_id);
1247 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1249 struct bond_dev_private *internals;
1251 internals = eth_dev->data->dev_private;
1254 case BONDING_MODE_ROUND_ROBIN:
1255 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1256 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1258 case BONDING_MODE_ACTIVE_BACKUP:
1259 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1260 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1262 case BONDING_MODE_BALANCE:
1263 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1264 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1266 case BONDING_MODE_BROADCAST:
1267 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1268 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1270 case BONDING_MODE_8023AD:
1271 if (bond_mode_8023ad_enable(eth_dev) != 0)
1274 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1275 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1276 RTE_LOG(WARNING, PMD,
1277 "Using mode 4, it is necessary to do TX burst and RX burst "
1278 "at least every 100ms.\n");
1280 case BONDING_MODE_TLB:
1281 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1282 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1284 case BONDING_MODE_ALB:
1285 if (bond_mode_alb_enable(eth_dev) != 0)
1288 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1289 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1295 internals->mode = mode;
1301 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1302 struct rte_eth_dev *slave_eth_dev)
1304 struct bond_rx_queue *bd_rx_q;
1305 struct bond_tx_queue *bd_tx_q;
1311 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1313 /* Enable interrupts on slave device if supported */
1314 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1315 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1317 /* If RSS is enabled for bonding, try to enable it for slaves */
1318 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1319 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1321 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1322 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1323 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1324 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1326 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1329 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1330 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1331 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1332 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1335 /* Configure device */
1336 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1337 bonded_eth_dev->data->nb_rx_queues,
1338 bonded_eth_dev->data->nb_tx_queues,
1339 &(slave_eth_dev->data->dev_conf));
1341 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1342 slave_eth_dev->data->port_id, errval);
1346 /* Setup Rx Queues */
1347 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1348 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1350 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1351 bd_rx_q->nb_rx_desc,
1352 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1353 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1356 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1357 slave_eth_dev->data->port_id, q_id, errval);
1362 /* Setup Tx Queues */
1363 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1364 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1366 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1367 bd_tx_q->nb_tx_desc,
1368 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1372 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1373 slave_eth_dev->data->port_id, q_id, errval);
1379 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1381 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1382 slave_eth_dev->data->port_id, errval);
1386 /* If RSS is enabled for bonding, synchronize RETA */
1387 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1389 struct bond_dev_private *internals;
1391 internals = bonded_eth_dev->data->dev_private;
1393 for (i = 0; i < internals->slave_count; i++) {
1394 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1395 errval = rte_eth_dev_rss_reta_update(
1396 slave_eth_dev->data->port_id,
1397 &internals->reta_conf[0],
1398 internals->slaves[i].reta_size);
1400 RTE_LOG(WARNING, PMD,
1401 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1402 " RSS Configuration for bonding may be inconsistent.\n",
1403 slave_eth_dev->data->port_id, errval);
1410 /* If lsc interrupt is set, check initial slave's link status */
1411 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1412 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1413 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1419 slave_remove(struct bond_dev_private *internals,
1420 struct rte_eth_dev *slave_eth_dev)
1424 for (i = 0; i < internals->slave_count; i++)
1425 if (internals->slaves[i].port_id ==
1426 slave_eth_dev->data->port_id)
1429 if (i < (internals->slave_count - 1))
1430 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1431 sizeof(internals->slaves[0]) *
1432 (internals->slave_count - i - 1));
1434 internals->slave_count--;
1438 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1441 slave_add(struct bond_dev_private *internals,
1442 struct rte_eth_dev *slave_eth_dev)
1444 struct bond_slave_details *slave_details =
1445 &internals->slaves[internals->slave_count];
1447 slave_details->port_id = slave_eth_dev->data->port_id;
1448 slave_details->last_link_status = 0;
1450 /* If slave device doesn't support interrupts then we need to enabled
1451 * polling to monitor link status */
1452 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1453 slave_details->link_status_poll_enabled = 1;
1455 if (!internals->link_status_polling_enabled) {
1456 internals->link_status_polling_enabled = 1;
1458 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1459 bond_ethdev_slave_link_status_change_monitor,
1460 (void *)&rte_eth_devices[internals->port_id]);
1464 slave_details->link_status_wait_to_complete = 0;
1465 /* clean tlb_last_obytes when adding port for bonding device */
1466 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1467 sizeof(struct ether_addr));
1471 bond_ethdev_primary_set(struct bond_dev_private *internals,
1472 uint8_t slave_port_id)
1476 if (internals->active_slave_count < 1)
1477 internals->current_primary_port = slave_port_id;
1479 /* Search bonded device slave ports for new proposed primary port */
1480 for (i = 0; i < internals->active_slave_count; i++) {
1481 if (internals->active_slaves[i] == slave_port_id)
1482 internals->current_primary_port = slave_port_id;
1487 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1490 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1492 struct bond_dev_private *internals;
1495 /* slave eth dev will be started by bonded device */
1496 if (check_for_bonded_ethdev(eth_dev)) {
1497 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1498 eth_dev->data->port_id);
1502 eth_dev->data->dev_link.link_status = 0;
1503 eth_dev->data->dev_started = 1;
1505 internals = eth_dev->data->dev_private;
1507 if (internals->slave_count == 0) {
1508 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1512 if (internals->user_defined_mac == 0) {
1513 struct ether_addr *new_mac_addr = NULL;
1515 for (i = 0; i < internals->slave_count; i++)
1516 if (internals->slaves[i].port_id == internals->primary_port)
1517 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1519 if (new_mac_addr == NULL)
1522 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1523 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1524 eth_dev->data->port_id);
1529 /* Update all slave devices MACs*/
1530 if (mac_address_slaves_update(eth_dev) != 0)
1533 /* If bonded device is configure in promiscuous mode then re-apply config */
1534 if (internals->promiscuous_en)
1535 bond_ethdev_promiscuous_enable(eth_dev);
1537 /* Reconfigure each slave device if starting bonded device */
1538 for (i = 0; i < internals->slave_count; i++) {
1539 if (slave_configure(eth_dev,
1540 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1542 "bonded port (%d) failed to reconfigure slave device (%d)",
1543 eth_dev->data->port_id, internals->slaves[i].port_id);
1548 if (internals->user_defined_primary_port)
1549 bond_ethdev_primary_set(internals, internals->primary_port);
1551 if (internals->mode == BONDING_MODE_8023AD)
1552 bond_mode_8023ad_start(eth_dev);
1554 if (internals->mode == BONDING_MODE_TLB ||
1555 internals->mode == BONDING_MODE_ALB)
1556 bond_tlb_enable(internals);
1562 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1566 if (dev->data->rx_queues != NULL) {
1567 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1568 rte_free(dev->data->rx_queues[i]);
1569 dev->data->rx_queues[i] = NULL;
1571 dev->data->nb_rx_queues = 0;
1574 if (dev->data->tx_queues != NULL) {
1575 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1576 rte_free(dev->data->tx_queues[i]);
1577 dev->data->tx_queues[i] = NULL;
1579 dev->data->nb_tx_queues = 0;
1584 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1586 struct bond_dev_private *internals = eth_dev->data->dev_private;
1589 if (internals->mode == BONDING_MODE_8023AD) {
1593 bond_mode_8023ad_stop(eth_dev);
1595 /* Discard all messages to/from mode 4 state machines */
1596 for (i = 0; i < internals->active_slave_count; i++) {
1597 port = &mode_8023ad_ports[internals->active_slaves[i]];
1599 RTE_VERIFY(port->rx_ring != NULL);
1600 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1601 rte_pktmbuf_free(pkt);
1603 RTE_VERIFY(port->tx_ring != NULL);
1604 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1605 rte_pktmbuf_free(pkt);
1609 if (internals->mode == BONDING_MODE_TLB ||
1610 internals->mode == BONDING_MODE_ALB) {
1611 bond_tlb_disable(internals);
1612 for (i = 0; i < internals->active_slave_count; i++)
1613 tlb_last_obytets[internals->active_slaves[i]] = 0;
1616 internals->active_slave_count = 0;
1617 internals->link_status_polling_enabled = 0;
1619 eth_dev->data->dev_link.link_status = 0;
1620 eth_dev->data->dev_started = 0;
1624 bond_ethdev_close(struct rte_eth_dev *dev)
1626 bond_ethdev_free_queues(dev);
1629 /* forward declaration */
1630 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1633 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1635 struct bond_dev_private *internals = dev->data->dev_private;
1637 dev_info->max_mac_addrs = 1;
1639 dev_info->max_rx_pktlen = (uint32_t)2048;
1641 dev_info->max_rx_queues = (uint16_t)128;
1642 dev_info->max_tx_queues = (uint16_t)512;
1644 dev_info->min_rx_bufsize = 0;
1645 dev_info->pci_dev = NULL;
1647 dev_info->rx_offload_capa = internals->rx_offload_capa;
1648 dev_info->tx_offload_capa = internals->tx_offload_capa;
1649 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1651 dev_info->reta_size = internals->reta_size;
1655 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1656 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1657 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1659 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1660 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1661 0, dev->data->numa_node);
1662 if (bd_rx_q == NULL)
1665 bd_rx_q->queue_id = rx_queue_id;
1666 bd_rx_q->dev_private = dev->data->dev_private;
1668 bd_rx_q->nb_rx_desc = nb_rx_desc;
1670 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1671 bd_rx_q->mb_pool = mb_pool;
1673 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1679 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1680 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1681 const struct rte_eth_txconf *tx_conf)
1683 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
1684 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1685 0, dev->data->numa_node);
1687 if (bd_tx_q == NULL)
1690 bd_tx_q->queue_id = tx_queue_id;
1691 bd_tx_q->dev_private = dev->data->dev_private;
1693 bd_tx_q->nb_tx_desc = nb_tx_desc;
1694 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1696 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1702 bond_ethdev_rx_queue_release(void *queue)
1711 bond_ethdev_tx_queue_release(void *queue)
1720 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1722 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1723 struct bond_dev_private *internals;
1725 /* Default value for polling slave found is true as we don't want to
1726 * disable the polling thread if we cannot get the lock */
1727 int i, polling_slave_found = 1;
1732 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1733 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1735 if (!bonded_ethdev->data->dev_started ||
1736 !internals->link_status_polling_enabled)
1739 /* If device is currently being configured then don't check slaves link
1740 * status, wait until next period */
1741 if (rte_spinlock_trylock(&internals->lock)) {
1742 if (internals->slave_count > 0)
1743 polling_slave_found = 0;
1745 for (i = 0; i < internals->slave_count; i++) {
1746 if (!internals->slaves[i].link_status_poll_enabled)
1749 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1750 polling_slave_found = 1;
1752 /* Update slave link status */
1753 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1754 internals->slaves[i].link_status_wait_to_complete);
1756 /* if link status has changed since last checked then call lsc
1758 if (slave_ethdev->data->dev_link.link_status !=
1759 internals->slaves[i].last_link_status) {
1760 internals->slaves[i].last_link_status =
1761 slave_ethdev->data->dev_link.link_status;
1763 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1764 RTE_ETH_EVENT_INTR_LSC,
1765 &bonded_ethdev->data->port_id);
1768 rte_spinlock_unlock(&internals->lock);
1771 if (polling_slave_found)
1772 /* Set alarm to continue monitoring link status of slave ethdev's */
1773 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1774 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1778 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1779 int wait_to_complete)
1781 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1783 if (!bonded_eth_dev->data->dev_started ||
1784 internals->active_slave_count == 0) {
1785 bonded_eth_dev->data->dev_link.link_status = 0;
1788 struct rte_eth_dev *slave_eth_dev;
1791 for (i = 0; i < internals->active_slave_count; i++) {
1792 slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1794 (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1796 if (slave_eth_dev->data->dev_link.link_status == 1) {
1802 bonded_eth_dev->data->dev_link.link_status = link_up;
1809 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1811 struct bond_dev_private *internals = dev->data->dev_private;
1812 struct rte_eth_stats slave_stats;
1815 for (i = 0; i < internals->slave_count; i++) {
1816 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1818 stats->ipackets += slave_stats.ipackets;
1819 stats->opackets += slave_stats.opackets;
1820 stats->ibytes += slave_stats.ibytes;
1821 stats->obytes += slave_stats.obytes;
1822 stats->imissed += slave_stats.imissed;
1823 stats->ierrors += slave_stats.ierrors;
1824 stats->oerrors += slave_stats.oerrors;
1825 stats->imcasts += slave_stats.imcasts;
1826 stats->rx_nombuf += slave_stats.rx_nombuf;
1828 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1829 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1830 stats->q_opackets[j] += slave_stats.q_opackets[j];
1831 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1832 stats->q_obytes[j] += slave_stats.q_obytes[j];
1833 stats->q_errors[j] += slave_stats.q_errors[j];
1840 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1842 struct bond_dev_private *internals = dev->data->dev_private;
1845 for (i = 0; i < internals->slave_count; i++)
1846 rte_eth_stats_reset(internals->slaves[i].port_id);
1850 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1852 struct bond_dev_private *internals = eth_dev->data->dev_private;
1855 internals->promiscuous_en = 1;
1857 switch (internals->mode) {
1858 /* Promiscuous mode is propagated to all slaves */
1859 case BONDING_MODE_ROUND_ROBIN:
1860 case BONDING_MODE_BALANCE:
1861 case BONDING_MODE_BROADCAST:
1862 for (i = 0; i < internals->slave_count; i++)
1863 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1865 /* In mode4 promiscus mode is managed when slave is added/removed */
1866 case BONDING_MODE_8023AD:
1868 /* Promiscuous mode is propagated only to primary slave */
1869 case BONDING_MODE_ACTIVE_BACKUP:
1870 case BONDING_MODE_TLB:
1871 case BONDING_MODE_ALB:
1873 rte_eth_promiscuous_enable(internals->current_primary_port);
1878 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1880 struct bond_dev_private *internals = dev->data->dev_private;
1883 internals->promiscuous_en = 0;
1885 switch (internals->mode) {
1886 /* Promiscuous mode is propagated to all slaves */
1887 case BONDING_MODE_ROUND_ROBIN:
1888 case BONDING_MODE_BALANCE:
1889 case BONDING_MODE_BROADCAST:
1890 for (i = 0; i < internals->slave_count; i++)
1891 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1893 /* In mode4 promiscus mode is set managed when slave is added/removed */
1894 case BONDING_MODE_8023AD:
1896 /* Promiscuous mode is propagated only to primary slave */
1897 case BONDING_MODE_ACTIVE_BACKUP:
1898 case BONDING_MODE_TLB:
1899 case BONDING_MODE_ALB:
1901 rte_eth_promiscuous_disable(internals->current_primary_port);
1906 bond_ethdev_delayed_lsc_propagation(void *arg)
1911 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1912 RTE_ETH_EVENT_INTR_LSC);
1916 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1919 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1920 struct bond_dev_private *internals;
1921 struct rte_eth_link link;
1923 int i, valid_slave = 0;
1925 uint8_t lsc_flag = 0;
1927 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1930 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1931 slave_eth_dev = &rte_eth_devices[port_id];
1933 if (check_for_bonded_ethdev(bonded_eth_dev))
1936 internals = bonded_eth_dev->data->dev_private;
1938 /* If the device isn't started don't handle interrupts */
1939 if (!bonded_eth_dev->data->dev_started)
1942 /* verify that port_id is a valid slave of bonded port */
1943 for (i = 0; i < internals->slave_count; i++) {
1944 if (internals->slaves[i].port_id == port_id) {
1953 /* Search for port in active port list */
1954 active_pos = find_slave_by_id(internals->active_slaves,
1955 internals->active_slave_count, port_id);
1957 rte_eth_link_get_nowait(port_id, &link);
1958 if (link.link_status) {
1959 if (active_pos < internals->active_slave_count)
1962 /* if no active slave ports then set this port to be primary port */
1963 if (internals->active_slave_count < 1) {
1964 /* If first active slave, then change link status */
1965 bonded_eth_dev->data->dev_link.link_status = 1;
1966 internals->current_primary_port = port_id;
1969 mac_address_slaves_update(bonded_eth_dev);
1971 /* Inherit eth dev link properties from first active slave */
1972 link_properties_set(bonded_eth_dev,
1973 &(slave_eth_dev->data->dev_link));
1976 activate_slave(bonded_eth_dev, port_id);
1978 /* If user has defined the primary port then default to using it */
1979 if (internals->user_defined_primary_port &&
1980 internals->primary_port == port_id)
1981 bond_ethdev_primary_set(internals, port_id);
1983 if (active_pos == internals->active_slave_count)
1986 /* Remove from active slave list */
1987 deactivate_slave(bonded_eth_dev, port_id);
1989 /* No active slaves, change link status to down and reset other
1990 * link properties */
1991 if (internals->active_slave_count < 1) {
1993 bonded_eth_dev->data->dev_link.link_status = 0;
1995 link_properties_reset(bonded_eth_dev);
1998 /* Update primary id, take first active slave from list or if none
1999 * available set to -1 */
2000 if (port_id == internals->current_primary_port) {
2001 if (internals->active_slave_count > 0)
2002 bond_ethdev_primary_set(internals,
2003 internals->active_slaves[0]);
2005 internals->current_primary_port = internals->primary_port;
2010 /* Cancel any possible outstanding interrupts if delays are enabled */
2011 if (internals->link_up_delay_ms > 0 ||
2012 internals->link_down_delay_ms > 0)
2013 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2016 if (bonded_eth_dev->data->dev_link.link_status) {
2017 if (internals->link_up_delay_ms > 0)
2018 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2019 bond_ethdev_delayed_lsc_propagation,
2020 (void *)bonded_eth_dev);
2022 _rte_eth_dev_callback_process(bonded_eth_dev,
2023 RTE_ETH_EVENT_INTR_LSC);
2026 if (internals->link_down_delay_ms > 0)
2027 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2028 bond_ethdev_delayed_lsc_propagation,
2029 (void *)bonded_eth_dev);
2031 _rte_eth_dev_callback_process(bonded_eth_dev,
2032 RTE_ETH_EVENT_INTR_LSC);
2038 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2039 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2043 int slave_reta_size;
2044 unsigned reta_count;
2045 struct bond_dev_private *internals = dev->data->dev_private;
2047 if (reta_size != internals->reta_size)
2050 /* Copy RETA table */
2051 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2053 for (i = 0; i < reta_count; i++) {
2054 internals->reta_conf[i].mask = reta_conf[i].mask;
2055 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2056 if ((reta_conf[i].mask >> j) & 0x01)
2057 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2060 /* Fill rest of array */
2061 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2062 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2063 sizeof(internals->reta_conf[0]) * reta_count);
2065 /* Propagate RETA over slaves */
2066 for (i = 0; i < internals->slave_count; i++) {
2067 slave_reta_size = internals->slaves[i].reta_size;
2068 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2069 &internals->reta_conf[0], slave_reta_size);
2078 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2079 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2082 struct bond_dev_private *internals = dev->data->dev_private;
2084 if (reta_size != internals->reta_size)
2087 /* Copy RETA table */
2088 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2089 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2090 if ((reta_conf[i].mask >> j) & 0x01)
2091 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2097 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2098 struct rte_eth_rss_conf *rss_conf)
2101 struct bond_dev_private *internals = dev->data->dev_private;
2102 struct rte_eth_rss_conf bond_rss_conf;
2104 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2106 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2108 if (bond_rss_conf.rss_hf != 0)
2109 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2111 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2112 sizeof(internals->rss_key)) {
2113 if (bond_rss_conf.rss_key_len == 0)
2114 bond_rss_conf.rss_key_len = 40;
2115 internals->rss_key_len = bond_rss_conf.rss_key_len;
2116 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2117 internals->rss_key_len);
2120 for (i = 0; i < internals->slave_count; i++) {
2121 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2131 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2132 struct rte_eth_rss_conf *rss_conf)
2134 struct bond_dev_private *internals = dev->data->dev_private;
2136 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2137 rss_conf->rss_key_len = internals->rss_key_len;
2138 if (rss_conf->rss_key)
2139 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2144 struct eth_dev_ops default_dev_ops = {
2145 .dev_start = bond_ethdev_start,
2146 .dev_stop = bond_ethdev_stop,
2147 .dev_close = bond_ethdev_close,
2148 .dev_configure = bond_ethdev_configure,
2149 .dev_infos_get = bond_ethdev_info,
2150 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2151 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2152 .rx_queue_release = bond_ethdev_rx_queue_release,
2153 .tx_queue_release = bond_ethdev_tx_queue_release,
2154 .link_update = bond_ethdev_link_update,
2155 .stats_get = bond_ethdev_stats_get,
2156 .stats_reset = bond_ethdev_stats_reset,
2157 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2158 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2159 .reta_update = bond_ethdev_rss_reta_update,
2160 .reta_query = bond_ethdev_rss_reta_query,
2161 .rss_hash_update = bond_ethdev_rss_hash_update,
2162 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2166 bond_init(const char *name, const char *params)
2168 struct bond_dev_private *internals;
2169 struct rte_kvargs *kvlist;
2170 uint8_t bonding_mode, socket_id;
2171 int arg_count, port_id;
2173 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2175 kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2179 /* Parse link bonding mode */
2180 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2181 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2182 &bond_ethdev_parse_slave_mode_kvarg,
2183 &bonding_mode) != 0) {
2184 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2189 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2190 "device %s\n", name);
2194 /* Parse socket id to create bonding device on */
2195 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2196 if (arg_count == 1) {
2197 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2198 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2200 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2201 "bonded device %s\n", name);
2204 } else if (arg_count > 1) {
2205 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2206 "bonded device %s\n", name);
2209 socket_id = rte_socket_id();
2212 /* Create link bonding eth device */
2213 port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2215 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2216 "socket %u.\n", name, bonding_mode, socket_id);
2219 internals = rte_eth_devices[port_id].data->dev_private;
2220 internals->kvlist = kvlist;
2222 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2223 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2227 rte_kvargs_free(kvlist);
2233 bond_uninit(const char *name)
2240 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2242 /* free link bonding eth device */
2243 ret = rte_eth_bond_free(name);
2245 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2250 /* this part will resolve the slave portids after all the other pdev and vdev
2251 * have been allocated */
2253 bond_ethdev_configure(struct rte_eth_dev *dev)
2255 char *name = dev->data->name;
2256 struct bond_dev_private *internals = dev->data->dev_private;
2257 struct rte_kvargs *kvlist = internals->kvlist;
2259 uint8_t port_id = dev - rte_eth_devices;
2261 static const uint8_t default_rss_key[40] = {
2262 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2263 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2264 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2265 0xBE, 0xAC, 0x01, 0xFA
2270 /* If RSS is enabled, fill table and key with default values */
2271 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2272 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2273 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2274 memcpy(internals->rss_key, default_rss_key, 40);
2276 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2277 internals->reta_conf[i].mask = ~0LL;
2278 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2279 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2284 * if no kvlist, it means that this bonded device has been created
2285 * through the bonding api.
2290 /* Parse MAC address for bonded device */
2291 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2292 if (arg_count == 1) {
2293 struct ether_addr bond_mac;
2295 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2296 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2297 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2302 /* Set MAC address */
2303 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2305 "Failed to set mac address on bonded device %s\n",
2309 } else if (arg_count > 1) {
2311 "MAC address can be specified only once for bonded device %s\n",
2316 /* Parse/set balance mode transmit policy */
2317 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2318 if (arg_count == 1) {
2319 uint8_t xmit_policy;
2321 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2322 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2325 "Invalid xmit policy specified for bonded device %s\n",
2330 /* Set balance mode transmit policy*/
2331 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2333 "Failed to set balance xmit policy on bonded device %s\n",
2337 } else if (arg_count > 1) {
2339 "Transmit policy can be specified only once for bonded device"
2344 /* Parse/add slave ports to bonded device */
2345 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2346 struct bond_ethdev_slave_ports slave_ports;
2349 memset(&slave_ports, 0, sizeof(slave_ports));
2351 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2352 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2354 "Failed to parse slave ports for bonded device %s\n",
2359 for (i = 0; i < slave_ports.slave_count; i++) {
2360 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2362 "Failed to add port %d as slave to bonded device %s\n",
2363 slave_ports.slaves[i], name);
2368 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2372 /* Parse/set primary slave port id*/
2373 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2374 if (arg_count == 1) {
2375 uint8_t primary_slave_port_id;
2377 if (rte_kvargs_process(kvlist,
2378 PMD_BOND_PRIMARY_SLAVE_KVARG,
2379 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2380 &primary_slave_port_id) < 0) {
2382 "Invalid primary slave port id specified for bonded device"
2387 /* Set balance mode transmit policy*/
2388 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2391 "Failed to set primary slave port %d on bonded device %s\n",
2392 primary_slave_port_id, name);
2395 } else if (arg_count > 1) {
2397 "Primary slave can be specified only once for bonded device"
2402 /* Parse link status monitor polling interval */
2403 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2404 if (arg_count == 1) {
2405 uint32_t lsc_poll_interval_ms;
2407 if (rte_kvargs_process(kvlist,
2408 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2409 &bond_ethdev_parse_time_ms_kvarg,
2410 &lsc_poll_interval_ms) < 0) {
2412 "Invalid lsc polling interval value specified for bonded"
2413 " device %s\n", name);
2417 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2420 "Failed to set lsc monitor polling interval (%u ms) on"
2421 " bonded device %s\n", lsc_poll_interval_ms, name);
2424 } else if (arg_count > 1) {
2426 "LSC polling interval can be specified only once for bonded"
2427 " device %s\n", name);
2431 /* Parse link up interrupt propagation delay */
2432 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2433 if (arg_count == 1) {
2434 uint32_t link_up_delay_ms;
2436 if (rte_kvargs_process(kvlist,
2437 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2438 &bond_ethdev_parse_time_ms_kvarg,
2439 &link_up_delay_ms) < 0) {
2441 "Invalid link up propagation delay value specified for"
2442 " bonded device %s\n", name);
2446 /* Set balance mode transmit policy*/
2447 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2450 "Failed to set link up propagation delay (%u ms) on bonded"
2451 " device %s\n", link_up_delay_ms, name);
2454 } else if (arg_count > 1) {
2456 "Link up propagation delay can be specified only once for"
2457 " bonded device %s\n", name);
2461 /* Parse link down interrupt propagation delay */
2462 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2463 if (arg_count == 1) {
2464 uint32_t link_down_delay_ms;
2466 if (rte_kvargs_process(kvlist,
2467 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2468 &bond_ethdev_parse_time_ms_kvarg,
2469 &link_down_delay_ms) < 0) {
2471 "Invalid link down propagation delay value specified for"
2472 " bonded device %s\n", name);
2476 /* Set balance mode transmit policy*/
2477 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2480 "Failed to set link down propagation delay (%u ms) on"
2481 " bonded device %s\n", link_down_delay_ms, name);
2484 } else if (arg_count > 1) {
2486 "Link down propagation delay can be specified only once for"
2487 " bonded device %s\n", name);
2494 static struct rte_driver bond_drv = {
2498 .uninit = bond_uninit,
2501 PMD_REGISTER_DRIVER(bond_drv);