4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
52 #define REORDER_PERIOD_MS 10
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 size_t vlan_offset = 0;
64 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67 vlan_offset = sizeof(struct vlan_hdr);
68 *proto = vlan_hdr->eth_proto;
70 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71 vlan_hdr = vlan_hdr + 1;
72 *proto = vlan_hdr->eth_proto;
73 vlan_offset += sizeof(struct vlan_hdr);
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 struct bond_dev_private *internals;
84 uint16_t num_rx_slave = 0;
85 uint16_t num_rx_total = 0;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
95 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96 /* Offset of pointer to *bufs increases as packets are received
97 * from other slaves */
98 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101 num_rx_total += num_rx_slave;
102 nb_pkts -= num_rx_slave;
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
113 struct bond_dev_private *internals;
115 /* Cast to structure, containing bonded device's port id and queue id */
116 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118 internals = bd_rx_q->dev_private;
120 return rte_eth_rx_burst(internals->current_primary_port,
121 bd_rx_q->queue_id, bufs, nb_pkts);
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
128 /* Cast to structure, containing bonded device's port id and queue id */
129 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130 struct bond_dev_private *internals = bd_rx_q->dev_private;
131 struct ether_addr bond_mac;
133 struct ether_hdr *hdr;
135 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136 uint16_t num_rx_total = 0; /* Total number of received packets */
137 uint8_t slaves[RTE_MAX_ETHPORTS];
140 uint8_t collecting; /* current slave collecting status */
141 const uint8_t promisc = internals->promiscuous_en;
144 rte_eth_macaddr_get(internals->port_id, &bond_mac);
145 /* Copy slave list to protect against slave up/down changes during tx
147 slave_count = internals->active_slave_count;
148 memcpy(slaves, internals->active_slaves,
149 sizeof(internals->active_slaves[0]) * slave_count);
151 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
153 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
155 /* Read packets from this slave */
156 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157 &bufs[num_rx_total], nb_pkts - num_rx_total);
159 for (k = j; k < 2 && k < num_rx_total; k++)
160 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
162 /* Handle slow protocol packets. */
163 while (j < num_rx_total) {
164 if (j + 3 < num_rx_total)
165 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
167 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168 /* Remove packet from array if it is slow packet or slave is not
169 * in collecting state or bondign interface is not in promiscus
170 * mode and packet address does not match. */
171 if (unlikely(hdr->ether_type == ether_type_slow_be ||
172 !collecting || (!promisc &&
173 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
175 if (hdr->ether_type == ether_type_slow_be) {
176 bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
179 rte_pktmbuf_free(bufs[j]);
181 /* Packet is managed by mode 4 or dropped, shift the array */
183 if (j < num_rx_total) {
184 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
202 arp_op_name(uint16_t arp_op, char *buf)
206 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
209 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
211 case ARP_OP_REVREQUEST:
212 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213 "Reverse ARP Request");
215 case ARP_OP_REVREPLY:
216 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217 "Reverse ARP Reply");
219 case ARP_OP_INVREQUEST:
220 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221 "Peer Identify Request");
223 case ARP_OP_INVREPLY:
224 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225 "Peer Identify Reply");
230 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
234 #define MaxIPv4String 16
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
240 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
246 #define MAX_CLIENTS_NUMBER 128
247 uint8_t active_clients;
248 struct client_stats_t {
251 uint32_t ipv4_rx_packets;
252 uint32_t ipv4_tx_packets;
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
261 for (; i < MAX_CLIENTS_NUMBER; i++) {
262 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
263 /* Just update RX packets number for this client */
264 if (TXorRXindicator == &burstnumberRX)
265 client_stats[i].ipv4_rx_packets++;
267 client_stats[i].ipv4_tx_packets++;
271 /* We have a new client. Insert him to the table, and increment stats */
272 if (TXorRXindicator == &burstnumberRX)
273 client_stats[active_clients].ipv4_rx_packets++;
275 client_stats[active_clients].ipv4_tx_packets++;
276 client_stats[active_clients].ipv4_addr = addr;
277 client_stats[active_clients].port = port;
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
284 RTE_LOG(DEBUG, PMD, \
287 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
289 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
295 eth_h->s_addr.addr_bytes[0], \
296 eth_h->s_addr.addr_bytes[1], \
297 eth_h->s_addr.addr_bytes[2], \
298 eth_h->s_addr.addr_bytes[3], \
299 eth_h->s_addr.addr_bytes[4], \
300 eth_h->s_addr.addr_bytes[5], \
302 eth_h->d_addr.addr_bytes[0], \
303 eth_h->d_addr.addr_bytes[1], \
304 eth_h->d_addr.addr_bytes[2], \
305 eth_h->d_addr.addr_bytes[3], \
306 eth_h->d_addr.addr_bytes[4], \
307 eth_h->d_addr.addr_bytes[5], \
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
317 struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319 struct arp_hdr *arp_h;
326 uint16_t ether_type = eth_h->ether_type;
327 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330 snprintf(buf, 16, "%s", info);
333 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
340 update_client_stats(ipv4_h->src_addr, port, burstnumber);
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
357 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358 struct bond_dev_private *internals = bd_tx_q->dev_private;
359 struct ether_hdr *eth_h;
360 uint16_t ether_type, offset;
361 uint16_t nb_recv_pkts;
364 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
366 for (i = 0; i < nb_recv_pkts; i++) {
367 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368 ether_type = eth_h->ether_type;
369 offset = get_vlan_offset(eth_h, ðer_type);
371 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
375 bond_mode_alb_arp_recv(eth_h, offset, internals);
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
390 struct bond_dev_private *internals;
391 struct bond_tx_queue *bd_tx_q;
393 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
396 uint8_t num_of_slaves;
397 uint8_t slaves[RTE_MAX_ETHPORTS];
399 uint16_t num_tx_total = 0, num_tx_slave;
401 static int slave_idx = 0;
402 int i, cslave_idx = 0, tx_fail_total = 0;
404 bd_tx_q = (struct bond_tx_queue *)queue;
405 internals = bd_tx_q->dev_private;
407 /* Copy slave list to protect against slave up/down changes during tx
409 num_of_slaves = internals->active_slave_count;
410 memcpy(slaves, internals->active_slaves,
411 sizeof(internals->active_slaves[0]) * num_of_slaves);
413 if (num_of_slaves < 1)
416 /* Populate slaves mbuf with which packets are to be sent on it */
417 for (i = 0; i < nb_pkts; i++) {
418 cslave_idx = (slave_idx + i) % num_of_slaves;
419 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
422 /* increment current slave index so the next call to tx burst starts on the
424 slave_idx = ++cslave_idx;
426 /* Send packet burst on each slave device */
427 for (i = 0; i < num_of_slaves; i++) {
428 if (slave_nb_pkts[i] > 0) {
429 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430 slave_bufs[i], slave_nb_pkts[i]);
432 /* if tx burst fails move packets to end of bufs */
433 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
436 tx_fail_total += tx_fail_slave;
438 memcpy(&bufs[nb_pkts - tx_fail_total],
439 &slave_bufs[i][num_tx_slave],
440 tx_fail_slave * sizeof(bufs[0]));
442 num_tx_total += num_tx_slave;
450 bond_ethdev_tx_burst_active_backup(void *queue,
451 struct rte_mbuf **bufs, uint16_t nb_pkts)
453 struct bond_dev_private *internals;
454 struct bond_tx_queue *bd_tx_q;
456 bd_tx_q = (struct bond_tx_queue *)queue;
457 internals = bd_tx_q->dev_private;
459 if (internals->active_slave_count < 1)
462 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
469 unaligned_uint16_t *word_src_addr =
470 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471 unaligned_uint16_t *word_dst_addr =
472 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
474 return (word_src_addr[0] ^ word_dst_addr[0]) ^
475 (word_src_addr[1] ^ word_dst_addr[1]) ^
476 (word_src_addr[2] ^ word_dst_addr[2]);
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
482 return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
488 unaligned_uint32_t *word_src_addr =
489 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490 unaligned_uint32_t *word_dst_addr =
491 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
493 return (word_src_addr[0] ^ word_dst_addr[0]) ^
494 (word_src_addr[1] ^ word_dst_addr[1]) ^
495 (word_src_addr[2] ^ word_dst_addr[2]) ^
496 (word_src_addr[3] ^ word_dst_addr[3]);
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
502 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
504 uint32_t hash = ether_hash(eth_hdr);
506 return (hash ^= hash >> 8) % slave_count;
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
512 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513 uint16_t proto = eth_hdr->ether_type;
514 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515 uint32_t hash, l3hash = 0;
517 hash = ether_hash(eth_hdr);
519 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521 ((char *)(eth_hdr + 1) + vlan_offset);
522 l3hash = ipv4_hash(ipv4_hdr);
524 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526 ((char *)(eth_hdr + 1) + vlan_offset);
527 l3hash = ipv6_hash(ipv6_hdr);
530 hash = hash ^ l3hash;
534 return hash % slave_count;
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
540 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541 uint16_t proto = eth_hdr->ether_type;
542 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
544 struct udp_hdr *udp_hdr = NULL;
545 struct tcp_hdr *tcp_hdr = NULL;
546 uint32_t hash, l3hash = 0, l4hash = 0;
548 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550 ((char *)(eth_hdr + 1) + vlan_offset);
551 size_t ip_hdr_offset;
553 l3hash = ipv4_hash(ipv4_hdr);
555 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
558 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
561 l4hash = HASH_L4_PORTS(tcp_hdr);
562 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
565 l4hash = HASH_L4_PORTS(udp_hdr);
567 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569 ((char *)(eth_hdr + 1) + vlan_offset);
570 l3hash = ipv6_hash(ipv6_hdr);
572 if (ipv6_hdr->proto == IPPROTO_TCP) {
573 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574 l4hash = HASH_L4_PORTS(tcp_hdr);
575 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577 l4hash = HASH_L4_PORTS(udp_hdr);
581 hash = l3hash ^ l4hash;
585 return hash % slave_count;
589 uint64_t bwg_left_int;
590 uint64_t bwg_left_remainder;
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
598 for (i = 0; i < internals->active_slave_count; i++) {
599 tlb_last_obytets[internals->active_slaves[i]] = 0;
604 bandwidth_cmp(const void *a, const void *b)
606 const struct bwg_slave *bwg_a = a;
607 const struct bwg_slave *bwg_b = b;
608 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610 (int64_t)bwg_a->bwg_left_remainder;
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625 struct bwg_slave *bwg_slave)
627 struct rte_eth_link link_status;
629 rte_eth_link_get(port_id, &link_status);
630 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
633 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
639 bond_ethdev_update_tlb_slave_cb(void *arg)
641 struct bond_dev_private *internals = arg;
642 struct rte_eth_stats slave_stats;
643 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
647 uint8_t update_stats = 0;
650 internals->slave_update_idx++;
653 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
656 for (i = 0; i < internals->active_slave_count; i++) {
657 slave_id = internals->active_slaves[i];
658 rte_eth_stats_get(slave_id, &slave_stats);
659 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660 bandwidth_left(slave_id, tx_bytes,
661 internals->slave_update_idx, &bwg_array[i]);
662 bwg_array[i].slave = slave_id;
665 tlb_last_obytets[slave_id] = slave_stats.obytes;
669 if (update_stats == 1)
670 internals->slave_update_idx = 0;
673 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674 for (i = 0; i < slave_count; i++)
675 internals->tlb_slaves_order[i] = bwg_array[i].slave;
677 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678 (struct bond_dev_private *)internals);
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
684 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685 struct bond_dev_private *internals = bd_tx_q->dev_private;
687 struct rte_eth_dev *primary_port =
688 &rte_eth_devices[internals->primary_port];
689 uint16_t num_tx_total = 0;
692 uint8_t num_of_slaves = internals->active_slave_count;
693 uint8_t slaves[RTE_MAX_ETHPORTS];
695 struct ether_hdr *ether_hdr;
696 struct ether_addr primary_slave_addr;
697 struct ether_addr active_slave_addr;
699 if (num_of_slaves < 1)
702 memcpy(slaves, internals->tlb_slaves_order,
703 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
706 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
709 for (i = 0; i < 3; i++)
710 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
713 for (i = 0; i < num_of_slaves; i++) {
714 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715 for (j = num_tx_total; j < nb_pkts; j++) {
717 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
719 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
721 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
727 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728 bufs + num_tx_total, nb_pkts - num_tx_total);
730 if (num_tx_total == nb_pkts)
738 bond_tlb_disable(struct bond_dev_private *internals)
740 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
744 bond_tlb_enable(struct bond_dev_private *internals)
746 bond_ethdev_update_tlb_slave_cb(internals);
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
752 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753 struct bond_dev_private *internals = bd_tx_q->dev_private;
755 struct ether_hdr *eth_h;
756 uint16_t ether_type, offset;
758 struct client_data *client_info;
761 * We create transmit buffers for every slave and one additional to send
762 * through tlb. In worst case every packet will be send on one port.
764 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
768 * We create separate transmit buffers for update packets as they wont be
769 * counted in num_tx_total.
771 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
774 struct rte_mbuf *upd_pkt;
777 uint16_t num_send, num_not_send = 0;
778 uint16_t num_tx_total = 0;
783 /* Search tx buffer for ARP packets and forward them to alb */
784 for (i = 0; i < nb_pkts; i++) {
785 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786 ether_type = eth_h->ether_type;
787 offset = get_vlan_offset(eth_h, ðer_type);
789 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
792 /* Change src mac in eth header */
793 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
795 /* Add packet to slave tx buffer */
796 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797 slave_bufs_pkts[slave_idx]++;
799 /* If packet is not ARP, send it with TLB policy */
800 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
802 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
806 /* Update connected client ARP tables */
807 if (internals->mode6.ntt) {
808 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809 client_info = &internals->mode6.client_table[i];
811 if (client_info->in_use) {
812 /* Allocate new packet to send ARP update on current slave */
813 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814 if (upd_pkt == NULL) {
815 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
818 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819 + client_info->vlan_count * sizeof(struct vlan_hdr);
820 upd_pkt->data_len = pkt_size;
821 upd_pkt->pkt_len = pkt_size;
823 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
826 /* Add packet to update tx buffer */
827 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828 update_bufs_pkts[slave_idx]++;
831 internals->mode6.ntt = 0;
834 /* Send ARP packets on proper slaves */
835 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836 if (slave_bufs_pkts[i] > 0) {
837 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838 slave_bufs[i], slave_bufs_pkts[i]);
839 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840 bufs[nb_pkts - 1 - num_not_send - j] =
841 slave_bufs[i][nb_pkts - 1 - j];
844 num_tx_total += num_send;
845 num_not_send += slave_bufs_pkts[i] - num_send;
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848 /* Print TX stats including update packets */
849 for (j = 0; j < slave_bufs_pkts[i]; j++) {
850 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
857 /* Send update packets on proper slaves */
858 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859 if (update_bufs_pkts[i] > 0) {
860 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861 update_bufs_pkts[i]);
862 for (j = num_send; j < update_bufs_pkts[i]; j++) {
863 rte_pktmbuf_free(update_bufs[i][j]);
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866 for (j = 0; j < update_bufs_pkts[i]; j++) {
867 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
874 /* Send non-ARP packets using tlb policy */
875 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876 num_send = bond_ethdev_tx_burst_tlb(queue,
877 slave_bufs[RTE_MAX_ETHPORTS],
878 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
880 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881 bufs[nb_pkts - 1 - num_not_send - j] =
882 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
885 num_tx_total += num_send;
886 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
896 struct bond_dev_private *internals;
897 struct bond_tx_queue *bd_tx_q;
899 uint8_t num_of_slaves;
900 uint8_t slaves[RTE_MAX_ETHPORTS];
902 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
906 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
909 bd_tx_q = (struct bond_tx_queue *)queue;
910 internals = bd_tx_q->dev_private;
912 /* Copy slave list to protect against slave up/down changes during tx
914 num_of_slaves = internals->active_slave_count;
915 memcpy(slaves, internals->active_slaves,
916 sizeof(internals->active_slaves[0]) * num_of_slaves);
918 if (num_of_slaves < 1)
921 /* Populate slaves mbuf with the packets which are to be sent on it */
922 for (i = 0; i < nb_pkts; i++) {
923 /* Select output slave using hash based on xmit policy */
924 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
926 /* Populate slave mbuf arrays with mbufs for that slave */
927 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
930 /* Send packet burst on each slave device */
931 for (i = 0; i < num_of_slaves; i++) {
932 if (slave_nb_pkts[i] > 0) {
933 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934 slave_bufs[i], slave_nb_pkts[i]);
936 /* if tx burst fails move packets to end of bufs */
937 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
940 tx_fail_total += slave_tx_fail_count;
941 memcpy(&bufs[nb_pkts - tx_fail_total],
942 &slave_bufs[i][num_tx_slave],
943 slave_tx_fail_count * sizeof(bufs[0]));
946 num_tx_total += num_tx_slave;
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
957 struct bond_dev_private *internals;
958 struct bond_tx_queue *bd_tx_q;
960 uint8_t num_of_slaves;
961 uint8_t slaves[RTE_MAX_ETHPORTS];
962 /* positions in slaves, not ID */
963 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964 uint8_t distributing_count;
966 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967 uint16_t i, j, op_slave_idx;
968 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
970 /* Allocate additional packets in case 8023AD mode. */
971 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
974 /* Total amount of packets in slave_bufs */
975 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976 /* Slow packets placed in each slave */
977 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
979 bd_tx_q = (struct bond_tx_queue *)queue;
980 internals = bd_tx_q->dev_private;
982 /* Copy slave list to protect against slave up/down changes during tx
984 num_of_slaves = internals->active_slave_count;
985 if (num_of_slaves < 1)
988 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
990 distributing_count = 0;
991 for (i = 0; i < num_of_slaves; i++) {
992 struct port *port = &mode_8023ad_ports[slaves[i]];
994 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
998 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999 slave_bufs[i][j] = slow_pkts[j];
1001 if (ACTOR_STATE(port, DISTRIBUTING))
1002 distributing_offsets[distributing_count++] = i;
1005 if (likely(distributing_count > 0)) {
1006 /* Populate slaves mbuf with the packets which are to be sent on it */
1007 for (i = 0; i < nb_pkts; i++) {
1008 /* Select output slave using hash based on xmit policy */
1009 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1011 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012 * slaves that are currently distributing. */
1013 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015 slave_nb_pkts[slave_offset]++;
1019 /* Send packet burst on each slave device */
1020 for (i = 0; i < num_of_slaves; i++) {
1021 if (slave_nb_pkts[i] == 0)
1024 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025 slave_bufs[i], slave_nb_pkts[i]);
1027 /* If tx burst fails drop slow packets */
1028 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1031 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1034 /* If tx burst fails move packets to end of bufs */
1035 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036 uint16_t j = nb_pkts - num_tx_fail_total;
1037 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038 bufs[j] = slave_bufs[i][num_tx_slave];
1042 return num_tx_total;
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1049 struct bond_dev_private *internals;
1050 struct bond_tx_queue *bd_tx_q;
1052 uint8_t tx_failed_flag = 0, num_of_slaves;
1053 uint8_t slaves[RTE_MAX_ETHPORTS];
1055 uint16_t max_nb_of_tx_pkts = 0;
1057 int slave_tx_total[RTE_MAX_ETHPORTS];
1058 int i, most_successful_tx_slave = -1;
1060 bd_tx_q = (struct bond_tx_queue *)queue;
1061 internals = bd_tx_q->dev_private;
1063 /* Copy slave list to protect against slave up/down changes during tx
1065 num_of_slaves = internals->active_slave_count;
1066 memcpy(slaves, internals->active_slaves,
1067 sizeof(internals->active_slaves[0]) * num_of_slaves);
1069 if (num_of_slaves < 1)
1072 /* Increment reference count on mbufs */
1073 for (i = 0; i < nb_pkts; i++)
1074 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1076 /* Transmit burst on each active slave */
1077 for (i = 0; i < num_of_slaves; i++) {
1078 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1081 if (unlikely(slave_tx_total[i] < nb_pkts))
1084 /* record the value and slave index for the slave which transmits the
1085 * maximum number of packets */
1086 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087 max_nb_of_tx_pkts = slave_tx_total[i];
1088 most_successful_tx_slave = i;
1092 /* if slaves fail to transmit packets from burst, the calling application
1093 * is not expected to know about multiple references to packets so we must
1094 * handle failures of all packets except those of the most successful slave
1096 if (unlikely(tx_failed_flag))
1097 for (i = 0; i < num_of_slaves; i++)
1098 if (i != most_successful_tx_slave)
1099 while (slave_tx_total[i] < nb_pkts)
1100 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1102 return max_nb_of_tx_pkts;
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107 struct rte_eth_link *slave_dev_link)
1109 struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1112 if (slave_dev_link->link_status &&
1113 bonded_eth_dev->data->dev_started) {
1114 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1117 internals->link_props_set = 1;
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1124 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1126 memset(&(bonded_eth_dev->data->dev_link), 0,
1127 sizeof(bonded_eth_dev->data->dev_link));
1129 internals->link_props_set = 0;
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134 struct rte_eth_link *slave_dev_link)
1136 if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137 bonded_dev_link->link_speed != slave_dev_link->link_speed)
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1146 struct ether_addr *mac_addr;
1148 if (eth_dev == NULL) {
1149 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1153 if (dst_mac_addr == NULL) {
1154 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1158 mac_addr = eth_dev->data->mac_addrs;
1160 ether_addr_copy(mac_addr, dst_mac_addr);
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1167 struct ether_addr *mac_addr;
1169 if (eth_dev == NULL) {
1170 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1174 if (new_mac_addr == NULL) {
1175 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1179 mac_addr = eth_dev->data->mac_addrs;
1181 /* If new MAC is different to current MAC then update */
1182 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1191 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1194 /* Update slave devices MAC addresses */
1195 if (internals->slave_count < 1)
1198 switch (internals->mode) {
1199 case BONDING_MODE_ROUND_ROBIN:
1200 case BONDING_MODE_BALANCE:
1201 case BONDING_MODE_BROADCAST:
1202 for (i = 0; i < internals->slave_count; i++) {
1203 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204 bonded_eth_dev->data->mac_addrs)) {
1205 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206 internals->slaves[i].port_id);
1211 case BONDING_MODE_8023AD:
1212 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1214 case BONDING_MODE_ACTIVE_BACKUP:
1215 case BONDING_MODE_TLB:
1216 case BONDING_MODE_ALB:
1218 for (i = 0; i < internals->slave_count; i++) {
1219 if (internals->slaves[i].port_id ==
1220 internals->current_primary_port) {
1221 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222 bonded_eth_dev->data->mac_addrs)) {
1223 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224 internals->current_primary_port);
1228 if (mac_address_set(
1229 &rte_eth_devices[internals->slaves[i].port_id],
1230 &internals->slaves[i].persisted_mac_addr)) {
1231 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232 internals->slaves[i].port_id);
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1245 struct bond_dev_private *internals;
1247 internals = eth_dev->data->dev_private;
1250 case BONDING_MODE_ROUND_ROBIN:
1251 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1254 case BONDING_MODE_ACTIVE_BACKUP:
1255 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1258 case BONDING_MODE_BALANCE:
1259 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1262 case BONDING_MODE_BROADCAST:
1263 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1266 case BONDING_MODE_8023AD:
1267 if (bond_mode_8023ad_enable(eth_dev) != 0)
1270 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272 RTE_LOG(WARNING, PMD,
1273 "Using mode 4, it is necessary to do TX burst and RX burst "
1274 "at least every 100ms.\n");
1276 case BONDING_MODE_TLB:
1277 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1280 case BONDING_MODE_ALB:
1281 if (bond_mode_alb_enable(eth_dev) != 0)
1284 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1291 internals->mode = mode;
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298 struct rte_eth_dev *slave_eth_dev)
1300 struct bond_rx_queue *bd_rx_q;
1301 struct bond_tx_queue *bd_tx_q;
1307 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1309 /* Enable interrupts on slave device if supported */
1310 if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
1311 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1313 /* If RSS is enabled for bonding, try to enable it for slaves */
1314 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1315 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1317 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1322 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1325 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327 slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
1330 /* Configure device */
1331 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1332 bonded_eth_dev->data->nb_rx_queues,
1333 bonded_eth_dev->data->nb_tx_queues,
1334 &(slave_eth_dev->data->dev_conf));
1336 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1337 slave_eth_dev->data->port_id, errval);
1341 /* Setup Rx Queues */
1342 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1343 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1345 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1346 bd_rx_q->nb_rx_desc,
1347 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1348 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1351 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1352 slave_eth_dev->data->port_id, q_id, errval);
1357 /* Setup Tx Queues */
1358 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1359 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1361 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1362 bd_tx_q->nb_tx_desc,
1363 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1367 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1368 slave_eth_dev->data->port_id, q_id, errval);
1374 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1376 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1377 slave_eth_dev->data->port_id, errval);
1381 /* If RSS is enabled for bonding, synchronize RETA */
1382 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1384 struct bond_dev_private *internals;
1386 internals = bonded_eth_dev->data->dev_private;
1388 for (i = 0; i < internals->slave_count; i++) {
1389 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1390 errval = rte_eth_dev_rss_reta_update(
1391 slave_eth_dev->data->port_id,
1392 &internals->reta_conf[0],
1393 internals->slaves[i].reta_size);
1395 RTE_LOG(WARNING, PMD,
1396 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1397 " RSS Configuration for bonding may be inconsistent.\n",
1398 slave_eth_dev->data->port_id, errval);
1405 /* If lsc interrupt is set, check initial slave's link status */
1406 if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
1407 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1408 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1414 slave_remove(struct bond_dev_private *internals,
1415 struct rte_eth_dev *slave_eth_dev)
1419 for (i = 0; i < internals->slave_count; i++)
1420 if (internals->slaves[i].port_id ==
1421 slave_eth_dev->data->port_id)
1424 if (i < (internals->slave_count - 1))
1425 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1426 sizeof(internals->slaves[0]) *
1427 (internals->slave_count - i - 1));
1429 internals->slave_count--;
1433 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1436 slave_add(struct bond_dev_private *internals,
1437 struct rte_eth_dev *slave_eth_dev)
1439 struct bond_slave_details *slave_details =
1440 &internals->slaves[internals->slave_count];
1442 slave_details->port_id = slave_eth_dev->data->port_id;
1443 slave_details->last_link_status = 0;
1445 /* If slave device doesn't support interrupts then we need to enabled
1446 * polling to monitor link status */
1447 if (!(slave_eth_dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
1448 slave_details->link_status_poll_enabled = 1;
1450 if (!internals->link_status_polling_enabled) {
1451 internals->link_status_polling_enabled = 1;
1453 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1454 bond_ethdev_slave_link_status_change_monitor,
1455 (void *)&rte_eth_devices[internals->port_id]);
1459 slave_details->link_status_wait_to_complete = 0;
1460 /* clean tlb_last_obytes when adding port for bonding device */
1461 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1462 sizeof(struct ether_addr));
1466 bond_ethdev_primary_set(struct bond_dev_private *internals,
1467 uint8_t slave_port_id)
1471 if (internals->active_slave_count < 1)
1472 internals->current_primary_port = slave_port_id;
1474 /* Search bonded device slave ports for new proposed primary port */
1475 for (i = 0; i < internals->active_slave_count; i++) {
1476 if (internals->active_slaves[i] == slave_port_id)
1477 internals->current_primary_port = slave_port_id;
1482 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1485 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1487 struct bond_dev_private *internals;
1490 /* slave eth dev will be started by bonded device */
1491 if (valid_bonded_ethdev(eth_dev)) {
1492 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1493 eth_dev->data->port_id);
1497 eth_dev->data->dev_link.link_status = 0;
1498 eth_dev->data->dev_started = 1;
1500 internals = eth_dev->data->dev_private;
1502 if (internals->slave_count == 0) {
1503 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1507 if (internals->user_defined_mac == 0) {
1508 struct ether_addr *new_mac_addr = NULL;
1510 for (i = 0; i < internals->slave_count; i++)
1511 if (internals->slaves[i].port_id == internals->primary_port)
1512 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1514 if (new_mac_addr == NULL)
1517 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1518 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1519 eth_dev->data->port_id);
1524 /* Update all slave devices MACs*/
1525 if (mac_address_slaves_update(eth_dev) != 0)
1528 /* If bonded device is configure in promiscuous mode then re-apply config */
1529 if (internals->promiscuous_en)
1530 bond_ethdev_promiscuous_enable(eth_dev);
1532 /* Reconfigure each slave device if starting bonded device */
1533 for (i = 0; i < internals->slave_count; i++) {
1534 if (slave_configure(eth_dev,
1535 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1537 "bonded port (%d) failed to reconfigure slave device (%d)",
1538 eth_dev->data->port_id, internals->slaves[i].port_id);
1543 if (internals->user_defined_primary_port)
1544 bond_ethdev_primary_set(internals, internals->primary_port);
1546 if (internals->mode == BONDING_MODE_8023AD)
1547 bond_mode_8023ad_start(eth_dev);
1549 if (internals->mode == BONDING_MODE_TLB ||
1550 internals->mode == BONDING_MODE_ALB)
1551 bond_tlb_enable(internals);
1557 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1561 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1562 rte_free(dev->data->rx_queues[i]);
1563 dev->data->rx_queues[i] = NULL;
1565 dev->data->nb_rx_queues = 0;
1567 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1568 rte_free(dev->data->tx_queues[i]);
1569 dev->data->tx_queues[i] = NULL;
1571 dev->data->nb_tx_queues = 0;
1575 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1577 struct bond_dev_private *internals = eth_dev->data->dev_private;
1580 if (internals->mode == BONDING_MODE_8023AD) {
1584 bond_mode_8023ad_stop(eth_dev);
1586 /* Discard all messages to/from mode 4 state machines */
1587 for (i = 0; i < internals->active_slave_count; i++) {
1588 port = &mode_8023ad_ports[internals->active_slaves[i]];
1590 RTE_VERIFY(port->rx_ring != NULL);
1591 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1592 rte_pktmbuf_free(pkt);
1594 RTE_VERIFY(port->tx_ring != NULL);
1595 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1596 rte_pktmbuf_free(pkt);
1600 if (internals->mode == BONDING_MODE_TLB ||
1601 internals->mode == BONDING_MODE_ALB) {
1602 bond_tlb_disable(internals);
1603 for (i = 0; i < internals->active_slave_count; i++)
1604 tlb_last_obytets[internals->active_slaves[i]] = 0;
1607 internals->active_slave_count = 0;
1608 internals->link_status_polling_enabled = 0;
1610 eth_dev->data->dev_link.link_status = 0;
1611 eth_dev->data->dev_started = 0;
1615 bond_ethdev_close(struct rte_eth_dev *dev)
1617 bond_ethdev_free_queues(dev);
1620 /* forward declaration */
1621 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1624 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1626 struct bond_dev_private *internals = dev->data->dev_private;
1628 dev_info->max_mac_addrs = 1;
1630 dev_info->max_rx_pktlen = (uint32_t)2048;
1632 dev_info->max_rx_queues = (uint16_t)128;
1633 dev_info->max_tx_queues = (uint16_t)512;
1635 dev_info->min_rx_bufsize = 0;
1636 dev_info->pci_dev = dev->pci_dev;
1638 dev_info->rx_offload_capa = internals->rx_offload_capa;
1639 dev_info->tx_offload_capa = internals->tx_offload_capa;
1640 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1642 dev_info->reta_size = internals->reta_size;
1646 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1647 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1648 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1650 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1651 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1652 0, dev->pci_dev->numa_node);
1653 if (bd_rx_q == NULL)
1656 bd_rx_q->queue_id = rx_queue_id;
1657 bd_rx_q->dev_private = dev->data->dev_private;
1659 bd_rx_q->nb_rx_desc = nb_rx_desc;
1661 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1662 bd_rx_q->mb_pool = mb_pool;
1664 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1670 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1671 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1672 const struct rte_eth_txconf *tx_conf)
1674 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
1675 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1676 0, dev->pci_dev->numa_node);
1678 if (bd_tx_q == NULL)
1681 bd_tx_q->queue_id = tx_queue_id;
1682 bd_tx_q->dev_private = dev->data->dev_private;
1684 bd_tx_q->nb_tx_desc = nb_tx_desc;
1685 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1687 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1693 bond_ethdev_rx_queue_release(void *queue)
1702 bond_ethdev_tx_queue_release(void *queue)
1711 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1713 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1714 struct bond_dev_private *internals;
1716 /* Default value for polling slave found is true as we don't want to
1717 * disable the polling thread if we cannot get the lock */
1718 int i, polling_slave_found = 1;
1723 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1724 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1726 if (!bonded_ethdev->data->dev_started ||
1727 !internals->link_status_polling_enabled)
1730 /* If device is currently being configured then don't check slaves link
1731 * status, wait until next period */
1732 if (rte_spinlock_trylock(&internals->lock)) {
1733 if (internals->slave_count > 0)
1734 polling_slave_found = 0;
1736 for (i = 0; i < internals->slave_count; i++) {
1737 if (!internals->slaves[i].link_status_poll_enabled)
1740 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1741 polling_slave_found = 1;
1743 /* Update slave link status */
1744 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1745 internals->slaves[i].link_status_wait_to_complete);
1747 /* if link status has changed since last checked then call lsc
1749 if (slave_ethdev->data->dev_link.link_status !=
1750 internals->slaves[i].last_link_status) {
1751 internals->slaves[i].last_link_status =
1752 slave_ethdev->data->dev_link.link_status;
1754 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1755 RTE_ETH_EVENT_INTR_LSC,
1756 &bonded_ethdev->data->port_id);
1759 rte_spinlock_unlock(&internals->lock);
1762 if (polling_slave_found)
1763 /* Set alarm to continue monitoring link status of slave ethdev's */
1764 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1765 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1769 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1770 int wait_to_complete)
1772 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1774 if (!bonded_eth_dev->data->dev_started ||
1775 internals->active_slave_count == 0) {
1776 bonded_eth_dev->data->dev_link.link_status = 0;
1779 struct rte_eth_dev *slave_eth_dev;
1782 for (i = 0; i < internals->active_slave_count; i++) {
1783 slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1785 (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1787 if (slave_eth_dev->data->dev_link.link_status == 1) {
1793 bonded_eth_dev->data->dev_link.link_status = link_up;
1800 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1802 struct bond_dev_private *internals = dev->data->dev_private;
1803 struct rte_eth_stats slave_stats;
1806 for (i = 0; i < internals->slave_count; i++) {
1807 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1809 stats->ipackets += slave_stats.ipackets;
1810 stats->opackets += slave_stats.opackets;
1811 stats->ibytes += slave_stats.ibytes;
1812 stats->obytes += slave_stats.obytes;
1813 stats->ierrors += slave_stats.ierrors;
1814 stats->oerrors += slave_stats.oerrors;
1815 stats->imcasts += slave_stats.imcasts;
1816 stats->rx_nombuf += slave_stats.rx_nombuf;
1817 stats->fdirmatch += slave_stats.fdirmatch;
1818 stats->fdirmiss += slave_stats.fdirmiss;
1819 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1820 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1821 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1822 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1827 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1829 struct bond_dev_private *internals = dev->data->dev_private;
1832 for (i = 0; i < internals->slave_count; i++)
1833 rte_eth_stats_reset(internals->slaves[i].port_id);
1837 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1839 struct bond_dev_private *internals = eth_dev->data->dev_private;
1842 internals->promiscuous_en = 1;
1844 switch (internals->mode) {
1845 /* Promiscuous mode is propagated to all slaves */
1846 case BONDING_MODE_ROUND_ROBIN:
1847 case BONDING_MODE_BALANCE:
1848 case BONDING_MODE_BROADCAST:
1849 for (i = 0; i < internals->slave_count; i++)
1850 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1852 /* In mode4 promiscus mode is managed when slave is added/removed */
1853 case BONDING_MODE_8023AD:
1855 /* Promiscuous mode is propagated only to primary slave */
1856 case BONDING_MODE_ACTIVE_BACKUP:
1857 case BONDING_MODE_TLB:
1858 case BONDING_MODE_ALB:
1860 rte_eth_promiscuous_enable(internals->current_primary_port);
1865 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1867 struct bond_dev_private *internals = dev->data->dev_private;
1870 internals->promiscuous_en = 0;
1872 switch (internals->mode) {
1873 /* Promiscuous mode is propagated to all slaves */
1874 case BONDING_MODE_ROUND_ROBIN:
1875 case BONDING_MODE_BALANCE:
1876 case BONDING_MODE_BROADCAST:
1877 for (i = 0; i < internals->slave_count; i++)
1878 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1880 /* In mode4 promiscus mode is set managed when slave is added/removed */
1881 case BONDING_MODE_8023AD:
1883 /* Promiscuous mode is propagated only to primary slave */
1884 case BONDING_MODE_ACTIVE_BACKUP:
1885 case BONDING_MODE_TLB:
1886 case BONDING_MODE_ALB:
1888 rte_eth_promiscuous_disable(internals->current_primary_port);
1893 bond_ethdev_delayed_lsc_propagation(void *arg)
1898 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1899 RTE_ETH_EVENT_INTR_LSC);
1903 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1906 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1907 struct bond_dev_private *internals;
1908 struct rte_eth_link link;
1910 int i, valid_slave = 0;
1912 uint8_t lsc_flag = 0;
1914 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1917 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1918 slave_eth_dev = &rte_eth_devices[port_id];
1920 if (valid_bonded_ethdev(bonded_eth_dev))
1923 internals = bonded_eth_dev->data->dev_private;
1925 /* If the device isn't started don't handle interrupts */
1926 if (!bonded_eth_dev->data->dev_started)
1929 /* verify that port_id is a valid slave of bonded port */
1930 for (i = 0; i < internals->slave_count; i++) {
1931 if (internals->slaves[i].port_id == port_id) {
1940 /* Search for port in active port list */
1941 active_pos = find_slave_by_id(internals->active_slaves,
1942 internals->active_slave_count, port_id);
1944 rte_eth_link_get_nowait(port_id, &link);
1945 if (link.link_status) {
1946 if (active_pos < internals->active_slave_count)
1949 /* if no active slave ports then set this port to be primary port */
1950 if (internals->active_slave_count < 1) {
1951 /* If first active slave, then change link status */
1952 bonded_eth_dev->data->dev_link.link_status = 1;
1953 internals->current_primary_port = port_id;
1956 mac_address_slaves_update(bonded_eth_dev);
1958 /* Inherit eth dev link properties from first active slave */
1959 link_properties_set(bonded_eth_dev,
1960 &(slave_eth_dev->data->dev_link));
1963 activate_slave(bonded_eth_dev, port_id);
1965 /* If user has defined the primary port then default to using it */
1966 if (internals->user_defined_primary_port &&
1967 internals->primary_port == port_id)
1968 bond_ethdev_primary_set(internals, port_id);
1970 if (active_pos == internals->active_slave_count)
1973 /* Remove from active slave list */
1974 deactivate_slave(bonded_eth_dev, port_id);
1976 /* No active slaves, change link status to down and reset other
1977 * link properties */
1978 if (internals->active_slave_count < 1) {
1980 bonded_eth_dev->data->dev_link.link_status = 0;
1982 link_properties_reset(bonded_eth_dev);
1985 /* Update primary id, take first active slave from list or if none
1986 * available set to -1 */
1987 if (port_id == internals->current_primary_port) {
1988 if (internals->active_slave_count > 0)
1989 bond_ethdev_primary_set(internals,
1990 internals->active_slaves[0]);
1992 internals->current_primary_port = internals->primary_port;
1997 /* Cancel any possible outstanding interrupts if delays are enabled */
1998 if (internals->link_up_delay_ms > 0 ||
1999 internals->link_down_delay_ms > 0)
2000 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2003 if (bonded_eth_dev->data->dev_link.link_status) {
2004 if (internals->link_up_delay_ms > 0)
2005 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2006 bond_ethdev_delayed_lsc_propagation,
2007 (void *)bonded_eth_dev);
2009 _rte_eth_dev_callback_process(bonded_eth_dev,
2010 RTE_ETH_EVENT_INTR_LSC);
2013 if (internals->link_down_delay_ms > 0)
2014 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2015 bond_ethdev_delayed_lsc_propagation,
2016 (void *)bonded_eth_dev);
2018 _rte_eth_dev_callback_process(bonded_eth_dev,
2019 RTE_ETH_EVENT_INTR_LSC);
2025 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2026 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2030 int slave_reta_size;
2031 unsigned reta_count;
2032 struct bond_dev_private *internals = dev->data->dev_private;
2034 if (reta_size != internals->reta_size)
2037 /* Copy RETA table */
2038 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2040 for (i = 0; i < reta_count; i++) {
2041 internals->reta_conf[i].mask = reta_conf[i].mask;
2042 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2043 if ((reta_conf[i].mask >> j) & 0x01)
2044 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2047 /* Fill rest of array */
2048 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2049 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2050 sizeof(internals->reta_conf[0]) * reta_count);
2052 /* Propagate RETA over slaves */
2053 for (i = 0; i < internals->slave_count; i++) {
2054 slave_reta_size = internals->slaves[i].reta_size;
2055 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2056 &internals->reta_conf[0], slave_reta_size);
2065 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2066 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2069 struct bond_dev_private *internals = dev->data->dev_private;
2071 if (reta_size != internals->reta_size)
2074 /* Copy RETA table */
2075 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2076 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2077 if ((reta_conf[i].mask >> j) & 0x01)
2078 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2084 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2085 struct rte_eth_rss_conf *rss_conf)
2088 struct bond_dev_private *internals = dev->data->dev_private;
2089 struct rte_eth_rss_conf bond_rss_conf;
2091 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2093 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2095 if (bond_rss_conf.rss_hf != 0)
2096 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2098 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2099 sizeof(internals->rss_key)) {
2100 if (bond_rss_conf.rss_key_len == 0)
2101 bond_rss_conf.rss_key_len = 40;
2102 internals->rss_key_len = bond_rss_conf.rss_key_len;
2103 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2104 internals->rss_key_len);
2107 for (i = 0; i < internals->slave_count; i++) {
2108 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2118 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2119 struct rte_eth_rss_conf *rss_conf)
2121 struct bond_dev_private *internals = dev->data->dev_private;
2123 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2124 rss_conf->rss_key_len = internals->rss_key_len;
2125 if (rss_conf->rss_key)
2126 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2131 struct eth_dev_ops default_dev_ops = {
2132 .dev_start = bond_ethdev_start,
2133 .dev_stop = bond_ethdev_stop,
2134 .dev_close = bond_ethdev_close,
2135 .dev_configure = bond_ethdev_configure,
2136 .dev_infos_get = bond_ethdev_info,
2137 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2138 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2139 .rx_queue_release = bond_ethdev_rx_queue_release,
2140 .tx_queue_release = bond_ethdev_tx_queue_release,
2141 .link_update = bond_ethdev_link_update,
2142 .stats_get = bond_ethdev_stats_get,
2143 .stats_reset = bond_ethdev_stats_reset,
2144 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2145 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2146 .reta_update = bond_ethdev_rss_reta_update,
2147 .reta_query = bond_ethdev_rss_reta_query,
2148 .rss_hash_update = bond_ethdev_rss_hash_update,
2149 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2153 bond_init(const char *name, const char *params)
2155 struct bond_dev_private *internals;
2156 struct rte_kvargs *kvlist;
2157 uint8_t bonding_mode, socket_id;
2158 int arg_count, port_id;
2160 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2162 kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2166 /* Parse link bonding mode */
2167 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2168 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2169 &bond_ethdev_parse_slave_mode_kvarg,
2170 &bonding_mode) != 0) {
2171 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2176 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2177 "device %s\n", name);
2181 /* Parse socket id to create bonding device on */
2182 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2183 if (arg_count == 1) {
2184 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2185 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2187 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2188 "bonded device %s\n", name);
2191 } else if (arg_count > 1) {
2192 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2193 "bonded device %s\n", name);
2196 socket_id = rte_socket_id();
2199 /* Create link bonding eth device */
2200 port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2202 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2203 "socket %u.\n", name, bonding_mode, socket_id);
2206 internals = rte_eth_devices[port_id].data->dev_private;
2207 internals->kvlist = kvlist;
2209 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2210 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2214 rte_kvargs_free(kvlist);
2220 bond_uninit(const char *name)
2227 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2229 /* free link bonding eth device */
2230 ret = rte_eth_bond_free(name);
2232 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2237 /* this part will resolve the slave portids after all the other pdev and vdev
2238 * have been allocated */
2240 bond_ethdev_configure(struct rte_eth_dev *dev)
2242 char *name = dev->data->name;
2243 struct bond_dev_private *internals = dev->data->dev_private;
2244 struct rte_kvargs *kvlist = internals->kvlist;
2246 uint8_t port_id = dev - rte_eth_devices;
2248 static const uint8_t default_rss_key[40] = {
2249 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2250 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2251 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2252 0xBE, 0xAC, 0x01, 0xFA
2257 /* If RSS is enabled, fill table and key with default values */
2258 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2259 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2260 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2261 memcpy(internals->rss_key, default_rss_key, 40);
2263 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2264 internals->reta_conf[i].mask = ~0LL;
2265 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2266 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2271 * if no kvlist, it means that this bonded device has been created
2272 * through the bonding api.
2277 /* Parse MAC address for bonded device */
2278 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2279 if (arg_count == 1) {
2280 struct ether_addr bond_mac;
2282 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2283 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2284 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2289 /* Set MAC address */
2290 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2292 "Failed to set mac address on bonded device %s\n",
2296 } else if (arg_count > 1) {
2298 "MAC address can be specified only once for bonded device %s\n",
2303 /* Parse/set balance mode transmit policy */
2304 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2305 if (arg_count == 1) {
2306 uint8_t xmit_policy;
2308 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2309 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2312 "Invalid xmit policy specified for bonded device %s\n",
2317 /* Set balance mode transmit policy*/
2318 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2320 "Failed to set balance xmit policy on bonded device %s\n",
2324 } else if (arg_count > 1) {
2326 "Transmit policy can be specified only once for bonded device"
2331 /* Parse/add slave ports to bonded device */
2332 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2333 struct bond_ethdev_slave_ports slave_ports;
2336 memset(&slave_ports, 0, sizeof(slave_ports));
2338 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2339 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2341 "Failed to parse slave ports for bonded device %s\n",
2346 for (i = 0; i < slave_ports.slave_count; i++) {
2347 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2349 "Failed to add port %d as slave to bonded device %s\n",
2350 slave_ports.slaves[i], name);
2355 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2359 /* Parse/set primary slave port id*/
2360 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2361 if (arg_count == 1) {
2362 uint8_t primary_slave_port_id;
2364 if (rte_kvargs_process(kvlist,
2365 PMD_BOND_PRIMARY_SLAVE_KVARG,
2366 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2367 &primary_slave_port_id) < 0) {
2369 "Invalid primary slave port id specified for bonded device"
2374 /* Set balance mode transmit policy*/
2375 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2378 "Failed to set primary slave port %d on bonded device %s\n",
2379 primary_slave_port_id, name);
2382 } else if (arg_count > 1) {
2384 "Primary slave can be specified only once for bonded device"
2389 /* Parse link status monitor polling interval */
2390 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2391 if (arg_count == 1) {
2392 uint32_t lsc_poll_interval_ms;
2394 if (rte_kvargs_process(kvlist,
2395 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2396 &bond_ethdev_parse_time_ms_kvarg,
2397 &lsc_poll_interval_ms) < 0) {
2399 "Invalid lsc polling interval value specified for bonded"
2400 " device %s\n", name);
2404 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2407 "Failed to set lsc monitor polling interval (%u ms) on"
2408 " bonded device %s\n", lsc_poll_interval_ms, name);
2411 } else if (arg_count > 1) {
2413 "LSC polling interval can be specified only once for bonded"
2414 " device %s\n", name);
2418 /* Parse link up interrupt propagation delay */
2419 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2420 if (arg_count == 1) {
2421 uint32_t link_up_delay_ms;
2423 if (rte_kvargs_process(kvlist,
2424 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2425 &bond_ethdev_parse_time_ms_kvarg,
2426 &link_up_delay_ms) < 0) {
2428 "Invalid link up propagation delay value specified for"
2429 " bonded device %s\n", name);
2433 /* Set balance mode transmit policy*/
2434 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2437 "Failed to set link up propagation delay (%u ms) on bonded"
2438 " device %s\n", link_up_delay_ms, name);
2441 } else if (arg_count > 1) {
2443 "Link up propagation delay can be specified only once for"
2444 " bonded device %s\n", name);
2448 /* Parse link down interrupt propagation delay */
2449 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2450 if (arg_count == 1) {
2451 uint32_t link_down_delay_ms;
2453 if (rte_kvargs_process(kvlist,
2454 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2455 &bond_ethdev_parse_time_ms_kvarg,
2456 &link_down_delay_ms) < 0) {
2458 "Invalid link down propagation delay value specified for"
2459 " bonded device %s\n", name);
2463 /* Set balance mode transmit policy*/
2464 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2467 "Failed to set link down propagation delay (%u ms) on"
2468 " bonded device %s\n", link_down_delay_ms, name);
2471 } else if (arg_count > 1) {
2473 "Link down propagation delay can be specified only once for"
2474 " bonded device %s\n", name);
2481 static struct rte_driver bond_drv = {
2485 .uninit = bond_uninit,
2488 PMD_REGISTER_DRIVER(bond_drv);