1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
6 #include <netinet/in.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
39 size_t vlan_offset = 0;
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 struct bond_dev_private *internals;
63 uint16_t num_rx_total = 0;
65 uint16_t active_slave;
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = internals->active_slave;
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
89 if (++internals->active_slave >= slave_count)
90 internals->active_slave = 0;
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
98 struct bond_dev_private *internals;
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
103 internals = bd_rx_q->dev_private;
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
136 static struct rte_flow_item flow_item_8023ad[] = {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
141 .mask = &flow_item_eth_mask_type_8023ad,
144 .type = RTE_FLOW_ITEM_TYPE_END,
151 const struct rte_flow_attr flow_attr_8023ad = {
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
166 const struct rte_flow_action_queue lacp_queue_conf = {
170 const struct rte_flow_action actions[] = {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
176 .type = RTE_FLOW_ACTION_TYPE_END,
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
189 rte_eth_dev_info_get(slave_port, &slave_info);
190 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
193 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194 __func__, slave_port);
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204 struct bond_dev_private *internals = bond_dev->data->dev_private;
205 struct rte_eth_dev_info bond_info;
208 /* Verify if all slaves in bonding supports flow director and */
209 if (internals->slave_count > 0) {
210 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
212 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
215 for (idx = 0; idx < internals->slave_count; idx++) {
216 if (bond_ethdev_8023ad_flow_verify(bond_dev,
217 internals->slaves[idx].port_id) != 0)
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
228 struct rte_flow_error error;
229 struct bond_dev_private *internals = bond_dev->data->dev_private;
230 struct rte_flow_action_queue lacp_queue_conf = {
231 .index = internals->mode4.dedicated_queues.rx_qid,
234 const struct rte_flow_action actions[] = {
236 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237 .conf = &lacp_queue_conf
240 .type = RTE_FLOW_ACTION_TYPE_END,
244 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248 "(slave_port=%d queue_id=%d)",
249 error.message, slave_port,
250 internals->mode4.dedicated_queues.rx_qid);
257 static inline uint16_t
258 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
261 /* Cast to structure, containing bonded device's port id and queue id */
262 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263 struct bond_dev_private *internals = bd_rx_q->dev_private;
264 struct rte_eth_dev *bonded_eth_dev =
265 &rte_eth_devices[internals->port_id];
266 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
267 struct rte_ether_hdr *hdr;
269 const uint16_t ether_type_slow_be =
270 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
271 uint16_t num_rx_total = 0; /* Total number of received packets */
272 uint16_t slaves[RTE_MAX_ETHPORTS];
273 uint16_t slave_count, idx;
275 uint8_t collecting; /* current slave collecting status */
276 const uint8_t promisc = internals->promiscuous_en;
282 /* Copy slave list to protect against slave up/down changes during tx
284 slave_count = internals->active_slave_count;
285 memcpy(slaves, internals->active_slaves,
286 sizeof(internals->active_slaves[0]) * slave_count);
288 idx = internals->active_slave;
289 if (idx >= slave_count) {
290 internals->active_slave = 0;
293 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
295 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
298 /* Read packets from this slave */
299 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
300 &bufs[num_rx_total], nb_pkts - num_rx_total);
302 for (k = j; k < 2 && k < num_rx_total; k++)
303 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
305 /* Handle slow protocol packets. */
306 while (j < num_rx_total) {
307 if (j + 3 < num_rx_total)
308 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
310 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
311 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
313 /* Remove packet from array if:
314 * - it is slow packet but no dedicated rxq is present,
315 * - slave is not in collecting state,
316 * - bonding interface is not in promiscuous mode and
317 * packet is not multicast and address does not match,
321 is_lacp_packets(hdr->ether_type, subtype,
325 !rte_is_multicast_ether_addr(&hdr->d_addr) &&
326 !rte_is_same_ether_addr(bond_mac,
329 if (hdr->ether_type == ether_type_slow_be) {
330 bond_mode_8023ad_handle_slow_pkt(
331 internals, slaves[idx], bufs[j]);
333 rte_pktmbuf_free(bufs[j]);
335 /* Packet is managed by mode 4 or dropped, shift the array */
337 if (j < num_rx_total) {
338 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
344 if (unlikely(++idx == slave_count))
348 if (++internals->active_slave >= slave_count)
349 internals->active_slave = 0;
355 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
358 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
362 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
365 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
368 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
369 uint32_t burstnumberRX;
370 uint32_t burstnumberTX;
372 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
375 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
378 case RTE_ARP_OP_REQUEST:
379 strlcpy(buf, "ARP Request", buf_len);
381 case RTE_ARP_OP_REPLY:
382 strlcpy(buf, "ARP Reply", buf_len);
384 case RTE_ARP_OP_REVREQUEST:
385 strlcpy(buf, "Reverse ARP Request", buf_len);
387 case RTE_ARP_OP_REVREPLY:
388 strlcpy(buf, "Reverse ARP Reply", buf_len);
390 case RTE_ARP_OP_INVREQUEST:
391 strlcpy(buf, "Peer Identify Request", buf_len);
393 case RTE_ARP_OP_INVREPLY:
394 strlcpy(buf, "Peer Identify Reply", buf_len);
399 strlcpy(buf, "Unknown", buf_len);
403 #define MaxIPv4String 16
405 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
409 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
410 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
411 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
415 #define MAX_CLIENTS_NUMBER 128
416 uint8_t active_clients;
417 struct client_stats_t {
420 uint32_t ipv4_rx_packets;
421 uint32_t ipv4_tx_packets;
423 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
426 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
430 for (; i < MAX_CLIENTS_NUMBER; i++) {
431 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
432 /* Just update RX packets number for this client */
433 if (TXorRXindicator == &burstnumberRX)
434 client_stats[i].ipv4_rx_packets++;
436 client_stats[i].ipv4_tx_packets++;
440 /* We have a new client. Insert him to the table, and increment stats */
441 if (TXorRXindicator == &burstnumberRX)
442 client_stats[active_clients].ipv4_rx_packets++;
444 client_stats[active_clients].ipv4_tx_packets++;
445 client_stats[active_clients].ipv4_addr = addr;
446 client_stats[active_clients].port = port;
451 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
452 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
453 rte_log(RTE_LOG_DEBUG, bond_logtype, \
454 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
455 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
458 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
459 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
460 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
462 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
463 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
464 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
466 arp_op, ++burstnumber)
470 mode6_debug(const char __attribute__((unused)) *info,
471 struct rte_ether_hdr *eth_h, uint16_t port,
472 uint32_t __attribute__((unused)) *burstnumber)
474 struct rte_ipv4_hdr *ipv4_h;
475 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
476 struct rte_arp_hdr *arp_h;
483 uint16_t ether_type = eth_h->ether_type;
484 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
486 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
487 strlcpy(buf, info, 16);
490 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
491 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
492 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
495 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
497 update_client_stats(ipv4_h->src_addr, port, burstnumber);
499 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
500 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
501 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
502 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
503 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
504 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
505 ArpOp, sizeof(ArpOp));
506 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
513 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
515 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
516 struct bond_dev_private *internals = bd_tx_q->dev_private;
517 struct rte_ether_hdr *eth_h;
518 uint16_t ether_type, offset;
519 uint16_t nb_recv_pkts;
522 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
524 for (i = 0; i < nb_recv_pkts; i++) {
525 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
526 ether_type = eth_h->ether_type;
527 offset = get_vlan_offset(eth_h, ðer_type);
529 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
530 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
531 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
533 bond_mode_alb_arp_recv(eth_h, offset, internals);
535 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
536 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
537 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
545 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
548 struct bond_dev_private *internals;
549 struct bond_tx_queue *bd_tx_q;
551 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
552 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
554 uint16_t num_of_slaves;
555 uint16_t slaves[RTE_MAX_ETHPORTS];
557 uint16_t num_tx_total = 0, num_tx_slave;
559 static int slave_idx = 0;
560 int i, cslave_idx = 0, tx_fail_total = 0;
562 bd_tx_q = (struct bond_tx_queue *)queue;
563 internals = bd_tx_q->dev_private;
565 /* Copy slave list to protect against slave up/down changes during tx
567 num_of_slaves = internals->active_slave_count;
568 memcpy(slaves, internals->active_slaves,
569 sizeof(internals->active_slaves[0]) * num_of_slaves);
571 if (num_of_slaves < 1)
574 /* Populate slaves mbuf with which packets are to be sent on it */
575 for (i = 0; i < nb_pkts; i++) {
576 cslave_idx = (slave_idx + i) % num_of_slaves;
577 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
580 /* increment current slave index so the next call to tx burst starts on the
582 slave_idx = ++cslave_idx;
584 /* Send packet burst on each slave device */
585 for (i = 0; i < num_of_slaves; i++) {
586 if (slave_nb_pkts[i] > 0) {
587 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
588 slave_bufs[i], slave_nb_pkts[i]);
590 /* if tx burst fails move packets to end of bufs */
591 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
592 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
594 tx_fail_total += tx_fail_slave;
596 memcpy(&bufs[nb_pkts - tx_fail_total],
597 &slave_bufs[i][num_tx_slave],
598 tx_fail_slave * sizeof(bufs[0]));
600 num_tx_total += num_tx_slave;
608 bond_ethdev_tx_burst_active_backup(void *queue,
609 struct rte_mbuf **bufs, uint16_t nb_pkts)
611 struct bond_dev_private *internals;
612 struct bond_tx_queue *bd_tx_q;
614 bd_tx_q = (struct bond_tx_queue *)queue;
615 internals = bd_tx_q->dev_private;
617 if (internals->active_slave_count < 1)
620 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
624 static inline uint16_t
625 ether_hash(struct rte_ether_hdr *eth_hdr)
627 unaligned_uint16_t *word_src_addr =
628 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
629 unaligned_uint16_t *word_dst_addr =
630 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
632 return (word_src_addr[0] ^ word_dst_addr[0]) ^
633 (word_src_addr[1] ^ word_dst_addr[1]) ^
634 (word_src_addr[2] ^ word_dst_addr[2]);
637 static inline uint32_t
638 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
640 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
643 static inline uint32_t
644 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
646 unaligned_uint32_t *word_src_addr =
647 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
648 unaligned_uint32_t *word_dst_addr =
649 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
651 return (word_src_addr[0] ^ word_dst_addr[0]) ^
652 (word_src_addr[1] ^ word_dst_addr[1]) ^
653 (word_src_addr[2] ^ word_dst_addr[2]) ^
654 (word_src_addr[3] ^ word_dst_addr[3]);
659 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
660 uint16_t slave_count, uint16_t *slaves)
662 struct rte_ether_hdr *eth_hdr;
666 for (i = 0; i < nb_pkts; i++) {
667 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
669 hash = ether_hash(eth_hdr);
671 slaves[i] = (hash ^= hash >> 8) % slave_count;
676 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
677 uint16_t slave_count, uint16_t *slaves)
680 struct rte_ether_hdr *eth_hdr;
683 uint32_t hash, l3hash;
685 for (i = 0; i < nb_pkts; i++) {
686 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
689 proto = eth_hdr->ether_type;
690 hash = ether_hash(eth_hdr);
692 vlan_offset = get_vlan_offset(eth_hdr, &proto);
694 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
695 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
696 ((char *)(eth_hdr + 1) + vlan_offset);
697 l3hash = ipv4_hash(ipv4_hdr);
699 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
700 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
701 ((char *)(eth_hdr + 1) + vlan_offset);
702 l3hash = ipv6_hash(ipv6_hdr);
705 hash = hash ^ l3hash;
709 slaves[i] = hash % slave_count;
714 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
715 uint16_t slave_count, uint16_t *slaves)
717 struct rte_ether_hdr *eth_hdr;
722 struct rte_udp_hdr *udp_hdr;
723 struct rte_tcp_hdr *tcp_hdr;
724 uint32_t hash, l3hash, l4hash;
726 for (i = 0; i < nb_pkts; i++) {
727 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
728 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
729 proto = eth_hdr->ether_type;
730 vlan_offset = get_vlan_offset(eth_hdr, &proto);
734 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
735 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
736 ((char *)(eth_hdr + 1) + vlan_offset);
737 size_t ip_hdr_offset;
739 l3hash = ipv4_hash(ipv4_hdr);
741 /* there is no L4 header in fragmented packet */
742 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
744 ip_hdr_offset = (ipv4_hdr->version_ihl
745 & RTE_IPV4_HDR_IHL_MASK) *
746 RTE_IPV4_IHL_MULTIPLIER;
748 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
749 tcp_hdr = (struct rte_tcp_hdr *)
752 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
754 l4hash = HASH_L4_PORTS(tcp_hdr);
755 } else if (ipv4_hdr->next_proto_id ==
757 udp_hdr = (struct rte_udp_hdr *)
760 if ((size_t)udp_hdr + sizeof(*udp_hdr)
762 l4hash = HASH_L4_PORTS(udp_hdr);
765 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
766 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
767 ((char *)(eth_hdr + 1) + vlan_offset);
768 l3hash = ipv6_hash(ipv6_hdr);
770 if (ipv6_hdr->proto == IPPROTO_TCP) {
771 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
772 l4hash = HASH_L4_PORTS(tcp_hdr);
773 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
774 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
775 l4hash = HASH_L4_PORTS(udp_hdr);
779 hash = l3hash ^ l4hash;
783 slaves[i] = hash % slave_count;
788 uint64_t bwg_left_int;
789 uint64_t bwg_left_remainder;
794 bond_tlb_activate_slave(struct bond_dev_private *internals) {
797 for (i = 0; i < internals->active_slave_count; i++) {
798 tlb_last_obytets[internals->active_slaves[i]] = 0;
803 bandwidth_cmp(const void *a, const void *b)
805 const struct bwg_slave *bwg_a = a;
806 const struct bwg_slave *bwg_b = b;
807 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
808 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
809 (int64_t)bwg_a->bwg_left_remainder;
823 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
824 struct bwg_slave *bwg_slave)
826 struct rte_eth_link link_status;
828 rte_eth_link_get_nowait(port_id, &link_status);
829 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
832 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
833 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
834 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
838 bond_ethdev_update_tlb_slave_cb(void *arg)
840 struct bond_dev_private *internals = arg;
841 struct rte_eth_stats slave_stats;
842 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
843 uint16_t slave_count;
846 uint8_t update_stats = 0;
850 internals->slave_update_idx++;
853 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
856 for (i = 0; i < internals->active_slave_count; i++) {
857 slave_id = internals->active_slaves[i];
858 rte_eth_stats_get(slave_id, &slave_stats);
859 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
860 bandwidth_left(slave_id, tx_bytes,
861 internals->slave_update_idx, &bwg_array[i]);
862 bwg_array[i].slave = slave_id;
865 tlb_last_obytets[slave_id] = slave_stats.obytes;
869 if (update_stats == 1)
870 internals->slave_update_idx = 0;
873 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
874 for (i = 0; i < slave_count; i++)
875 internals->tlb_slaves_order[i] = bwg_array[i].slave;
877 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
878 (struct bond_dev_private *)internals);
882 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
884 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
885 struct bond_dev_private *internals = bd_tx_q->dev_private;
887 struct rte_eth_dev *primary_port =
888 &rte_eth_devices[internals->primary_port];
889 uint16_t num_tx_total = 0;
892 uint16_t num_of_slaves = internals->active_slave_count;
893 uint16_t slaves[RTE_MAX_ETHPORTS];
895 struct rte_ether_hdr *ether_hdr;
896 struct rte_ether_addr primary_slave_addr;
897 struct rte_ether_addr active_slave_addr;
899 if (num_of_slaves < 1)
902 memcpy(slaves, internals->tlb_slaves_order,
903 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
906 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
909 for (i = 0; i < 3; i++)
910 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
913 for (i = 0; i < num_of_slaves; i++) {
914 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
915 for (j = num_tx_total; j < nb_pkts; j++) {
917 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
919 ether_hdr = rte_pktmbuf_mtod(bufs[j],
920 struct rte_ether_hdr *);
921 if (rte_is_same_ether_addr(ðer_hdr->s_addr,
922 &primary_slave_addr))
923 rte_ether_addr_copy(&active_slave_addr,
925 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
926 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
930 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
931 bufs + num_tx_total, nb_pkts - num_tx_total);
933 if (num_tx_total == nb_pkts)
941 bond_tlb_disable(struct bond_dev_private *internals)
943 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
947 bond_tlb_enable(struct bond_dev_private *internals)
949 bond_ethdev_update_tlb_slave_cb(internals);
953 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
955 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
956 struct bond_dev_private *internals = bd_tx_q->dev_private;
958 struct rte_ether_hdr *eth_h;
959 uint16_t ether_type, offset;
961 struct client_data *client_info;
964 * We create transmit buffers for every slave and one additional to send
965 * through tlb. In worst case every packet will be send on one port.
967 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
968 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
971 * We create separate transmit buffers for update packets as they won't
972 * be counted in num_tx_total.
974 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
975 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
977 struct rte_mbuf *upd_pkt;
980 uint16_t num_send, num_not_send = 0;
981 uint16_t num_tx_total = 0;
986 /* Search tx buffer for ARP packets and forward them to alb */
987 for (i = 0; i < nb_pkts; i++) {
988 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
989 ether_type = eth_h->ether_type;
990 offset = get_vlan_offset(eth_h, ðer_type);
992 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
993 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
995 /* Change src mac in eth header */
996 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
998 /* Add packet to slave tx buffer */
999 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1000 slave_bufs_pkts[slave_idx]++;
1002 /* If packet is not ARP, send it with TLB policy */
1003 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1005 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1009 /* Update connected client ARP tables */
1010 if (internals->mode6.ntt) {
1011 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1012 client_info = &internals->mode6.client_table[i];
1014 if (client_info->in_use) {
1015 /* Allocate new packet to send ARP update on current slave */
1016 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1017 if (upd_pkt == NULL) {
1019 "Failed to allocate ARP packet from pool");
1022 pkt_size = sizeof(struct rte_ether_hdr) +
1023 sizeof(struct rte_arp_hdr) +
1024 client_info->vlan_count *
1025 sizeof(struct rte_vlan_hdr);
1026 upd_pkt->data_len = pkt_size;
1027 upd_pkt->pkt_len = pkt_size;
1029 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1032 /* Add packet to update tx buffer */
1033 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1034 update_bufs_pkts[slave_idx]++;
1037 internals->mode6.ntt = 0;
1040 /* Send ARP packets on proper slaves */
1041 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1042 if (slave_bufs_pkts[i] > 0) {
1043 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1044 slave_bufs[i], slave_bufs_pkts[i]);
1045 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1046 bufs[nb_pkts - 1 - num_not_send - j] =
1047 slave_bufs[i][nb_pkts - 1 - j];
1050 num_tx_total += num_send;
1051 num_not_send += slave_bufs_pkts[i] - num_send;
1053 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1054 /* Print TX stats including update packets */
1055 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1056 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1057 struct rte_ether_hdr *);
1058 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1064 /* Send update packets on proper slaves */
1065 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 if (update_bufs_pkts[i] > 0) {
1067 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1068 update_bufs_pkts[i]);
1069 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1070 rte_pktmbuf_free(update_bufs[i][j]);
1072 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1073 for (j = 0; j < update_bufs_pkts[i]; j++) {
1074 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1075 struct rte_ether_hdr *);
1076 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1082 /* Send non-ARP packets using tlb policy */
1083 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1084 num_send = bond_ethdev_tx_burst_tlb(queue,
1085 slave_bufs[RTE_MAX_ETHPORTS],
1086 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1088 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1089 bufs[nb_pkts - 1 - num_not_send - j] =
1090 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1093 num_tx_total += num_send;
1096 return num_tx_total;
1099 static inline uint16_t
1100 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1101 uint16_t *slave_port_ids, uint16_t slave_count)
1103 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1104 struct bond_dev_private *internals = bd_tx_q->dev_private;
1106 /* Array to sort mbufs for transmission on each slave into */
1107 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1108 /* Number of mbufs for transmission on each slave */
1109 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1110 /* Mapping array generated by hash function to map mbufs to slaves */
1111 uint16_t bufs_slave_port_idxs[nb_bufs];
1113 uint16_t slave_tx_count;
1114 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1119 * Populate slaves mbuf with the packets which are to be sent on it
1120 * selecting output slave using hash based on xmit policy
1122 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1123 bufs_slave_port_idxs);
1125 for (i = 0; i < nb_bufs; i++) {
1126 /* Populate slave mbuf arrays with mbufs for that slave. */
1127 uint16_t slave_idx = bufs_slave_port_idxs[i];
1129 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1132 /* Send packet burst on each slave device */
1133 for (i = 0; i < slave_count; i++) {
1134 if (slave_nb_bufs[i] == 0)
1137 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1138 bd_tx_q->queue_id, slave_bufs[i],
1141 total_tx_count += slave_tx_count;
1143 /* If tx burst fails move packets to end of bufs */
1144 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1145 int slave_tx_fail_count = slave_nb_bufs[i] -
1147 total_tx_fail_count += slave_tx_fail_count;
1148 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1149 &slave_bufs[i][slave_tx_count],
1150 slave_tx_fail_count * sizeof(bufs[0]));
1154 return total_tx_count;
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1161 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1162 struct bond_dev_private *internals = bd_tx_q->dev_private;
1164 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1165 uint16_t slave_count;
1167 if (unlikely(nb_bufs == 0))
1170 /* Copy slave list to protect against slave up/down changes during tx
1173 slave_count = internals->active_slave_count;
1174 if (unlikely(slave_count < 1))
1177 memcpy(slave_port_ids, internals->active_slaves,
1178 sizeof(slave_port_ids[0]) * slave_count);
1179 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1183 static inline uint16_t
1184 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1187 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1188 struct bond_dev_private *internals = bd_tx_q->dev_private;
1190 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1191 uint16_t slave_count;
1193 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1194 uint16_t dist_slave_count;
1196 uint16_t slave_tx_count;
1200 /* Copy slave list to protect against slave up/down changes during tx
1202 slave_count = internals->active_slave_count;
1203 if (unlikely(slave_count < 1))
1206 memcpy(slave_port_ids, internals->active_slaves,
1207 sizeof(slave_port_ids[0]) * slave_count);
1212 /* Check for LACP control packets and send if available */
1213 for (i = 0; i < slave_count; i++) {
1214 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1215 struct rte_mbuf *ctrl_pkt = NULL;
1217 if (likely(rte_ring_empty(port->tx_ring)))
1220 if (rte_ring_dequeue(port->tx_ring,
1221 (void **)&ctrl_pkt) != -ENOENT) {
1222 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1223 bd_tx_q->queue_id, &ctrl_pkt, 1);
1225 * re-enqueue LAG control plane packets to buffering
1226 * ring if transmission fails so the packet isn't lost.
1228 if (slave_tx_count != 1)
1229 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1234 if (unlikely(nb_bufs == 0))
1237 dist_slave_count = 0;
1238 for (i = 0; i < slave_count; i++) {
1239 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1241 if (ACTOR_STATE(port, DISTRIBUTING))
1242 dist_slave_port_ids[dist_slave_count++] =
1246 if (unlikely(dist_slave_count < 1))
1249 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1254 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1257 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1261 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1264 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1268 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1271 struct bond_dev_private *internals;
1272 struct bond_tx_queue *bd_tx_q;
1274 uint16_t slaves[RTE_MAX_ETHPORTS];
1275 uint8_t tx_failed_flag = 0;
1276 uint16_t num_of_slaves;
1278 uint16_t max_nb_of_tx_pkts = 0;
1280 int slave_tx_total[RTE_MAX_ETHPORTS];
1281 int i, most_successful_tx_slave = -1;
1283 bd_tx_q = (struct bond_tx_queue *)queue;
1284 internals = bd_tx_q->dev_private;
1286 /* Copy slave list to protect against slave up/down changes during tx
1288 num_of_slaves = internals->active_slave_count;
1289 memcpy(slaves, internals->active_slaves,
1290 sizeof(internals->active_slaves[0]) * num_of_slaves);
1292 if (num_of_slaves < 1)
1295 /* Increment reference count on mbufs */
1296 for (i = 0; i < nb_pkts; i++)
1297 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1299 /* Transmit burst on each active slave */
1300 for (i = 0; i < num_of_slaves; i++) {
1301 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1304 if (unlikely(slave_tx_total[i] < nb_pkts))
1307 /* record the value and slave index for the slave which transmits the
1308 * maximum number of packets */
1309 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1310 max_nb_of_tx_pkts = slave_tx_total[i];
1311 most_successful_tx_slave = i;
1315 /* if slaves fail to transmit packets from burst, the calling application
1316 * is not expected to know about multiple references to packets so we must
1317 * handle failures of all packets except those of the most successful slave
1319 if (unlikely(tx_failed_flag))
1320 for (i = 0; i < num_of_slaves; i++)
1321 if (i != most_successful_tx_slave)
1322 while (slave_tx_total[i] < nb_pkts)
1323 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1325 return max_nb_of_tx_pkts;
1329 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1331 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1333 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1335 * If in mode 4 then save the link properties of the first
1336 * slave, all subsequent slaves must match these properties
1338 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1340 bond_link->link_autoneg = slave_link->link_autoneg;
1341 bond_link->link_duplex = slave_link->link_duplex;
1342 bond_link->link_speed = slave_link->link_speed;
1345 * In any other mode the link properties are set to default
1346 * values of AUTONEG/DUPLEX
1348 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1349 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1354 link_properties_valid(struct rte_eth_dev *ethdev,
1355 struct rte_eth_link *slave_link)
1357 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1359 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1360 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1362 if (bond_link->link_duplex != slave_link->link_duplex ||
1363 bond_link->link_autoneg != slave_link->link_autoneg ||
1364 bond_link->link_speed != slave_link->link_speed)
1372 mac_address_get(struct rte_eth_dev *eth_dev,
1373 struct rte_ether_addr *dst_mac_addr)
1375 struct rte_ether_addr *mac_addr;
1377 if (eth_dev == NULL) {
1378 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1382 if (dst_mac_addr == NULL) {
1383 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1387 mac_addr = eth_dev->data->mac_addrs;
1389 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1394 mac_address_set(struct rte_eth_dev *eth_dev,
1395 struct rte_ether_addr *new_mac_addr)
1397 struct rte_ether_addr *mac_addr;
1399 if (eth_dev == NULL) {
1400 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1404 if (new_mac_addr == NULL) {
1405 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1409 mac_addr = eth_dev->data->mac_addrs;
1411 /* If new MAC is different to current MAC then update */
1412 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1413 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1418 static const struct rte_ether_addr null_mac_addr;
1421 * Add additional MAC addresses to the slave
1424 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1425 uint16_t slave_port_id)
1428 struct rte_ether_addr *mac_addr;
1430 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1431 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1432 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1435 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1438 for (i--; i > 0; i--)
1439 rte_eth_dev_mac_addr_remove(slave_port_id,
1440 &bonded_eth_dev->data->mac_addrs[i]);
1449 * Remove additional MAC addresses from the slave
1452 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 uint16_t slave_port_id)
1456 struct rte_ether_addr *mac_addr;
1459 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1460 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1461 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1464 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1465 /* save only the first error */
1466 if (ret < 0 && rc == 0)
1474 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1476 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1479 /* Update slave devices MAC addresses */
1480 if (internals->slave_count < 1)
1483 switch (internals->mode) {
1484 case BONDING_MODE_ROUND_ROBIN:
1485 case BONDING_MODE_BALANCE:
1486 case BONDING_MODE_BROADCAST:
1487 for (i = 0; i < internals->slave_count; i++) {
1488 if (rte_eth_dev_default_mac_addr_set(
1489 internals->slaves[i].port_id,
1490 bonded_eth_dev->data->mac_addrs)) {
1491 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1492 internals->slaves[i].port_id);
1497 case BONDING_MODE_8023AD:
1498 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1500 case BONDING_MODE_ACTIVE_BACKUP:
1501 case BONDING_MODE_TLB:
1502 case BONDING_MODE_ALB:
1504 for (i = 0; i < internals->slave_count; i++) {
1505 if (internals->slaves[i].port_id ==
1506 internals->current_primary_port) {
1507 if (rte_eth_dev_default_mac_addr_set(
1508 internals->primary_port,
1509 bonded_eth_dev->data->mac_addrs)) {
1510 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1511 internals->current_primary_port);
1515 if (rte_eth_dev_default_mac_addr_set(
1516 internals->slaves[i].port_id,
1517 &internals->slaves[i].persisted_mac_addr)) {
1518 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1519 internals->slaves[i].port_id);
1530 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1532 struct bond_dev_private *internals;
1534 internals = eth_dev->data->dev_private;
1537 case BONDING_MODE_ROUND_ROBIN:
1538 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1539 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1541 case BONDING_MODE_ACTIVE_BACKUP:
1542 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1543 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1545 case BONDING_MODE_BALANCE:
1546 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1547 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1549 case BONDING_MODE_BROADCAST:
1550 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1551 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1553 case BONDING_MODE_8023AD:
1554 if (bond_mode_8023ad_enable(eth_dev) != 0)
1557 if (internals->mode4.dedicated_queues.enabled == 0) {
1558 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1559 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1560 RTE_BOND_LOG(WARNING,
1561 "Using mode 4, it is necessary to do TX burst "
1562 "and RX burst at least every 100ms.");
1564 /* Use flow director's optimization */
1565 eth_dev->rx_pkt_burst =
1566 bond_ethdev_rx_burst_8023ad_fast_queue;
1567 eth_dev->tx_pkt_burst =
1568 bond_ethdev_tx_burst_8023ad_fast_queue;
1571 case BONDING_MODE_TLB:
1572 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1573 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575 case BONDING_MODE_ALB:
1576 if (bond_mode_alb_enable(eth_dev) != 0)
1579 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1580 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1586 internals->mode = mode;
1593 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1594 struct rte_eth_dev *slave_eth_dev)
1597 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1598 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1600 if (port->slow_pool == NULL) {
1602 int slave_id = slave_eth_dev->data->port_id;
1604 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1606 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1607 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1608 slave_eth_dev->data->numa_node);
1610 /* Any memory allocation failure in initialization is critical because
1611 * resources can't be free, so reinitialization is impossible. */
1612 if (port->slow_pool == NULL) {
1613 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1614 slave_id, mem_name, rte_strerror(rte_errno));
1618 if (internals->mode4.dedicated_queues.enabled == 1) {
1619 /* Configure slow Rx queue */
1621 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1622 internals->mode4.dedicated_queues.rx_qid, 128,
1623 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1624 NULL, port->slow_pool);
1627 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1628 slave_eth_dev->data->port_id,
1629 internals->mode4.dedicated_queues.rx_qid,
1634 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1635 internals->mode4.dedicated_queues.tx_qid, 512,
1636 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1640 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1641 slave_eth_dev->data->port_id,
1642 internals->mode4.dedicated_queues.tx_qid,
1651 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1652 struct rte_eth_dev *slave_eth_dev)
1654 struct bond_rx_queue *bd_rx_q;
1655 struct bond_tx_queue *bd_tx_q;
1656 uint16_t nb_rx_queues;
1657 uint16_t nb_tx_queues;
1661 struct rte_flow_error flow_error;
1663 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1666 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1668 /* Enable interrupts on slave device if supported */
1669 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1670 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1672 /* If RSS is enabled for bonding, try to enable it for slaves */
1673 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1674 if (internals->rss_key_len != 0) {
1675 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1676 internals->rss_key_len;
1677 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1680 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1683 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1684 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1685 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1686 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1689 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1690 DEV_RX_OFFLOAD_VLAN_FILTER)
1691 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1692 DEV_RX_OFFLOAD_VLAN_FILTER;
1694 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1695 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1697 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1698 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1700 if (internals->mode == BONDING_MODE_8023AD) {
1701 if (internals->mode4.dedicated_queues.enabled == 1) {
1707 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1708 bonded_eth_dev->data->mtu);
1709 if (errval != 0 && errval != -ENOTSUP) {
1710 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1711 slave_eth_dev->data->port_id, errval);
1715 /* Configure device */
1716 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1717 nb_rx_queues, nb_tx_queues,
1718 &(slave_eth_dev->data->dev_conf));
1720 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1721 slave_eth_dev->data->port_id, errval);
1725 /* Setup Rx Queues */
1726 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1727 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1729 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1730 bd_rx_q->nb_rx_desc,
1731 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1732 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1735 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1736 slave_eth_dev->data->port_id, q_id, errval);
1741 /* Setup Tx Queues */
1742 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1743 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1745 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1746 bd_tx_q->nb_tx_desc,
1747 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1751 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1752 slave_eth_dev->data->port_id, q_id, errval);
1757 if (internals->mode == BONDING_MODE_8023AD &&
1758 internals->mode4.dedicated_queues.enabled == 1) {
1759 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1763 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1764 slave_eth_dev->data->port_id) != 0) {
1766 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1767 slave_eth_dev->data->port_id, q_id, errval);
1771 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1772 rte_flow_destroy(slave_eth_dev->data->port_id,
1773 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1776 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1777 slave_eth_dev->data->port_id);
1781 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1783 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1784 slave_eth_dev->data->port_id, errval);
1788 /* If RSS is enabled for bonding, synchronize RETA */
1789 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1791 struct bond_dev_private *internals;
1793 internals = bonded_eth_dev->data->dev_private;
1795 for (i = 0; i < internals->slave_count; i++) {
1796 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1797 errval = rte_eth_dev_rss_reta_update(
1798 slave_eth_dev->data->port_id,
1799 &internals->reta_conf[0],
1800 internals->slaves[i].reta_size);
1802 RTE_BOND_LOG(WARNING,
1803 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1804 " RSS Configuration for bonding may be inconsistent.",
1805 slave_eth_dev->data->port_id, errval);
1812 /* If lsc interrupt is set, check initial slave's link status */
1813 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1814 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1815 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1816 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1824 slave_remove(struct bond_dev_private *internals,
1825 struct rte_eth_dev *slave_eth_dev)
1829 for (i = 0; i < internals->slave_count; i++)
1830 if (internals->slaves[i].port_id ==
1831 slave_eth_dev->data->port_id)
1834 if (i < (internals->slave_count - 1)) {
1835 struct rte_flow *flow;
1837 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1838 sizeof(internals->slaves[0]) *
1839 (internals->slave_count - i - 1));
1840 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1841 memmove(&flow->flows[i], &flow->flows[i + 1],
1842 sizeof(flow->flows[0]) *
1843 (internals->slave_count - i - 1));
1844 flow->flows[internals->slave_count - 1] = NULL;
1848 internals->slave_count--;
1850 /* force reconfiguration of slave interfaces */
1851 _rte_eth_dev_reset(slave_eth_dev);
1855 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1858 slave_add(struct bond_dev_private *internals,
1859 struct rte_eth_dev *slave_eth_dev)
1861 struct bond_slave_details *slave_details =
1862 &internals->slaves[internals->slave_count];
1864 slave_details->port_id = slave_eth_dev->data->port_id;
1865 slave_details->last_link_status = 0;
1867 /* Mark slave devices that don't support interrupts so we can
1868 * compensate when we start the bond
1870 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1871 slave_details->link_status_poll_enabled = 1;
1874 slave_details->link_status_wait_to_complete = 0;
1875 /* clean tlb_last_obytes when adding port for bonding device */
1876 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1877 sizeof(struct rte_ether_addr));
1881 bond_ethdev_primary_set(struct bond_dev_private *internals,
1882 uint16_t slave_port_id)
1886 if (internals->active_slave_count < 1)
1887 internals->current_primary_port = slave_port_id;
1889 /* Search bonded device slave ports for new proposed primary port */
1890 for (i = 0; i < internals->active_slave_count; i++) {
1891 if (internals->active_slaves[i] == slave_port_id)
1892 internals->current_primary_port = slave_port_id;
1897 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1900 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1902 struct bond_dev_private *internals;
1905 /* slave eth dev will be started by bonded device */
1906 if (check_for_bonded_ethdev(eth_dev)) {
1907 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1908 eth_dev->data->port_id);
1912 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1913 eth_dev->data->dev_started = 1;
1915 internals = eth_dev->data->dev_private;
1917 if (internals->slave_count == 0) {
1918 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1922 if (internals->user_defined_mac == 0) {
1923 struct rte_ether_addr *new_mac_addr = NULL;
1925 for (i = 0; i < internals->slave_count; i++)
1926 if (internals->slaves[i].port_id == internals->primary_port)
1927 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1929 if (new_mac_addr == NULL)
1932 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1933 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1934 eth_dev->data->port_id);
1939 /* If bonded device is configure in promiscuous mode then re-apply config */
1940 if (internals->promiscuous_en)
1941 bond_ethdev_promiscuous_enable(eth_dev);
1943 if (internals->mode == BONDING_MODE_8023AD) {
1944 if (internals->mode4.dedicated_queues.enabled == 1) {
1945 internals->mode4.dedicated_queues.rx_qid =
1946 eth_dev->data->nb_rx_queues;
1947 internals->mode4.dedicated_queues.tx_qid =
1948 eth_dev->data->nb_tx_queues;
1953 /* Reconfigure each slave device if starting bonded device */
1954 for (i = 0; i < internals->slave_count; i++) {
1955 struct rte_eth_dev *slave_ethdev =
1956 &(rte_eth_devices[internals->slaves[i].port_id]);
1957 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1959 "bonded port (%d) failed to reconfigure slave device (%d)",
1960 eth_dev->data->port_id,
1961 internals->slaves[i].port_id);
1964 /* We will need to poll for link status if any slave doesn't
1965 * support interrupts
1967 if (internals->slaves[i].link_status_poll_enabled)
1968 internals->link_status_polling_enabled = 1;
1971 /* start polling if needed */
1972 if (internals->link_status_polling_enabled) {
1974 internals->link_status_polling_interval_ms * 1000,
1975 bond_ethdev_slave_link_status_change_monitor,
1976 (void *)&rte_eth_devices[internals->port_id]);
1979 /* Update all slave devices MACs*/
1980 if (mac_address_slaves_update(eth_dev) != 0)
1983 if (internals->user_defined_primary_port)
1984 bond_ethdev_primary_set(internals, internals->primary_port);
1986 if (internals->mode == BONDING_MODE_8023AD)
1987 bond_mode_8023ad_start(eth_dev);
1989 if (internals->mode == BONDING_MODE_TLB ||
1990 internals->mode == BONDING_MODE_ALB)
1991 bond_tlb_enable(internals);
1996 eth_dev->data->dev_started = 0;
2001 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2005 if (dev->data->rx_queues != NULL) {
2006 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2007 rte_free(dev->data->rx_queues[i]);
2008 dev->data->rx_queues[i] = NULL;
2010 dev->data->nb_rx_queues = 0;
2013 if (dev->data->tx_queues != NULL) {
2014 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2015 rte_free(dev->data->tx_queues[i]);
2016 dev->data->tx_queues[i] = NULL;
2018 dev->data->nb_tx_queues = 0;
2023 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2025 struct bond_dev_private *internals = eth_dev->data->dev_private;
2028 if (internals->mode == BONDING_MODE_8023AD) {
2032 bond_mode_8023ad_stop(eth_dev);
2034 /* Discard all messages to/from mode 4 state machines */
2035 for (i = 0; i < internals->active_slave_count; i++) {
2036 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2038 RTE_ASSERT(port->rx_ring != NULL);
2039 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2040 rte_pktmbuf_free(pkt);
2042 RTE_ASSERT(port->tx_ring != NULL);
2043 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2044 rte_pktmbuf_free(pkt);
2048 if (internals->mode == BONDING_MODE_TLB ||
2049 internals->mode == BONDING_MODE_ALB) {
2050 bond_tlb_disable(internals);
2051 for (i = 0; i < internals->active_slave_count; i++)
2052 tlb_last_obytets[internals->active_slaves[i]] = 0;
2055 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2056 eth_dev->data->dev_started = 0;
2058 internals->link_status_polling_enabled = 0;
2059 for (i = 0; i < internals->slave_count; i++) {
2060 uint16_t slave_id = internals->slaves[i].port_id;
2061 if (find_slave_by_id(internals->active_slaves,
2062 internals->active_slave_count, slave_id) !=
2063 internals->active_slave_count) {
2064 internals->slaves[i].last_link_status = 0;
2065 rte_eth_dev_stop(slave_id);
2066 deactivate_slave(eth_dev, slave_id);
2072 bond_ethdev_close(struct rte_eth_dev *dev)
2074 struct bond_dev_private *internals = dev->data->dev_private;
2075 uint16_t bond_port_id = internals->port_id;
2077 struct rte_flow_error ferror;
2079 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2080 while (internals->slave_count != skipped) {
2081 uint16_t port_id = internals->slaves[skipped].port_id;
2083 rte_eth_dev_stop(port_id);
2085 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2087 "Failed to remove port %d from bonded device %s",
2088 port_id, dev->device->name);
2092 bond_flow_ops.flush(dev, &ferror);
2093 bond_ethdev_free_queues(dev);
2094 rte_bitmap_reset(internals->vlan_filter_bmp);
2097 /* forward declaration */
2098 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2101 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2103 struct bond_dev_private *internals = dev->data->dev_private;
2105 uint16_t max_nb_rx_queues = UINT16_MAX;
2106 uint16_t max_nb_tx_queues = UINT16_MAX;
2107 uint16_t max_rx_desc_lim = UINT16_MAX;
2108 uint16_t max_tx_desc_lim = UINT16_MAX;
2110 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2112 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2113 internals->candidate_max_rx_pktlen :
2114 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2116 /* Max number of tx/rx queues that the bonded device can support is the
2117 * minimum values of the bonded slaves, as all slaves must be capable
2118 * of supporting the same number of tx/rx queues.
2120 if (internals->slave_count > 0) {
2121 struct rte_eth_dev_info slave_info;
2124 for (idx = 0; idx < internals->slave_count; idx++) {
2125 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2128 if (slave_info.max_rx_queues < max_nb_rx_queues)
2129 max_nb_rx_queues = slave_info.max_rx_queues;
2131 if (slave_info.max_tx_queues < max_nb_tx_queues)
2132 max_nb_tx_queues = slave_info.max_tx_queues;
2134 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2135 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2137 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2138 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2142 dev_info->max_rx_queues = max_nb_rx_queues;
2143 dev_info->max_tx_queues = max_nb_tx_queues;
2145 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2146 sizeof(dev_info->default_rxconf));
2147 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2148 sizeof(dev_info->default_txconf));
2150 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2151 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2154 * If dedicated hw queues enabled for link bonding device in LACP mode
2155 * then we need to reduce the maximum number of data path queues by 1.
2157 if (internals->mode == BONDING_MODE_8023AD &&
2158 internals->mode4.dedicated_queues.enabled == 1) {
2159 dev_info->max_rx_queues--;
2160 dev_info->max_tx_queues--;
2163 dev_info->min_rx_bufsize = 0;
2165 dev_info->rx_offload_capa = internals->rx_offload_capa;
2166 dev_info->tx_offload_capa = internals->tx_offload_capa;
2167 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2168 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2169 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2171 dev_info->reta_size = internals->reta_size;
2175 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2179 struct bond_dev_private *internals = dev->data->dev_private;
2181 /* don't do this while a slave is being added */
2182 rte_spinlock_lock(&internals->lock);
2185 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2187 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2189 for (i = 0; i < internals->slave_count; i++) {
2190 uint16_t port_id = internals->slaves[i].port_id;
2192 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2194 RTE_BOND_LOG(WARNING,
2195 "Setting VLAN filter on slave port %u not supported.",
2199 rte_spinlock_unlock(&internals->lock);
2204 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2205 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2206 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2208 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2209 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2210 0, dev->data->numa_node);
2211 if (bd_rx_q == NULL)
2214 bd_rx_q->queue_id = rx_queue_id;
2215 bd_rx_q->dev_private = dev->data->dev_private;
2217 bd_rx_q->nb_rx_desc = nb_rx_desc;
2219 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2220 bd_rx_q->mb_pool = mb_pool;
2222 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2228 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2229 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2230 const struct rte_eth_txconf *tx_conf)
2232 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2233 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2234 0, dev->data->numa_node);
2236 if (bd_tx_q == NULL)
2239 bd_tx_q->queue_id = tx_queue_id;
2240 bd_tx_q->dev_private = dev->data->dev_private;
2242 bd_tx_q->nb_tx_desc = nb_tx_desc;
2243 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2245 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2251 bond_ethdev_rx_queue_release(void *queue)
2260 bond_ethdev_tx_queue_release(void *queue)
2269 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2271 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2272 struct bond_dev_private *internals;
2274 /* Default value for polling slave found is true as we don't want to
2275 * disable the polling thread if we cannot get the lock */
2276 int i, polling_slave_found = 1;
2281 bonded_ethdev = cb_arg;
2282 internals = bonded_ethdev->data->dev_private;
2284 if (!bonded_ethdev->data->dev_started ||
2285 !internals->link_status_polling_enabled)
2288 /* If device is currently being configured then don't check slaves link
2289 * status, wait until next period */
2290 if (rte_spinlock_trylock(&internals->lock)) {
2291 if (internals->slave_count > 0)
2292 polling_slave_found = 0;
2294 for (i = 0; i < internals->slave_count; i++) {
2295 if (!internals->slaves[i].link_status_poll_enabled)
2298 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2299 polling_slave_found = 1;
2301 /* Update slave link status */
2302 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2303 internals->slaves[i].link_status_wait_to_complete);
2305 /* if link status has changed since last checked then call lsc
2307 if (slave_ethdev->data->dev_link.link_status !=
2308 internals->slaves[i].last_link_status) {
2309 internals->slaves[i].last_link_status =
2310 slave_ethdev->data->dev_link.link_status;
2312 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2313 RTE_ETH_EVENT_INTR_LSC,
2314 &bonded_ethdev->data->port_id,
2318 rte_spinlock_unlock(&internals->lock);
2321 if (polling_slave_found)
2322 /* Set alarm to continue monitoring link status of slave ethdev's */
2323 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2324 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2328 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2330 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2332 struct bond_dev_private *bond_ctx;
2333 struct rte_eth_link slave_link;
2337 bond_ctx = ethdev->data->dev_private;
2339 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2341 if (ethdev->data->dev_started == 0 ||
2342 bond_ctx->active_slave_count == 0) {
2343 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2347 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2349 if (wait_to_complete)
2350 link_update = rte_eth_link_get;
2352 link_update = rte_eth_link_get_nowait;
2354 switch (bond_ctx->mode) {
2355 case BONDING_MODE_BROADCAST:
2357 * Setting link speed to UINT32_MAX to ensure we pick up the
2358 * value of the first active slave
2360 ethdev->data->dev_link.link_speed = UINT32_MAX;
2363 * link speed is minimum value of all the slaves link speed as
2364 * packet loss will occur on this slave if transmission at rates
2365 * greater than this are attempted
2367 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2368 link_update(bond_ctx->active_slaves[0], &slave_link);
2370 if (slave_link.link_speed <
2371 ethdev->data->dev_link.link_speed)
2372 ethdev->data->dev_link.link_speed =
2373 slave_link.link_speed;
2376 case BONDING_MODE_ACTIVE_BACKUP:
2377 /* Current primary slave */
2378 link_update(bond_ctx->current_primary_port, &slave_link);
2380 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2382 case BONDING_MODE_8023AD:
2383 ethdev->data->dev_link.link_autoneg =
2384 bond_ctx->mode4.slave_link.link_autoneg;
2385 ethdev->data->dev_link.link_duplex =
2386 bond_ctx->mode4.slave_link.link_duplex;
2387 /* fall through to update link speed */
2388 case BONDING_MODE_ROUND_ROBIN:
2389 case BONDING_MODE_BALANCE:
2390 case BONDING_MODE_TLB:
2391 case BONDING_MODE_ALB:
2394 * In theses mode the maximum theoretical link speed is the sum
2397 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2399 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2400 link_update(bond_ctx->active_slaves[idx], &slave_link);
2402 ethdev->data->dev_link.link_speed +=
2403 slave_link.link_speed;
2413 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2415 struct bond_dev_private *internals = dev->data->dev_private;
2416 struct rte_eth_stats slave_stats;
2419 for (i = 0; i < internals->slave_count; i++) {
2420 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2422 stats->ipackets += slave_stats.ipackets;
2423 stats->opackets += slave_stats.opackets;
2424 stats->ibytes += slave_stats.ibytes;
2425 stats->obytes += slave_stats.obytes;
2426 stats->imissed += slave_stats.imissed;
2427 stats->ierrors += slave_stats.ierrors;
2428 stats->oerrors += slave_stats.oerrors;
2429 stats->rx_nombuf += slave_stats.rx_nombuf;
2431 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2432 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2433 stats->q_opackets[j] += slave_stats.q_opackets[j];
2434 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2435 stats->q_obytes[j] += slave_stats.q_obytes[j];
2436 stats->q_errors[j] += slave_stats.q_errors[j];
2445 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2447 struct bond_dev_private *internals = dev->data->dev_private;
2450 for (i = 0; i < internals->slave_count; i++)
2451 rte_eth_stats_reset(internals->slaves[i].port_id);
2455 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2457 struct bond_dev_private *internals = eth_dev->data->dev_private;
2460 internals->promiscuous_en = 1;
2462 switch (internals->mode) {
2463 /* Promiscuous mode is propagated to all slaves */
2464 case BONDING_MODE_ROUND_ROBIN:
2465 case BONDING_MODE_BALANCE:
2466 case BONDING_MODE_BROADCAST:
2467 for (i = 0; i < internals->slave_count; i++)
2468 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2470 /* In mode4 promiscus mode is managed when slave is added/removed */
2471 case BONDING_MODE_8023AD:
2473 /* Promiscuous mode is propagated only to primary slave */
2474 case BONDING_MODE_ACTIVE_BACKUP:
2475 case BONDING_MODE_TLB:
2476 case BONDING_MODE_ALB:
2478 /* Do not touch promisc when there cannot be primary ports */
2479 if (internals->slave_count == 0)
2481 rte_eth_promiscuous_enable(internals->current_primary_port);
2486 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2488 struct bond_dev_private *internals = dev->data->dev_private;
2491 internals->promiscuous_en = 0;
2493 switch (internals->mode) {
2494 /* Promiscuous mode is propagated to all slaves */
2495 case BONDING_MODE_ROUND_ROBIN:
2496 case BONDING_MODE_BALANCE:
2497 case BONDING_MODE_BROADCAST:
2498 for (i = 0; i < internals->slave_count; i++)
2499 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2501 /* In mode4 promiscus mode is set managed when slave is added/removed */
2502 case BONDING_MODE_8023AD:
2504 /* Promiscuous mode is propagated only to primary slave */
2505 case BONDING_MODE_ACTIVE_BACKUP:
2506 case BONDING_MODE_TLB:
2507 case BONDING_MODE_ALB:
2509 /* Do not touch promisc when there cannot be primary ports */
2510 if (internals->slave_count == 0)
2512 rte_eth_promiscuous_disable(internals->current_primary_port);
2517 bond_ethdev_delayed_lsc_propagation(void *arg)
2522 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2523 RTE_ETH_EVENT_INTR_LSC, NULL);
2527 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2528 void *param, void *ret_param __rte_unused)
2530 struct rte_eth_dev *bonded_eth_dev;
2531 struct bond_dev_private *internals;
2532 struct rte_eth_link link;
2535 uint8_t lsc_flag = 0;
2536 int valid_slave = 0;
2537 uint16_t active_pos;
2540 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2543 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2545 if (check_for_bonded_ethdev(bonded_eth_dev))
2548 internals = bonded_eth_dev->data->dev_private;
2550 /* If the device isn't started don't handle interrupts */
2551 if (!bonded_eth_dev->data->dev_started)
2554 /* verify that port_id is a valid slave of bonded port */
2555 for (i = 0; i < internals->slave_count; i++) {
2556 if (internals->slaves[i].port_id == port_id) {
2565 /* Synchronize lsc callback parallel calls either by real link event
2566 * from the slaves PMDs or by the bonding PMD itself.
2568 rte_spinlock_lock(&internals->lsc_lock);
2570 /* Search for port in active port list */
2571 active_pos = find_slave_by_id(internals->active_slaves,
2572 internals->active_slave_count, port_id);
2574 rte_eth_link_get_nowait(port_id, &link);
2575 if (link.link_status) {
2576 if (active_pos < internals->active_slave_count)
2579 /* check link state properties if bonded link is up*/
2580 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2581 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2582 RTE_BOND_LOG(ERR, "Invalid link properties "
2583 "for slave %d in bonding mode %d",
2584 port_id, internals->mode);
2586 /* inherit slave link properties */
2587 link_properties_set(bonded_eth_dev, &link);
2590 /* If no active slave ports then set this port to be
2593 if (internals->active_slave_count < 1) {
2594 /* If first active slave, then change link status */
2595 bonded_eth_dev->data->dev_link.link_status =
2597 internals->current_primary_port = port_id;
2600 mac_address_slaves_update(bonded_eth_dev);
2603 activate_slave(bonded_eth_dev, port_id);
2605 /* If the user has defined the primary port then default to
2608 if (internals->user_defined_primary_port &&
2609 internals->primary_port == port_id)
2610 bond_ethdev_primary_set(internals, port_id);
2612 if (active_pos == internals->active_slave_count)
2615 /* Remove from active slave list */
2616 deactivate_slave(bonded_eth_dev, port_id);
2618 if (internals->active_slave_count < 1)
2621 /* Update primary id, take first active slave from list or if none
2622 * available set to -1 */
2623 if (port_id == internals->current_primary_port) {
2624 if (internals->active_slave_count > 0)
2625 bond_ethdev_primary_set(internals,
2626 internals->active_slaves[0]);
2628 internals->current_primary_port = internals->primary_port;
2634 * Update bonded device link properties after any change to active
2637 bond_ethdev_link_update(bonded_eth_dev, 0);
2640 /* Cancel any possible outstanding interrupts if delays are enabled */
2641 if (internals->link_up_delay_ms > 0 ||
2642 internals->link_down_delay_ms > 0)
2643 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2646 if (bonded_eth_dev->data->dev_link.link_status) {
2647 if (internals->link_up_delay_ms > 0)
2648 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2649 bond_ethdev_delayed_lsc_propagation,
2650 (void *)bonded_eth_dev);
2652 _rte_eth_dev_callback_process(bonded_eth_dev,
2653 RTE_ETH_EVENT_INTR_LSC,
2657 if (internals->link_down_delay_ms > 0)
2658 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2659 bond_ethdev_delayed_lsc_propagation,
2660 (void *)bonded_eth_dev);
2662 _rte_eth_dev_callback_process(bonded_eth_dev,
2663 RTE_ETH_EVENT_INTR_LSC,
2668 rte_spinlock_unlock(&internals->lsc_lock);
2674 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2675 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2679 int slave_reta_size;
2680 unsigned reta_count;
2681 struct bond_dev_private *internals = dev->data->dev_private;
2683 if (reta_size != internals->reta_size)
2686 /* Copy RETA table */
2687 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2689 for (i = 0; i < reta_count; i++) {
2690 internals->reta_conf[i].mask = reta_conf[i].mask;
2691 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2692 if ((reta_conf[i].mask >> j) & 0x01)
2693 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2696 /* Fill rest of array */
2697 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2698 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2699 sizeof(internals->reta_conf[0]) * reta_count);
2701 /* Propagate RETA over slaves */
2702 for (i = 0; i < internals->slave_count; i++) {
2703 slave_reta_size = internals->slaves[i].reta_size;
2704 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2705 &internals->reta_conf[0], slave_reta_size);
2714 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2715 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2718 struct bond_dev_private *internals = dev->data->dev_private;
2720 if (reta_size != internals->reta_size)
2723 /* Copy RETA table */
2724 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2725 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2726 if ((reta_conf[i].mask >> j) & 0x01)
2727 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2733 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2734 struct rte_eth_rss_conf *rss_conf)
2737 struct bond_dev_private *internals = dev->data->dev_private;
2738 struct rte_eth_rss_conf bond_rss_conf;
2740 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2742 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2744 if (bond_rss_conf.rss_hf != 0)
2745 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2747 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2748 sizeof(internals->rss_key)) {
2749 if (bond_rss_conf.rss_key_len == 0)
2750 bond_rss_conf.rss_key_len = 40;
2751 internals->rss_key_len = bond_rss_conf.rss_key_len;
2752 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2753 internals->rss_key_len);
2756 for (i = 0; i < internals->slave_count; i++) {
2757 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2767 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2768 struct rte_eth_rss_conf *rss_conf)
2770 struct bond_dev_private *internals = dev->data->dev_private;
2772 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2773 rss_conf->rss_key_len = internals->rss_key_len;
2774 if (rss_conf->rss_key)
2775 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2781 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2783 struct rte_eth_dev *slave_eth_dev;
2784 struct bond_dev_private *internals = dev->data->dev_private;
2787 rte_spinlock_lock(&internals->lock);
2789 for (i = 0; i < internals->slave_count; i++) {
2790 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2791 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2792 rte_spinlock_unlock(&internals->lock);
2796 for (i = 0; i < internals->slave_count; i++) {
2797 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2799 rte_spinlock_unlock(&internals->lock);
2804 rte_spinlock_unlock(&internals->lock);
2809 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2810 struct rte_ether_addr *addr)
2812 if (mac_address_set(dev, addr)) {
2813 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2821 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2822 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2824 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2825 *(const void **)arg = &bond_flow_ops;
2832 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2833 struct rte_ether_addr *mac_addr,
2834 __rte_unused uint32_t index, uint32_t vmdq)
2836 struct rte_eth_dev *slave_eth_dev;
2837 struct bond_dev_private *internals = dev->data->dev_private;
2840 rte_spinlock_lock(&internals->lock);
2842 for (i = 0; i < internals->slave_count; i++) {
2843 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2844 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2845 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2851 for (i = 0; i < internals->slave_count; i++) {
2852 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2856 for (i--; i >= 0; i--)
2857 rte_eth_dev_mac_addr_remove(
2858 internals->slaves[i].port_id, mac_addr);
2865 rte_spinlock_unlock(&internals->lock);
2870 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2872 struct rte_eth_dev *slave_eth_dev;
2873 struct bond_dev_private *internals = dev->data->dev_private;
2876 rte_spinlock_lock(&internals->lock);
2878 for (i = 0; i < internals->slave_count; i++) {
2879 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2880 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2884 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2886 for (i = 0; i < internals->slave_count; i++)
2887 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2891 rte_spinlock_unlock(&internals->lock);
2894 const struct eth_dev_ops default_dev_ops = {
2895 .dev_start = bond_ethdev_start,
2896 .dev_stop = bond_ethdev_stop,
2897 .dev_close = bond_ethdev_close,
2898 .dev_configure = bond_ethdev_configure,
2899 .dev_infos_get = bond_ethdev_info,
2900 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2901 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2902 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2903 .rx_queue_release = bond_ethdev_rx_queue_release,
2904 .tx_queue_release = bond_ethdev_tx_queue_release,
2905 .link_update = bond_ethdev_link_update,
2906 .stats_get = bond_ethdev_stats_get,
2907 .stats_reset = bond_ethdev_stats_reset,
2908 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2909 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2910 .reta_update = bond_ethdev_rss_reta_update,
2911 .reta_query = bond_ethdev_rss_reta_query,
2912 .rss_hash_update = bond_ethdev_rss_hash_update,
2913 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2914 .mtu_set = bond_ethdev_mtu_set,
2915 .mac_addr_set = bond_ethdev_mac_address_set,
2916 .mac_addr_add = bond_ethdev_mac_addr_add,
2917 .mac_addr_remove = bond_ethdev_mac_addr_remove,
2918 .filter_ctrl = bond_filter_ctrl
2922 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2924 const char *name = rte_vdev_device_name(dev);
2925 uint8_t socket_id = dev->device.numa_node;
2926 struct bond_dev_private *internals = NULL;
2927 struct rte_eth_dev *eth_dev = NULL;
2928 uint32_t vlan_filter_bmp_size;
2930 /* now do all data allocation - for eth_dev structure, dummy pci driver
2931 * and internal (private) data
2934 /* reserve an ethdev entry */
2935 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2936 if (eth_dev == NULL) {
2937 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2941 internals = eth_dev->data->dev_private;
2942 eth_dev->data->nb_rx_queues = (uint16_t)1;
2943 eth_dev->data->nb_tx_queues = (uint16_t)1;
2945 /* Allocate memory for storing MAC addresses */
2946 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
2947 BOND_MAX_MAC_ADDRS, 0, socket_id);
2948 if (eth_dev->data->mac_addrs == NULL) {
2950 "Failed to allocate %u bytes needed to store MAC addresses",
2951 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
2955 eth_dev->dev_ops = &default_dev_ops;
2956 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2958 rte_spinlock_init(&internals->lock);
2959 rte_spinlock_init(&internals->lsc_lock);
2961 internals->port_id = eth_dev->data->port_id;
2962 internals->mode = BONDING_MODE_INVALID;
2963 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2964 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2965 internals->burst_xmit_hash = burst_xmit_l2_hash;
2966 internals->user_defined_mac = 0;
2968 internals->link_status_polling_enabled = 0;
2970 internals->link_status_polling_interval_ms =
2971 DEFAULT_POLLING_INTERVAL_10_MS;
2972 internals->link_down_delay_ms = 0;
2973 internals->link_up_delay_ms = 0;
2975 internals->slave_count = 0;
2976 internals->active_slave_count = 0;
2977 internals->rx_offload_capa = 0;
2978 internals->tx_offload_capa = 0;
2979 internals->rx_queue_offload_capa = 0;
2980 internals->tx_queue_offload_capa = 0;
2981 internals->candidate_max_rx_pktlen = 0;
2982 internals->max_rx_pktlen = 0;
2984 /* Initially allow to choose any offload type */
2985 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2987 memset(&internals->default_rxconf, 0,
2988 sizeof(internals->default_rxconf));
2989 memset(&internals->default_txconf, 0,
2990 sizeof(internals->default_txconf));
2992 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
2993 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
2995 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2996 memset(internals->slaves, 0, sizeof(internals->slaves));
2998 TAILQ_INIT(&internals->flow_list);
2999 internals->flow_isolated_valid = 0;
3001 /* Set mode 4 default configuration */
3002 bond_mode_8023ad_setup(eth_dev, NULL);
3003 if (bond_ethdev_mode_set(eth_dev, mode)) {
3004 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3005 eth_dev->data->port_id, mode);
3009 vlan_filter_bmp_size =
3010 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3011 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3012 RTE_CACHE_LINE_SIZE);
3013 if (internals->vlan_filter_bmpmem == NULL) {
3015 "Failed to allocate vlan bitmap for bonded device %u",
3016 eth_dev->data->port_id);
3020 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3021 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3022 if (internals->vlan_filter_bmp == NULL) {
3024 "Failed to init vlan bitmap for bonded device %u",
3025 eth_dev->data->port_id);
3026 rte_free(internals->vlan_filter_bmpmem);
3030 return eth_dev->data->port_id;
3033 rte_free(internals);
3034 if (eth_dev != NULL)
3035 eth_dev->data->dev_private = NULL;
3036 rte_eth_dev_release_port(eth_dev);
3041 bond_probe(struct rte_vdev_device *dev)
3044 struct bond_dev_private *internals;
3045 struct rte_kvargs *kvlist;
3046 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3047 int arg_count, port_id;
3049 struct rte_eth_dev *eth_dev;
3054 name = rte_vdev_device_name(dev);
3055 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3057 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3058 eth_dev = rte_eth_dev_attach_secondary(name);
3060 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3063 /* TODO: request info from primary to set up Rx and Tx */
3064 eth_dev->dev_ops = &default_dev_ops;
3065 eth_dev->device = &dev->device;
3066 rte_eth_dev_probing_finish(eth_dev);
3070 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3071 pmd_bond_init_valid_arguments);
3075 /* Parse link bonding mode */
3076 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3077 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3078 &bond_ethdev_parse_slave_mode_kvarg,
3079 &bonding_mode) != 0) {
3080 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3085 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3090 /* Parse socket id to create bonding device on */
3091 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3092 if (arg_count == 1) {
3093 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3094 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3096 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3097 "bonded device %s", name);
3100 } else if (arg_count > 1) {
3101 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3102 "bonded device %s", name);
3105 socket_id = rte_socket_id();
3108 dev->device.numa_node = socket_id;
3110 /* Create link bonding eth device */
3111 port_id = bond_alloc(dev, bonding_mode);
3113 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3114 "socket %u.", name, bonding_mode, socket_id);
3117 internals = rte_eth_devices[port_id].data->dev_private;
3118 internals->kvlist = kvlist;
3120 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3121 if (rte_kvargs_process(kvlist,
3122 PMD_BOND_AGG_MODE_KVARG,
3123 &bond_ethdev_parse_slave_agg_mode_kvarg,
3126 "Failed to parse agg selection mode for bonded device %s",
3131 if (internals->mode == BONDING_MODE_8023AD)
3132 internals->mode4.agg_selection = agg_mode;
3134 internals->mode4.agg_selection = AGG_STABLE;
3137 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3138 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3139 "socket %u.", name, port_id, bonding_mode, socket_id);
3143 rte_kvargs_free(kvlist);
3149 bond_remove(struct rte_vdev_device *dev)
3151 struct rte_eth_dev *eth_dev;
3152 struct bond_dev_private *internals;
3158 name = rte_vdev_device_name(dev);
3159 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3161 /* now free all data allocation - for eth_dev structure,
3162 * dummy pci driver and internal (private) data
3165 /* find an ethdev entry */
3166 eth_dev = rte_eth_dev_allocated(name);
3167 if (eth_dev == NULL)
3170 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3171 return rte_eth_dev_release_port(eth_dev);
3173 RTE_ASSERT(eth_dev->device == &dev->device);
3175 internals = eth_dev->data->dev_private;
3176 if (internals->slave_count != 0)
3179 if (eth_dev->data->dev_started == 1) {
3180 bond_ethdev_stop(eth_dev);
3181 bond_ethdev_close(eth_dev);
3184 eth_dev->dev_ops = NULL;
3185 eth_dev->rx_pkt_burst = NULL;
3186 eth_dev->tx_pkt_burst = NULL;
3188 internals = eth_dev->data->dev_private;
3189 /* Try to release mempool used in mode6. If the bond
3190 * device is not mode6, free the NULL is not problem.
3192 rte_mempool_free(internals->mode6.mempool);
3193 rte_bitmap_free(internals->vlan_filter_bmp);
3194 rte_free(internals->vlan_filter_bmpmem);
3196 rte_eth_dev_release_port(eth_dev);
3201 /* this part will resolve the slave portids after all the other pdev and vdev
3202 * have been allocated */
3204 bond_ethdev_configure(struct rte_eth_dev *dev)
3206 const char *name = dev->device->name;
3207 struct bond_dev_private *internals = dev->data->dev_private;
3208 struct rte_kvargs *kvlist = internals->kvlist;
3210 uint16_t port_id = dev - rte_eth_devices;
3213 static const uint8_t default_rss_key[40] = {
3214 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3215 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3216 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3217 0xBE, 0xAC, 0x01, 0xFA
3223 * If RSS is enabled, fill table with default values and
3224 * set key to the the value specified in port RSS configuration.
3225 * Fall back to default RSS key if the key is not specified
3227 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3228 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3229 internals->rss_key_len =
3230 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3231 memcpy(internals->rss_key,
3232 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3233 internals->rss_key_len);
3235 internals->rss_key_len = sizeof(default_rss_key);
3236 memcpy(internals->rss_key, default_rss_key,
3237 internals->rss_key_len);
3240 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3241 internals->reta_conf[i].mask = ~0LL;
3242 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3243 internals->reta_conf[i].reta[j] =
3244 (i * RTE_RETA_GROUP_SIZE + j) %
3245 dev->data->nb_rx_queues;
3249 /* set the max_rx_pktlen */
3250 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3253 * if no kvlist, it means that this bonded device has been created
3254 * through the bonding api.
3259 /* Parse MAC address for bonded device */
3260 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3261 if (arg_count == 1) {
3262 struct rte_ether_addr bond_mac;
3264 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3265 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3266 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3271 /* Set MAC address */
3272 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3274 "Failed to set mac address on bonded device %s",
3278 } else if (arg_count > 1) {
3280 "MAC address can be specified only once for bonded device %s",
3285 /* Parse/set balance mode transmit policy */
3286 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3287 if (arg_count == 1) {
3288 uint8_t xmit_policy;
3290 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3291 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3294 "Invalid xmit policy specified for bonded device %s",
3299 /* Set balance mode transmit policy*/
3300 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3302 "Failed to set balance xmit policy on bonded device %s",
3306 } else if (arg_count > 1) {
3308 "Transmit policy can be specified only once for bonded device %s",
3313 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3314 if (rte_kvargs_process(kvlist,
3315 PMD_BOND_AGG_MODE_KVARG,
3316 &bond_ethdev_parse_slave_agg_mode_kvarg,
3319 "Failed to parse agg selection mode for bonded device %s",
3322 if (internals->mode == BONDING_MODE_8023AD) {
3323 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3327 "Invalid args for agg selection set for bonded device %s",
3334 /* Parse/add slave ports to bonded device */
3335 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3336 struct bond_ethdev_slave_ports slave_ports;
3339 memset(&slave_ports, 0, sizeof(slave_ports));
3341 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3342 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3344 "Failed to parse slave ports for bonded device %s",
3349 for (i = 0; i < slave_ports.slave_count; i++) {
3350 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3352 "Failed to add port %d as slave to bonded device %s",
3353 slave_ports.slaves[i], name);
3358 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3362 /* Parse/set primary slave port id*/
3363 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3364 if (arg_count == 1) {
3365 uint16_t primary_slave_port_id;
3367 if (rte_kvargs_process(kvlist,
3368 PMD_BOND_PRIMARY_SLAVE_KVARG,
3369 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3370 &primary_slave_port_id) < 0) {
3372 "Invalid primary slave port id specified for bonded device %s",
3377 /* Set balance mode transmit policy*/
3378 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3381 "Failed to set primary slave port %d on bonded device %s",
3382 primary_slave_port_id, name);
3385 } else if (arg_count > 1) {
3387 "Primary slave can be specified only once for bonded device %s",
3392 /* Parse link status monitor polling interval */
3393 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3394 if (arg_count == 1) {
3395 uint32_t lsc_poll_interval_ms;
3397 if (rte_kvargs_process(kvlist,
3398 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3399 &bond_ethdev_parse_time_ms_kvarg,
3400 &lsc_poll_interval_ms) < 0) {
3402 "Invalid lsc polling interval value specified for bonded"
3403 " device %s", name);
3407 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3410 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3411 lsc_poll_interval_ms, name);
3414 } else if (arg_count > 1) {
3416 "LSC polling interval can be specified only once for bonded"
3417 " device %s", name);
3421 /* Parse link up interrupt propagation delay */
3422 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3423 if (arg_count == 1) {
3424 uint32_t link_up_delay_ms;
3426 if (rte_kvargs_process(kvlist,
3427 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3428 &bond_ethdev_parse_time_ms_kvarg,
3429 &link_up_delay_ms) < 0) {
3431 "Invalid link up propagation delay value specified for"
3432 " bonded device %s", name);
3436 /* Set balance mode transmit policy*/
3437 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3440 "Failed to set link up propagation delay (%u ms) on bonded"
3441 " device %s", link_up_delay_ms, name);
3444 } else if (arg_count > 1) {
3446 "Link up propagation delay can be specified only once for"
3447 " bonded device %s", name);
3451 /* Parse link down interrupt propagation delay */
3452 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3453 if (arg_count == 1) {
3454 uint32_t link_down_delay_ms;
3456 if (rte_kvargs_process(kvlist,
3457 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3458 &bond_ethdev_parse_time_ms_kvarg,
3459 &link_down_delay_ms) < 0) {
3461 "Invalid link down propagation delay value specified for"
3462 " bonded device %s", name);
3466 /* Set balance mode transmit policy*/
3467 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3470 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3471 link_down_delay_ms, name);
3474 } else if (arg_count > 1) {
3476 "Link down propagation delay can be specified only once for bonded device %s",
3484 struct rte_vdev_driver pmd_bond_drv = {
3485 .probe = bond_probe,
3486 .remove = bond_remove,
3489 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3490 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3492 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3496 "xmit_policy=[l2 | l23 | l34] "
3497 "agg_mode=[count | stable | bandwidth] "
3500 "lsc_poll_period_ms=<int> "
3502 "down_delay=<int>");
3506 RTE_INIT(bond_init_log)
3508 bond_logtype = rte_log_register("pmd.net.bond");
3509 if (bond_logtype >= 0)
3510 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);