1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
6 #include <netinet/in.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
39 size_t vlan_offset = 0;
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 struct bond_dev_private *internals;
63 uint16_t num_rx_total = 0;
65 uint16_t active_slave;
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = internals->active_slave;
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
89 if (++internals->active_slave >= slave_count)
90 internals->active_slave = 0;
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
98 struct bond_dev_private *internals;
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
103 internals = bd_rx_q->dev_private;
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
136 static struct rte_flow_item flow_item_8023ad[] = {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
141 .mask = &flow_item_eth_mask_type_8023ad,
144 .type = RTE_FLOW_ITEM_TYPE_END,
151 const struct rte_flow_attr flow_attr_8023ad = {
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
166 const struct rte_flow_action_queue lacp_queue_conf = {
170 const struct rte_flow_action actions[] = {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
176 .type = RTE_FLOW_ACTION_TYPE_END,
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
189 rte_eth_dev_info_get(slave_port, &slave_info);
190 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
193 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194 __func__, slave_port);
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204 struct bond_dev_private *internals = bond_dev->data->dev_private;
205 struct rte_eth_dev_info bond_info;
208 /* Verify if all slaves in bonding supports flow director and */
209 if (internals->slave_count > 0) {
210 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
212 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
215 for (idx = 0; idx < internals->slave_count; idx++) {
216 if (bond_ethdev_8023ad_flow_verify(bond_dev,
217 internals->slaves[idx].port_id) != 0)
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
228 struct rte_flow_error error;
229 struct bond_dev_private *internals = bond_dev->data->dev_private;
230 struct rte_flow_action_queue lacp_queue_conf = {
231 .index = internals->mode4.dedicated_queues.rx_qid,
234 const struct rte_flow_action actions[] = {
236 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237 .conf = &lacp_queue_conf
240 .type = RTE_FLOW_ACTION_TYPE_END,
244 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248 "(slave_port=%d queue_id=%d)",
249 error.message, slave_port,
250 internals->mode4.dedicated_queues.rx_qid);
257 static inline uint16_t
258 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
261 /* Cast to structure, containing bonded device's port id and queue id */
262 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263 struct bond_dev_private *internals = bd_rx_q->dev_private;
264 struct rte_eth_dev *bonded_eth_dev =
265 &rte_eth_devices[internals->port_id];
266 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
267 struct rte_ether_hdr *hdr;
269 const uint16_t ether_type_slow_be =
270 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
271 uint16_t num_rx_total = 0; /* Total number of received packets */
272 uint16_t slaves[RTE_MAX_ETHPORTS];
273 uint16_t slave_count, idx;
275 uint8_t collecting; /* current slave collecting status */
276 const uint8_t promisc = internals->promiscuous_en;
282 /* Copy slave list to protect against slave up/down changes during tx
284 slave_count = internals->active_slave_count;
285 memcpy(slaves, internals->active_slaves,
286 sizeof(internals->active_slaves[0]) * slave_count);
288 idx = internals->active_slave;
289 if (idx >= slave_count) {
290 internals->active_slave = 0;
293 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
295 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
298 /* Read packets from this slave */
299 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
300 &bufs[num_rx_total], nb_pkts - num_rx_total);
302 for (k = j; k < 2 && k < num_rx_total; k++)
303 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
305 /* Handle slow protocol packets. */
306 while (j < num_rx_total) {
308 /* If packet is not pure L2 and is known, skip it */
309 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
314 if (j + 3 < num_rx_total)
315 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
317 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
318 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
320 /* Remove packet from array if:
321 * - it is slow packet but no dedicated rxq is present,
322 * - slave is not in collecting state,
323 * - bonding interface is not in promiscuous mode and
324 * packet is not multicast and address does not match,
328 is_lacp_packets(hdr->ether_type, subtype,
332 !rte_is_multicast_ether_addr(&hdr->d_addr) &&
333 !rte_is_same_ether_addr(bond_mac,
336 if (hdr->ether_type == ether_type_slow_be) {
337 bond_mode_8023ad_handle_slow_pkt(
338 internals, slaves[idx], bufs[j]);
340 rte_pktmbuf_free(bufs[j]);
342 /* Packet is managed by mode 4 or dropped, shift the array */
344 if (j < num_rx_total) {
345 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
351 if (unlikely(++idx == slave_count))
355 if (++internals->active_slave >= slave_count)
356 internals->active_slave = 0;
362 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
365 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
369 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
372 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
375 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
376 uint32_t burstnumberRX;
377 uint32_t burstnumberTX;
379 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
382 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
385 case RTE_ARP_OP_REQUEST:
386 strlcpy(buf, "ARP Request", buf_len);
388 case RTE_ARP_OP_REPLY:
389 strlcpy(buf, "ARP Reply", buf_len);
391 case RTE_ARP_OP_REVREQUEST:
392 strlcpy(buf, "Reverse ARP Request", buf_len);
394 case RTE_ARP_OP_REVREPLY:
395 strlcpy(buf, "Reverse ARP Reply", buf_len);
397 case RTE_ARP_OP_INVREQUEST:
398 strlcpy(buf, "Peer Identify Request", buf_len);
400 case RTE_ARP_OP_INVREPLY:
401 strlcpy(buf, "Peer Identify Reply", buf_len);
406 strlcpy(buf, "Unknown", buf_len);
410 #define MaxIPv4String 16
412 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
416 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
417 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
418 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
422 #define MAX_CLIENTS_NUMBER 128
423 uint8_t active_clients;
424 struct client_stats_t {
427 uint32_t ipv4_rx_packets;
428 uint32_t ipv4_tx_packets;
430 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
433 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
437 for (; i < MAX_CLIENTS_NUMBER; i++) {
438 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
439 /* Just update RX packets number for this client */
440 if (TXorRXindicator == &burstnumberRX)
441 client_stats[i].ipv4_rx_packets++;
443 client_stats[i].ipv4_tx_packets++;
447 /* We have a new client. Insert him to the table, and increment stats */
448 if (TXorRXindicator == &burstnumberRX)
449 client_stats[active_clients].ipv4_rx_packets++;
451 client_stats[active_clients].ipv4_tx_packets++;
452 client_stats[active_clients].ipv4_addr = addr;
453 client_stats[active_clients].port = port;
458 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
459 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
460 rte_log(RTE_LOG_DEBUG, bond_logtype, \
461 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
462 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
465 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
466 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
467 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
469 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
470 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
471 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
473 arp_op, ++burstnumber)
477 mode6_debug(const char __attribute__((unused)) *info,
478 struct rte_ether_hdr *eth_h, uint16_t port,
479 uint32_t __attribute__((unused)) *burstnumber)
481 struct rte_ipv4_hdr *ipv4_h;
482 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
483 struct rte_arp_hdr *arp_h;
490 uint16_t ether_type = eth_h->ether_type;
491 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 strlcpy(buf, info, 16);
497 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
498 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
499 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
500 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
501 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
502 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
504 update_client_stats(ipv4_h->src_addr, port, burstnumber);
506 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
507 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
508 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
509 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
510 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
511 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
512 ArpOp, sizeof(ArpOp));
513 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
520 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
522 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
523 struct bond_dev_private *internals = bd_tx_q->dev_private;
524 struct rte_ether_hdr *eth_h;
525 uint16_t ether_type, offset;
526 uint16_t nb_recv_pkts;
529 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
531 for (i = 0; i < nb_recv_pkts; i++) {
532 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
533 ether_type = eth_h->ether_type;
534 offset = get_vlan_offset(eth_h, ðer_type);
536 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
537 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
538 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
540 bond_mode_alb_arp_recv(eth_h, offset, internals);
542 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
543 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
544 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
552 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
555 struct bond_dev_private *internals;
556 struct bond_tx_queue *bd_tx_q;
558 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
559 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
561 uint16_t num_of_slaves;
562 uint16_t slaves[RTE_MAX_ETHPORTS];
564 uint16_t num_tx_total = 0, num_tx_slave;
566 static int slave_idx = 0;
567 int i, cslave_idx = 0, tx_fail_total = 0;
569 bd_tx_q = (struct bond_tx_queue *)queue;
570 internals = bd_tx_q->dev_private;
572 /* Copy slave list to protect against slave up/down changes during tx
574 num_of_slaves = internals->active_slave_count;
575 memcpy(slaves, internals->active_slaves,
576 sizeof(internals->active_slaves[0]) * num_of_slaves);
578 if (num_of_slaves < 1)
581 /* Populate slaves mbuf with which packets are to be sent on it */
582 for (i = 0; i < nb_pkts; i++) {
583 cslave_idx = (slave_idx + i) % num_of_slaves;
584 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
587 /* increment current slave index so the next call to tx burst starts on the
589 slave_idx = ++cslave_idx;
591 /* Send packet burst on each slave device */
592 for (i = 0; i < num_of_slaves; i++) {
593 if (slave_nb_pkts[i] > 0) {
594 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
595 slave_bufs[i], slave_nb_pkts[i]);
597 /* if tx burst fails move packets to end of bufs */
598 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
599 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
601 tx_fail_total += tx_fail_slave;
603 memcpy(&bufs[nb_pkts - tx_fail_total],
604 &slave_bufs[i][num_tx_slave],
605 tx_fail_slave * sizeof(bufs[0]));
607 num_tx_total += num_tx_slave;
615 bond_ethdev_tx_burst_active_backup(void *queue,
616 struct rte_mbuf **bufs, uint16_t nb_pkts)
618 struct bond_dev_private *internals;
619 struct bond_tx_queue *bd_tx_q;
621 bd_tx_q = (struct bond_tx_queue *)queue;
622 internals = bd_tx_q->dev_private;
624 if (internals->active_slave_count < 1)
627 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
631 static inline uint16_t
632 ether_hash(struct rte_ether_hdr *eth_hdr)
634 unaligned_uint16_t *word_src_addr =
635 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
636 unaligned_uint16_t *word_dst_addr =
637 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
639 return (word_src_addr[0] ^ word_dst_addr[0]) ^
640 (word_src_addr[1] ^ word_dst_addr[1]) ^
641 (word_src_addr[2] ^ word_dst_addr[2]);
644 static inline uint32_t
645 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
647 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
650 static inline uint32_t
651 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
653 unaligned_uint32_t *word_src_addr =
654 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
655 unaligned_uint32_t *word_dst_addr =
656 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
658 return (word_src_addr[0] ^ word_dst_addr[0]) ^
659 (word_src_addr[1] ^ word_dst_addr[1]) ^
660 (word_src_addr[2] ^ word_dst_addr[2]) ^
661 (word_src_addr[3] ^ word_dst_addr[3]);
666 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
667 uint16_t slave_count, uint16_t *slaves)
669 struct rte_ether_hdr *eth_hdr;
673 for (i = 0; i < nb_pkts; i++) {
674 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
676 hash = ether_hash(eth_hdr);
678 slaves[i] = (hash ^= hash >> 8) % slave_count;
683 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
684 uint16_t slave_count, uint16_t *slaves)
687 struct rte_ether_hdr *eth_hdr;
690 uint32_t hash, l3hash;
692 for (i = 0; i < nb_pkts; i++) {
693 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
696 proto = eth_hdr->ether_type;
697 hash = ether_hash(eth_hdr);
699 vlan_offset = get_vlan_offset(eth_hdr, &proto);
701 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
702 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
703 ((char *)(eth_hdr + 1) + vlan_offset);
704 l3hash = ipv4_hash(ipv4_hdr);
706 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
707 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
708 ((char *)(eth_hdr + 1) + vlan_offset);
709 l3hash = ipv6_hash(ipv6_hdr);
712 hash = hash ^ l3hash;
716 slaves[i] = hash % slave_count;
721 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
722 uint16_t slave_count, uint16_t *slaves)
724 struct rte_ether_hdr *eth_hdr;
729 struct rte_udp_hdr *udp_hdr;
730 struct rte_tcp_hdr *tcp_hdr;
731 uint32_t hash, l3hash, l4hash;
733 for (i = 0; i < nb_pkts; i++) {
734 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
735 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
736 proto = eth_hdr->ether_type;
737 vlan_offset = get_vlan_offset(eth_hdr, &proto);
741 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
742 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
743 ((char *)(eth_hdr + 1) + vlan_offset);
744 size_t ip_hdr_offset;
746 l3hash = ipv4_hash(ipv4_hdr);
748 /* there is no L4 header in fragmented packet */
749 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
751 ip_hdr_offset = (ipv4_hdr->version_ihl
752 & RTE_IPV4_HDR_IHL_MASK) *
753 RTE_IPV4_IHL_MULTIPLIER;
755 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
756 tcp_hdr = (struct rte_tcp_hdr *)
759 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
761 l4hash = HASH_L4_PORTS(tcp_hdr);
762 } else if (ipv4_hdr->next_proto_id ==
764 udp_hdr = (struct rte_udp_hdr *)
767 if ((size_t)udp_hdr + sizeof(*udp_hdr)
769 l4hash = HASH_L4_PORTS(udp_hdr);
772 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
773 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
774 ((char *)(eth_hdr + 1) + vlan_offset);
775 l3hash = ipv6_hash(ipv6_hdr);
777 if (ipv6_hdr->proto == IPPROTO_TCP) {
778 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
779 l4hash = HASH_L4_PORTS(tcp_hdr);
780 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
781 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
782 l4hash = HASH_L4_PORTS(udp_hdr);
786 hash = l3hash ^ l4hash;
790 slaves[i] = hash % slave_count;
795 uint64_t bwg_left_int;
796 uint64_t bwg_left_remainder;
801 bond_tlb_activate_slave(struct bond_dev_private *internals) {
804 for (i = 0; i < internals->active_slave_count; i++) {
805 tlb_last_obytets[internals->active_slaves[i]] = 0;
810 bandwidth_cmp(const void *a, const void *b)
812 const struct bwg_slave *bwg_a = a;
813 const struct bwg_slave *bwg_b = b;
814 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
815 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
816 (int64_t)bwg_a->bwg_left_remainder;
830 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
831 struct bwg_slave *bwg_slave)
833 struct rte_eth_link link_status;
835 rte_eth_link_get_nowait(port_id, &link_status);
836 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
839 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
840 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
841 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
845 bond_ethdev_update_tlb_slave_cb(void *arg)
847 struct bond_dev_private *internals = arg;
848 struct rte_eth_stats slave_stats;
849 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
850 uint16_t slave_count;
853 uint8_t update_stats = 0;
857 internals->slave_update_idx++;
860 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
863 for (i = 0; i < internals->active_slave_count; i++) {
864 slave_id = internals->active_slaves[i];
865 rte_eth_stats_get(slave_id, &slave_stats);
866 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
867 bandwidth_left(slave_id, tx_bytes,
868 internals->slave_update_idx, &bwg_array[i]);
869 bwg_array[i].slave = slave_id;
872 tlb_last_obytets[slave_id] = slave_stats.obytes;
876 if (update_stats == 1)
877 internals->slave_update_idx = 0;
880 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
881 for (i = 0; i < slave_count; i++)
882 internals->tlb_slaves_order[i] = bwg_array[i].slave;
884 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
885 (struct bond_dev_private *)internals);
889 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
891 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
892 struct bond_dev_private *internals = bd_tx_q->dev_private;
894 struct rte_eth_dev *primary_port =
895 &rte_eth_devices[internals->primary_port];
896 uint16_t num_tx_total = 0;
899 uint16_t num_of_slaves = internals->active_slave_count;
900 uint16_t slaves[RTE_MAX_ETHPORTS];
902 struct rte_ether_hdr *ether_hdr;
903 struct rte_ether_addr primary_slave_addr;
904 struct rte_ether_addr active_slave_addr;
906 if (num_of_slaves < 1)
909 memcpy(slaves, internals->tlb_slaves_order,
910 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
913 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
916 for (i = 0; i < 3; i++)
917 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
920 for (i = 0; i < num_of_slaves; i++) {
921 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
922 for (j = num_tx_total; j < nb_pkts; j++) {
924 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
926 ether_hdr = rte_pktmbuf_mtod(bufs[j],
927 struct rte_ether_hdr *);
928 if (rte_is_same_ether_addr(ðer_hdr->s_addr,
929 &primary_slave_addr))
930 rte_ether_addr_copy(&active_slave_addr,
932 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
933 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
937 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
938 bufs + num_tx_total, nb_pkts - num_tx_total);
940 if (num_tx_total == nb_pkts)
948 bond_tlb_disable(struct bond_dev_private *internals)
950 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
954 bond_tlb_enable(struct bond_dev_private *internals)
956 bond_ethdev_update_tlb_slave_cb(internals);
960 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
962 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
963 struct bond_dev_private *internals = bd_tx_q->dev_private;
965 struct rte_ether_hdr *eth_h;
966 uint16_t ether_type, offset;
968 struct client_data *client_info;
971 * We create transmit buffers for every slave and one additional to send
972 * through tlb. In worst case every packet will be send on one port.
974 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
975 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
978 * We create separate transmit buffers for update packets as they won't
979 * be counted in num_tx_total.
981 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
982 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
984 struct rte_mbuf *upd_pkt;
987 uint16_t num_send, num_not_send = 0;
988 uint16_t num_tx_total = 0;
993 /* Search tx buffer for ARP packets and forward them to alb */
994 for (i = 0; i < nb_pkts; i++) {
995 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
996 ether_type = eth_h->ether_type;
997 offset = get_vlan_offset(eth_h, ðer_type);
999 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1000 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1002 /* Change src mac in eth header */
1003 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1005 /* Add packet to slave tx buffer */
1006 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1007 slave_bufs_pkts[slave_idx]++;
1009 /* If packet is not ARP, send it with TLB policy */
1010 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1012 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1016 /* Update connected client ARP tables */
1017 if (internals->mode6.ntt) {
1018 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1019 client_info = &internals->mode6.client_table[i];
1021 if (client_info->in_use) {
1022 /* Allocate new packet to send ARP update on current slave */
1023 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1024 if (upd_pkt == NULL) {
1026 "Failed to allocate ARP packet from pool");
1029 pkt_size = sizeof(struct rte_ether_hdr) +
1030 sizeof(struct rte_arp_hdr) +
1031 client_info->vlan_count *
1032 sizeof(struct rte_vlan_hdr);
1033 upd_pkt->data_len = pkt_size;
1034 upd_pkt->pkt_len = pkt_size;
1036 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1039 /* Add packet to update tx buffer */
1040 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1041 update_bufs_pkts[slave_idx]++;
1044 internals->mode6.ntt = 0;
1047 /* Send ARP packets on proper slaves */
1048 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1049 if (slave_bufs_pkts[i] > 0) {
1050 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1051 slave_bufs[i], slave_bufs_pkts[i]);
1052 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1053 bufs[nb_pkts - 1 - num_not_send - j] =
1054 slave_bufs[i][nb_pkts - 1 - j];
1057 num_tx_total += num_send;
1058 num_not_send += slave_bufs_pkts[i] - num_send;
1060 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1061 /* Print TX stats including update packets */
1062 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1063 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1064 struct rte_ether_hdr *);
1065 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1071 /* Send update packets on proper slaves */
1072 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1073 if (update_bufs_pkts[i] > 0) {
1074 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1075 update_bufs_pkts[i]);
1076 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1077 rte_pktmbuf_free(update_bufs[i][j]);
1079 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1080 for (j = 0; j < update_bufs_pkts[i]; j++) {
1081 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1082 struct rte_ether_hdr *);
1083 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1089 /* Send non-ARP packets using tlb policy */
1090 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1091 num_send = bond_ethdev_tx_burst_tlb(queue,
1092 slave_bufs[RTE_MAX_ETHPORTS],
1093 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1095 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1096 bufs[nb_pkts - 1 - num_not_send - j] =
1097 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1100 num_tx_total += num_send;
1103 return num_tx_total;
1106 static inline uint16_t
1107 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1108 uint16_t *slave_port_ids, uint16_t slave_count)
1110 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1111 struct bond_dev_private *internals = bd_tx_q->dev_private;
1113 /* Array to sort mbufs for transmission on each slave into */
1114 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1115 /* Number of mbufs for transmission on each slave */
1116 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1117 /* Mapping array generated by hash function to map mbufs to slaves */
1118 uint16_t bufs_slave_port_idxs[nb_bufs];
1120 uint16_t slave_tx_count;
1121 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1126 * Populate slaves mbuf with the packets which are to be sent on it
1127 * selecting output slave using hash based on xmit policy
1129 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1130 bufs_slave_port_idxs);
1132 for (i = 0; i < nb_bufs; i++) {
1133 /* Populate slave mbuf arrays with mbufs for that slave. */
1134 uint16_t slave_idx = bufs_slave_port_idxs[i];
1136 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1139 /* Send packet burst on each slave device */
1140 for (i = 0; i < slave_count; i++) {
1141 if (slave_nb_bufs[i] == 0)
1144 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1145 bd_tx_q->queue_id, slave_bufs[i],
1148 total_tx_count += slave_tx_count;
1150 /* If tx burst fails move packets to end of bufs */
1151 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1152 int slave_tx_fail_count = slave_nb_bufs[i] -
1154 total_tx_fail_count += slave_tx_fail_count;
1155 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1156 &slave_bufs[i][slave_tx_count],
1157 slave_tx_fail_count * sizeof(bufs[0]));
1161 return total_tx_count;
1165 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1168 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1169 struct bond_dev_private *internals = bd_tx_q->dev_private;
1171 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1172 uint16_t slave_count;
1174 if (unlikely(nb_bufs == 0))
1177 /* Copy slave list to protect against slave up/down changes during tx
1180 slave_count = internals->active_slave_count;
1181 if (unlikely(slave_count < 1))
1184 memcpy(slave_port_ids, internals->active_slaves,
1185 sizeof(slave_port_ids[0]) * slave_count);
1186 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1190 static inline uint16_t
1191 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1194 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1195 struct bond_dev_private *internals = bd_tx_q->dev_private;
1197 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1198 uint16_t slave_count;
1200 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1201 uint16_t dist_slave_count;
1203 uint16_t slave_tx_count;
1207 /* Copy slave list to protect against slave up/down changes during tx
1209 slave_count = internals->active_slave_count;
1210 if (unlikely(slave_count < 1))
1213 memcpy(slave_port_ids, internals->active_slaves,
1214 sizeof(slave_port_ids[0]) * slave_count);
1219 /* Check for LACP control packets and send if available */
1220 for (i = 0; i < slave_count; i++) {
1221 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1222 struct rte_mbuf *ctrl_pkt = NULL;
1224 if (likely(rte_ring_empty(port->tx_ring)))
1227 if (rte_ring_dequeue(port->tx_ring,
1228 (void **)&ctrl_pkt) != -ENOENT) {
1229 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1230 bd_tx_q->queue_id, &ctrl_pkt, 1);
1232 * re-enqueue LAG control plane packets to buffering
1233 * ring if transmission fails so the packet isn't lost.
1235 if (slave_tx_count != 1)
1236 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1241 if (unlikely(nb_bufs == 0))
1244 dist_slave_count = 0;
1245 for (i = 0; i < slave_count; i++) {
1246 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1248 if (ACTOR_STATE(port, DISTRIBUTING))
1249 dist_slave_port_ids[dist_slave_count++] =
1253 if (unlikely(dist_slave_count < 1))
1256 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1261 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1264 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1268 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1271 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1275 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1278 struct bond_dev_private *internals;
1279 struct bond_tx_queue *bd_tx_q;
1281 uint16_t slaves[RTE_MAX_ETHPORTS];
1282 uint8_t tx_failed_flag = 0;
1283 uint16_t num_of_slaves;
1285 uint16_t max_nb_of_tx_pkts = 0;
1287 int slave_tx_total[RTE_MAX_ETHPORTS];
1288 int i, most_successful_tx_slave = -1;
1290 bd_tx_q = (struct bond_tx_queue *)queue;
1291 internals = bd_tx_q->dev_private;
1293 /* Copy slave list to protect against slave up/down changes during tx
1295 num_of_slaves = internals->active_slave_count;
1296 memcpy(slaves, internals->active_slaves,
1297 sizeof(internals->active_slaves[0]) * num_of_slaves);
1299 if (num_of_slaves < 1)
1302 /* Increment reference count on mbufs */
1303 for (i = 0; i < nb_pkts; i++)
1304 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1306 /* Transmit burst on each active slave */
1307 for (i = 0; i < num_of_slaves; i++) {
1308 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1311 if (unlikely(slave_tx_total[i] < nb_pkts))
1314 /* record the value and slave index for the slave which transmits the
1315 * maximum number of packets */
1316 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1317 max_nb_of_tx_pkts = slave_tx_total[i];
1318 most_successful_tx_slave = i;
1322 /* if slaves fail to transmit packets from burst, the calling application
1323 * is not expected to know about multiple references to packets so we must
1324 * handle failures of all packets except those of the most successful slave
1326 if (unlikely(tx_failed_flag))
1327 for (i = 0; i < num_of_slaves; i++)
1328 if (i != most_successful_tx_slave)
1329 while (slave_tx_total[i] < nb_pkts)
1330 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1332 return max_nb_of_tx_pkts;
1336 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1338 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1340 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1342 * If in mode 4 then save the link properties of the first
1343 * slave, all subsequent slaves must match these properties
1345 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1347 bond_link->link_autoneg = slave_link->link_autoneg;
1348 bond_link->link_duplex = slave_link->link_duplex;
1349 bond_link->link_speed = slave_link->link_speed;
1352 * In any other mode the link properties are set to default
1353 * values of AUTONEG/DUPLEX
1355 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1356 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1361 link_properties_valid(struct rte_eth_dev *ethdev,
1362 struct rte_eth_link *slave_link)
1364 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1366 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1367 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1369 if (bond_link->link_duplex != slave_link->link_duplex ||
1370 bond_link->link_autoneg != slave_link->link_autoneg ||
1371 bond_link->link_speed != slave_link->link_speed)
1379 mac_address_get(struct rte_eth_dev *eth_dev,
1380 struct rte_ether_addr *dst_mac_addr)
1382 struct rte_ether_addr *mac_addr;
1384 if (eth_dev == NULL) {
1385 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1389 if (dst_mac_addr == NULL) {
1390 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1394 mac_addr = eth_dev->data->mac_addrs;
1396 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1401 mac_address_set(struct rte_eth_dev *eth_dev,
1402 struct rte_ether_addr *new_mac_addr)
1404 struct rte_ether_addr *mac_addr;
1406 if (eth_dev == NULL) {
1407 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1411 if (new_mac_addr == NULL) {
1412 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1416 mac_addr = eth_dev->data->mac_addrs;
1418 /* If new MAC is different to current MAC then update */
1419 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1420 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1425 static const struct rte_ether_addr null_mac_addr;
1428 * Add additional MAC addresses to the slave
1431 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1432 uint16_t slave_port_id)
1435 struct rte_ether_addr *mac_addr;
1437 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1438 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1439 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1442 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1445 for (i--; i > 0; i--)
1446 rte_eth_dev_mac_addr_remove(slave_port_id,
1447 &bonded_eth_dev->data->mac_addrs[i]);
1456 * Remove additional MAC addresses from the slave
1459 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1460 uint16_t slave_port_id)
1463 struct rte_ether_addr *mac_addr;
1466 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1467 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1468 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1471 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1472 /* save only the first error */
1473 if (ret < 0 && rc == 0)
1481 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1483 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1486 /* Update slave devices MAC addresses */
1487 if (internals->slave_count < 1)
1490 switch (internals->mode) {
1491 case BONDING_MODE_ROUND_ROBIN:
1492 case BONDING_MODE_BALANCE:
1493 case BONDING_MODE_BROADCAST:
1494 for (i = 0; i < internals->slave_count; i++) {
1495 if (rte_eth_dev_default_mac_addr_set(
1496 internals->slaves[i].port_id,
1497 bonded_eth_dev->data->mac_addrs)) {
1498 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1499 internals->slaves[i].port_id);
1504 case BONDING_MODE_8023AD:
1505 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1507 case BONDING_MODE_ACTIVE_BACKUP:
1508 case BONDING_MODE_TLB:
1509 case BONDING_MODE_ALB:
1511 for (i = 0; i < internals->slave_count; i++) {
1512 if (internals->slaves[i].port_id ==
1513 internals->current_primary_port) {
1514 if (rte_eth_dev_default_mac_addr_set(
1515 internals->primary_port,
1516 bonded_eth_dev->data->mac_addrs)) {
1517 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1518 internals->current_primary_port);
1522 if (rte_eth_dev_default_mac_addr_set(
1523 internals->slaves[i].port_id,
1524 &internals->slaves[i].persisted_mac_addr)) {
1525 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1526 internals->slaves[i].port_id);
1537 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1539 struct bond_dev_private *internals;
1541 internals = eth_dev->data->dev_private;
1544 case BONDING_MODE_ROUND_ROBIN:
1545 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1546 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1548 case BONDING_MODE_ACTIVE_BACKUP:
1549 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1550 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1552 case BONDING_MODE_BALANCE:
1553 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1554 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1556 case BONDING_MODE_BROADCAST:
1557 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1558 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1560 case BONDING_MODE_8023AD:
1561 if (bond_mode_8023ad_enable(eth_dev) != 0)
1564 if (internals->mode4.dedicated_queues.enabled == 0) {
1565 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1566 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1567 RTE_BOND_LOG(WARNING,
1568 "Using mode 4, it is necessary to do TX burst "
1569 "and RX burst at least every 100ms.");
1571 /* Use flow director's optimization */
1572 eth_dev->rx_pkt_burst =
1573 bond_ethdev_rx_burst_8023ad_fast_queue;
1574 eth_dev->tx_pkt_burst =
1575 bond_ethdev_tx_burst_8023ad_fast_queue;
1578 case BONDING_MODE_TLB:
1579 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1580 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1582 case BONDING_MODE_ALB:
1583 if (bond_mode_alb_enable(eth_dev) != 0)
1586 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1587 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1593 internals->mode = mode;
1600 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1601 struct rte_eth_dev *slave_eth_dev)
1604 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1605 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1607 if (port->slow_pool == NULL) {
1609 int slave_id = slave_eth_dev->data->port_id;
1611 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1613 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1614 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1615 slave_eth_dev->data->numa_node);
1617 /* Any memory allocation failure in initialization is critical because
1618 * resources can't be free, so reinitialization is impossible. */
1619 if (port->slow_pool == NULL) {
1620 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1621 slave_id, mem_name, rte_strerror(rte_errno));
1625 if (internals->mode4.dedicated_queues.enabled == 1) {
1626 /* Configure slow Rx queue */
1628 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1629 internals->mode4.dedicated_queues.rx_qid, 128,
1630 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1631 NULL, port->slow_pool);
1634 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1635 slave_eth_dev->data->port_id,
1636 internals->mode4.dedicated_queues.rx_qid,
1641 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1642 internals->mode4.dedicated_queues.tx_qid, 512,
1643 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1647 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1648 slave_eth_dev->data->port_id,
1649 internals->mode4.dedicated_queues.tx_qid,
1658 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1659 struct rte_eth_dev *slave_eth_dev)
1661 struct bond_rx_queue *bd_rx_q;
1662 struct bond_tx_queue *bd_tx_q;
1663 uint16_t nb_rx_queues;
1664 uint16_t nb_tx_queues;
1668 struct rte_flow_error flow_error;
1670 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1673 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1675 /* Enable interrupts on slave device if supported */
1676 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1677 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1679 /* If RSS is enabled for bonding, try to enable it for slaves */
1680 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1681 if (internals->rss_key_len != 0) {
1682 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1683 internals->rss_key_len;
1684 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1687 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1690 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1691 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1692 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1693 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1696 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1697 DEV_RX_OFFLOAD_VLAN_FILTER)
1698 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1699 DEV_RX_OFFLOAD_VLAN_FILTER;
1701 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1702 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1704 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1705 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1707 if (internals->mode == BONDING_MODE_8023AD) {
1708 if (internals->mode4.dedicated_queues.enabled == 1) {
1714 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1715 bonded_eth_dev->data->mtu);
1716 if (errval != 0 && errval != -ENOTSUP) {
1717 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1718 slave_eth_dev->data->port_id, errval);
1722 /* Configure device */
1723 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1724 nb_rx_queues, nb_tx_queues,
1725 &(slave_eth_dev->data->dev_conf));
1727 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1728 slave_eth_dev->data->port_id, errval);
1732 /* Setup Rx Queues */
1733 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1734 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1736 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1737 bd_rx_q->nb_rx_desc,
1738 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1739 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1742 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1743 slave_eth_dev->data->port_id, q_id, errval);
1748 /* Setup Tx Queues */
1749 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1750 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1752 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1753 bd_tx_q->nb_tx_desc,
1754 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1759 slave_eth_dev->data->port_id, q_id, errval);
1764 if (internals->mode == BONDING_MODE_8023AD &&
1765 internals->mode4.dedicated_queues.enabled == 1) {
1766 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1770 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1771 slave_eth_dev->data->port_id) != 0) {
1773 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1774 slave_eth_dev->data->port_id, q_id, errval);
1778 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1779 rte_flow_destroy(slave_eth_dev->data->port_id,
1780 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1783 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1784 slave_eth_dev->data->port_id);
1788 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1790 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1791 slave_eth_dev->data->port_id, errval);
1795 /* If RSS is enabled for bonding, synchronize RETA */
1796 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1798 struct bond_dev_private *internals;
1800 internals = bonded_eth_dev->data->dev_private;
1802 for (i = 0; i < internals->slave_count; i++) {
1803 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1804 errval = rte_eth_dev_rss_reta_update(
1805 slave_eth_dev->data->port_id,
1806 &internals->reta_conf[0],
1807 internals->slaves[i].reta_size);
1809 RTE_BOND_LOG(WARNING,
1810 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1811 " RSS Configuration for bonding may be inconsistent.",
1812 slave_eth_dev->data->port_id, errval);
1819 /* If lsc interrupt is set, check initial slave's link status */
1820 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1821 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1822 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1823 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1831 slave_remove(struct bond_dev_private *internals,
1832 struct rte_eth_dev *slave_eth_dev)
1836 for (i = 0; i < internals->slave_count; i++)
1837 if (internals->slaves[i].port_id ==
1838 slave_eth_dev->data->port_id)
1841 if (i < (internals->slave_count - 1)) {
1842 struct rte_flow *flow;
1844 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1845 sizeof(internals->slaves[0]) *
1846 (internals->slave_count - i - 1));
1847 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1848 memmove(&flow->flows[i], &flow->flows[i + 1],
1849 sizeof(flow->flows[0]) *
1850 (internals->slave_count - i - 1));
1851 flow->flows[internals->slave_count - 1] = NULL;
1855 internals->slave_count--;
1857 /* force reconfiguration of slave interfaces */
1858 _rte_eth_dev_reset(slave_eth_dev);
1862 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1865 slave_add(struct bond_dev_private *internals,
1866 struct rte_eth_dev *slave_eth_dev)
1868 struct bond_slave_details *slave_details =
1869 &internals->slaves[internals->slave_count];
1871 slave_details->port_id = slave_eth_dev->data->port_id;
1872 slave_details->last_link_status = 0;
1874 /* Mark slave devices that don't support interrupts so we can
1875 * compensate when we start the bond
1877 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1878 slave_details->link_status_poll_enabled = 1;
1881 slave_details->link_status_wait_to_complete = 0;
1882 /* clean tlb_last_obytes when adding port for bonding device */
1883 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1884 sizeof(struct rte_ether_addr));
1888 bond_ethdev_primary_set(struct bond_dev_private *internals,
1889 uint16_t slave_port_id)
1893 if (internals->active_slave_count < 1)
1894 internals->current_primary_port = slave_port_id;
1896 /* Search bonded device slave ports for new proposed primary port */
1897 for (i = 0; i < internals->active_slave_count; i++) {
1898 if (internals->active_slaves[i] == slave_port_id)
1899 internals->current_primary_port = slave_port_id;
1904 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1907 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1909 struct bond_dev_private *internals;
1912 /* slave eth dev will be started by bonded device */
1913 if (check_for_bonded_ethdev(eth_dev)) {
1914 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1915 eth_dev->data->port_id);
1919 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1920 eth_dev->data->dev_started = 1;
1922 internals = eth_dev->data->dev_private;
1924 if (internals->slave_count == 0) {
1925 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1929 if (internals->user_defined_mac == 0) {
1930 struct rte_ether_addr *new_mac_addr = NULL;
1932 for (i = 0; i < internals->slave_count; i++)
1933 if (internals->slaves[i].port_id == internals->primary_port)
1934 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1936 if (new_mac_addr == NULL)
1939 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1940 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1941 eth_dev->data->port_id);
1946 /* If bonded device is configure in promiscuous mode then re-apply config */
1947 if (internals->promiscuous_en)
1948 bond_ethdev_promiscuous_enable(eth_dev);
1950 if (internals->mode == BONDING_MODE_8023AD) {
1951 if (internals->mode4.dedicated_queues.enabled == 1) {
1952 internals->mode4.dedicated_queues.rx_qid =
1953 eth_dev->data->nb_rx_queues;
1954 internals->mode4.dedicated_queues.tx_qid =
1955 eth_dev->data->nb_tx_queues;
1960 /* Reconfigure each slave device if starting bonded device */
1961 for (i = 0; i < internals->slave_count; i++) {
1962 struct rte_eth_dev *slave_ethdev =
1963 &(rte_eth_devices[internals->slaves[i].port_id]);
1964 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1966 "bonded port (%d) failed to reconfigure slave device (%d)",
1967 eth_dev->data->port_id,
1968 internals->slaves[i].port_id);
1971 /* We will need to poll for link status if any slave doesn't
1972 * support interrupts
1974 if (internals->slaves[i].link_status_poll_enabled)
1975 internals->link_status_polling_enabled = 1;
1978 /* start polling if needed */
1979 if (internals->link_status_polling_enabled) {
1981 internals->link_status_polling_interval_ms * 1000,
1982 bond_ethdev_slave_link_status_change_monitor,
1983 (void *)&rte_eth_devices[internals->port_id]);
1986 /* Update all slave devices MACs*/
1987 if (mac_address_slaves_update(eth_dev) != 0)
1990 if (internals->user_defined_primary_port)
1991 bond_ethdev_primary_set(internals, internals->primary_port);
1993 if (internals->mode == BONDING_MODE_8023AD)
1994 bond_mode_8023ad_start(eth_dev);
1996 if (internals->mode == BONDING_MODE_TLB ||
1997 internals->mode == BONDING_MODE_ALB)
1998 bond_tlb_enable(internals);
2003 eth_dev->data->dev_started = 0;
2008 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2012 if (dev->data->rx_queues != NULL) {
2013 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2014 rte_free(dev->data->rx_queues[i]);
2015 dev->data->rx_queues[i] = NULL;
2017 dev->data->nb_rx_queues = 0;
2020 if (dev->data->tx_queues != NULL) {
2021 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2022 rte_free(dev->data->tx_queues[i]);
2023 dev->data->tx_queues[i] = NULL;
2025 dev->data->nb_tx_queues = 0;
2030 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2032 struct bond_dev_private *internals = eth_dev->data->dev_private;
2035 if (internals->mode == BONDING_MODE_8023AD) {
2039 bond_mode_8023ad_stop(eth_dev);
2041 /* Discard all messages to/from mode 4 state machines */
2042 for (i = 0; i < internals->active_slave_count; i++) {
2043 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2045 RTE_ASSERT(port->rx_ring != NULL);
2046 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2047 rte_pktmbuf_free(pkt);
2049 RTE_ASSERT(port->tx_ring != NULL);
2050 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2051 rte_pktmbuf_free(pkt);
2055 if (internals->mode == BONDING_MODE_TLB ||
2056 internals->mode == BONDING_MODE_ALB) {
2057 bond_tlb_disable(internals);
2058 for (i = 0; i < internals->active_slave_count; i++)
2059 tlb_last_obytets[internals->active_slaves[i]] = 0;
2062 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2063 eth_dev->data->dev_started = 0;
2065 internals->link_status_polling_enabled = 0;
2066 for (i = 0; i < internals->slave_count; i++) {
2067 uint16_t slave_id = internals->slaves[i].port_id;
2068 if (find_slave_by_id(internals->active_slaves,
2069 internals->active_slave_count, slave_id) !=
2070 internals->active_slave_count) {
2071 internals->slaves[i].last_link_status = 0;
2072 rte_eth_dev_stop(slave_id);
2073 deactivate_slave(eth_dev, slave_id);
2079 bond_ethdev_close(struct rte_eth_dev *dev)
2081 struct bond_dev_private *internals = dev->data->dev_private;
2082 uint16_t bond_port_id = internals->port_id;
2084 struct rte_flow_error ferror;
2086 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2087 while (internals->slave_count != skipped) {
2088 uint16_t port_id = internals->slaves[skipped].port_id;
2090 rte_eth_dev_stop(port_id);
2092 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2094 "Failed to remove port %d from bonded device %s",
2095 port_id, dev->device->name);
2099 bond_flow_ops.flush(dev, &ferror);
2100 bond_ethdev_free_queues(dev);
2101 rte_bitmap_reset(internals->vlan_filter_bmp);
2104 /* forward declaration */
2105 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2108 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2110 struct bond_dev_private *internals = dev->data->dev_private;
2112 uint16_t max_nb_rx_queues = UINT16_MAX;
2113 uint16_t max_nb_tx_queues = UINT16_MAX;
2114 uint16_t max_rx_desc_lim = UINT16_MAX;
2115 uint16_t max_tx_desc_lim = UINT16_MAX;
2117 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2119 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2120 internals->candidate_max_rx_pktlen :
2121 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2123 /* Max number of tx/rx queues that the bonded device can support is the
2124 * minimum values of the bonded slaves, as all slaves must be capable
2125 * of supporting the same number of tx/rx queues.
2127 if (internals->slave_count > 0) {
2128 struct rte_eth_dev_info slave_info;
2131 for (idx = 0; idx < internals->slave_count; idx++) {
2132 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2135 if (slave_info.max_rx_queues < max_nb_rx_queues)
2136 max_nb_rx_queues = slave_info.max_rx_queues;
2138 if (slave_info.max_tx_queues < max_nb_tx_queues)
2139 max_nb_tx_queues = slave_info.max_tx_queues;
2141 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2142 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2144 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2145 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2149 dev_info->max_rx_queues = max_nb_rx_queues;
2150 dev_info->max_tx_queues = max_nb_tx_queues;
2152 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2153 sizeof(dev_info->default_rxconf));
2154 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2155 sizeof(dev_info->default_txconf));
2157 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2158 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2161 * If dedicated hw queues enabled for link bonding device in LACP mode
2162 * then we need to reduce the maximum number of data path queues by 1.
2164 if (internals->mode == BONDING_MODE_8023AD &&
2165 internals->mode4.dedicated_queues.enabled == 1) {
2166 dev_info->max_rx_queues--;
2167 dev_info->max_tx_queues--;
2170 dev_info->min_rx_bufsize = 0;
2172 dev_info->rx_offload_capa = internals->rx_offload_capa;
2173 dev_info->tx_offload_capa = internals->tx_offload_capa;
2174 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2175 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2176 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2178 dev_info->reta_size = internals->reta_size;
2182 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2186 struct bond_dev_private *internals = dev->data->dev_private;
2188 /* don't do this while a slave is being added */
2189 rte_spinlock_lock(&internals->lock);
2192 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2194 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2196 for (i = 0; i < internals->slave_count; i++) {
2197 uint16_t port_id = internals->slaves[i].port_id;
2199 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2201 RTE_BOND_LOG(WARNING,
2202 "Setting VLAN filter on slave port %u not supported.",
2206 rte_spinlock_unlock(&internals->lock);
2211 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2212 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2213 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2215 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2216 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2217 0, dev->data->numa_node);
2218 if (bd_rx_q == NULL)
2221 bd_rx_q->queue_id = rx_queue_id;
2222 bd_rx_q->dev_private = dev->data->dev_private;
2224 bd_rx_q->nb_rx_desc = nb_rx_desc;
2226 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2227 bd_rx_q->mb_pool = mb_pool;
2229 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2235 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2236 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2237 const struct rte_eth_txconf *tx_conf)
2239 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2240 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2241 0, dev->data->numa_node);
2243 if (bd_tx_q == NULL)
2246 bd_tx_q->queue_id = tx_queue_id;
2247 bd_tx_q->dev_private = dev->data->dev_private;
2249 bd_tx_q->nb_tx_desc = nb_tx_desc;
2250 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2252 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2258 bond_ethdev_rx_queue_release(void *queue)
2267 bond_ethdev_tx_queue_release(void *queue)
2276 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2278 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2279 struct bond_dev_private *internals;
2281 /* Default value for polling slave found is true as we don't want to
2282 * disable the polling thread if we cannot get the lock */
2283 int i, polling_slave_found = 1;
2288 bonded_ethdev = cb_arg;
2289 internals = bonded_ethdev->data->dev_private;
2291 if (!bonded_ethdev->data->dev_started ||
2292 !internals->link_status_polling_enabled)
2295 /* If device is currently being configured then don't check slaves link
2296 * status, wait until next period */
2297 if (rte_spinlock_trylock(&internals->lock)) {
2298 if (internals->slave_count > 0)
2299 polling_slave_found = 0;
2301 for (i = 0; i < internals->slave_count; i++) {
2302 if (!internals->slaves[i].link_status_poll_enabled)
2305 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2306 polling_slave_found = 1;
2308 /* Update slave link status */
2309 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2310 internals->slaves[i].link_status_wait_to_complete);
2312 /* if link status has changed since last checked then call lsc
2314 if (slave_ethdev->data->dev_link.link_status !=
2315 internals->slaves[i].last_link_status) {
2316 internals->slaves[i].last_link_status =
2317 slave_ethdev->data->dev_link.link_status;
2319 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2320 RTE_ETH_EVENT_INTR_LSC,
2321 &bonded_ethdev->data->port_id,
2325 rte_spinlock_unlock(&internals->lock);
2328 if (polling_slave_found)
2329 /* Set alarm to continue monitoring link status of slave ethdev's */
2330 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2331 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2335 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2337 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2339 struct bond_dev_private *bond_ctx;
2340 struct rte_eth_link slave_link;
2344 bond_ctx = ethdev->data->dev_private;
2346 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2348 if (ethdev->data->dev_started == 0 ||
2349 bond_ctx->active_slave_count == 0) {
2350 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2354 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2356 if (wait_to_complete)
2357 link_update = rte_eth_link_get;
2359 link_update = rte_eth_link_get_nowait;
2361 switch (bond_ctx->mode) {
2362 case BONDING_MODE_BROADCAST:
2364 * Setting link speed to UINT32_MAX to ensure we pick up the
2365 * value of the first active slave
2367 ethdev->data->dev_link.link_speed = UINT32_MAX;
2370 * link speed is minimum value of all the slaves link speed as
2371 * packet loss will occur on this slave if transmission at rates
2372 * greater than this are attempted
2374 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2375 link_update(bond_ctx->active_slaves[0], &slave_link);
2377 if (slave_link.link_speed <
2378 ethdev->data->dev_link.link_speed)
2379 ethdev->data->dev_link.link_speed =
2380 slave_link.link_speed;
2383 case BONDING_MODE_ACTIVE_BACKUP:
2384 /* Current primary slave */
2385 link_update(bond_ctx->current_primary_port, &slave_link);
2387 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2389 case BONDING_MODE_8023AD:
2390 ethdev->data->dev_link.link_autoneg =
2391 bond_ctx->mode4.slave_link.link_autoneg;
2392 ethdev->data->dev_link.link_duplex =
2393 bond_ctx->mode4.slave_link.link_duplex;
2394 /* fall through to update link speed */
2395 case BONDING_MODE_ROUND_ROBIN:
2396 case BONDING_MODE_BALANCE:
2397 case BONDING_MODE_TLB:
2398 case BONDING_MODE_ALB:
2401 * In theses mode the maximum theoretical link speed is the sum
2404 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2406 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2407 link_update(bond_ctx->active_slaves[idx], &slave_link);
2409 ethdev->data->dev_link.link_speed +=
2410 slave_link.link_speed;
2420 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2422 struct bond_dev_private *internals = dev->data->dev_private;
2423 struct rte_eth_stats slave_stats;
2426 for (i = 0; i < internals->slave_count; i++) {
2427 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2429 stats->ipackets += slave_stats.ipackets;
2430 stats->opackets += slave_stats.opackets;
2431 stats->ibytes += slave_stats.ibytes;
2432 stats->obytes += slave_stats.obytes;
2433 stats->imissed += slave_stats.imissed;
2434 stats->ierrors += slave_stats.ierrors;
2435 stats->oerrors += slave_stats.oerrors;
2436 stats->rx_nombuf += slave_stats.rx_nombuf;
2438 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2439 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2440 stats->q_opackets[j] += slave_stats.q_opackets[j];
2441 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2442 stats->q_obytes[j] += slave_stats.q_obytes[j];
2443 stats->q_errors[j] += slave_stats.q_errors[j];
2452 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2454 struct bond_dev_private *internals = dev->data->dev_private;
2457 for (i = 0; i < internals->slave_count; i++)
2458 rte_eth_stats_reset(internals->slaves[i].port_id);
2462 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2464 struct bond_dev_private *internals = eth_dev->data->dev_private;
2467 internals->promiscuous_en = 1;
2469 switch (internals->mode) {
2470 /* Promiscuous mode is propagated to all slaves */
2471 case BONDING_MODE_ROUND_ROBIN:
2472 case BONDING_MODE_BALANCE:
2473 case BONDING_MODE_BROADCAST:
2474 for (i = 0; i < internals->slave_count; i++)
2475 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2477 /* In mode4 promiscus mode is managed when slave is added/removed */
2478 case BONDING_MODE_8023AD:
2480 /* Promiscuous mode is propagated only to primary slave */
2481 case BONDING_MODE_ACTIVE_BACKUP:
2482 case BONDING_MODE_TLB:
2483 case BONDING_MODE_ALB:
2485 /* Do not touch promisc when there cannot be primary ports */
2486 if (internals->slave_count == 0)
2488 rte_eth_promiscuous_enable(internals->current_primary_port);
2493 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2495 struct bond_dev_private *internals = dev->data->dev_private;
2498 internals->promiscuous_en = 0;
2500 switch (internals->mode) {
2501 /* Promiscuous mode is propagated to all slaves */
2502 case BONDING_MODE_ROUND_ROBIN:
2503 case BONDING_MODE_BALANCE:
2504 case BONDING_MODE_BROADCAST:
2505 for (i = 0; i < internals->slave_count; i++)
2506 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2508 /* In mode4 promiscus mode is set managed when slave is added/removed */
2509 case BONDING_MODE_8023AD:
2511 /* Promiscuous mode is propagated only to primary slave */
2512 case BONDING_MODE_ACTIVE_BACKUP:
2513 case BONDING_MODE_TLB:
2514 case BONDING_MODE_ALB:
2516 /* Do not touch promisc when there cannot be primary ports */
2517 if (internals->slave_count == 0)
2519 rte_eth_promiscuous_disable(internals->current_primary_port);
2524 bond_ethdev_delayed_lsc_propagation(void *arg)
2529 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2530 RTE_ETH_EVENT_INTR_LSC, NULL);
2534 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2535 void *param, void *ret_param __rte_unused)
2537 struct rte_eth_dev *bonded_eth_dev;
2538 struct bond_dev_private *internals;
2539 struct rte_eth_link link;
2542 uint8_t lsc_flag = 0;
2543 int valid_slave = 0;
2544 uint16_t active_pos;
2547 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2550 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2552 if (check_for_bonded_ethdev(bonded_eth_dev))
2555 internals = bonded_eth_dev->data->dev_private;
2557 /* If the device isn't started don't handle interrupts */
2558 if (!bonded_eth_dev->data->dev_started)
2561 /* verify that port_id is a valid slave of bonded port */
2562 for (i = 0; i < internals->slave_count; i++) {
2563 if (internals->slaves[i].port_id == port_id) {
2572 /* Synchronize lsc callback parallel calls either by real link event
2573 * from the slaves PMDs or by the bonding PMD itself.
2575 rte_spinlock_lock(&internals->lsc_lock);
2577 /* Search for port in active port list */
2578 active_pos = find_slave_by_id(internals->active_slaves,
2579 internals->active_slave_count, port_id);
2581 rte_eth_link_get_nowait(port_id, &link);
2582 if (link.link_status) {
2583 if (active_pos < internals->active_slave_count)
2586 /* check link state properties if bonded link is up*/
2587 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2588 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2589 RTE_BOND_LOG(ERR, "Invalid link properties "
2590 "for slave %d in bonding mode %d",
2591 port_id, internals->mode);
2593 /* inherit slave link properties */
2594 link_properties_set(bonded_eth_dev, &link);
2597 /* If no active slave ports then set this port to be
2600 if (internals->active_slave_count < 1) {
2601 /* If first active slave, then change link status */
2602 bonded_eth_dev->data->dev_link.link_status =
2604 internals->current_primary_port = port_id;
2607 mac_address_slaves_update(bonded_eth_dev);
2610 activate_slave(bonded_eth_dev, port_id);
2612 /* If the user has defined the primary port then default to
2615 if (internals->user_defined_primary_port &&
2616 internals->primary_port == port_id)
2617 bond_ethdev_primary_set(internals, port_id);
2619 if (active_pos == internals->active_slave_count)
2622 /* Remove from active slave list */
2623 deactivate_slave(bonded_eth_dev, port_id);
2625 if (internals->active_slave_count < 1)
2628 /* Update primary id, take first active slave from list or if none
2629 * available set to -1 */
2630 if (port_id == internals->current_primary_port) {
2631 if (internals->active_slave_count > 0)
2632 bond_ethdev_primary_set(internals,
2633 internals->active_slaves[0]);
2635 internals->current_primary_port = internals->primary_port;
2641 * Update bonded device link properties after any change to active
2644 bond_ethdev_link_update(bonded_eth_dev, 0);
2647 /* Cancel any possible outstanding interrupts if delays are enabled */
2648 if (internals->link_up_delay_ms > 0 ||
2649 internals->link_down_delay_ms > 0)
2650 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2653 if (bonded_eth_dev->data->dev_link.link_status) {
2654 if (internals->link_up_delay_ms > 0)
2655 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2656 bond_ethdev_delayed_lsc_propagation,
2657 (void *)bonded_eth_dev);
2659 _rte_eth_dev_callback_process(bonded_eth_dev,
2660 RTE_ETH_EVENT_INTR_LSC,
2664 if (internals->link_down_delay_ms > 0)
2665 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2666 bond_ethdev_delayed_lsc_propagation,
2667 (void *)bonded_eth_dev);
2669 _rte_eth_dev_callback_process(bonded_eth_dev,
2670 RTE_ETH_EVENT_INTR_LSC,
2675 rte_spinlock_unlock(&internals->lsc_lock);
2681 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2682 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2686 int slave_reta_size;
2687 unsigned reta_count;
2688 struct bond_dev_private *internals = dev->data->dev_private;
2690 if (reta_size != internals->reta_size)
2693 /* Copy RETA table */
2694 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2696 for (i = 0; i < reta_count; i++) {
2697 internals->reta_conf[i].mask = reta_conf[i].mask;
2698 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2699 if ((reta_conf[i].mask >> j) & 0x01)
2700 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2703 /* Fill rest of array */
2704 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2705 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2706 sizeof(internals->reta_conf[0]) * reta_count);
2708 /* Propagate RETA over slaves */
2709 for (i = 0; i < internals->slave_count; i++) {
2710 slave_reta_size = internals->slaves[i].reta_size;
2711 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2712 &internals->reta_conf[0], slave_reta_size);
2721 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2722 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2725 struct bond_dev_private *internals = dev->data->dev_private;
2727 if (reta_size != internals->reta_size)
2730 /* Copy RETA table */
2731 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2732 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2733 if ((reta_conf[i].mask >> j) & 0x01)
2734 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2740 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2741 struct rte_eth_rss_conf *rss_conf)
2744 struct bond_dev_private *internals = dev->data->dev_private;
2745 struct rte_eth_rss_conf bond_rss_conf;
2747 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2749 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2751 if (bond_rss_conf.rss_hf != 0)
2752 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2754 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2755 sizeof(internals->rss_key)) {
2756 if (bond_rss_conf.rss_key_len == 0)
2757 bond_rss_conf.rss_key_len = 40;
2758 internals->rss_key_len = bond_rss_conf.rss_key_len;
2759 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2760 internals->rss_key_len);
2763 for (i = 0; i < internals->slave_count; i++) {
2764 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2774 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2775 struct rte_eth_rss_conf *rss_conf)
2777 struct bond_dev_private *internals = dev->data->dev_private;
2779 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2780 rss_conf->rss_key_len = internals->rss_key_len;
2781 if (rss_conf->rss_key)
2782 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2788 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2790 struct rte_eth_dev *slave_eth_dev;
2791 struct bond_dev_private *internals = dev->data->dev_private;
2794 rte_spinlock_lock(&internals->lock);
2796 for (i = 0; i < internals->slave_count; i++) {
2797 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2798 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2799 rte_spinlock_unlock(&internals->lock);
2803 for (i = 0; i < internals->slave_count; i++) {
2804 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2806 rte_spinlock_unlock(&internals->lock);
2811 rte_spinlock_unlock(&internals->lock);
2816 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2817 struct rte_ether_addr *addr)
2819 if (mac_address_set(dev, addr)) {
2820 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2828 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2829 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2831 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2832 *(const void **)arg = &bond_flow_ops;
2839 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2840 struct rte_ether_addr *mac_addr,
2841 __rte_unused uint32_t index, uint32_t vmdq)
2843 struct rte_eth_dev *slave_eth_dev;
2844 struct bond_dev_private *internals = dev->data->dev_private;
2847 rte_spinlock_lock(&internals->lock);
2849 for (i = 0; i < internals->slave_count; i++) {
2850 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2851 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2852 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2858 for (i = 0; i < internals->slave_count; i++) {
2859 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2863 for (i--; i >= 0; i--)
2864 rte_eth_dev_mac_addr_remove(
2865 internals->slaves[i].port_id, mac_addr);
2872 rte_spinlock_unlock(&internals->lock);
2877 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2879 struct rte_eth_dev *slave_eth_dev;
2880 struct bond_dev_private *internals = dev->data->dev_private;
2883 rte_spinlock_lock(&internals->lock);
2885 for (i = 0; i < internals->slave_count; i++) {
2886 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2887 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2891 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2893 for (i = 0; i < internals->slave_count; i++)
2894 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2898 rte_spinlock_unlock(&internals->lock);
2901 const struct eth_dev_ops default_dev_ops = {
2902 .dev_start = bond_ethdev_start,
2903 .dev_stop = bond_ethdev_stop,
2904 .dev_close = bond_ethdev_close,
2905 .dev_configure = bond_ethdev_configure,
2906 .dev_infos_get = bond_ethdev_info,
2907 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2908 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2909 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2910 .rx_queue_release = bond_ethdev_rx_queue_release,
2911 .tx_queue_release = bond_ethdev_tx_queue_release,
2912 .link_update = bond_ethdev_link_update,
2913 .stats_get = bond_ethdev_stats_get,
2914 .stats_reset = bond_ethdev_stats_reset,
2915 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2916 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2917 .reta_update = bond_ethdev_rss_reta_update,
2918 .reta_query = bond_ethdev_rss_reta_query,
2919 .rss_hash_update = bond_ethdev_rss_hash_update,
2920 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2921 .mtu_set = bond_ethdev_mtu_set,
2922 .mac_addr_set = bond_ethdev_mac_address_set,
2923 .mac_addr_add = bond_ethdev_mac_addr_add,
2924 .mac_addr_remove = bond_ethdev_mac_addr_remove,
2925 .filter_ctrl = bond_filter_ctrl
2929 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2931 const char *name = rte_vdev_device_name(dev);
2932 uint8_t socket_id = dev->device.numa_node;
2933 struct bond_dev_private *internals = NULL;
2934 struct rte_eth_dev *eth_dev = NULL;
2935 uint32_t vlan_filter_bmp_size;
2937 /* now do all data allocation - for eth_dev structure, dummy pci driver
2938 * and internal (private) data
2941 /* reserve an ethdev entry */
2942 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2943 if (eth_dev == NULL) {
2944 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2948 internals = eth_dev->data->dev_private;
2949 eth_dev->data->nb_rx_queues = (uint16_t)1;
2950 eth_dev->data->nb_tx_queues = (uint16_t)1;
2952 /* Allocate memory for storing MAC addresses */
2953 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
2954 BOND_MAX_MAC_ADDRS, 0, socket_id);
2955 if (eth_dev->data->mac_addrs == NULL) {
2957 "Failed to allocate %u bytes needed to store MAC addresses",
2958 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
2962 eth_dev->dev_ops = &default_dev_ops;
2963 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2965 rte_spinlock_init(&internals->lock);
2966 rte_spinlock_init(&internals->lsc_lock);
2968 internals->port_id = eth_dev->data->port_id;
2969 internals->mode = BONDING_MODE_INVALID;
2970 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2971 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2972 internals->burst_xmit_hash = burst_xmit_l2_hash;
2973 internals->user_defined_mac = 0;
2975 internals->link_status_polling_enabled = 0;
2977 internals->link_status_polling_interval_ms =
2978 DEFAULT_POLLING_INTERVAL_10_MS;
2979 internals->link_down_delay_ms = 0;
2980 internals->link_up_delay_ms = 0;
2982 internals->slave_count = 0;
2983 internals->active_slave_count = 0;
2984 internals->rx_offload_capa = 0;
2985 internals->tx_offload_capa = 0;
2986 internals->rx_queue_offload_capa = 0;
2987 internals->tx_queue_offload_capa = 0;
2988 internals->candidate_max_rx_pktlen = 0;
2989 internals->max_rx_pktlen = 0;
2991 /* Initially allow to choose any offload type */
2992 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2994 memset(&internals->default_rxconf, 0,
2995 sizeof(internals->default_rxconf));
2996 memset(&internals->default_txconf, 0,
2997 sizeof(internals->default_txconf));
2999 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3000 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3002 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3003 memset(internals->slaves, 0, sizeof(internals->slaves));
3005 TAILQ_INIT(&internals->flow_list);
3006 internals->flow_isolated_valid = 0;
3008 /* Set mode 4 default configuration */
3009 bond_mode_8023ad_setup(eth_dev, NULL);
3010 if (bond_ethdev_mode_set(eth_dev, mode)) {
3011 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3012 eth_dev->data->port_id, mode);
3016 vlan_filter_bmp_size =
3017 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3018 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3019 RTE_CACHE_LINE_SIZE);
3020 if (internals->vlan_filter_bmpmem == NULL) {
3022 "Failed to allocate vlan bitmap for bonded device %u",
3023 eth_dev->data->port_id);
3027 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3028 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3029 if (internals->vlan_filter_bmp == NULL) {
3031 "Failed to init vlan bitmap for bonded device %u",
3032 eth_dev->data->port_id);
3033 rte_free(internals->vlan_filter_bmpmem);
3037 return eth_dev->data->port_id;
3040 rte_free(internals);
3041 if (eth_dev != NULL)
3042 eth_dev->data->dev_private = NULL;
3043 rte_eth_dev_release_port(eth_dev);
3048 bond_probe(struct rte_vdev_device *dev)
3051 struct bond_dev_private *internals;
3052 struct rte_kvargs *kvlist;
3053 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3054 int arg_count, port_id;
3056 struct rte_eth_dev *eth_dev;
3061 name = rte_vdev_device_name(dev);
3062 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3064 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3065 eth_dev = rte_eth_dev_attach_secondary(name);
3067 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3070 /* TODO: request info from primary to set up Rx and Tx */
3071 eth_dev->dev_ops = &default_dev_ops;
3072 eth_dev->device = &dev->device;
3073 rte_eth_dev_probing_finish(eth_dev);
3077 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3078 pmd_bond_init_valid_arguments);
3082 /* Parse link bonding mode */
3083 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3084 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3085 &bond_ethdev_parse_slave_mode_kvarg,
3086 &bonding_mode) != 0) {
3087 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3092 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3097 /* Parse socket id to create bonding device on */
3098 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3099 if (arg_count == 1) {
3100 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3101 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3103 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3104 "bonded device %s", name);
3107 } else if (arg_count > 1) {
3108 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3109 "bonded device %s", name);
3112 socket_id = rte_socket_id();
3115 dev->device.numa_node = socket_id;
3117 /* Create link bonding eth device */
3118 port_id = bond_alloc(dev, bonding_mode);
3120 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3121 "socket %u.", name, bonding_mode, socket_id);
3124 internals = rte_eth_devices[port_id].data->dev_private;
3125 internals->kvlist = kvlist;
3127 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3128 if (rte_kvargs_process(kvlist,
3129 PMD_BOND_AGG_MODE_KVARG,
3130 &bond_ethdev_parse_slave_agg_mode_kvarg,
3133 "Failed to parse agg selection mode for bonded device %s",
3138 if (internals->mode == BONDING_MODE_8023AD)
3139 internals->mode4.agg_selection = agg_mode;
3141 internals->mode4.agg_selection = AGG_STABLE;
3144 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3145 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3146 "socket %u.", name, port_id, bonding_mode, socket_id);
3150 rte_kvargs_free(kvlist);
3156 bond_remove(struct rte_vdev_device *dev)
3158 struct rte_eth_dev *eth_dev;
3159 struct bond_dev_private *internals;
3165 name = rte_vdev_device_name(dev);
3166 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3168 /* now free all data allocation - for eth_dev structure,
3169 * dummy pci driver and internal (private) data
3172 /* find an ethdev entry */
3173 eth_dev = rte_eth_dev_allocated(name);
3174 if (eth_dev == NULL)
3177 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3178 return rte_eth_dev_release_port(eth_dev);
3180 RTE_ASSERT(eth_dev->device == &dev->device);
3182 internals = eth_dev->data->dev_private;
3183 if (internals->slave_count != 0)
3186 if (eth_dev->data->dev_started == 1) {
3187 bond_ethdev_stop(eth_dev);
3188 bond_ethdev_close(eth_dev);
3191 eth_dev->dev_ops = NULL;
3192 eth_dev->rx_pkt_burst = NULL;
3193 eth_dev->tx_pkt_burst = NULL;
3195 internals = eth_dev->data->dev_private;
3196 /* Try to release mempool used in mode6. If the bond
3197 * device is not mode6, free the NULL is not problem.
3199 rte_mempool_free(internals->mode6.mempool);
3200 rte_bitmap_free(internals->vlan_filter_bmp);
3201 rte_free(internals->vlan_filter_bmpmem);
3203 rte_eth_dev_release_port(eth_dev);
3208 /* this part will resolve the slave portids after all the other pdev and vdev
3209 * have been allocated */
3211 bond_ethdev_configure(struct rte_eth_dev *dev)
3213 const char *name = dev->device->name;
3214 struct bond_dev_private *internals = dev->data->dev_private;
3215 struct rte_kvargs *kvlist = internals->kvlist;
3217 uint16_t port_id = dev - rte_eth_devices;
3220 static const uint8_t default_rss_key[40] = {
3221 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3222 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3223 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3224 0xBE, 0xAC, 0x01, 0xFA
3230 * If RSS is enabled, fill table with default values and
3231 * set key to the the value specified in port RSS configuration.
3232 * Fall back to default RSS key if the key is not specified
3234 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3235 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3236 internals->rss_key_len =
3237 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3238 memcpy(internals->rss_key,
3239 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3240 internals->rss_key_len);
3242 internals->rss_key_len = sizeof(default_rss_key);
3243 memcpy(internals->rss_key, default_rss_key,
3244 internals->rss_key_len);
3247 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3248 internals->reta_conf[i].mask = ~0LL;
3249 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3250 internals->reta_conf[i].reta[j] =
3251 (i * RTE_RETA_GROUP_SIZE + j) %
3252 dev->data->nb_rx_queues;
3256 /* set the max_rx_pktlen */
3257 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3260 * if no kvlist, it means that this bonded device has been created
3261 * through the bonding api.
3266 /* Parse MAC address for bonded device */
3267 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3268 if (arg_count == 1) {
3269 struct rte_ether_addr bond_mac;
3271 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3272 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3273 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3278 /* Set MAC address */
3279 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3281 "Failed to set mac address on bonded device %s",
3285 } else if (arg_count > 1) {
3287 "MAC address can be specified only once for bonded device %s",
3292 /* Parse/set balance mode transmit policy */
3293 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3294 if (arg_count == 1) {
3295 uint8_t xmit_policy;
3297 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3298 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3301 "Invalid xmit policy specified for bonded device %s",
3306 /* Set balance mode transmit policy*/
3307 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3309 "Failed to set balance xmit policy on bonded device %s",
3313 } else if (arg_count > 1) {
3315 "Transmit policy can be specified only once for bonded device %s",
3320 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3321 if (rte_kvargs_process(kvlist,
3322 PMD_BOND_AGG_MODE_KVARG,
3323 &bond_ethdev_parse_slave_agg_mode_kvarg,
3326 "Failed to parse agg selection mode for bonded device %s",
3329 if (internals->mode == BONDING_MODE_8023AD) {
3330 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3334 "Invalid args for agg selection set for bonded device %s",
3341 /* Parse/add slave ports to bonded device */
3342 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3343 struct bond_ethdev_slave_ports slave_ports;
3346 memset(&slave_ports, 0, sizeof(slave_ports));
3348 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3349 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3351 "Failed to parse slave ports for bonded device %s",
3356 for (i = 0; i < slave_ports.slave_count; i++) {
3357 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3359 "Failed to add port %d as slave to bonded device %s",
3360 slave_ports.slaves[i], name);
3365 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3369 /* Parse/set primary slave port id*/
3370 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3371 if (arg_count == 1) {
3372 uint16_t primary_slave_port_id;
3374 if (rte_kvargs_process(kvlist,
3375 PMD_BOND_PRIMARY_SLAVE_KVARG,
3376 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3377 &primary_slave_port_id) < 0) {
3379 "Invalid primary slave port id specified for bonded device %s",
3384 /* Set balance mode transmit policy*/
3385 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3388 "Failed to set primary slave port %d on bonded device %s",
3389 primary_slave_port_id, name);
3392 } else if (arg_count > 1) {
3394 "Primary slave can be specified only once for bonded device %s",
3399 /* Parse link status monitor polling interval */
3400 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3401 if (arg_count == 1) {
3402 uint32_t lsc_poll_interval_ms;
3404 if (rte_kvargs_process(kvlist,
3405 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3406 &bond_ethdev_parse_time_ms_kvarg,
3407 &lsc_poll_interval_ms) < 0) {
3409 "Invalid lsc polling interval value specified for bonded"
3410 " device %s", name);
3414 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3417 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3418 lsc_poll_interval_ms, name);
3421 } else if (arg_count > 1) {
3423 "LSC polling interval can be specified only once for bonded"
3424 " device %s", name);
3428 /* Parse link up interrupt propagation delay */
3429 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3430 if (arg_count == 1) {
3431 uint32_t link_up_delay_ms;
3433 if (rte_kvargs_process(kvlist,
3434 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3435 &bond_ethdev_parse_time_ms_kvarg,
3436 &link_up_delay_ms) < 0) {
3438 "Invalid link up propagation delay value specified for"
3439 " bonded device %s", name);
3443 /* Set balance mode transmit policy*/
3444 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3447 "Failed to set link up propagation delay (%u ms) on bonded"
3448 " device %s", link_up_delay_ms, name);
3451 } else if (arg_count > 1) {
3453 "Link up propagation delay can be specified only once for"
3454 " bonded device %s", name);
3458 /* Parse link down interrupt propagation delay */
3459 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3460 if (arg_count == 1) {
3461 uint32_t link_down_delay_ms;
3463 if (rte_kvargs_process(kvlist,
3464 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3465 &bond_ethdev_parse_time_ms_kvarg,
3466 &link_down_delay_ms) < 0) {
3468 "Invalid link down propagation delay value specified for"
3469 " bonded device %s", name);
3473 /* Set balance mode transmit policy*/
3474 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3477 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3478 link_down_delay_ms, name);
3481 } else if (arg_count > 1) {
3483 "Link down propagation delay can be specified only once for bonded device %s",
3491 struct rte_vdev_driver pmd_bond_drv = {
3492 .probe = bond_probe,
3493 .remove = bond_remove,
3496 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3497 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3499 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3503 "xmit_policy=[l2 | l23 | l34] "
3504 "agg_mode=[count | stable | bandwidth] "
3507 "lsc_poll_period_ms=<int> "
3509 "down_delay=<int>");
3513 RTE_INIT(bond_init_log)
3515 bond_logtype = rte_log_register("pmd.net.bond");
3516 if (bond_logtype >= 0)
3517 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);