1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
6 #include <netinet/in.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
23 #include "rte_eth_bond.h"
24 #include "rte_eth_bond_private.h"
25 #include "rte_eth_bond_8023ad_private.h"
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
39 size_t vlan_offset = 0;
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 struct bond_dev_private *internals;
63 uint16_t num_rx_total = 0;
65 uint16_t active_slave;
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = internals->active_slave;
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
89 if (++internals->active_slave >= slave_count)
90 internals->active_slave = 0;
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
98 struct bond_dev_private *internals;
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
103 internals = bd_rx_q->dev_private;
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
136 static struct rte_flow_item flow_item_8023ad[] = {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
141 .mask = &flow_item_eth_mask_type_8023ad,
144 .type = RTE_FLOW_ITEM_TYPE_END,
151 const struct rte_flow_attr flow_attr_8023ad = {
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
166 const struct rte_flow_action_queue lacp_queue_conf = {
170 const struct rte_flow_action actions[] = {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
176 .type = RTE_FLOW_ACTION_TYPE_END,
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
189 rte_eth_dev_info_get(slave_port, &slave_info);
190 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
191 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
193 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
194 __func__, slave_port);
202 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
203 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
204 struct bond_dev_private *internals = bond_dev->data->dev_private;
205 struct rte_eth_dev_info bond_info;
208 /* Verify if all slaves in bonding supports flow director and */
209 if (internals->slave_count > 0) {
210 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
212 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
213 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
215 for (idx = 0; idx < internals->slave_count; idx++) {
216 if (bond_ethdev_8023ad_flow_verify(bond_dev,
217 internals->slaves[idx].port_id) != 0)
226 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
228 struct rte_flow_error error;
229 struct bond_dev_private *internals = bond_dev->data->dev_private;
230 struct rte_flow_action_queue lacp_queue_conf = {
231 .index = internals->mode4.dedicated_queues.rx_qid,
234 const struct rte_flow_action actions[] = {
236 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
237 .conf = &lacp_queue_conf
240 .type = RTE_FLOW_ACTION_TYPE_END,
244 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
245 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
246 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
247 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
248 "(slave_port=%d queue_id=%d)",
249 error.message, slave_port,
250 internals->mode4.dedicated_queues.rx_qid);
258 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
261 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
262 struct bond_dev_private *internals = bd_rx_q->dev_private;
263 uint16_t num_rx_total = 0; /* Total number of received packets */
264 uint16_t slaves[RTE_MAX_ETHPORTS];
265 uint16_t slave_count;
266 uint16_t active_slave;
269 /* Copy slave list to protect against slave up/down changes during tx
271 slave_count = internals->active_slave_count;
272 active_slave = internals->active_slave;
273 memcpy(slaves, internals->active_slaves,
274 sizeof(internals->active_slaves[0]) * slave_count);
276 for (i = 0; i < slave_count && nb_pkts; i++) {
277 uint16_t num_rx_slave;
279 /* Read packets from this slave */
280 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282 bufs + num_rx_total, nb_pkts);
283 num_rx_total += num_rx_slave;
284 nb_pkts -= num_rx_slave;
286 if (++active_slave == slave_count)
290 if (++internals->active_slave >= slave_count)
291 internals->active_slave = 0;
297 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
300 /* Cast to structure, containing bonded device's port id and queue id */
301 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
302 struct bond_dev_private *internals = bd_rx_q->dev_private;
303 struct rte_eth_dev *bonded_eth_dev =
304 &rte_eth_devices[internals->port_id];
305 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
306 struct rte_ether_hdr *hdr;
308 const uint16_t ether_type_slow_be =
309 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
310 uint16_t num_rx_total = 0; /* Total number of received packets */
311 uint16_t slaves[RTE_MAX_ETHPORTS];
312 uint16_t slave_count, idx;
314 uint8_t collecting; /* current slave collecting status */
315 const uint8_t promisc = internals->promiscuous_en;
321 /* Copy slave list to protect against slave up/down changes during tx
323 slave_count = internals->active_slave_count;
324 memcpy(slaves, internals->active_slaves,
325 sizeof(internals->active_slaves[0]) * slave_count);
327 idx = internals->active_slave;
328 if (idx >= slave_count) {
329 internals->active_slave = 0;
332 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
334 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
337 /* Read packets from this slave */
338 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
339 &bufs[num_rx_total], nb_pkts - num_rx_total);
341 for (k = j; k < 2 && k < num_rx_total; k++)
342 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
344 /* Handle slow protocol packets. */
345 while (j < num_rx_total) {
347 /* If packet is not pure L2 and is known, skip it */
348 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
353 if (j + 3 < num_rx_total)
354 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
356 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
357 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
359 /* Remove packet from array if it is slow packet or slave is not
360 * in collecting state or bonding interface is not in promiscuous
361 * mode and packet address does not match. */
362 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
365 !rte_is_multicast_ether_addr(&hdr->d_addr) &&
366 !rte_is_same_ether_addr(bond_mac,
369 if (hdr->ether_type == ether_type_slow_be) {
370 bond_mode_8023ad_handle_slow_pkt(
371 internals, slaves[idx], bufs[j]);
373 rte_pktmbuf_free(bufs[j]);
375 /* Packet is managed by mode 4 or dropped, shift the array */
377 if (j < num_rx_total) {
378 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
384 if (unlikely(++idx == slave_count))
388 if (++internals->active_slave >= slave_count)
389 internals->active_slave = 0;
394 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
395 uint32_t burstnumberRX;
396 uint32_t burstnumberTX;
398 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
401 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
404 case RTE_ARP_OP_REQUEST:
405 strlcpy(buf, "ARP Request", buf_len);
407 case RTE_ARP_OP_REPLY:
408 strlcpy(buf, "ARP Reply", buf_len);
410 case RTE_ARP_OP_REVREQUEST:
411 strlcpy(buf, "Reverse ARP Request", buf_len);
413 case RTE_ARP_OP_REVREPLY:
414 strlcpy(buf, "Reverse ARP Reply", buf_len);
416 case RTE_ARP_OP_INVREQUEST:
417 strlcpy(buf, "Peer Identify Request", buf_len);
419 case RTE_ARP_OP_INVREPLY:
420 strlcpy(buf, "Peer Identify Reply", buf_len);
425 strlcpy(buf, "Unknown", buf_len);
429 #define MaxIPv4String 16
431 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
435 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
436 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
437 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
441 #define MAX_CLIENTS_NUMBER 128
442 uint8_t active_clients;
443 struct client_stats_t {
446 uint32_t ipv4_rx_packets;
447 uint32_t ipv4_tx_packets;
449 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
452 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
456 for (; i < MAX_CLIENTS_NUMBER; i++) {
457 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
458 /* Just update RX packets number for this client */
459 if (TXorRXindicator == &burstnumberRX)
460 client_stats[i].ipv4_rx_packets++;
462 client_stats[i].ipv4_tx_packets++;
466 /* We have a new client. Insert him to the table, and increment stats */
467 if (TXorRXindicator == &burstnumberRX)
468 client_stats[active_clients].ipv4_rx_packets++;
470 client_stats[active_clients].ipv4_tx_packets++;
471 client_stats[active_clients].ipv4_addr = addr;
472 client_stats[active_clients].port = port;
477 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
478 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
479 rte_log(RTE_LOG_DEBUG, bond_logtype, \
480 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
481 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
484 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
485 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
486 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
488 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
489 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
490 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
492 arp_op, ++burstnumber)
496 mode6_debug(const char __attribute__((unused)) *info,
497 struct rte_ether_hdr *eth_h, uint16_t port,
498 uint32_t __attribute__((unused)) *burstnumber)
500 struct rte_ipv4_hdr *ipv4_h;
501 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
502 struct rte_arp_hdr *arp_h;
509 uint16_t ether_type = eth_h->ether_type;
510 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
512 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
513 strlcpy(buf, info, 16);
516 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
517 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
518 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
519 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
520 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
521 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
523 update_client_stats(ipv4_h->src_addr, port, burstnumber);
525 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
526 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
527 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
528 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
529 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
530 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
531 ArpOp, sizeof(ArpOp));
532 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
539 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
541 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
542 struct bond_dev_private *internals = bd_tx_q->dev_private;
543 struct rte_ether_hdr *eth_h;
544 uint16_t ether_type, offset;
545 uint16_t nb_recv_pkts;
548 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
550 for (i = 0; i < nb_recv_pkts; i++) {
551 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
552 ether_type = eth_h->ether_type;
553 offset = get_vlan_offset(eth_h, ðer_type);
555 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
556 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
557 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
559 bond_mode_alb_arp_recv(eth_h, offset, internals);
561 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
562 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
563 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
571 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
574 struct bond_dev_private *internals;
575 struct bond_tx_queue *bd_tx_q;
577 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
578 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
580 uint16_t num_of_slaves;
581 uint16_t slaves[RTE_MAX_ETHPORTS];
583 uint16_t num_tx_total = 0, num_tx_slave;
585 static int slave_idx = 0;
586 int i, cslave_idx = 0, tx_fail_total = 0;
588 bd_tx_q = (struct bond_tx_queue *)queue;
589 internals = bd_tx_q->dev_private;
591 /* Copy slave list to protect against slave up/down changes during tx
593 num_of_slaves = internals->active_slave_count;
594 memcpy(slaves, internals->active_slaves,
595 sizeof(internals->active_slaves[0]) * num_of_slaves);
597 if (num_of_slaves < 1)
600 /* Populate slaves mbuf with which packets are to be sent on it */
601 for (i = 0; i < nb_pkts; i++) {
602 cslave_idx = (slave_idx + i) % num_of_slaves;
603 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
606 /* increment current slave index so the next call to tx burst starts on the
608 slave_idx = ++cslave_idx;
610 /* Send packet burst on each slave device */
611 for (i = 0; i < num_of_slaves; i++) {
612 if (slave_nb_pkts[i] > 0) {
613 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
614 slave_bufs[i], slave_nb_pkts[i]);
616 /* if tx burst fails move packets to end of bufs */
617 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
618 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
620 tx_fail_total += tx_fail_slave;
622 memcpy(&bufs[nb_pkts - tx_fail_total],
623 &slave_bufs[i][num_tx_slave],
624 tx_fail_slave * sizeof(bufs[0]));
626 num_tx_total += num_tx_slave;
634 bond_ethdev_tx_burst_active_backup(void *queue,
635 struct rte_mbuf **bufs, uint16_t nb_pkts)
637 struct bond_dev_private *internals;
638 struct bond_tx_queue *bd_tx_q;
640 bd_tx_q = (struct bond_tx_queue *)queue;
641 internals = bd_tx_q->dev_private;
643 if (internals->active_slave_count < 1)
646 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
650 static inline uint16_t
651 ether_hash(struct rte_ether_hdr *eth_hdr)
653 unaligned_uint16_t *word_src_addr =
654 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
655 unaligned_uint16_t *word_dst_addr =
656 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
658 return (word_src_addr[0] ^ word_dst_addr[0]) ^
659 (word_src_addr[1] ^ word_dst_addr[1]) ^
660 (word_src_addr[2] ^ word_dst_addr[2]);
663 static inline uint32_t
664 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
666 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
669 static inline uint32_t
670 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
672 unaligned_uint32_t *word_src_addr =
673 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
674 unaligned_uint32_t *word_dst_addr =
675 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
677 return (word_src_addr[0] ^ word_dst_addr[0]) ^
678 (word_src_addr[1] ^ word_dst_addr[1]) ^
679 (word_src_addr[2] ^ word_dst_addr[2]) ^
680 (word_src_addr[3] ^ word_dst_addr[3]);
685 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
686 uint16_t slave_count, uint16_t *slaves)
688 struct rte_ether_hdr *eth_hdr;
692 for (i = 0; i < nb_pkts; i++) {
693 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
695 hash = ether_hash(eth_hdr);
697 slaves[i] = (hash ^= hash >> 8) % slave_count;
702 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
703 uint16_t slave_count, uint16_t *slaves)
706 struct rte_ether_hdr *eth_hdr;
709 uint32_t hash, l3hash;
711 for (i = 0; i < nb_pkts; i++) {
712 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
715 proto = eth_hdr->ether_type;
716 hash = ether_hash(eth_hdr);
718 vlan_offset = get_vlan_offset(eth_hdr, &proto);
720 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
721 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
722 ((char *)(eth_hdr + 1) + vlan_offset);
723 l3hash = ipv4_hash(ipv4_hdr);
725 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
726 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
727 ((char *)(eth_hdr + 1) + vlan_offset);
728 l3hash = ipv6_hash(ipv6_hdr);
731 hash = hash ^ l3hash;
735 slaves[i] = hash % slave_count;
740 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
741 uint16_t slave_count, uint16_t *slaves)
743 struct rte_ether_hdr *eth_hdr;
748 struct rte_udp_hdr *udp_hdr;
749 struct rte_tcp_hdr *tcp_hdr;
750 uint32_t hash, l3hash, l4hash;
752 for (i = 0; i < nb_pkts; i++) {
753 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
754 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
755 proto = eth_hdr->ether_type;
756 vlan_offset = get_vlan_offset(eth_hdr, &proto);
760 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
761 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
762 ((char *)(eth_hdr + 1) + vlan_offset);
763 size_t ip_hdr_offset;
765 l3hash = ipv4_hash(ipv4_hdr);
767 /* there is no L4 header in fragmented packet */
768 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
770 ip_hdr_offset = (ipv4_hdr->version_ihl
771 & RTE_IPV4_HDR_IHL_MASK) *
772 RTE_IPV4_IHL_MULTIPLIER;
774 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
775 tcp_hdr = (struct rte_tcp_hdr *)
778 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
780 l4hash = HASH_L4_PORTS(tcp_hdr);
781 } else if (ipv4_hdr->next_proto_id ==
783 udp_hdr = (struct rte_udp_hdr *)
786 if ((size_t)udp_hdr + sizeof(*udp_hdr)
788 l4hash = HASH_L4_PORTS(udp_hdr);
791 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
792 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
793 ((char *)(eth_hdr + 1) + vlan_offset);
794 l3hash = ipv6_hash(ipv6_hdr);
796 if (ipv6_hdr->proto == IPPROTO_TCP) {
797 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
798 l4hash = HASH_L4_PORTS(tcp_hdr);
799 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
800 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
801 l4hash = HASH_L4_PORTS(udp_hdr);
805 hash = l3hash ^ l4hash;
809 slaves[i] = hash % slave_count;
814 uint64_t bwg_left_int;
815 uint64_t bwg_left_remainder;
820 bond_tlb_activate_slave(struct bond_dev_private *internals) {
823 for (i = 0; i < internals->active_slave_count; i++) {
824 tlb_last_obytets[internals->active_slaves[i]] = 0;
829 bandwidth_cmp(const void *a, const void *b)
831 const struct bwg_slave *bwg_a = a;
832 const struct bwg_slave *bwg_b = b;
833 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
834 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
835 (int64_t)bwg_a->bwg_left_remainder;
849 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
850 struct bwg_slave *bwg_slave)
852 struct rte_eth_link link_status;
854 rte_eth_link_get_nowait(port_id, &link_status);
855 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
858 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
859 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
860 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
864 bond_ethdev_update_tlb_slave_cb(void *arg)
866 struct bond_dev_private *internals = arg;
867 struct rte_eth_stats slave_stats;
868 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
869 uint16_t slave_count;
872 uint8_t update_stats = 0;
876 internals->slave_update_idx++;
879 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
882 for (i = 0; i < internals->active_slave_count; i++) {
883 slave_id = internals->active_slaves[i];
884 rte_eth_stats_get(slave_id, &slave_stats);
885 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
886 bandwidth_left(slave_id, tx_bytes,
887 internals->slave_update_idx, &bwg_array[i]);
888 bwg_array[i].slave = slave_id;
891 tlb_last_obytets[slave_id] = slave_stats.obytes;
895 if (update_stats == 1)
896 internals->slave_update_idx = 0;
899 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
900 for (i = 0; i < slave_count; i++)
901 internals->tlb_slaves_order[i] = bwg_array[i].slave;
903 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
904 (struct bond_dev_private *)internals);
908 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
910 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
911 struct bond_dev_private *internals = bd_tx_q->dev_private;
913 struct rte_eth_dev *primary_port =
914 &rte_eth_devices[internals->primary_port];
915 uint16_t num_tx_total = 0;
918 uint16_t num_of_slaves = internals->active_slave_count;
919 uint16_t slaves[RTE_MAX_ETHPORTS];
921 struct rte_ether_hdr *ether_hdr;
922 struct rte_ether_addr primary_slave_addr;
923 struct rte_ether_addr active_slave_addr;
925 if (num_of_slaves < 1)
928 memcpy(slaves, internals->tlb_slaves_order,
929 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
932 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
935 for (i = 0; i < 3; i++)
936 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
939 for (i = 0; i < num_of_slaves; i++) {
940 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
941 for (j = num_tx_total; j < nb_pkts; j++) {
943 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
945 ether_hdr = rte_pktmbuf_mtod(bufs[j],
946 struct rte_ether_hdr *);
947 if (rte_is_same_ether_addr(ðer_hdr->s_addr,
948 &primary_slave_addr))
949 rte_ether_addr_copy(&active_slave_addr,
951 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
952 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
956 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
957 bufs + num_tx_total, nb_pkts - num_tx_total);
959 if (num_tx_total == nb_pkts)
967 bond_tlb_disable(struct bond_dev_private *internals)
969 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
973 bond_tlb_enable(struct bond_dev_private *internals)
975 bond_ethdev_update_tlb_slave_cb(internals);
979 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
981 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
982 struct bond_dev_private *internals = bd_tx_q->dev_private;
984 struct rte_ether_hdr *eth_h;
985 uint16_t ether_type, offset;
987 struct client_data *client_info;
990 * We create transmit buffers for every slave and one additional to send
991 * through tlb. In worst case every packet will be send on one port.
993 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
994 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
997 * We create separate transmit buffers for update packets as they won't
998 * be counted in num_tx_total.
1000 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1001 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1003 struct rte_mbuf *upd_pkt;
1006 uint16_t num_send, num_not_send = 0;
1007 uint16_t num_tx_total = 0;
1012 /* Search tx buffer for ARP packets and forward them to alb */
1013 for (i = 0; i < nb_pkts; i++) {
1014 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1015 ether_type = eth_h->ether_type;
1016 offset = get_vlan_offset(eth_h, ðer_type);
1018 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1019 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1021 /* Change src mac in eth header */
1022 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1024 /* Add packet to slave tx buffer */
1025 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1026 slave_bufs_pkts[slave_idx]++;
1028 /* If packet is not ARP, send it with TLB policy */
1029 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1031 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1035 /* Update connected client ARP tables */
1036 if (internals->mode6.ntt) {
1037 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1038 client_info = &internals->mode6.client_table[i];
1040 if (client_info->in_use) {
1041 /* Allocate new packet to send ARP update on current slave */
1042 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1043 if (upd_pkt == NULL) {
1045 "Failed to allocate ARP packet from pool");
1048 pkt_size = sizeof(struct rte_ether_hdr) +
1049 sizeof(struct rte_arp_hdr) +
1050 client_info->vlan_count *
1051 sizeof(struct rte_vlan_hdr);
1052 upd_pkt->data_len = pkt_size;
1053 upd_pkt->pkt_len = pkt_size;
1055 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058 /* Add packet to update tx buffer */
1059 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1060 update_bufs_pkts[slave_idx]++;
1063 internals->mode6.ntt = 0;
1066 /* Send ARP packets on proper slaves */
1067 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1068 if (slave_bufs_pkts[i] > 0) {
1069 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1070 slave_bufs[i], slave_bufs_pkts[i]);
1071 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1072 bufs[nb_pkts - 1 - num_not_send - j] =
1073 slave_bufs[i][nb_pkts - 1 - j];
1076 num_tx_total += num_send;
1077 num_not_send += slave_bufs_pkts[i] - num_send;
1079 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1080 /* Print TX stats including update packets */
1081 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1082 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1083 struct rte_ether_hdr *);
1084 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1090 /* Send update packets on proper slaves */
1091 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1092 if (update_bufs_pkts[i] > 0) {
1093 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1094 update_bufs_pkts[i]);
1095 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1096 rte_pktmbuf_free(update_bufs[i][j]);
1098 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1099 for (j = 0; j < update_bufs_pkts[i]; j++) {
1100 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1101 struct rte_ether_hdr *);
1102 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1108 /* Send non-ARP packets using tlb policy */
1109 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1110 num_send = bond_ethdev_tx_burst_tlb(queue,
1111 slave_bufs[RTE_MAX_ETHPORTS],
1112 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1114 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1115 bufs[nb_pkts - 1 - num_not_send - j] =
1116 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119 num_tx_total += num_send;
1122 return num_tx_total;
1125 static inline uint16_t
1126 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1127 uint16_t *slave_port_ids, uint16_t slave_count)
1129 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1130 struct bond_dev_private *internals = bd_tx_q->dev_private;
1132 /* Array to sort mbufs for transmission on each slave into */
1133 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1134 /* Number of mbufs for transmission on each slave */
1135 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1136 /* Mapping array generated by hash function to map mbufs to slaves */
1137 uint16_t bufs_slave_port_idxs[nb_bufs];
1139 uint16_t slave_tx_count;
1140 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1145 * Populate slaves mbuf with the packets which are to be sent on it
1146 * selecting output slave using hash based on xmit policy
1148 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1149 bufs_slave_port_idxs);
1151 for (i = 0; i < nb_bufs; i++) {
1152 /* Populate slave mbuf arrays with mbufs for that slave. */
1153 uint16_t slave_idx = bufs_slave_port_idxs[i];
1155 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158 /* Send packet burst on each slave device */
1159 for (i = 0; i < slave_count; i++) {
1160 if (slave_nb_bufs[i] == 0)
1163 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1164 bd_tx_q->queue_id, slave_bufs[i],
1167 total_tx_count += slave_tx_count;
1169 /* If tx burst fails move packets to end of bufs */
1170 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1171 int slave_tx_fail_count = slave_nb_bufs[i] -
1173 total_tx_fail_count += slave_tx_fail_count;
1174 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1175 &slave_bufs[i][slave_tx_count],
1176 slave_tx_fail_count * sizeof(bufs[0]));
1180 return total_tx_count;
1184 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1188 struct bond_dev_private *internals = bd_tx_q->dev_private;
1190 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1191 uint16_t slave_count;
1193 if (unlikely(nb_bufs == 0))
1196 /* Copy slave list to protect against slave up/down changes during tx
1199 slave_count = internals->active_slave_count;
1200 if (unlikely(slave_count < 1))
1203 memcpy(slave_port_ids, internals->active_slaves,
1204 sizeof(slave_port_ids[0]) * slave_count);
1205 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1209 static inline uint16_t
1210 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1214 struct bond_dev_private *internals = bd_tx_q->dev_private;
1216 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1217 uint16_t slave_count;
1219 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1220 uint16_t dist_slave_count;
1222 uint16_t slave_tx_count;
1226 /* Copy slave list to protect against slave up/down changes during tx
1228 slave_count = internals->active_slave_count;
1229 if (unlikely(slave_count < 1))
1232 memcpy(slave_port_ids, internals->active_slaves,
1233 sizeof(slave_port_ids[0]) * slave_count);
1238 /* Check for LACP control packets and send if available */
1239 for (i = 0; i < slave_count; i++) {
1240 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1241 struct rte_mbuf *ctrl_pkt = NULL;
1243 if (likely(rte_ring_empty(port->tx_ring)))
1246 if (rte_ring_dequeue(port->tx_ring,
1247 (void **)&ctrl_pkt) != -ENOENT) {
1248 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1249 bd_tx_q->queue_id, &ctrl_pkt, 1);
1251 * re-enqueue LAG control plane packets to buffering
1252 * ring if transmission fails so the packet isn't lost.
1254 if (slave_tx_count != 1)
1255 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1260 if (unlikely(nb_bufs == 0))
1263 dist_slave_count = 0;
1264 for (i = 0; i < slave_count; i++) {
1265 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1267 if (ACTOR_STATE(port, DISTRIBUTING))
1268 dist_slave_port_ids[dist_slave_count++] =
1272 if (unlikely(dist_slave_count < 1))
1275 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1280 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1287 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1294 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297 struct bond_dev_private *internals;
1298 struct bond_tx_queue *bd_tx_q;
1300 uint16_t slaves[RTE_MAX_ETHPORTS];
1301 uint8_t tx_failed_flag = 0;
1302 uint16_t num_of_slaves;
1304 uint16_t max_nb_of_tx_pkts = 0;
1306 int slave_tx_total[RTE_MAX_ETHPORTS];
1307 int i, most_successful_tx_slave = -1;
1309 bd_tx_q = (struct bond_tx_queue *)queue;
1310 internals = bd_tx_q->dev_private;
1312 /* Copy slave list to protect against slave up/down changes during tx
1314 num_of_slaves = internals->active_slave_count;
1315 memcpy(slaves, internals->active_slaves,
1316 sizeof(internals->active_slaves[0]) * num_of_slaves);
1318 if (num_of_slaves < 1)
1321 /* Increment reference count on mbufs */
1322 for (i = 0; i < nb_pkts; i++)
1323 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1325 /* Transmit burst on each active slave */
1326 for (i = 0; i < num_of_slaves; i++) {
1327 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330 if (unlikely(slave_tx_total[i] < nb_pkts))
1333 /* record the value and slave index for the slave which transmits the
1334 * maximum number of packets */
1335 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1336 max_nb_of_tx_pkts = slave_tx_total[i];
1337 most_successful_tx_slave = i;
1341 /* if slaves fail to transmit packets from burst, the calling application
1342 * is not expected to know about multiple references to packets so we must
1343 * handle failures of all packets except those of the most successful slave
1345 if (unlikely(tx_failed_flag))
1346 for (i = 0; i < num_of_slaves; i++)
1347 if (i != most_successful_tx_slave)
1348 while (slave_tx_total[i] < nb_pkts)
1349 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1351 return max_nb_of_tx_pkts;
1355 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1357 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1359 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1361 * If in mode 4 then save the link properties of the first
1362 * slave, all subsequent slaves must match these properties
1364 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1366 bond_link->link_autoneg = slave_link->link_autoneg;
1367 bond_link->link_duplex = slave_link->link_duplex;
1368 bond_link->link_speed = slave_link->link_speed;
1371 * In any other mode the link properties are set to default
1372 * values of AUTONEG/DUPLEX
1374 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1375 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1380 link_properties_valid(struct rte_eth_dev *ethdev,
1381 struct rte_eth_link *slave_link)
1383 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1385 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1386 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1388 if (bond_link->link_duplex != slave_link->link_duplex ||
1389 bond_link->link_autoneg != slave_link->link_autoneg ||
1390 bond_link->link_speed != slave_link->link_speed)
1398 mac_address_get(struct rte_eth_dev *eth_dev,
1399 struct rte_ether_addr *dst_mac_addr)
1401 struct rte_ether_addr *mac_addr;
1403 if (eth_dev == NULL) {
1404 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1408 if (dst_mac_addr == NULL) {
1409 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1413 mac_addr = eth_dev->data->mac_addrs;
1415 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1420 mac_address_set(struct rte_eth_dev *eth_dev,
1421 struct rte_ether_addr *new_mac_addr)
1423 struct rte_ether_addr *mac_addr;
1425 if (eth_dev == NULL) {
1426 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1430 if (new_mac_addr == NULL) {
1431 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1435 mac_addr = eth_dev->data->mac_addrs;
1437 /* If new MAC is different to current MAC then update */
1438 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1439 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1444 static const struct rte_ether_addr null_mac_addr;
1447 * Add additional MAC addresses to the slave
1450 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1451 uint16_t slave_port_id)
1454 struct rte_ether_addr *mac_addr;
1456 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1457 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1458 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464 for (i--; i > 0; i--)
1465 rte_eth_dev_mac_addr_remove(slave_port_id,
1466 &bonded_eth_dev->data->mac_addrs[i]);
1475 * Remove additional MAC addresses from the slave
1478 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1479 uint16_t slave_port_id)
1482 struct rte_ether_addr *mac_addr;
1485 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1486 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1487 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1491 /* save only the first error */
1492 if (ret < 0 && rc == 0)
1500 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1502 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505 /* Update slave devices MAC addresses */
1506 if (internals->slave_count < 1)
1509 switch (internals->mode) {
1510 case BONDING_MODE_ROUND_ROBIN:
1511 case BONDING_MODE_BALANCE:
1512 case BONDING_MODE_BROADCAST:
1513 for (i = 0; i < internals->slave_count; i++) {
1514 if (rte_eth_dev_default_mac_addr_set(
1515 internals->slaves[i].port_id,
1516 bonded_eth_dev->data->mac_addrs)) {
1517 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1518 internals->slaves[i].port_id);
1523 case BONDING_MODE_8023AD:
1524 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1526 case BONDING_MODE_ACTIVE_BACKUP:
1527 case BONDING_MODE_TLB:
1528 case BONDING_MODE_ALB:
1530 for (i = 0; i < internals->slave_count; i++) {
1531 if (internals->slaves[i].port_id ==
1532 internals->current_primary_port) {
1533 if (rte_eth_dev_default_mac_addr_set(
1534 internals->primary_port,
1535 bonded_eth_dev->data->mac_addrs)) {
1536 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537 internals->current_primary_port);
1541 if (rte_eth_dev_default_mac_addr_set(
1542 internals->slaves[i].port_id,
1543 &internals->slaves[i].persisted_mac_addr)) {
1544 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545 internals->slaves[i].port_id);
1556 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1558 struct bond_dev_private *internals;
1560 internals = eth_dev->data->dev_private;
1563 case BONDING_MODE_ROUND_ROBIN:
1564 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1565 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567 case BONDING_MODE_ACTIVE_BACKUP:
1568 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1569 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571 case BONDING_MODE_BALANCE:
1572 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1573 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1575 case BONDING_MODE_BROADCAST:
1576 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1577 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 case BONDING_MODE_8023AD:
1580 if (bond_mode_8023ad_enable(eth_dev) != 0)
1583 if (internals->mode4.dedicated_queues.enabled == 0) {
1584 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1585 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1586 RTE_BOND_LOG(WARNING,
1587 "Using mode 4, it is necessary to do TX burst "
1588 "and RX burst at least every 100ms.");
1590 /* Use flow director's optimization */
1591 eth_dev->rx_pkt_burst =
1592 bond_ethdev_rx_burst_8023ad_fast_queue;
1593 eth_dev->tx_pkt_burst =
1594 bond_ethdev_tx_burst_8023ad_fast_queue;
1597 case BONDING_MODE_TLB:
1598 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1599 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1601 case BONDING_MODE_ALB:
1602 if (bond_mode_alb_enable(eth_dev) != 0)
1605 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1606 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612 internals->mode = mode;
1619 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1620 struct rte_eth_dev *slave_eth_dev)
1623 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1624 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1626 if (port->slow_pool == NULL) {
1628 int slave_id = slave_eth_dev->data->port_id;
1630 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1632 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1633 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1634 slave_eth_dev->data->numa_node);
1636 /* Any memory allocation failure in initialization is critical because
1637 * resources can't be free, so reinitialization is impossible. */
1638 if (port->slow_pool == NULL) {
1639 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1640 slave_id, mem_name, rte_strerror(rte_errno));
1644 if (internals->mode4.dedicated_queues.enabled == 1) {
1645 /* Configure slow Rx queue */
1647 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1648 internals->mode4.dedicated_queues.rx_qid, 128,
1649 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1650 NULL, port->slow_pool);
1653 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1654 slave_eth_dev->data->port_id,
1655 internals->mode4.dedicated_queues.rx_qid,
1660 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1661 internals->mode4.dedicated_queues.tx_qid, 512,
1662 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1666 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1667 slave_eth_dev->data->port_id,
1668 internals->mode4.dedicated_queues.tx_qid,
1677 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1678 struct rte_eth_dev *slave_eth_dev)
1680 struct bond_rx_queue *bd_rx_q;
1681 struct bond_tx_queue *bd_tx_q;
1682 uint16_t nb_rx_queues;
1683 uint16_t nb_tx_queues;
1687 struct rte_flow_error flow_error;
1689 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1692 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1694 /* Enable interrupts on slave device if supported */
1695 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1698 /* If RSS is enabled for bonding, try to enable it for slaves */
1699 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1700 if (internals->rss_key_len != 0) {
1701 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1702 internals->rss_key_len;
1703 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1706 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1709 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1710 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1711 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1712 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1715 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1716 DEV_RX_OFFLOAD_VLAN_FILTER)
1717 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1718 DEV_RX_OFFLOAD_VLAN_FILTER;
1720 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1721 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1723 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1724 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1726 if (internals->mode == BONDING_MODE_8023AD) {
1727 if (internals->mode4.dedicated_queues.enabled == 1) {
1733 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1734 bonded_eth_dev->data->mtu);
1735 if (errval != 0 && errval != -ENOTSUP) {
1736 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1737 slave_eth_dev->data->port_id, errval);
1741 /* Configure device */
1742 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1743 nb_rx_queues, nb_tx_queues,
1744 &(slave_eth_dev->data->dev_conf));
1746 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1747 slave_eth_dev->data->port_id, errval);
1751 /* Setup Rx Queues */
1752 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1753 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1755 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1756 bd_rx_q->nb_rx_desc,
1757 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1758 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1761 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1762 slave_eth_dev->data->port_id, q_id, errval);
1767 /* Setup Tx Queues */
1768 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1769 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1771 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1772 bd_tx_q->nb_tx_desc,
1773 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1777 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1778 slave_eth_dev->data->port_id, q_id, errval);
1783 if (internals->mode == BONDING_MODE_8023AD &&
1784 internals->mode4.dedicated_queues.enabled == 1) {
1785 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1789 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1790 slave_eth_dev->data->port_id) != 0) {
1792 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1793 slave_eth_dev->data->port_id, q_id, errval);
1797 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1798 rte_flow_destroy(slave_eth_dev->data->port_id,
1799 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1802 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1803 slave_eth_dev->data->port_id);
1807 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1809 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1810 slave_eth_dev->data->port_id, errval);
1814 /* If RSS is enabled for bonding, synchronize RETA */
1815 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1817 struct bond_dev_private *internals;
1819 internals = bonded_eth_dev->data->dev_private;
1821 for (i = 0; i < internals->slave_count; i++) {
1822 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1823 errval = rte_eth_dev_rss_reta_update(
1824 slave_eth_dev->data->port_id,
1825 &internals->reta_conf[0],
1826 internals->slaves[i].reta_size);
1828 RTE_BOND_LOG(WARNING,
1829 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1830 " RSS Configuration for bonding may be inconsistent.",
1831 slave_eth_dev->data->port_id, errval);
1838 /* If lsc interrupt is set, check initial slave's link status */
1839 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1840 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1841 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1842 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1850 slave_remove(struct bond_dev_private *internals,
1851 struct rte_eth_dev *slave_eth_dev)
1855 for (i = 0; i < internals->slave_count; i++)
1856 if (internals->slaves[i].port_id ==
1857 slave_eth_dev->data->port_id)
1860 if (i < (internals->slave_count - 1)) {
1861 struct rte_flow *flow;
1863 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1864 sizeof(internals->slaves[0]) *
1865 (internals->slave_count - i - 1));
1866 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1867 memmove(&flow->flows[i], &flow->flows[i + 1],
1868 sizeof(flow->flows[0]) *
1869 (internals->slave_count - i - 1));
1870 flow->flows[internals->slave_count - 1] = NULL;
1874 internals->slave_count--;
1876 /* force reconfiguration of slave interfaces */
1877 _rte_eth_dev_reset(slave_eth_dev);
1881 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1884 slave_add(struct bond_dev_private *internals,
1885 struct rte_eth_dev *slave_eth_dev)
1887 struct bond_slave_details *slave_details =
1888 &internals->slaves[internals->slave_count];
1890 slave_details->port_id = slave_eth_dev->data->port_id;
1891 slave_details->last_link_status = 0;
1893 /* Mark slave devices that don't support interrupts so we can
1894 * compensate when we start the bond
1896 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1897 slave_details->link_status_poll_enabled = 1;
1900 slave_details->link_status_wait_to_complete = 0;
1901 /* clean tlb_last_obytes when adding port for bonding device */
1902 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1903 sizeof(struct rte_ether_addr));
1907 bond_ethdev_primary_set(struct bond_dev_private *internals,
1908 uint16_t slave_port_id)
1912 if (internals->active_slave_count < 1)
1913 internals->current_primary_port = slave_port_id;
1915 /* Search bonded device slave ports for new proposed primary port */
1916 for (i = 0; i < internals->active_slave_count; i++) {
1917 if (internals->active_slaves[i] == slave_port_id)
1918 internals->current_primary_port = slave_port_id;
1923 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1926 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1928 struct bond_dev_private *internals;
1931 /* slave eth dev will be started by bonded device */
1932 if (check_for_bonded_ethdev(eth_dev)) {
1933 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1934 eth_dev->data->port_id);
1938 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1939 eth_dev->data->dev_started = 1;
1941 internals = eth_dev->data->dev_private;
1943 if (internals->slave_count == 0) {
1944 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1948 if (internals->user_defined_mac == 0) {
1949 struct rte_ether_addr *new_mac_addr = NULL;
1951 for (i = 0; i < internals->slave_count; i++)
1952 if (internals->slaves[i].port_id == internals->primary_port)
1953 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1955 if (new_mac_addr == NULL)
1958 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1959 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1960 eth_dev->data->port_id);
1965 /* If bonded device is configure in promiscuous mode then re-apply config */
1966 if (internals->promiscuous_en)
1967 bond_ethdev_promiscuous_enable(eth_dev);
1969 if (internals->mode == BONDING_MODE_8023AD) {
1970 if (internals->mode4.dedicated_queues.enabled == 1) {
1971 internals->mode4.dedicated_queues.rx_qid =
1972 eth_dev->data->nb_rx_queues;
1973 internals->mode4.dedicated_queues.tx_qid =
1974 eth_dev->data->nb_tx_queues;
1979 /* Reconfigure each slave device if starting bonded device */
1980 for (i = 0; i < internals->slave_count; i++) {
1981 struct rte_eth_dev *slave_ethdev =
1982 &(rte_eth_devices[internals->slaves[i].port_id]);
1983 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1985 "bonded port (%d) failed to reconfigure slave device (%d)",
1986 eth_dev->data->port_id,
1987 internals->slaves[i].port_id);
1990 /* We will need to poll for link status if any slave doesn't
1991 * support interrupts
1993 if (internals->slaves[i].link_status_poll_enabled)
1994 internals->link_status_polling_enabled = 1;
1997 /* start polling if needed */
1998 if (internals->link_status_polling_enabled) {
2000 internals->link_status_polling_interval_ms * 1000,
2001 bond_ethdev_slave_link_status_change_monitor,
2002 (void *)&rte_eth_devices[internals->port_id]);
2005 /* Update all slave devices MACs*/
2006 if (mac_address_slaves_update(eth_dev) != 0)
2009 if (internals->user_defined_primary_port)
2010 bond_ethdev_primary_set(internals, internals->primary_port);
2012 if (internals->mode == BONDING_MODE_8023AD)
2013 bond_mode_8023ad_start(eth_dev);
2015 if (internals->mode == BONDING_MODE_TLB ||
2016 internals->mode == BONDING_MODE_ALB)
2017 bond_tlb_enable(internals);
2022 eth_dev->data->dev_started = 0;
2027 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2031 if (dev->data->rx_queues != NULL) {
2032 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2033 rte_free(dev->data->rx_queues[i]);
2034 dev->data->rx_queues[i] = NULL;
2036 dev->data->nb_rx_queues = 0;
2039 if (dev->data->tx_queues != NULL) {
2040 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2041 rte_free(dev->data->tx_queues[i]);
2042 dev->data->tx_queues[i] = NULL;
2044 dev->data->nb_tx_queues = 0;
2049 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2051 struct bond_dev_private *internals = eth_dev->data->dev_private;
2054 if (internals->mode == BONDING_MODE_8023AD) {
2058 bond_mode_8023ad_stop(eth_dev);
2060 /* Discard all messages to/from mode 4 state machines */
2061 for (i = 0; i < internals->active_slave_count; i++) {
2062 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2064 RTE_ASSERT(port->rx_ring != NULL);
2065 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2066 rte_pktmbuf_free(pkt);
2068 RTE_ASSERT(port->tx_ring != NULL);
2069 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2070 rte_pktmbuf_free(pkt);
2074 if (internals->mode == BONDING_MODE_TLB ||
2075 internals->mode == BONDING_MODE_ALB) {
2076 bond_tlb_disable(internals);
2077 for (i = 0; i < internals->active_slave_count; i++)
2078 tlb_last_obytets[internals->active_slaves[i]] = 0;
2081 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2082 eth_dev->data->dev_started = 0;
2084 internals->link_status_polling_enabled = 0;
2085 for (i = 0; i < internals->slave_count; i++) {
2086 uint16_t slave_id = internals->slaves[i].port_id;
2087 if (find_slave_by_id(internals->active_slaves,
2088 internals->active_slave_count, slave_id) !=
2089 internals->active_slave_count) {
2090 internals->slaves[i].last_link_status = 0;
2091 rte_eth_dev_stop(slave_id);
2092 deactivate_slave(eth_dev, slave_id);
2098 bond_ethdev_close(struct rte_eth_dev *dev)
2100 struct bond_dev_private *internals = dev->data->dev_private;
2101 uint16_t bond_port_id = internals->port_id;
2103 struct rte_flow_error ferror;
2105 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2106 while (internals->slave_count != skipped) {
2107 uint16_t port_id = internals->slaves[skipped].port_id;
2109 rte_eth_dev_stop(port_id);
2111 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2113 "Failed to remove port %d from bonded device %s",
2114 port_id, dev->device->name);
2118 bond_flow_ops.flush(dev, &ferror);
2119 bond_ethdev_free_queues(dev);
2120 rte_bitmap_reset(internals->vlan_filter_bmp);
2123 /* forward declaration */
2124 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2127 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2129 struct bond_dev_private *internals = dev->data->dev_private;
2131 uint16_t max_nb_rx_queues = UINT16_MAX;
2132 uint16_t max_nb_tx_queues = UINT16_MAX;
2133 uint16_t max_rx_desc_lim = UINT16_MAX;
2134 uint16_t max_tx_desc_lim = UINT16_MAX;
2136 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2138 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2139 internals->candidate_max_rx_pktlen :
2140 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2142 /* Max number of tx/rx queues that the bonded device can support is the
2143 * minimum values of the bonded slaves, as all slaves must be capable
2144 * of supporting the same number of tx/rx queues.
2146 if (internals->slave_count > 0) {
2147 struct rte_eth_dev_info slave_info;
2150 for (idx = 0; idx < internals->slave_count; idx++) {
2151 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2154 if (slave_info.max_rx_queues < max_nb_rx_queues)
2155 max_nb_rx_queues = slave_info.max_rx_queues;
2157 if (slave_info.max_tx_queues < max_nb_tx_queues)
2158 max_nb_tx_queues = slave_info.max_tx_queues;
2160 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2161 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2163 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2164 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2168 dev_info->max_rx_queues = max_nb_rx_queues;
2169 dev_info->max_tx_queues = max_nb_tx_queues;
2171 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2172 sizeof(dev_info->default_rxconf));
2173 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2174 sizeof(dev_info->default_txconf));
2176 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2177 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2180 * If dedicated hw queues enabled for link bonding device in LACP mode
2181 * then we need to reduce the maximum number of data path queues by 1.
2183 if (internals->mode == BONDING_MODE_8023AD &&
2184 internals->mode4.dedicated_queues.enabled == 1) {
2185 dev_info->max_rx_queues--;
2186 dev_info->max_tx_queues--;
2189 dev_info->min_rx_bufsize = 0;
2191 dev_info->rx_offload_capa = internals->rx_offload_capa;
2192 dev_info->tx_offload_capa = internals->tx_offload_capa;
2193 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2194 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2195 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2197 dev_info->reta_size = internals->reta_size;
2201 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2205 struct bond_dev_private *internals = dev->data->dev_private;
2207 /* don't do this while a slave is being added */
2208 rte_spinlock_lock(&internals->lock);
2211 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2213 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2215 for (i = 0; i < internals->slave_count; i++) {
2216 uint16_t port_id = internals->slaves[i].port_id;
2218 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2220 RTE_BOND_LOG(WARNING,
2221 "Setting VLAN filter on slave port %u not supported.",
2225 rte_spinlock_unlock(&internals->lock);
2230 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2231 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2232 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2234 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2235 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2236 0, dev->data->numa_node);
2237 if (bd_rx_q == NULL)
2240 bd_rx_q->queue_id = rx_queue_id;
2241 bd_rx_q->dev_private = dev->data->dev_private;
2243 bd_rx_q->nb_rx_desc = nb_rx_desc;
2245 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2246 bd_rx_q->mb_pool = mb_pool;
2248 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2254 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2255 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2256 const struct rte_eth_txconf *tx_conf)
2258 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2259 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2260 0, dev->data->numa_node);
2262 if (bd_tx_q == NULL)
2265 bd_tx_q->queue_id = tx_queue_id;
2266 bd_tx_q->dev_private = dev->data->dev_private;
2268 bd_tx_q->nb_tx_desc = nb_tx_desc;
2269 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2271 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2277 bond_ethdev_rx_queue_release(void *queue)
2286 bond_ethdev_tx_queue_release(void *queue)
2295 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2297 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2298 struct bond_dev_private *internals;
2300 /* Default value for polling slave found is true as we don't want to
2301 * disable the polling thread if we cannot get the lock */
2302 int i, polling_slave_found = 1;
2307 bonded_ethdev = cb_arg;
2308 internals = bonded_ethdev->data->dev_private;
2310 if (!bonded_ethdev->data->dev_started ||
2311 !internals->link_status_polling_enabled)
2314 /* If device is currently being configured then don't check slaves link
2315 * status, wait until next period */
2316 if (rte_spinlock_trylock(&internals->lock)) {
2317 if (internals->slave_count > 0)
2318 polling_slave_found = 0;
2320 for (i = 0; i < internals->slave_count; i++) {
2321 if (!internals->slaves[i].link_status_poll_enabled)
2324 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2325 polling_slave_found = 1;
2327 /* Update slave link status */
2328 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2329 internals->slaves[i].link_status_wait_to_complete);
2331 /* if link status has changed since last checked then call lsc
2333 if (slave_ethdev->data->dev_link.link_status !=
2334 internals->slaves[i].last_link_status) {
2335 internals->slaves[i].last_link_status =
2336 slave_ethdev->data->dev_link.link_status;
2338 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2339 RTE_ETH_EVENT_INTR_LSC,
2340 &bonded_ethdev->data->port_id,
2344 rte_spinlock_unlock(&internals->lock);
2347 if (polling_slave_found)
2348 /* Set alarm to continue monitoring link status of slave ethdev's */
2349 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2350 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2354 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2356 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2358 struct bond_dev_private *bond_ctx;
2359 struct rte_eth_link slave_link;
2363 bond_ctx = ethdev->data->dev_private;
2365 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2367 if (ethdev->data->dev_started == 0 ||
2368 bond_ctx->active_slave_count == 0) {
2369 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2373 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2375 if (wait_to_complete)
2376 link_update = rte_eth_link_get;
2378 link_update = rte_eth_link_get_nowait;
2380 switch (bond_ctx->mode) {
2381 case BONDING_MODE_BROADCAST:
2383 * Setting link speed to UINT32_MAX to ensure we pick up the
2384 * value of the first active slave
2386 ethdev->data->dev_link.link_speed = UINT32_MAX;
2389 * link speed is minimum value of all the slaves link speed as
2390 * packet loss will occur on this slave if transmission at rates
2391 * greater than this are attempted
2393 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2394 link_update(bond_ctx->active_slaves[0], &slave_link);
2396 if (slave_link.link_speed <
2397 ethdev->data->dev_link.link_speed)
2398 ethdev->data->dev_link.link_speed =
2399 slave_link.link_speed;
2402 case BONDING_MODE_ACTIVE_BACKUP:
2403 /* Current primary slave */
2404 link_update(bond_ctx->current_primary_port, &slave_link);
2406 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2408 case BONDING_MODE_8023AD:
2409 ethdev->data->dev_link.link_autoneg =
2410 bond_ctx->mode4.slave_link.link_autoneg;
2411 ethdev->data->dev_link.link_duplex =
2412 bond_ctx->mode4.slave_link.link_duplex;
2413 /* fall through to update link speed */
2414 case BONDING_MODE_ROUND_ROBIN:
2415 case BONDING_MODE_BALANCE:
2416 case BONDING_MODE_TLB:
2417 case BONDING_MODE_ALB:
2420 * In theses mode the maximum theoretical link speed is the sum
2423 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2425 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2426 link_update(bond_ctx->active_slaves[idx], &slave_link);
2428 ethdev->data->dev_link.link_speed +=
2429 slave_link.link_speed;
2439 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2441 struct bond_dev_private *internals = dev->data->dev_private;
2442 struct rte_eth_stats slave_stats;
2445 for (i = 0; i < internals->slave_count; i++) {
2446 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2448 stats->ipackets += slave_stats.ipackets;
2449 stats->opackets += slave_stats.opackets;
2450 stats->ibytes += slave_stats.ibytes;
2451 stats->obytes += slave_stats.obytes;
2452 stats->imissed += slave_stats.imissed;
2453 stats->ierrors += slave_stats.ierrors;
2454 stats->oerrors += slave_stats.oerrors;
2455 stats->rx_nombuf += slave_stats.rx_nombuf;
2457 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2458 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2459 stats->q_opackets[j] += slave_stats.q_opackets[j];
2460 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2461 stats->q_obytes[j] += slave_stats.q_obytes[j];
2462 stats->q_errors[j] += slave_stats.q_errors[j];
2471 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2473 struct bond_dev_private *internals = dev->data->dev_private;
2476 for (i = 0; i < internals->slave_count; i++)
2477 rte_eth_stats_reset(internals->slaves[i].port_id);
2481 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2483 struct bond_dev_private *internals = eth_dev->data->dev_private;
2486 internals->promiscuous_en = 1;
2488 switch (internals->mode) {
2489 /* Promiscuous mode is propagated to all slaves */
2490 case BONDING_MODE_ROUND_ROBIN:
2491 case BONDING_MODE_BALANCE:
2492 case BONDING_MODE_BROADCAST:
2493 for (i = 0; i < internals->slave_count; i++)
2494 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2496 /* In mode4 promiscus mode is managed when slave is added/removed */
2497 case BONDING_MODE_8023AD:
2499 /* Promiscuous mode is propagated only to primary slave */
2500 case BONDING_MODE_ACTIVE_BACKUP:
2501 case BONDING_MODE_TLB:
2502 case BONDING_MODE_ALB:
2504 /* Do not touch promisc when there cannot be primary ports */
2505 if (internals->slave_count == 0)
2507 rte_eth_promiscuous_enable(internals->current_primary_port);
2512 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2514 struct bond_dev_private *internals = dev->data->dev_private;
2517 internals->promiscuous_en = 0;
2519 switch (internals->mode) {
2520 /* Promiscuous mode is propagated to all slaves */
2521 case BONDING_MODE_ROUND_ROBIN:
2522 case BONDING_MODE_BALANCE:
2523 case BONDING_MODE_BROADCAST:
2524 for (i = 0; i < internals->slave_count; i++)
2525 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2527 /* In mode4 promiscus mode is set managed when slave is added/removed */
2528 case BONDING_MODE_8023AD:
2530 /* Promiscuous mode is propagated only to primary slave */
2531 case BONDING_MODE_ACTIVE_BACKUP:
2532 case BONDING_MODE_TLB:
2533 case BONDING_MODE_ALB:
2535 /* Do not touch promisc when there cannot be primary ports */
2536 if (internals->slave_count == 0)
2538 rte_eth_promiscuous_disable(internals->current_primary_port);
2543 bond_ethdev_delayed_lsc_propagation(void *arg)
2548 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2549 RTE_ETH_EVENT_INTR_LSC, NULL);
2553 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2554 void *param, void *ret_param __rte_unused)
2556 struct rte_eth_dev *bonded_eth_dev;
2557 struct bond_dev_private *internals;
2558 struct rte_eth_link link;
2561 uint8_t lsc_flag = 0;
2562 int valid_slave = 0;
2563 uint16_t active_pos;
2566 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2569 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2571 if (check_for_bonded_ethdev(bonded_eth_dev))
2574 internals = bonded_eth_dev->data->dev_private;
2576 /* If the device isn't started don't handle interrupts */
2577 if (!bonded_eth_dev->data->dev_started)
2580 /* verify that port_id is a valid slave of bonded port */
2581 for (i = 0; i < internals->slave_count; i++) {
2582 if (internals->slaves[i].port_id == port_id) {
2591 /* Synchronize lsc callback parallel calls either by real link event
2592 * from the slaves PMDs or by the bonding PMD itself.
2594 rte_spinlock_lock(&internals->lsc_lock);
2596 /* Search for port in active port list */
2597 active_pos = find_slave_by_id(internals->active_slaves,
2598 internals->active_slave_count, port_id);
2600 rte_eth_link_get_nowait(port_id, &link);
2601 if (link.link_status) {
2602 if (active_pos < internals->active_slave_count)
2605 /* check link state properties if bonded link is up*/
2606 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2607 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2608 RTE_BOND_LOG(ERR, "Invalid link properties "
2609 "for slave %d in bonding mode %d",
2610 port_id, internals->mode);
2612 /* inherit slave link properties */
2613 link_properties_set(bonded_eth_dev, &link);
2616 /* If no active slave ports then set this port to be
2619 if (internals->active_slave_count < 1) {
2620 /* If first active slave, then change link status */
2621 bonded_eth_dev->data->dev_link.link_status =
2623 internals->current_primary_port = port_id;
2626 mac_address_slaves_update(bonded_eth_dev);
2629 activate_slave(bonded_eth_dev, port_id);
2631 /* If the user has defined the primary port then default to
2634 if (internals->user_defined_primary_port &&
2635 internals->primary_port == port_id)
2636 bond_ethdev_primary_set(internals, port_id);
2638 if (active_pos == internals->active_slave_count)
2641 /* Remove from active slave list */
2642 deactivate_slave(bonded_eth_dev, port_id);
2644 if (internals->active_slave_count < 1)
2647 /* Update primary id, take first active slave from list or if none
2648 * available set to -1 */
2649 if (port_id == internals->current_primary_port) {
2650 if (internals->active_slave_count > 0)
2651 bond_ethdev_primary_set(internals,
2652 internals->active_slaves[0]);
2654 internals->current_primary_port = internals->primary_port;
2660 * Update bonded device link properties after any change to active
2663 bond_ethdev_link_update(bonded_eth_dev, 0);
2666 /* Cancel any possible outstanding interrupts if delays are enabled */
2667 if (internals->link_up_delay_ms > 0 ||
2668 internals->link_down_delay_ms > 0)
2669 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2672 if (bonded_eth_dev->data->dev_link.link_status) {
2673 if (internals->link_up_delay_ms > 0)
2674 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2675 bond_ethdev_delayed_lsc_propagation,
2676 (void *)bonded_eth_dev);
2678 _rte_eth_dev_callback_process(bonded_eth_dev,
2679 RTE_ETH_EVENT_INTR_LSC,
2683 if (internals->link_down_delay_ms > 0)
2684 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2685 bond_ethdev_delayed_lsc_propagation,
2686 (void *)bonded_eth_dev);
2688 _rte_eth_dev_callback_process(bonded_eth_dev,
2689 RTE_ETH_EVENT_INTR_LSC,
2694 rte_spinlock_unlock(&internals->lsc_lock);
2700 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2701 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2705 int slave_reta_size;
2706 unsigned reta_count;
2707 struct bond_dev_private *internals = dev->data->dev_private;
2709 if (reta_size != internals->reta_size)
2712 /* Copy RETA table */
2713 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2715 for (i = 0; i < reta_count; i++) {
2716 internals->reta_conf[i].mask = reta_conf[i].mask;
2717 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2718 if ((reta_conf[i].mask >> j) & 0x01)
2719 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2722 /* Fill rest of array */
2723 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2724 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2725 sizeof(internals->reta_conf[0]) * reta_count);
2727 /* Propagate RETA over slaves */
2728 for (i = 0; i < internals->slave_count; i++) {
2729 slave_reta_size = internals->slaves[i].reta_size;
2730 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2731 &internals->reta_conf[0], slave_reta_size);
2740 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2741 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2744 struct bond_dev_private *internals = dev->data->dev_private;
2746 if (reta_size != internals->reta_size)
2749 /* Copy RETA table */
2750 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2751 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2752 if ((reta_conf[i].mask >> j) & 0x01)
2753 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2759 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2760 struct rte_eth_rss_conf *rss_conf)
2763 struct bond_dev_private *internals = dev->data->dev_private;
2764 struct rte_eth_rss_conf bond_rss_conf;
2766 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2768 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2770 if (bond_rss_conf.rss_hf != 0)
2771 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2773 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2774 sizeof(internals->rss_key)) {
2775 if (bond_rss_conf.rss_key_len == 0)
2776 bond_rss_conf.rss_key_len = 40;
2777 internals->rss_key_len = bond_rss_conf.rss_key_len;
2778 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2779 internals->rss_key_len);
2782 for (i = 0; i < internals->slave_count; i++) {
2783 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2793 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2794 struct rte_eth_rss_conf *rss_conf)
2796 struct bond_dev_private *internals = dev->data->dev_private;
2798 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2799 rss_conf->rss_key_len = internals->rss_key_len;
2800 if (rss_conf->rss_key)
2801 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2807 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2809 struct rte_eth_dev *slave_eth_dev;
2810 struct bond_dev_private *internals = dev->data->dev_private;
2813 rte_spinlock_lock(&internals->lock);
2815 for (i = 0; i < internals->slave_count; i++) {
2816 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2817 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2818 rte_spinlock_unlock(&internals->lock);
2822 for (i = 0; i < internals->slave_count; i++) {
2823 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2825 rte_spinlock_unlock(&internals->lock);
2830 rte_spinlock_unlock(&internals->lock);
2835 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
2836 struct rte_ether_addr *addr)
2838 if (mac_address_set(dev, addr)) {
2839 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2847 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2848 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2850 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2851 *(const void **)arg = &bond_flow_ops;
2858 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
2859 struct rte_ether_addr *mac_addr,
2860 __rte_unused uint32_t index, uint32_t vmdq)
2862 struct rte_eth_dev *slave_eth_dev;
2863 struct bond_dev_private *internals = dev->data->dev_private;
2866 rte_spinlock_lock(&internals->lock);
2868 for (i = 0; i < internals->slave_count; i++) {
2869 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2870 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2871 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2877 for (i = 0; i < internals->slave_count; i++) {
2878 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2882 for (i--; i >= 0; i--)
2883 rte_eth_dev_mac_addr_remove(
2884 internals->slaves[i].port_id, mac_addr);
2891 rte_spinlock_unlock(&internals->lock);
2896 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2898 struct rte_eth_dev *slave_eth_dev;
2899 struct bond_dev_private *internals = dev->data->dev_private;
2902 rte_spinlock_lock(&internals->lock);
2904 for (i = 0; i < internals->slave_count; i++) {
2905 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2906 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2910 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
2912 for (i = 0; i < internals->slave_count; i++)
2913 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2917 rte_spinlock_unlock(&internals->lock);
2920 const struct eth_dev_ops default_dev_ops = {
2921 .dev_start = bond_ethdev_start,
2922 .dev_stop = bond_ethdev_stop,
2923 .dev_close = bond_ethdev_close,
2924 .dev_configure = bond_ethdev_configure,
2925 .dev_infos_get = bond_ethdev_info,
2926 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2927 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2928 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2929 .rx_queue_release = bond_ethdev_rx_queue_release,
2930 .tx_queue_release = bond_ethdev_tx_queue_release,
2931 .link_update = bond_ethdev_link_update,
2932 .stats_get = bond_ethdev_stats_get,
2933 .stats_reset = bond_ethdev_stats_reset,
2934 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2935 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2936 .reta_update = bond_ethdev_rss_reta_update,
2937 .reta_query = bond_ethdev_rss_reta_query,
2938 .rss_hash_update = bond_ethdev_rss_hash_update,
2939 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2940 .mtu_set = bond_ethdev_mtu_set,
2941 .mac_addr_set = bond_ethdev_mac_address_set,
2942 .mac_addr_add = bond_ethdev_mac_addr_add,
2943 .mac_addr_remove = bond_ethdev_mac_addr_remove,
2944 .filter_ctrl = bond_filter_ctrl
2948 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2950 const char *name = rte_vdev_device_name(dev);
2951 uint8_t socket_id = dev->device.numa_node;
2952 struct bond_dev_private *internals = NULL;
2953 struct rte_eth_dev *eth_dev = NULL;
2954 uint32_t vlan_filter_bmp_size;
2956 /* now do all data allocation - for eth_dev structure, dummy pci driver
2957 * and internal (private) data
2960 /* reserve an ethdev entry */
2961 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2962 if (eth_dev == NULL) {
2963 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2967 internals = eth_dev->data->dev_private;
2968 eth_dev->data->nb_rx_queues = (uint16_t)1;
2969 eth_dev->data->nb_tx_queues = (uint16_t)1;
2971 /* Allocate memory for storing MAC addresses */
2972 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
2973 BOND_MAX_MAC_ADDRS, 0, socket_id);
2974 if (eth_dev->data->mac_addrs == NULL) {
2976 "Failed to allocate %u bytes needed to store MAC addresses",
2977 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
2981 eth_dev->dev_ops = &default_dev_ops;
2982 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2984 rte_spinlock_init(&internals->lock);
2985 rte_spinlock_init(&internals->lsc_lock);
2987 internals->port_id = eth_dev->data->port_id;
2988 internals->mode = BONDING_MODE_INVALID;
2989 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2990 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2991 internals->burst_xmit_hash = burst_xmit_l2_hash;
2992 internals->user_defined_mac = 0;
2994 internals->link_status_polling_enabled = 0;
2996 internals->link_status_polling_interval_ms =
2997 DEFAULT_POLLING_INTERVAL_10_MS;
2998 internals->link_down_delay_ms = 0;
2999 internals->link_up_delay_ms = 0;
3001 internals->slave_count = 0;
3002 internals->active_slave_count = 0;
3003 internals->rx_offload_capa = 0;
3004 internals->tx_offload_capa = 0;
3005 internals->rx_queue_offload_capa = 0;
3006 internals->tx_queue_offload_capa = 0;
3007 internals->candidate_max_rx_pktlen = 0;
3008 internals->max_rx_pktlen = 0;
3010 /* Initially allow to choose any offload type */
3011 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3013 memset(&internals->default_rxconf, 0,
3014 sizeof(internals->default_rxconf));
3015 memset(&internals->default_txconf, 0,
3016 sizeof(internals->default_txconf));
3018 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3019 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3021 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3022 memset(internals->slaves, 0, sizeof(internals->slaves));
3024 TAILQ_INIT(&internals->flow_list);
3025 internals->flow_isolated_valid = 0;
3027 /* Set mode 4 default configuration */
3028 bond_mode_8023ad_setup(eth_dev, NULL);
3029 if (bond_ethdev_mode_set(eth_dev, mode)) {
3030 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3031 eth_dev->data->port_id, mode);
3035 vlan_filter_bmp_size =
3036 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3037 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3038 RTE_CACHE_LINE_SIZE);
3039 if (internals->vlan_filter_bmpmem == NULL) {
3041 "Failed to allocate vlan bitmap for bonded device %u",
3042 eth_dev->data->port_id);
3046 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3047 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3048 if (internals->vlan_filter_bmp == NULL) {
3050 "Failed to init vlan bitmap for bonded device %u",
3051 eth_dev->data->port_id);
3052 rte_free(internals->vlan_filter_bmpmem);
3056 return eth_dev->data->port_id;
3059 rte_free(internals);
3060 if (eth_dev != NULL)
3061 eth_dev->data->dev_private = NULL;
3062 rte_eth_dev_release_port(eth_dev);
3067 bond_probe(struct rte_vdev_device *dev)
3070 struct bond_dev_private *internals;
3071 struct rte_kvargs *kvlist;
3072 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3073 int arg_count, port_id;
3075 struct rte_eth_dev *eth_dev;
3080 name = rte_vdev_device_name(dev);
3081 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3083 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3084 eth_dev = rte_eth_dev_attach_secondary(name);
3086 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3089 /* TODO: request info from primary to set up Rx and Tx */
3090 eth_dev->dev_ops = &default_dev_ops;
3091 eth_dev->device = &dev->device;
3092 rte_eth_dev_probing_finish(eth_dev);
3096 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3097 pmd_bond_init_valid_arguments);
3101 /* Parse link bonding mode */
3102 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3103 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3104 &bond_ethdev_parse_slave_mode_kvarg,
3105 &bonding_mode) != 0) {
3106 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3111 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3116 /* Parse socket id to create bonding device on */
3117 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3118 if (arg_count == 1) {
3119 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3120 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3122 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3123 "bonded device %s", name);
3126 } else if (arg_count > 1) {
3127 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3128 "bonded device %s", name);
3131 socket_id = rte_socket_id();
3134 dev->device.numa_node = socket_id;
3136 /* Create link bonding eth device */
3137 port_id = bond_alloc(dev, bonding_mode);
3139 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3140 "socket %u.", name, bonding_mode, socket_id);
3143 internals = rte_eth_devices[port_id].data->dev_private;
3144 internals->kvlist = kvlist;
3146 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3147 if (rte_kvargs_process(kvlist,
3148 PMD_BOND_AGG_MODE_KVARG,
3149 &bond_ethdev_parse_slave_agg_mode_kvarg,
3152 "Failed to parse agg selection mode for bonded device %s",
3157 if (internals->mode == BONDING_MODE_8023AD)
3158 internals->mode4.agg_selection = agg_mode;
3160 internals->mode4.agg_selection = AGG_STABLE;
3163 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3164 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3165 "socket %u.", name, port_id, bonding_mode, socket_id);
3169 rte_kvargs_free(kvlist);
3175 bond_remove(struct rte_vdev_device *dev)
3177 struct rte_eth_dev *eth_dev;
3178 struct bond_dev_private *internals;
3184 name = rte_vdev_device_name(dev);
3185 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3187 /* now free all data allocation - for eth_dev structure,
3188 * dummy pci driver and internal (private) data
3191 /* find an ethdev entry */
3192 eth_dev = rte_eth_dev_allocated(name);
3193 if (eth_dev == NULL)
3196 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3197 return rte_eth_dev_release_port(eth_dev);
3199 RTE_ASSERT(eth_dev->device == &dev->device);
3201 internals = eth_dev->data->dev_private;
3202 if (internals->slave_count != 0)
3205 if (eth_dev->data->dev_started == 1) {
3206 bond_ethdev_stop(eth_dev);
3207 bond_ethdev_close(eth_dev);
3210 eth_dev->dev_ops = NULL;
3211 eth_dev->rx_pkt_burst = NULL;
3212 eth_dev->tx_pkt_burst = NULL;
3214 internals = eth_dev->data->dev_private;
3215 /* Try to release mempool used in mode6. If the bond
3216 * device is not mode6, free the NULL is not problem.
3218 rte_mempool_free(internals->mode6.mempool);
3219 rte_bitmap_free(internals->vlan_filter_bmp);
3220 rte_free(internals->vlan_filter_bmpmem);
3222 rte_eth_dev_release_port(eth_dev);
3227 /* this part will resolve the slave portids after all the other pdev and vdev
3228 * have been allocated */
3230 bond_ethdev_configure(struct rte_eth_dev *dev)
3232 const char *name = dev->device->name;
3233 struct bond_dev_private *internals = dev->data->dev_private;
3234 struct rte_kvargs *kvlist = internals->kvlist;
3236 uint16_t port_id = dev - rte_eth_devices;
3239 static const uint8_t default_rss_key[40] = {
3240 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3241 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3242 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3243 0xBE, 0xAC, 0x01, 0xFA
3249 * If RSS is enabled, fill table with default values and
3250 * set key to the the value specified in port RSS configuration.
3251 * Fall back to default RSS key if the key is not specified
3253 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3254 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3255 internals->rss_key_len =
3256 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3257 memcpy(internals->rss_key,
3258 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3259 internals->rss_key_len);
3261 internals->rss_key_len = sizeof(default_rss_key);
3262 memcpy(internals->rss_key, default_rss_key,
3263 internals->rss_key_len);
3266 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3267 internals->reta_conf[i].mask = ~0LL;
3268 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3269 internals->reta_conf[i].reta[j] =
3270 (i * RTE_RETA_GROUP_SIZE + j) %
3271 dev->data->nb_rx_queues;
3275 /* set the max_rx_pktlen */
3276 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3279 * if no kvlist, it means that this bonded device has been created
3280 * through the bonding api.
3285 /* Parse MAC address for bonded device */
3286 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3287 if (arg_count == 1) {
3288 struct rte_ether_addr bond_mac;
3290 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3291 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3292 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3297 /* Set MAC address */
3298 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3300 "Failed to set mac address on bonded device %s",
3304 } else if (arg_count > 1) {
3306 "MAC address can be specified only once for bonded device %s",
3311 /* Parse/set balance mode transmit policy */
3312 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3313 if (arg_count == 1) {
3314 uint8_t xmit_policy;
3316 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3317 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3320 "Invalid xmit policy specified for bonded device %s",
3325 /* Set balance mode transmit policy*/
3326 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3328 "Failed to set balance xmit policy on bonded device %s",
3332 } else if (arg_count > 1) {
3334 "Transmit policy can be specified only once for bonded device %s",
3339 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3340 if (rte_kvargs_process(kvlist,
3341 PMD_BOND_AGG_MODE_KVARG,
3342 &bond_ethdev_parse_slave_agg_mode_kvarg,
3345 "Failed to parse agg selection mode for bonded device %s",
3348 if (internals->mode == BONDING_MODE_8023AD) {
3349 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3353 "Invalid args for agg selection set for bonded device %s",
3360 /* Parse/add slave ports to bonded device */
3361 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3362 struct bond_ethdev_slave_ports slave_ports;
3365 memset(&slave_ports, 0, sizeof(slave_ports));
3367 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3368 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3370 "Failed to parse slave ports for bonded device %s",
3375 for (i = 0; i < slave_ports.slave_count; i++) {
3376 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3378 "Failed to add port %d as slave to bonded device %s",
3379 slave_ports.slaves[i], name);
3384 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3388 /* Parse/set primary slave port id*/
3389 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3390 if (arg_count == 1) {
3391 uint16_t primary_slave_port_id;
3393 if (rte_kvargs_process(kvlist,
3394 PMD_BOND_PRIMARY_SLAVE_KVARG,
3395 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3396 &primary_slave_port_id) < 0) {
3398 "Invalid primary slave port id specified for bonded device %s",
3403 /* Set balance mode transmit policy*/
3404 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3407 "Failed to set primary slave port %d on bonded device %s",
3408 primary_slave_port_id, name);
3411 } else if (arg_count > 1) {
3413 "Primary slave can be specified only once for bonded device %s",
3418 /* Parse link status monitor polling interval */
3419 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3420 if (arg_count == 1) {
3421 uint32_t lsc_poll_interval_ms;
3423 if (rte_kvargs_process(kvlist,
3424 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3425 &bond_ethdev_parse_time_ms_kvarg,
3426 &lsc_poll_interval_ms) < 0) {
3428 "Invalid lsc polling interval value specified for bonded"
3429 " device %s", name);
3433 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3436 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3437 lsc_poll_interval_ms, name);
3440 } else if (arg_count > 1) {
3442 "LSC polling interval can be specified only once for bonded"
3443 " device %s", name);
3447 /* Parse link up interrupt propagation delay */
3448 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3449 if (arg_count == 1) {
3450 uint32_t link_up_delay_ms;
3452 if (rte_kvargs_process(kvlist,
3453 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3454 &bond_ethdev_parse_time_ms_kvarg,
3455 &link_up_delay_ms) < 0) {
3457 "Invalid link up propagation delay value specified for"
3458 " bonded device %s", name);
3462 /* Set balance mode transmit policy*/
3463 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3466 "Failed to set link up propagation delay (%u ms) on bonded"
3467 " device %s", link_up_delay_ms, name);
3470 } else if (arg_count > 1) {
3472 "Link up propagation delay can be specified only once for"
3473 " bonded device %s", name);
3477 /* Parse link down interrupt propagation delay */
3478 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3479 if (arg_count == 1) {
3480 uint32_t link_down_delay_ms;
3482 if (rte_kvargs_process(kvlist,
3483 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3484 &bond_ethdev_parse_time_ms_kvarg,
3485 &link_down_delay_ms) < 0) {
3487 "Invalid link down propagation delay value specified for"
3488 " bonded device %s", name);
3492 /* Set balance mode transmit policy*/
3493 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3496 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3497 link_down_delay_ms, name);
3500 } else if (arg_count > 1) {
3502 "Link down propagation delay can be specified only once for bonded device %s",
3510 struct rte_vdev_driver pmd_bond_drv = {
3511 .probe = bond_probe,
3512 .remove = bond_remove,
3515 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3516 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3518 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3522 "xmit_policy=[l2 | l23 | l34] "
3523 "agg_mode=[count | stable | bandwidth] "
3526 "lsc_poll_period_ms=<int> "
3528 "down_delay=<int>");
3532 RTE_INIT(bond_init_log)
3534 bond_logtype = rte_log_register("pmd.net.bond");
3535 if (bond_logtype >= 0)
3536 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);