1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
6 #include <netinet/in.h>
9 #include <rte_malloc.h>
10 #include <ethdev_driver.h>
11 #include <ethdev_vdev.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
39 size_t vlan_offset = 0;
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 struct bond_dev_private *internals;
63 uint16_t num_rx_total = 0;
65 uint16_t active_slave;
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = bd_rx_q->active_slave;
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
89 if (++bd_rx_q->active_slave >= slave_count)
90 bd_rx_q->active_slave = 0;
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
98 struct bond_dev_private *internals;
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
103 internals = bd_rx_q->dev_private;
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
115 return !((mbuf->ol_flags & RTE_MBUF_F_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
136 static struct rte_flow_item flow_item_8023ad[] = {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
141 .mask = &flow_item_eth_mask_type_8023ad,
144 .type = RTE_FLOW_ITEM_TYPE_END,
151 const struct rte_flow_attr flow_attr_8023ad = {
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
166 const struct rte_flow_action_queue lacp_queue_conf = {
170 const struct rte_flow_action actions[] = {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
176 .type = RTE_FLOW_ACTION_TYPE_END,
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
189 ret = rte_eth_dev_info_get(slave_port, &slave_info);
192 "%s: Error during getting device (port %u) info: %s\n",
193 __func__, slave_port, strerror(-ret));
198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
201 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 __func__, slave_port);
210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 struct bond_dev_private *internals = bond_dev->data->dev_private;
213 struct rte_eth_dev_info bond_info;
217 /* Verify if all slaves in bonding supports flow director and */
218 if (internals->slave_count > 0) {
219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
222 "%s: Error during getting device (port %u) info: %s\n",
223 __func__, bond_dev->data->port_id,
229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
232 for (idx = 0; idx < internals->slave_count; idx++) {
233 if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 internals->slaves[idx].port_id) != 0)
243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
245 struct rte_flow_error error;
246 struct bond_dev_private *internals = bond_dev->data->dev_private;
247 struct rte_flow_action_queue lacp_queue_conf = {
248 .index = internals->mode4.dedicated_queues.rx_qid,
251 const struct rte_flow_action actions[] = {
253 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 .conf = &lacp_queue_conf
257 .type = RTE_FLOW_ACTION_TYPE_END,
261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 "(slave_port=%d queue_id=%d)",
266 error.message, slave_port,
267 internals->mode4.dedicated_queues.rx_qid);
274 static inline uint16_t
275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
278 /* Cast to structure, containing bonded device's port id and queue id */
279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 struct bond_dev_private *internals = bd_rx_q->dev_private;
281 struct rte_eth_dev *bonded_eth_dev =
282 &rte_eth_devices[internals->port_id];
283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 struct rte_ether_hdr *hdr;
286 const uint16_t ether_type_slow_be =
287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 uint16_t num_rx_total = 0; /* Total number of received packets */
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 uint16_t slave_count, idx;
292 uint8_t collecting; /* current slave collecting status */
293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
300 /* Copy slave list to protect against slave up/down changes during tx
302 slave_count = internals->active_slave_count;
303 memcpy(slaves, internals->active_slaves,
304 sizeof(internals->active_slaves[0]) * slave_count);
306 idx = bd_rx_q->active_slave;
307 if (idx >= slave_count) {
308 bd_rx_q->active_slave = 0;
311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
316 /* Read packets from this slave */
317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 &bufs[num_rx_total], nb_pkts - num_rx_total);
320 for (k = j; k < 2 && k < num_rx_total; k++)
321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
323 /* Handle slow protocol packets. */
324 while (j < num_rx_total) {
325 if (j + 3 < num_rx_total)
326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
331 /* Remove packet from array if:
332 * - it is slow packet but no dedicated rxq is present,
333 * - slave is not in collecting state,
334 * - bonding interface is not in promiscuous mode:
335 * - packet is unicast and address does not match,
336 * - packet is multicast and bonding interface
337 * is not in allmulti,
341 is_lacp_packets(hdr->ether_type, subtype,
345 ((rte_is_unicast_ether_addr(&hdr->dst_addr) &&
346 !rte_is_same_ether_addr(bond_mac,
349 rte_is_multicast_ether_addr(&hdr->dst_addr)))))) {
351 if (hdr->ether_type == ether_type_slow_be) {
352 bond_mode_8023ad_handle_slow_pkt(
353 internals, slaves[idx], bufs[j]);
355 rte_pktmbuf_free(bufs[j]);
357 /* Packet is managed by mode 4 or dropped, shift the array */
359 if (j < num_rx_total) {
360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
366 if (unlikely(++idx == slave_count))
370 if (++bd_rx_q->active_slave >= slave_count)
371 bd_rx_q->active_slave = 0;
377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
380 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
387 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
400 case RTE_ARP_OP_REQUEST:
401 strlcpy(buf, "ARP Request", buf_len);
403 case RTE_ARP_OP_REPLY:
404 strlcpy(buf, "ARP Reply", buf_len);
406 case RTE_ARP_OP_REVREQUEST:
407 strlcpy(buf, "Reverse ARP Request", buf_len);
409 case RTE_ARP_OP_REVREPLY:
410 strlcpy(buf, "Reverse ARP Reply", buf_len);
412 case RTE_ARP_OP_INVREQUEST:
413 strlcpy(buf, "Peer Identify Request", buf_len);
415 case RTE_ARP_OP_INVREPLY:
416 strlcpy(buf, "Peer Identify Reply", buf_len);
421 strlcpy(buf, "Unknown", buf_len);
425 #define MaxIPv4String 16
427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
437 #define MAX_CLIENTS_NUMBER 128
438 uint8_t active_clients;
439 struct client_stats_t {
442 uint32_t ipv4_rx_packets;
443 uint32_t ipv4_tx_packets;
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
452 for (; i < MAX_CLIENTS_NUMBER; i++) {
453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
454 /* Just update RX packets number for this client */
455 if (TXorRXindicator == &burstnumberRX)
456 client_stats[i].ipv4_rx_packets++;
458 client_stats[i].ipv4_tx_packets++;
462 /* We have a new client. Insert him to the table, and increment stats */
463 if (TXorRXindicator == &burstnumberRX)
464 client_stats[active_clients].ipv4_rx_packets++;
466 client_stats[active_clients].ipv4_tx_packets++;
467 client_stats[active_clients].ipv4_addr = addr;
468 client_stats[active_clients].port = port;
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 rte_log(RTE_LOG_DEBUG, bond_logtype, \
476 "%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
477 "DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \
480 RTE_ETHER_ADDR_BYTES(ð_h->src_addr), \
482 RTE_ETHER_ADDR_BYTES(ð_h->dst_addr), \
484 arp_op, ++burstnumber)
488 mode6_debug(const char __rte_unused *info,
489 struct rte_ether_hdr *eth_h, uint16_t port,
490 uint32_t __rte_unused *burstnumber)
492 struct rte_ipv4_hdr *ipv4_h;
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 struct rte_arp_hdr *arp_h;
501 uint16_t ether_type = eth_h->ether_type;
502 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
504 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
505 strlcpy(buf, info, 16);
508 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
509 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
510 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
511 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
512 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
513 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
515 update_client_stats(ipv4_h->src_addr, port, burstnumber);
517 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
518 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
519 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
520 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
521 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
522 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
523 ArpOp, sizeof(ArpOp));
524 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
531 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
533 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
534 struct bond_dev_private *internals = bd_rx_q->dev_private;
535 struct rte_ether_hdr *eth_h;
536 uint16_t ether_type, offset;
537 uint16_t nb_recv_pkts;
540 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
542 for (i = 0; i < nb_recv_pkts; i++) {
543 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
544 ether_type = eth_h->ether_type;
545 offset = get_vlan_offset(eth_h, ðer_type);
547 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
548 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
549 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
551 bond_mode_alb_arp_recv(eth_h, offset, internals);
553 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
554 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
555 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
563 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
566 struct bond_dev_private *internals;
567 struct bond_tx_queue *bd_tx_q;
569 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
570 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
572 uint16_t num_of_slaves;
573 uint16_t slaves[RTE_MAX_ETHPORTS];
575 uint16_t num_tx_total = 0, num_tx_slave;
577 static int slave_idx = 0;
578 int i, cslave_idx = 0, tx_fail_total = 0;
580 bd_tx_q = (struct bond_tx_queue *)queue;
581 internals = bd_tx_q->dev_private;
583 /* Copy slave list to protect against slave up/down changes during tx
585 num_of_slaves = internals->active_slave_count;
586 memcpy(slaves, internals->active_slaves,
587 sizeof(internals->active_slaves[0]) * num_of_slaves);
589 if (num_of_slaves < 1)
592 /* Populate slaves mbuf with which packets are to be sent on it */
593 for (i = 0; i < nb_pkts; i++) {
594 cslave_idx = (slave_idx + i) % num_of_slaves;
595 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
598 /* increment current slave index so the next call to tx burst starts on the
600 slave_idx = ++cslave_idx;
602 /* Send packet burst on each slave device */
603 for (i = 0; i < num_of_slaves; i++) {
604 if (slave_nb_pkts[i] > 0) {
605 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
606 slave_bufs[i], slave_nb_pkts[i]);
608 /* if tx burst fails move packets to end of bufs */
609 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
610 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
612 tx_fail_total += tx_fail_slave;
614 memcpy(&bufs[nb_pkts - tx_fail_total],
615 &slave_bufs[i][num_tx_slave],
616 tx_fail_slave * sizeof(bufs[0]));
618 num_tx_total += num_tx_slave;
626 bond_ethdev_tx_burst_active_backup(void *queue,
627 struct rte_mbuf **bufs, uint16_t nb_pkts)
629 struct bond_dev_private *internals;
630 struct bond_tx_queue *bd_tx_q;
632 bd_tx_q = (struct bond_tx_queue *)queue;
633 internals = bd_tx_q->dev_private;
635 if (internals->active_slave_count < 1)
638 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
642 static inline uint16_t
643 ether_hash(struct rte_ether_hdr *eth_hdr)
645 unaligned_uint16_t *word_src_addr =
646 (unaligned_uint16_t *)eth_hdr->src_addr.addr_bytes;
647 unaligned_uint16_t *word_dst_addr =
648 (unaligned_uint16_t *)eth_hdr->dst_addr.addr_bytes;
650 return (word_src_addr[0] ^ word_dst_addr[0]) ^
651 (word_src_addr[1] ^ word_dst_addr[1]) ^
652 (word_src_addr[2] ^ word_dst_addr[2]);
655 static inline uint32_t
656 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
658 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
661 static inline uint32_t
662 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
664 unaligned_uint32_t *word_src_addr =
665 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
666 unaligned_uint32_t *word_dst_addr =
667 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
669 return (word_src_addr[0] ^ word_dst_addr[0]) ^
670 (word_src_addr[1] ^ word_dst_addr[1]) ^
671 (word_src_addr[2] ^ word_dst_addr[2]) ^
672 (word_src_addr[3] ^ word_dst_addr[3]);
677 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
678 uint16_t slave_count, uint16_t *slaves)
680 struct rte_ether_hdr *eth_hdr;
684 for (i = 0; i < nb_pkts; i++) {
685 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
687 hash = ether_hash(eth_hdr);
689 slaves[i] = (hash ^= hash >> 8) % slave_count;
694 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
695 uint16_t slave_count, uint16_t *slaves)
698 struct rte_ether_hdr *eth_hdr;
701 uint32_t hash, l3hash;
703 for (i = 0; i < nb_pkts; i++) {
704 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
707 proto = eth_hdr->ether_type;
708 hash = ether_hash(eth_hdr);
710 vlan_offset = get_vlan_offset(eth_hdr, &proto);
712 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
713 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
714 ((char *)(eth_hdr + 1) + vlan_offset);
715 l3hash = ipv4_hash(ipv4_hdr);
717 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
718 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
719 ((char *)(eth_hdr + 1) + vlan_offset);
720 l3hash = ipv6_hash(ipv6_hdr);
723 hash = hash ^ l3hash;
727 slaves[i] = hash % slave_count;
732 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
733 uint16_t slave_count, uint16_t *slaves)
735 struct rte_ether_hdr *eth_hdr;
740 struct rte_udp_hdr *udp_hdr;
741 struct rte_tcp_hdr *tcp_hdr;
742 uint32_t hash, l3hash, l4hash;
744 for (i = 0; i < nb_pkts; i++) {
745 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
746 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
747 proto = eth_hdr->ether_type;
748 vlan_offset = get_vlan_offset(eth_hdr, &proto);
752 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
753 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
754 ((char *)(eth_hdr + 1) + vlan_offset);
755 size_t ip_hdr_offset;
757 l3hash = ipv4_hash(ipv4_hdr);
759 /* there is no L4 header in fragmented packet */
760 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
762 ip_hdr_offset = (ipv4_hdr->version_ihl
763 & RTE_IPV4_HDR_IHL_MASK) *
764 RTE_IPV4_IHL_MULTIPLIER;
766 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
767 tcp_hdr = (struct rte_tcp_hdr *)
770 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
772 l4hash = HASH_L4_PORTS(tcp_hdr);
773 } else if (ipv4_hdr->next_proto_id ==
775 udp_hdr = (struct rte_udp_hdr *)
778 if ((size_t)udp_hdr + sizeof(*udp_hdr)
780 l4hash = HASH_L4_PORTS(udp_hdr);
783 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
784 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
785 ((char *)(eth_hdr + 1) + vlan_offset);
786 l3hash = ipv6_hash(ipv6_hdr);
788 if (ipv6_hdr->proto == IPPROTO_TCP) {
789 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
790 l4hash = HASH_L4_PORTS(tcp_hdr);
791 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
792 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
793 l4hash = HASH_L4_PORTS(udp_hdr);
797 hash = l3hash ^ l4hash;
801 slaves[i] = hash % slave_count;
806 uint64_t bwg_left_int;
807 uint64_t bwg_left_remainder;
812 bond_tlb_activate_slave(struct bond_dev_private *internals) {
815 for (i = 0; i < internals->active_slave_count; i++) {
816 tlb_last_obytets[internals->active_slaves[i]] = 0;
821 bandwidth_cmp(const void *a, const void *b)
823 const struct bwg_slave *bwg_a = a;
824 const struct bwg_slave *bwg_b = b;
825 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
826 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
827 (int64_t)bwg_a->bwg_left_remainder;
841 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
842 struct bwg_slave *bwg_slave)
844 struct rte_eth_link link_status;
847 ret = rte_eth_link_get_nowait(port_id, &link_status);
849 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
850 port_id, rte_strerror(-ret));
853 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
856 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
857 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
858 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
862 bond_ethdev_update_tlb_slave_cb(void *arg)
864 struct bond_dev_private *internals = arg;
865 struct rte_eth_stats slave_stats;
866 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
867 uint16_t slave_count;
870 uint8_t update_stats = 0;
874 internals->slave_update_idx++;
877 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
880 for (i = 0; i < internals->active_slave_count; i++) {
881 slave_id = internals->active_slaves[i];
882 rte_eth_stats_get(slave_id, &slave_stats);
883 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
884 bandwidth_left(slave_id, tx_bytes,
885 internals->slave_update_idx, &bwg_array[i]);
886 bwg_array[i].slave = slave_id;
889 tlb_last_obytets[slave_id] = slave_stats.obytes;
893 if (update_stats == 1)
894 internals->slave_update_idx = 0;
897 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
898 for (i = 0; i < slave_count; i++)
899 internals->tlb_slaves_order[i] = bwg_array[i].slave;
901 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
902 (struct bond_dev_private *)internals);
906 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
908 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
909 struct bond_dev_private *internals = bd_tx_q->dev_private;
911 struct rte_eth_dev *primary_port =
912 &rte_eth_devices[internals->primary_port];
913 uint16_t num_tx_total = 0;
916 uint16_t num_of_slaves = internals->active_slave_count;
917 uint16_t slaves[RTE_MAX_ETHPORTS];
919 struct rte_ether_hdr *ether_hdr;
920 struct rte_ether_addr primary_slave_addr;
921 struct rte_ether_addr active_slave_addr;
923 if (num_of_slaves < 1)
926 memcpy(slaves, internals->tlb_slaves_order,
927 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
930 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
933 for (i = 0; i < 3; i++)
934 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
937 for (i = 0; i < num_of_slaves; i++) {
938 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
939 for (j = num_tx_total; j < nb_pkts; j++) {
941 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
943 ether_hdr = rte_pktmbuf_mtod(bufs[j],
944 struct rte_ether_hdr *);
945 if (rte_is_same_ether_addr(ðer_hdr->src_addr,
946 &primary_slave_addr))
947 rte_ether_addr_copy(&active_slave_addr,
948 ðer_hdr->src_addr);
949 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
950 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
954 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
955 bufs + num_tx_total, nb_pkts - num_tx_total);
957 if (num_tx_total == nb_pkts)
965 bond_tlb_disable(struct bond_dev_private *internals)
967 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
971 bond_tlb_enable(struct bond_dev_private *internals)
973 bond_ethdev_update_tlb_slave_cb(internals);
977 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
979 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980 struct bond_dev_private *internals = bd_tx_q->dev_private;
982 struct rte_ether_hdr *eth_h;
983 uint16_t ether_type, offset;
985 struct client_data *client_info;
988 * We create transmit buffers for every slave and one additional to send
989 * through tlb. In worst case every packet will be send on one port.
991 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
992 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
995 * We create separate transmit buffers for update packets as they won't
996 * be counted in num_tx_total.
998 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
999 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1001 struct rte_mbuf *upd_pkt;
1004 uint16_t num_send, num_not_send = 0;
1005 uint16_t num_tx_total = 0;
1010 /* Search tx buffer for ARP packets and forward them to alb */
1011 for (i = 0; i < nb_pkts; i++) {
1012 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1013 ether_type = eth_h->ether_type;
1014 offset = get_vlan_offset(eth_h, ðer_type);
1016 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1017 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1019 /* Change src mac in eth header */
1020 rte_eth_macaddr_get(slave_idx, ð_h->src_addr);
1022 /* Add packet to slave tx buffer */
1023 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1024 slave_bufs_pkts[slave_idx]++;
1026 /* If packet is not ARP, send it with TLB policy */
1027 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1029 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1033 /* Update connected client ARP tables */
1034 if (internals->mode6.ntt) {
1035 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1036 client_info = &internals->mode6.client_table[i];
1038 if (client_info->in_use) {
1039 /* Allocate new packet to send ARP update on current slave */
1040 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1041 if (upd_pkt == NULL) {
1043 "Failed to allocate ARP packet from pool");
1046 pkt_size = sizeof(struct rte_ether_hdr) +
1047 sizeof(struct rte_arp_hdr) +
1048 client_info->vlan_count *
1049 sizeof(struct rte_vlan_hdr);
1050 upd_pkt->data_len = pkt_size;
1051 upd_pkt->pkt_len = pkt_size;
1053 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1056 /* Add packet to update tx buffer */
1057 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1058 update_bufs_pkts[slave_idx]++;
1061 internals->mode6.ntt = 0;
1064 /* Send ARP packets on proper slaves */
1065 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 if (slave_bufs_pkts[i] > 0) {
1067 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1068 slave_bufs[i], slave_bufs_pkts[i]);
1069 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1070 bufs[nb_pkts - 1 - num_not_send - j] =
1071 slave_bufs[i][nb_pkts - 1 - j];
1074 num_tx_total += num_send;
1075 num_not_send += slave_bufs_pkts[i] - num_send;
1077 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1078 /* Print TX stats including update packets */
1079 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1080 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1081 struct rte_ether_hdr *);
1082 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1088 /* Send update packets on proper slaves */
1089 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1090 if (update_bufs_pkts[i] > 0) {
1091 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1092 update_bufs_pkts[i]);
1093 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1094 rte_pktmbuf_free(update_bufs[i][j]);
1096 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1097 for (j = 0; j < update_bufs_pkts[i]; j++) {
1098 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1099 struct rte_ether_hdr *);
1100 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1106 /* Send non-ARP packets using tlb policy */
1107 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1108 num_send = bond_ethdev_tx_burst_tlb(queue,
1109 slave_bufs[RTE_MAX_ETHPORTS],
1110 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1112 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1113 bufs[nb_pkts - 1 - num_not_send - j] =
1114 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1117 num_tx_total += num_send;
1120 return num_tx_total;
1123 static inline uint16_t
1124 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1125 uint16_t *slave_port_ids, uint16_t slave_count)
1127 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1128 struct bond_dev_private *internals = bd_tx_q->dev_private;
1130 /* Array to sort mbufs for transmission on each slave into */
1131 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1132 /* Number of mbufs for transmission on each slave */
1133 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1134 /* Mapping array generated by hash function to map mbufs to slaves */
1135 uint16_t bufs_slave_port_idxs[nb_bufs];
1137 uint16_t slave_tx_count;
1138 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143 * Populate slaves mbuf with the packets which are to be sent on it
1144 * selecting output slave using hash based on xmit policy
1146 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1147 bufs_slave_port_idxs);
1149 for (i = 0; i < nb_bufs; i++) {
1150 /* Populate slave mbuf arrays with mbufs for that slave. */
1151 uint16_t slave_idx = bufs_slave_port_idxs[i];
1153 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1156 /* Send packet burst on each slave device */
1157 for (i = 0; i < slave_count; i++) {
1158 if (slave_nb_bufs[i] == 0)
1161 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1162 bd_tx_q->queue_id, slave_bufs[i],
1165 total_tx_count += slave_tx_count;
1167 /* If tx burst fails move packets to end of bufs */
1168 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1169 int slave_tx_fail_count = slave_nb_bufs[i] -
1171 total_tx_fail_count += slave_tx_fail_count;
1172 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1173 &slave_bufs[i][slave_tx_count],
1174 slave_tx_fail_count * sizeof(bufs[0]));
1178 return total_tx_count;
1182 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1185 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1186 struct bond_dev_private *internals = bd_tx_q->dev_private;
1188 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1189 uint16_t slave_count;
1191 if (unlikely(nb_bufs == 0))
1194 /* Copy slave list to protect against slave up/down changes during tx
1197 slave_count = internals->active_slave_count;
1198 if (unlikely(slave_count < 1))
1201 memcpy(slave_port_ids, internals->active_slaves,
1202 sizeof(slave_port_ids[0]) * slave_count);
1203 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1207 static inline uint16_t
1208 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1211 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212 struct bond_dev_private *internals = bd_tx_q->dev_private;
1214 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215 uint16_t slave_count;
1217 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1218 uint16_t dist_slave_count;
1220 uint16_t slave_tx_count;
1224 /* Copy slave list to protect against slave up/down changes during tx
1226 slave_count = internals->active_slave_count;
1227 if (unlikely(slave_count < 1))
1230 memcpy(slave_port_ids, internals->active_slaves,
1231 sizeof(slave_port_ids[0]) * slave_count);
1236 /* Check for LACP control packets and send if available */
1237 for (i = 0; i < slave_count; i++) {
1238 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1239 struct rte_mbuf *ctrl_pkt = NULL;
1241 if (likely(rte_ring_empty(port->tx_ring)))
1244 if (rte_ring_dequeue(port->tx_ring,
1245 (void **)&ctrl_pkt) != -ENOENT) {
1246 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1247 bd_tx_q->queue_id, &ctrl_pkt, 1);
1249 * re-enqueue LAG control plane packets to buffering
1250 * ring if transmission fails so the packet isn't lost.
1252 if (slave_tx_count != 1)
1253 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1258 if (unlikely(nb_bufs == 0))
1261 dist_slave_count = 0;
1262 for (i = 0; i < slave_count; i++) {
1263 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1265 if (ACTOR_STATE(port, DISTRIBUTING))
1266 dist_slave_port_ids[dist_slave_count++] =
1270 if (unlikely(dist_slave_count < 1))
1273 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1281 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1288 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1292 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1295 struct bond_dev_private *internals;
1296 struct bond_tx_queue *bd_tx_q;
1298 uint16_t slaves[RTE_MAX_ETHPORTS];
1299 uint8_t tx_failed_flag = 0;
1300 uint16_t num_of_slaves;
1302 uint16_t max_nb_of_tx_pkts = 0;
1304 int slave_tx_total[RTE_MAX_ETHPORTS];
1305 int i, most_successful_tx_slave = -1;
1307 bd_tx_q = (struct bond_tx_queue *)queue;
1308 internals = bd_tx_q->dev_private;
1310 /* Copy slave list to protect against slave up/down changes during tx
1312 num_of_slaves = internals->active_slave_count;
1313 memcpy(slaves, internals->active_slaves,
1314 sizeof(internals->active_slaves[0]) * num_of_slaves);
1316 if (num_of_slaves < 1)
1319 /* Increment reference count on mbufs */
1320 for (i = 0; i < nb_pkts; i++)
1321 rte_pktmbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1323 /* Transmit burst on each active slave */
1324 for (i = 0; i < num_of_slaves; i++) {
1325 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1328 if (unlikely(slave_tx_total[i] < nb_pkts))
1331 /* record the value and slave index for the slave which transmits the
1332 * maximum number of packets */
1333 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1334 max_nb_of_tx_pkts = slave_tx_total[i];
1335 most_successful_tx_slave = i;
1339 /* if slaves fail to transmit packets from burst, the calling application
1340 * is not expected to know about multiple references to packets so we must
1341 * handle failures of all packets except those of the most successful slave
1343 if (unlikely(tx_failed_flag))
1344 for (i = 0; i < num_of_slaves; i++)
1345 if (i != most_successful_tx_slave)
1346 while (slave_tx_total[i] < nb_pkts)
1347 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1349 return max_nb_of_tx_pkts;
1353 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1355 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1357 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1359 * If in mode 4 then save the link properties of the first
1360 * slave, all subsequent slaves must match these properties
1362 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1364 bond_link->link_autoneg = slave_link->link_autoneg;
1365 bond_link->link_duplex = slave_link->link_duplex;
1366 bond_link->link_speed = slave_link->link_speed;
1369 * In any other mode the link properties are set to default
1370 * values of AUTONEG/DUPLEX
1372 ethdev->data->dev_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
1373 ethdev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1378 link_properties_valid(struct rte_eth_dev *ethdev,
1379 struct rte_eth_link *slave_link)
1381 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1383 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1384 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1386 if (bond_link->link_duplex != slave_link->link_duplex ||
1387 bond_link->link_autoneg != slave_link->link_autoneg ||
1388 bond_link->link_speed != slave_link->link_speed)
1396 mac_address_get(struct rte_eth_dev *eth_dev,
1397 struct rte_ether_addr *dst_mac_addr)
1399 struct rte_ether_addr *mac_addr;
1401 if (eth_dev == NULL) {
1402 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1406 if (dst_mac_addr == NULL) {
1407 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1411 mac_addr = eth_dev->data->mac_addrs;
1413 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418 mac_address_set(struct rte_eth_dev *eth_dev,
1419 struct rte_ether_addr *new_mac_addr)
1421 struct rte_ether_addr *mac_addr;
1423 if (eth_dev == NULL) {
1424 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1428 if (new_mac_addr == NULL) {
1429 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1433 mac_addr = eth_dev->data->mac_addrs;
1435 /* If new MAC is different to current MAC then update */
1436 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1437 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442 static const struct rte_ether_addr null_mac_addr;
1445 * Add additional MAC addresses to the slave
1448 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1449 uint16_t slave_port_id)
1452 struct rte_ether_addr *mac_addr;
1454 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1455 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1456 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1459 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1462 for (i--; i > 0; i--)
1463 rte_eth_dev_mac_addr_remove(slave_port_id,
1464 &bonded_eth_dev->data->mac_addrs[i]);
1473 * Remove additional MAC addresses from the slave
1476 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1477 uint16_t slave_port_id)
1480 struct rte_ether_addr *mac_addr;
1483 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1484 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1485 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1488 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1489 /* save only the first error */
1490 if (ret < 0 && rc == 0)
1498 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1500 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1504 /* Update slave devices MAC addresses */
1505 if (internals->slave_count < 1)
1508 switch (internals->mode) {
1509 case BONDING_MODE_ROUND_ROBIN:
1510 case BONDING_MODE_BALANCE:
1511 case BONDING_MODE_BROADCAST:
1512 for (i = 0; i < internals->slave_count; i++) {
1513 if (rte_eth_dev_default_mac_addr_set(
1514 internals->slaves[i].port_id,
1515 bonded_eth_dev->data->mac_addrs)) {
1516 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1517 internals->slaves[i].port_id);
1522 case BONDING_MODE_8023AD:
1523 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1525 case BONDING_MODE_ACTIVE_BACKUP:
1526 case BONDING_MODE_TLB:
1527 case BONDING_MODE_ALB:
1530 for (i = 0; i < internals->slave_count; i++) {
1531 if (internals->slaves[i].port_id ==
1532 internals->current_primary_port) {
1533 if (rte_eth_dev_default_mac_addr_set(
1534 internals->current_primary_port,
1535 bonded_eth_dev->data->mac_addrs)) {
1536 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537 internals->current_primary_port);
1541 if (rte_eth_dev_default_mac_addr_set(
1542 internals->slaves[i].port_id,
1543 &internals->slaves[i].persisted_mac_addr)) {
1544 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545 internals->slaves[i].port_id);
1557 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, uint8_t mode)
1559 struct bond_dev_private *internals;
1561 internals = eth_dev->data->dev_private;
1564 case BONDING_MODE_ROUND_ROBIN:
1565 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1566 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1568 case BONDING_MODE_ACTIVE_BACKUP:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1572 case BONDING_MODE_BALANCE:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1576 case BONDING_MODE_BROADCAST:
1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1580 case BONDING_MODE_8023AD:
1581 if (bond_mode_8023ad_enable(eth_dev) != 0)
1584 if (internals->mode4.dedicated_queues.enabled == 0) {
1585 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1586 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1587 RTE_BOND_LOG(WARNING,
1588 "Using mode 4, it is necessary to do TX burst "
1589 "and RX burst at least every 100ms.");
1591 /* Use flow director's optimization */
1592 eth_dev->rx_pkt_burst =
1593 bond_ethdev_rx_burst_8023ad_fast_queue;
1594 eth_dev->tx_pkt_burst =
1595 bond_ethdev_tx_burst_8023ad_fast_queue;
1598 case BONDING_MODE_TLB:
1599 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1600 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1602 case BONDING_MODE_ALB:
1603 if (bond_mode_alb_enable(eth_dev) != 0)
1606 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1607 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1613 internals->mode = mode;
1620 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1621 struct rte_eth_dev *slave_eth_dev)
1624 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1625 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1627 if (port->slow_pool == NULL) {
1629 int slave_id = slave_eth_dev->data->port_id;
1631 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1633 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1634 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1635 slave_eth_dev->data->numa_node);
1637 /* Any memory allocation failure in initialization is critical because
1638 * resources can't be free, so reinitialization is impossible. */
1639 if (port->slow_pool == NULL) {
1640 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1641 slave_id, mem_name, rte_strerror(rte_errno));
1645 if (internals->mode4.dedicated_queues.enabled == 1) {
1646 /* Configure slow Rx queue */
1648 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1649 internals->mode4.dedicated_queues.rx_qid, 128,
1650 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1651 NULL, port->slow_pool);
1654 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1655 slave_eth_dev->data->port_id,
1656 internals->mode4.dedicated_queues.rx_qid,
1661 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1662 internals->mode4.dedicated_queues.tx_qid, 512,
1663 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1667 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1668 slave_eth_dev->data->port_id,
1669 internals->mode4.dedicated_queues.tx_qid,
1678 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1679 struct rte_eth_dev *slave_eth_dev)
1681 uint16_t nb_rx_queues;
1682 uint16_t nb_tx_queues;
1686 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1689 errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1691 RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1692 slave_eth_dev->data->port_id, errval);
1694 /* Enable interrupts on slave device if supported */
1695 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1696 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1698 /* If RSS is enabled for bonding, try to enable it for slaves */
1699 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1700 /* rss_key won't be empty if RSS is configured in bonded dev */
1701 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1702 internals->rss_key_len;
1703 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1706 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1707 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1708 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1709 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1712 slave_eth_dev->data->dev_conf.rxmode.mtu =
1713 bonded_eth_dev->data->dev_conf.rxmode.mtu;
1715 slave_eth_dev->data->dev_conf.txmode.offloads |=
1716 bonded_eth_dev->data->dev_conf.txmode.offloads;
1718 slave_eth_dev->data->dev_conf.txmode.offloads &=
1719 (bonded_eth_dev->data->dev_conf.txmode.offloads |
1720 ~internals->tx_offload_capa);
1722 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1723 bonded_eth_dev->data->dev_conf.rxmode.offloads;
1725 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1726 (bonded_eth_dev->data->dev_conf.rxmode.offloads |
1727 ~internals->rx_offload_capa);
1730 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1731 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1733 if (internals->mode == BONDING_MODE_8023AD) {
1734 if (internals->mode4.dedicated_queues.enabled == 1) {
1740 /* Configure device */
1741 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1742 nb_rx_queues, nb_tx_queues,
1743 &(slave_eth_dev->data->dev_conf));
1745 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1746 slave_eth_dev->data->port_id, errval);
1750 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1751 bonded_eth_dev->data->mtu);
1752 if (errval != 0 && errval != -ENOTSUP) {
1753 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1754 slave_eth_dev->data->port_id, errval);
1761 slave_start(struct rte_eth_dev *bonded_eth_dev,
1762 struct rte_eth_dev *slave_eth_dev)
1765 struct bond_rx_queue *bd_rx_q;
1766 struct bond_tx_queue *bd_tx_q;
1768 struct rte_flow_error flow_error;
1769 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1771 /* Setup Rx Queues */
1772 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1773 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1775 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1776 bd_rx_q->nb_rx_desc,
1777 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1778 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1781 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1782 slave_eth_dev->data->port_id, q_id, errval);
1787 /* Setup Tx Queues */
1788 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1789 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1791 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1792 bd_tx_q->nb_tx_desc,
1793 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1797 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1798 slave_eth_dev->data->port_id, q_id, errval);
1803 if (internals->mode == BONDING_MODE_8023AD &&
1804 internals->mode4.dedicated_queues.enabled == 1) {
1805 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1809 errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1810 slave_eth_dev->data->port_id);
1813 "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1814 slave_eth_dev->data->port_id, errval);
1818 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) {
1819 errval = rte_flow_destroy(slave_eth_dev->data->port_id,
1820 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1822 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_destroy: port=%d, err (%d)",
1823 slave_eth_dev->data->port_id, errval);
1826 errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1827 slave_eth_dev->data->port_id);
1830 "bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1831 slave_eth_dev->data->port_id, errval);
1837 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1839 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1840 slave_eth_dev->data->port_id, errval);
1844 /* If RSS is enabled for bonding, synchronize RETA */
1845 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
1847 struct bond_dev_private *internals;
1849 internals = bonded_eth_dev->data->dev_private;
1851 for (i = 0; i < internals->slave_count; i++) {
1852 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1853 errval = rte_eth_dev_rss_reta_update(
1854 slave_eth_dev->data->port_id,
1855 &internals->reta_conf[0],
1856 internals->slaves[i].reta_size);
1858 RTE_BOND_LOG(WARNING,
1859 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1860 " RSS Configuration for bonding may be inconsistent.",
1861 slave_eth_dev->data->port_id, errval);
1868 /* If lsc interrupt is set, check initial slave's link status */
1869 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1870 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1871 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1872 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1880 slave_remove(struct bond_dev_private *internals,
1881 struct rte_eth_dev *slave_eth_dev)
1885 for (i = 0; i < internals->slave_count; i++)
1886 if (internals->slaves[i].port_id ==
1887 slave_eth_dev->data->port_id)
1890 if (i < (internals->slave_count - 1)) {
1891 struct rte_flow *flow;
1893 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1894 sizeof(internals->slaves[0]) *
1895 (internals->slave_count - i - 1));
1896 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1897 memmove(&flow->flows[i], &flow->flows[i + 1],
1898 sizeof(flow->flows[0]) *
1899 (internals->slave_count - i - 1));
1900 flow->flows[internals->slave_count - 1] = NULL;
1904 internals->slave_count--;
1906 /* force reconfiguration of slave interfaces */
1907 rte_eth_dev_internal_reset(slave_eth_dev);
1911 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1914 slave_add(struct bond_dev_private *internals,
1915 struct rte_eth_dev *slave_eth_dev)
1917 struct bond_slave_details *slave_details =
1918 &internals->slaves[internals->slave_count];
1920 slave_details->port_id = slave_eth_dev->data->port_id;
1921 slave_details->last_link_status = 0;
1923 /* Mark slave devices that don't support interrupts so we can
1924 * compensate when we start the bond
1926 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1927 slave_details->link_status_poll_enabled = 1;
1930 slave_details->link_status_wait_to_complete = 0;
1931 /* clean tlb_last_obytes when adding port for bonding device */
1932 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1933 sizeof(struct rte_ether_addr));
1937 bond_ethdev_primary_set(struct bond_dev_private *internals,
1938 uint16_t slave_port_id)
1942 if (internals->active_slave_count < 1)
1943 internals->current_primary_port = slave_port_id;
1945 /* Search bonded device slave ports for new proposed primary port */
1946 for (i = 0; i < internals->active_slave_count; i++) {
1947 if (internals->active_slaves[i] == slave_port_id)
1948 internals->current_primary_port = slave_port_id;
1953 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1956 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1958 struct bond_dev_private *internals;
1961 /* slave eth dev will be started by bonded device */
1962 if (check_for_bonded_ethdev(eth_dev)) {
1963 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1964 eth_dev->data->port_id);
1968 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
1969 eth_dev->data->dev_started = 1;
1971 internals = eth_dev->data->dev_private;
1973 if (internals->slave_count == 0) {
1974 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1978 if (internals->user_defined_mac == 0) {
1979 struct rte_ether_addr *new_mac_addr = NULL;
1981 for (i = 0; i < internals->slave_count; i++)
1982 if (internals->slaves[i].port_id == internals->primary_port)
1983 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1985 if (new_mac_addr == NULL)
1988 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1989 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1990 eth_dev->data->port_id);
1995 if (internals->mode == BONDING_MODE_8023AD) {
1996 if (internals->mode4.dedicated_queues.enabled == 1) {
1997 internals->mode4.dedicated_queues.rx_qid =
1998 eth_dev->data->nb_rx_queues;
1999 internals->mode4.dedicated_queues.tx_qid =
2000 eth_dev->data->nb_tx_queues;
2005 /* Reconfigure each slave device if starting bonded device */
2006 for (i = 0; i < internals->slave_count; i++) {
2007 struct rte_eth_dev *slave_ethdev =
2008 &(rte_eth_devices[internals->slaves[i].port_id]);
2009 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2011 "bonded port (%d) failed to reconfigure slave device (%d)",
2012 eth_dev->data->port_id,
2013 internals->slaves[i].port_id);
2016 if (slave_start(eth_dev, slave_ethdev) != 0) {
2018 "bonded port (%d) failed to start slave device (%d)",
2019 eth_dev->data->port_id,
2020 internals->slaves[i].port_id);
2023 /* We will need to poll for link status if any slave doesn't
2024 * support interrupts
2026 if (internals->slaves[i].link_status_poll_enabled)
2027 internals->link_status_polling_enabled = 1;
2030 /* start polling if needed */
2031 if (internals->link_status_polling_enabled) {
2033 internals->link_status_polling_interval_ms * 1000,
2034 bond_ethdev_slave_link_status_change_monitor,
2035 (void *)&rte_eth_devices[internals->port_id]);
2038 /* Update all slave devices MACs*/
2039 if (mac_address_slaves_update(eth_dev) != 0)
2042 if (internals->user_defined_primary_port)
2043 bond_ethdev_primary_set(internals, internals->primary_port);
2045 if (internals->mode == BONDING_MODE_8023AD)
2046 bond_mode_8023ad_start(eth_dev);
2048 if (internals->mode == BONDING_MODE_TLB ||
2049 internals->mode == BONDING_MODE_ALB)
2050 bond_tlb_enable(internals);
2055 eth_dev->data->dev_started = 0;
2060 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2064 if (dev->data->rx_queues != NULL) {
2065 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2066 rte_free(dev->data->rx_queues[i]);
2067 dev->data->rx_queues[i] = NULL;
2069 dev->data->nb_rx_queues = 0;
2072 if (dev->data->tx_queues != NULL) {
2073 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2074 rte_free(dev->data->tx_queues[i]);
2075 dev->data->tx_queues[i] = NULL;
2077 dev->data->nb_tx_queues = 0;
2082 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2084 struct bond_dev_private *internals = eth_dev->data->dev_private;
2088 if (internals->mode == BONDING_MODE_8023AD) {
2092 bond_mode_8023ad_stop(eth_dev);
2094 /* Discard all messages to/from mode 4 state machines */
2095 for (i = 0; i < internals->active_slave_count; i++) {
2096 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2098 RTE_ASSERT(port->rx_ring != NULL);
2099 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2100 rte_pktmbuf_free(pkt);
2102 RTE_ASSERT(port->tx_ring != NULL);
2103 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2104 rte_pktmbuf_free(pkt);
2108 if (internals->mode == BONDING_MODE_TLB ||
2109 internals->mode == BONDING_MODE_ALB) {
2110 bond_tlb_disable(internals);
2111 for (i = 0; i < internals->active_slave_count; i++)
2112 tlb_last_obytets[internals->active_slaves[i]] = 0;
2115 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2116 eth_dev->data->dev_started = 0;
2118 internals->link_status_polling_enabled = 0;
2119 for (i = 0; i < internals->slave_count; i++) {
2120 uint16_t slave_id = internals->slaves[i].port_id;
2122 internals->slaves[i].last_link_status = 0;
2123 ret = rte_eth_dev_stop(slave_id);
2125 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2130 /* active slaves need to be deactivated. */
2131 if (find_slave_by_id(internals->active_slaves,
2132 internals->active_slave_count, slave_id) !=
2133 internals->active_slave_count)
2134 deactivate_slave(eth_dev, slave_id);
2141 bond_ethdev_close(struct rte_eth_dev *dev)
2143 struct bond_dev_private *internals = dev->data->dev_private;
2144 uint16_t bond_port_id = internals->port_id;
2146 struct rte_flow_error ferror;
2148 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2151 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2152 while (internals->slave_count != skipped) {
2153 uint16_t port_id = internals->slaves[skipped].port_id;
2155 if (rte_eth_dev_stop(port_id) != 0) {
2156 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2162 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2164 "Failed to remove port %d from bonded device %s",
2165 port_id, dev->device->name);
2169 bond_flow_ops.flush(dev, &ferror);
2170 bond_ethdev_free_queues(dev);
2171 rte_bitmap_reset(internals->vlan_filter_bmp);
2172 rte_bitmap_free(internals->vlan_filter_bmp);
2173 rte_free(internals->vlan_filter_bmpmem);
2175 /* Try to release mempool used in mode6. If the bond
2176 * device is not mode6, free the NULL is not problem.
2178 rte_mempool_free(internals->mode6.mempool);
2180 rte_kvargs_free(internals->kvlist);
2185 /* forward declaration */
2186 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2189 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2191 struct bond_dev_private *internals = dev->data->dev_private;
2192 struct bond_slave_details slave;
2195 uint16_t max_nb_rx_queues = UINT16_MAX;
2196 uint16_t max_nb_tx_queues = UINT16_MAX;
2197 uint16_t max_rx_desc_lim = UINT16_MAX;
2198 uint16_t max_tx_desc_lim = UINT16_MAX;
2200 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2202 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2203 internals->candidate_max_rx_pktlen :
2204 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2206 /* Max number of tx/rx queues that the bonded device can support is the
2207 * minimum values of the bonded slaves, as all slaves must be capable
2208 * of supporting the same number of tx/rx queues.
2210 if (internals->slave_count > 0) {
2211 struct rte_eth_dev_info slave_info;
2214 for (idx = 0; idx < internals->slave_count; idx++) {
2215 slave = internals->slaves[idx];
2216 ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2219 "%s: Error during getting device (port %u) info: %s\n",
2227 if (slave_info.max_rx_queues < max_nb_rx_queues)
2228 max_nb_rx_queues = slave_info.max_rx_queues;
2230 if (slave_info.max_tx_queues < max_nb_tx_queues)
2231 max_nb_tx_queues = slave_info.max_tx_queues;
2233 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2234 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2236 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2237 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2241 dev_info->max_rx_queues = max_nb_rx_queues;
2242 dev_info->max_tx_queues = max_nb_tx_queues;
2244 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2245 sizeof(dev_info->default_rxconf));
2246 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2247 sizeof(dev_info->default_txconf));
2249 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2250 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2253 * If dedicated hw queues enabled for link bonding device in LACP mode
2254 * then we need to reduce the maximum number of data path queues by 1.
2256 if (internals->mode == BONDING_MODE_8023AD &&
2257 internals->mode4.dedicated_queues.enabled == 1) {
2258 dev_info->max_rx_queues--;
2259 dev_info->max_tx_queues--;
2262 dev_info->min_rx_bufsize = 0;
2264 dev_info->rx_offload_capa = internals->rx_offload_capa;
2265 dev_info->tx_offload_capa = internals->tx_offload_capa;
2266 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2267 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2268 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2270 dev_info->reta_size = internals->reta_size;
2271 dev_info->hash_key_size = internals->rss_key_len;
2277 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2281 struct bond_dev_private *internals = dev->data->dev_private;
2283 /* don't do this while a slave is being added */
2284 rte_spinlock_lock(&internals->lock);
2287 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2289 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2291 for (i = 0; i < internals->slave_count; i++) {
2292 uint16_t port_id = internals->slaves[i].port_id;
2294 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2296 RTE_BOND_LOG(WARNING,
2297 "Setting VLAN filter on slave port %u not supported.",
2301 rte_spinlock_unlock(&internals->lock);
2306 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2307 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2308 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2310 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2311 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2312 0, dev->data->numa_node);
2313 if (bd_rx_q == NULL)
2316 bd_rx_q->queue_id = rx_queue_id;
2317 bd_rx_q->dev_private = dev->data->dev_private;
2319 bd_rx_q->nb_rx_desc = nb_rx_desc;
2321 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2322 bd_rx_q->mb_pool = mb_pool;
2324 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2330 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2331 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2332 const struct rte_eth_txconf *tx_conf)
2334 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2335 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2336 0, dev->data->numa_node);
2338 if (bd_tx_q == NULL)
2341 bd_tx_q->queue_id = tx_queue_id;
2342 bd_tx_q->dev_private = dev->data->dev_private;
2344 bd_tx_q->nb_tx_desc = nb_tx_desc;
2345 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2347 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2353 bond_ethdev_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2355 void *queue = dev->data->rx_queues[queue_id];
2364 bond_ethdev_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2366 void *queue = dev->data->tx_queues[queue_id];
2375 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2377 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2378 struct bond_dev_private *internals;
2380 /* Default value for polling slave found is true as we don't want to
2381 * disable the polling thread if we cannot get the lock */
2382 int i, polling_slave_found = 1;
2387 bonded_ethdev = cb_arg;
2388 internals = bonded_ethdev->data->dev_private;
2390 if (!bonded_ethdev->data->dev_started ||
2391 !internals->link_status_polling_enabled)
2394 /* If device is currently being configured then don't check slaves link
2395 * status, wait until next period */
2396 if (rte_spinlock_trylock(&internals->lock)) {
2397 if (internals->slave_count > 0)
2398 polling_slave_found = 0;
2400 for (i = 0; i < internals->slave_count; i++) {
2401 if (!internals->slaves[i].link_status_poll_enabled)
2404 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2405 polling_slave_found = 1;
2407 /* Update slave link status */
2408 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2409 internals->slaves[i].link_status_wait_to_complete);
2411 /* if link status has changed since last checked then call lsc
2413 if (slave_ethdev->data->dev_link.link_status !=
2414 internals->slaves[i].last_link_status) {
2415 internals->slaves[i].last_link_status =
2416 slave_ethdev->data->dev_link.link_status;
2418 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2419 RTE_ETH_EVENT_INTR_LSC,
2420 &bonded_ethdev->data->port_id,
2424 rte_spinlock_unlock(&internals->lock);
2427 if (polling_slave_found)
2428 /* Set alarm to continue monitoring link status of slave ethdev's */
2429 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2430 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2434 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2436 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2438 struct bond_dev_private *bond_ctx;
2439 struct rte_eth_link slave_link;
2441 bool one_link_update_succeeded;
2445 bond_ctx = ethdev->data->dev_private;
2447 ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2449 if (ethdev->data->dev_started == 0 ||
2450 bond_ctx->active_slave_count == 0) {
2451 ethdev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2455 ethdev->data->dev_link.link_status = RTE_ETH_LINK_UP;
2457 if (wait_to_complete)
2458 link_update = rte_eth_link_get;
2460 link_update = rte_eth_link_get_nowait;
2462 switch (bond_ctx->mode) {
2463 case BONDING_MODE_BROADCAST:
2465 * Setting link speed to UINT32_MAX to ensure we pick up the
2466 * value of the first active slave
2468 ethdev->data->dev_link.link_speed = UINT32_MAX;
2471 * link speed is minimum value of all the slaves link speed as
2472 * packet loss will occur on this slave if transmission at rates
2473 * greater than this are attempted
2475 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2476 ret = link_update(bond_ctx->active_slaves[idx],
2479 ethdev->data->dev_link.link_speed =
2480 RTE_ETH_SPEED_NUM_NONE;
2482 "Slave (port %u) link get failed: %s",
2483 bond_ctx->active_slaves[idx],
2484 rte_strerror(-ret));
2488 if (slave_link.link_speed <
2489 ethdev->data->dev_link.link_speed)
2490 ethdev->data->dev_link.link_speed =
2491 slave_link.link_speed;
2494 case BONDING_MODE_ACTIVE_BACKUP:
2495 /* Current primary slave */
2496 ret = link_update(bond_ctx->current_primary_port, &slave_link);
2498 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2499 bond_ctx->current_primary_port,
2500 rte_strerror(-ret));
2504 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2506 case BONDING_MODE_8023AD:
2507 ethdev->data->dev_link.link_autoneg =
2508 bond_ctx->mode4.slave_link.link_autoneg;
2509 ethdev->data->dev_link.link_duplex =
2510 bond_ctx->mode4.slave_link.link_duplex;
2512 /* to update link speed */
2513 case BONDING_MODE_ROUND_ROBIN:
2514 case BONDING_MODE_BALANCE:
2515 case BONDING_MODE_TLB:
2516 case BONDING_MODE_ALB:
2519 * In theses mode the maximum theoretical link speed is the sum
2522 ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2523 one_link_update_succeeded = false;
2525 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2526 ret = link_update(bond_ctx->active_slaves[idx],
2530 "Slave (port %u) link get failed: %s",
2531 bond_ctx->active_slaves[idx],
2532 rte_strerror(-ret));
2536 one_link_update_succeeded = true;
2537 ethdev->data->dev_link.link_speed +=
2538 slave_link.link_speed;
2541 if (!one_link_update_succeeded) {
2542 RTE_BOND_LOG(ERR, "All slaves link get failed");
2553 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2555 struct bond_dev_private *internals = dev->data->dev_private;
2556 struct rte_eth_stats slave_stats;
2559 for (i = 0; i < internals->slave_count; i++) {
2560 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2562 stats->ipackets += slave_stats.ipackets;
2563 stats->opackets += slave_stats.opackets;
2564 stats->ibytes += slave_stats.ibytes;
2565 stats->obytes += slave_stats.obytes;
2566 stats->imissed += slave_stats.imissed;
2567 stats->ierrors += slave_stats.ierrors;
2568 stats->oerrors += slave_stats.oerrors;
2569 stats->rx_nombuf += slave_stats.rx_nombuf;
2571 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2572 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2573 stats->q_opackets[j] += slave_stats.q_opackets[j];
2574 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2575 stats->q_obytes[j] += slave_stats.q_obytes[j];
2576 stats->q_errors[j] += slave_stats.q_errors[j];
2585 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2587 struct bond_dev_private *internals = dev->data->dev_private;
2592 for (i = 0, err = 0; i < internals->slave_count; i++) {
2593 ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2602 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2604 struct bond_dev_private *internals = eth_dev->data->dev_private;
2609 switch (internals->mode) {
2610 /* Promiscuous mode is propagated to all slaves */
2611 case BONDING_MODE_ROUND_ROBIN:
2612 case BONDING_MODE_BALANCE:
2613 case BONDING_MODE_BROADCAST:
2614 case BONDING_MODE_8023AD: {
2615 unsigned int slave_ok = 0;
2617 for (i = 0; i < internals->slave_count; i++) {
2618 port_id = internals->slaves[i].port_id;
2620 ret = rte_eth_promiscuous_enable(port_id);
2623 "Failed to enable promiscuous mode for port %u: %s",
2624 port_id, rte_strerror(-ret));
2629 * Report success if operation is successful on at least
2630 * on one slave. Otherwise return last error code.
2636 /* Promiscuous mode is propagated only to primary slave */
2637 case BONDING_MODE_ACTIVE_BACKUP:
2638 case BONDING_MODE_TLB:
2639 case BONDING_MODE_ALB:
2641 /* Do not touch promisc when there cannot be primary ports */
2642 if (internals->slave_count == 0)
2644 port_id = internals->current_primary_port;
2645 ret = rte_eth_promiscuous_enable(port_id);
2648 "Failed to enable promiscuous mode for port %u: %s",
2649 port_id, rte_strerror(-ret));
2656 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2658 struct bond_dev_private *internals = dev->data->dev_private;
2663 switch (internals->mode) {
2664 /* Promiscuous mode is propagated to all slaves */
2665 case BONDING_MODE_ROUND_ROBIN:
2666 case BONDING_MODE_BALANCE:
2667 case BONDING_MODE_BROADCAST:
2668 case BONDING_MODE_8023AD: {
2669 unsigned int slave_ok = 0;
2671 for (i = 0; i < internals->slave_count; i++) {
2672 port_id = internals->slaves[i].port_id;
2674 if (internals->mode == BONDING_MODE_8023AD &&
2675 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2676 BOND_8023AD_FORCED_PROMISC) {
2680 ret = rte_eth_promiscuous_disable(port_id);
2683 "Failed to disable promiscuous mode for port %u: %s",
2684 port_id, rte_strerror(-ret));
2689 * Report success if operation is successful on at least
2690 * on one slave. Otherwise return last error code.
2696 /* Promiscuous mode is propagated only to primary slave */
2697 case BONDING_MODE_ACTIVE_BACKUP:
2698 case BONDING_MODE_TLB:
2699 case BONDING_MODE_ALB:
2701 /* Do not touch promisc when there cannot be primary ports */
2702 if (internals->slave_count == 0)
2704 port_id = internals->current_primary_port;
2705 ret = rte_eth_promiscuous_disable(port_id);
2708 "Failed to disable promiscuous mode for port %u: %s",
2709 port_id, rte_strerror(-ret));
2716 bond_ethdev_promiscuous_update(struct rte_eth_dev *dev)
2718 struct bond_dev_private *internals = dev->data->dev_private;
2719 uint16_t port_id = internals->current_primary_port;
2721 switch (internals->mode) {
2722 case BONDING_MODE_ROUND_ROBIN:
2723 case BONDING_MODE_BALANCE:
2724 case BONDING_MODE_BROADCAST:
2725 case BONDING_MODE_8023AD:
2726 /* As promiscuous mode is propagated to all slaves for these
2727 * mode, no need to update for bonding device.
2730 case BONDING_MODE_ACTIVE_BACKUP:
2731 case BONDING_MODE_TLB:
2732 case BONDING_MODE_ALB:
2734 /* As promiscuous mode is propagated only to primary slave
2735 * for these mode. When active/standby switchover, promiscuous
2736 * mode should be set to new primary slave according to bonding
2739 if (rte_eth_promiscuous_get(internals->port_id) == 1)
2740 rte_eth_promiscuous_enable(port_id);
2742 rte_eth_promiscuous_disable(port_id);
2749 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2751 struct bond_dev_private *internals = eth_dev->data->dev_private;
2756 switch (internals->mode) {
2757 /* allmulti mode is propagated to all slaves */
2758 case BONDING_MODE_ROUND_ROBIN:
2759 case BONDING_MODE_BALANCE:
2760 case BONDING_MODE_BROADCAST:
2761 case BONDING_MODE_8023AD: {
2762 unsigned int slave_ok = 0;
2764 for (i = 0; i < internals->slave_count; i++) {
2765 port_id = internals->slaves[i].port_id;
2767 ret = rte_eth_allmulticast_enable(port_id);
2770 "Failed to enable allmulti mode for port %u: %s",
2771 port_id, rte_strerror(-ret));
2776 * Report success if operation is successful on at least
2777 * on one slave. Otherwise return last error code.
2783 /* allmulti mode is propagated only to primary slave */
2784 case BONDING_MODE_ACTIVE_BACKUP:
2785 case BONDING_MODE_TLB:
2786 case BONDING_MODE_ALB:
2788 /* Do not touch allmulti when there cannot be primary ports */
2789 if (internals->slave_count == 0)
2791 port_id = internals->current_primary_port;
2792 ret = rte_eth_allmulticast_enable(port_id);
2795 "Failed to enable allmulti mode for port %u: %s",
2796 port_id, rte_strerror(-ret));
2803 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2805 struct bond_dev_private *internals = eth_dev->data->dev_private;
2810 switch (internals->mode) {
2811 /* allmulti mode is propagated to all slaves */
2812 case BONDING_MODE_ROUND_ROBIN:
2813 case BONDING_MODE_BALANCE:
2814 case BONDING_MODE_BROADCAST:
2815 case BONDING_MODE_8023AD: {
2816 unsigned int slave_ok = 0;
2818 for (i = 0; i < internals->slave_count; i++) {
2819 uint16_t port_id = internals->slaves[i].port_id;
2821 if (internals->mode == BONDING_MODE_8023AD &&
2822 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2823 BOND_8023AD_FORCED_ALLMULTI)
2826 ret = rte_eth_allmulticast_disable(port_id);
2829 "Failed to disable allmulti mode for port %u: %s",
2830 port_id, rte_strerror(-ret));
2835 * Report success if operation is successful on at least
2836 * on one slave. Otherwise return last error code.
2842 /* allmulti mode is propagated only to primary slave */
2843 case BONDING_MODE_ACTIVE_BACKUP:
2844 case BONDING_MODE_TLB:
2845 case BONDING_MODE_ALB:
2847 /* Do not touch allmulti when there cannot be primary ports */
2848 if (internals->slave_count == 0)
2850 port_id = internals->current_primary_port;
2851 ret = rte_eth_allmulticast_disable(port_id);
2854 "Failed to disable allmulti mode for port %u: %s",
2855 port_id, rte_strerror(-ret));
2862 bond_ethdev_allmulticast_update(struct rte_eth_dev *dev)
2864 struct bond_dev_private *internals = dev->data->dev_private;
2865 uint16_t port_id = internals->current_primary_port;
2867 switch (internals->mode) {
2868 case BONDING_MODE_ROUND_ROBIN:
2869 case BONDING_MODE_BALANCE:
2870 case BONDING_MODE_BROADCAST:
2871 case BONDING_MODE_8023AD:
2872 /* As allmulticast mode is propagated to all slaves for these
2873 * mode, no need to update for bonding device.
2876 case BONDING_MODE_ACTIVE_BACKUP:
2877 case BONDING_MODE_TLB:
2878 case BONDING_MODE_ALB:
2880 /* As allmulticast mode is propagated only to primary slave
2881 * for these mode. When active/standby switchover, allmulticast
2882 * mode should be set to new primary slave according to bonding
2885 if (rte_eth_allmulticast_get(internals->port_id) == 1)
2886 rte_eth_allmulticast_enable(port_id);
2888 rte_eth_allmulticast_disable(port_id);
2895 bond_ethdev_delayed_lsc_propagation(void *arg)
2900 rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2901 RTE_ETH_EVENT_INTR_LSC, NULL);
2905 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2906 void *param, void *ret_param __rte_unused)
2908 struct rte_eth_dev *bonded_eth_dev;
2909 struct bond_dev_private *internals;
2910 struct rte_eth_link link;
2914 uint8_t lsc_flag = 0;
2915 int valid_slave = 0;
2916 uint16_t active_pos;
2919 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2922 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2924 if (check_for_bonded_ethdev(bonded_eth_dev))
2927 internals = bonded_eth_dev->data->dev_private;
2929 /* If the device isn't started don't handle interrupts */
2930 if (!bonded_eth_dev->data->dev_started)
2933 /* verify that port_id is a valid slave of bonded port */
2934 for (i = 0; i < internals->slave_count; i++) {
2935 if (internals->slaves[i].port_id == port_id) {
2944 /* Synchronize lsc callback parallel calls either by real link event
2945 * from the slaves PMDs or by the bonding PMD itself.
2947 rte_spinlock_lock(&internals->lsc_lock);
2949 /* Search for port in active port list */
2950 active_pos = find_slave_by_id(internals->active_slaves,
2951 internals->active_slave_count, port_id);
2953 ret = rte_eth_link_get_nowait(port_id, &link);
2955 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2957 if (ret == 0 && link.link_status) {
2958 if (active_pos < internals->active_slave_count)
2961 /* check link state properties if bonded link is up*/
2962 if (bonded_eth_dev->data->dev_link.link_status == RTE_ETH_LINK_UP) {
2963 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2964 RTE_BOND_LOG(ERR, "Invalid link properties "
2965 "for slave %d in bonding mode %d",
2966 port_id, internals->mode);
2968 /* inherit slave link properties */
2969 link_properties_set(bonded_eth_dev, &link);
2972 /* If no active slave ports then set this port to be
2975 if (internals->active_slave_count < 1) {
2976 /* If first active slave, then change link status */
2977 bonded_eth_dev->data->dev_link.link_status =
2979 internals->current_primary_port = port_id;
2982 mac_address_slaves_update(bonded_eth_dev);
2983 bond_ethdev_promiscuous_update(bonded_eth_dev);
2984 bond_ethdev_allmulticast_update(bonded_eth_dev);
2987 activate_slave(bonded_eth_dev, port_id);
2989 /* If the user has defined the primary port then default to
2992 if (internals->user_defined_primary_port &&
2993 internals->primary_port == port_id)
2994 bond_ethdev_primary_set(internals, port_id);
2996 if (active_pos == internals->active_slave_count)
2999 /* Remove from active slave list */
3000 deactivate_slave(bonded_eth_dev, port_id);
3002 if (internals->active_slave_count < 1)
3005 /* Update primary id, take first active slave from list or if none
3006 * available set to -1 */
3007 if (port_id == internals->current_primary_port) {
3008 if (internals->active_slave_count > 0)
3009 bond_ethdev_primary_set(internals,
3010 internals->active_slaves[0]);
3012 internals->current_primary_port = internals->primary_port;
3013 mac_address_slaves_update(bonded_eth_dev);
3014 bond_ethdev_promiscuous_update(bonded_eth_dev);
3015 bond_ethdev_allmulticast_update(bonded_eth_dev);
3021 * Update bonded device link properties after any change to active
3024 bond_ethdev_link_update(bonded_eth_dev, 0);
3027 /* Cancel any possible outstanding interrupts if delays are enabled */
3028 if (internals->link_up_delay_ms > 0 ||
3029 internals->link_down_delay_ms > 0)
3030 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
3033 if (bonded_eth_dev->data->dev_link.link_status) {
3034 if (internals->link_up_delay_ms > 0)
3035 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
3036 bond_ethdev_delayed_lsc_propagation,
3037 (void *)bonded_eth_dev);
3039 rte_eth_dev_callback_process(bonded_eth_dev,
3040 RTE_ETH_EVENT_INTR_LSC,
3044 if (internals->link_down_delay_ms > 0)
3045 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
3046 bond_ethdev_delayed_lsc_propagation,
3047 (void *)bonded_eth_dev);
3049 rte_eth_dev_callback_process(bonded_eth_dev,
3050 RTE_ETH_EVENT_INTR_LSC,
3055 rte_spinlock_unlock(&internals->lsc_lock);
3061 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
3062 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3066 int slave_reta_size;
3067 unsigned reta_count;
3068 struct bond_dev_private *internals = dev->data->dev_private;
3070 if (reta_size != internals->reta_size)
3073 /* Copy RETA table */
3074 reta_count = (reta_size + RTE_ETH_RETA_GROUP_SIZE - 1) /
3075 RTE_ETH_RETA_GROUP_SIZE;
3077 for (i = 0; i < reta_count; i++) {
3078 internals->reta_conf[i].mask = reta_conf[i].mask;
3079 for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3080 if ((reta_conf[i].mask >> j) & 0x01)
3081 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
3084 /* Fill rest of array */
3085 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
3086 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
3087 sizeof(internals->reta_conf[0]) * reta_count);
3089 /* Propagate RETA over slaves */
3090 for (i = 0; i < internals->slave_count; i++) {
3091 slave_reta_size = internals->slaves[i].reta_size;
3092 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
3093 &internals->reta_conf[0], slave_reta_size);
3102 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3103 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3106 struct bond_dev_private *internals = dev->data->dev_private;
3108 if (reta_size != internals->reta_size)
3111 /* Copy RETA table */
3112 for (i = 0; i < reta_size / RTE_ETH_RETA_GROUP_SIZE; i++)
3113 for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3114 if ((reta_conf[i].mask >> j) & 0x01)
3115 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3121 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3122 struct rte_eth_rss_conf *rss_conf)
3125 struct bond_dev_private *internals = dev->data->dev_private;
3126 struct rte_eth_rss_conf bond_rss_conf;
3128 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3130 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3132 if (bond_rss_conf.rss_hf != 0)
3133 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3135 if (bond_rss_conf.rss_key) {
3136 if (bond_rss_conf.rss_key_len < internals->rss_key_len)
3138 else if (bond_rss_conf.rss_key_len > internals->rss_key_len)
3139 RTE_BOND_LOG(WARNING, "rss_key will be truncated");
3141 memcpy(internals->rss_key, bond_rss_conf.rss_key,
3142 internals->rss_key_len);
3143 bond_rss_conf.rss_key_len = internals->rss_key_len;
3146 for (i = 0; i < internals->slave_count; i++) {
3147 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3157 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3158 struct rte_eth_rss_conf *rss_conf)
3160 struct bond_dev_private *internals = dev->data->dev_private;
3162 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3163 rss_conf->rss_key_len = internals->rss_key_len;
3164 if (rss_conf->rss_key)
3165 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3171 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3173 struct rte_eth_dev *slave_eth_dev;
3174 struct bond_dev_private *internals = dev->data->dev_private;
3177 rte_spinlock_lock(&internals->lock);
3179 for (i = 0; i < internals->slave_count; i++) {
3180 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3181 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3182 rte_spinlock_unlock(&internals->lock);
3186 for (i = 0; i < internals->slave_count; i++) {
3187 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3189 rte_spinlock_unlock(&internals->lock);
3194 rte_spinlock_unlock(&internals->lock);
3199 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3200 struct rte_ether_addr *addr)
3202 if (mac_address_set(dev, addr)) {
3203 RTE_BOND_LOG(ERR, "Failed to update MAC address");
3211 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3212 const struct rte_flow_ops **ops)
3214 *ops = &bond_flow_ops;
3219 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3220 struct rte_ether_addr *mac_addr,
3221 __rte_unused uint32_t index, uint32_t vmdq)
3223 struct rte_eth_dev *slave_eth_dev;
3224 struct bond_dev_private *internals = dev->data->dev_private;
3227 rte_spinlock_lock(&internals->lock);
3229 for (i = 0; i < internals->slave_count; i++) {
3230 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3231 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3232 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3238 for (i = 0; i < internals->slave_count; i++) {
3239 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3243 for (i--; i >= 0; i--)
3244 rte_eth_dev_mac_addr_remove(
3245 internals->slaves[i].port_id, mac_addr);
3252 rte_spinlock_unlock(&internals->lock);
3257 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3259 struct rte_eth_dev *slave_eth_dev;
3260 struct bond_dev_private *internals = dev->data->dev_private;
3263 rte_spinlock_lock(&internals->lock);
3265 for (i = 0; i < internals->slave_count; i++) {
3266 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3267 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3271 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3273 for (i = 0; i < internals->slave_count; i++)
3274 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3278 rte_spinlock_unlock(&internals->lock);
3281 const struct eth_dev_ops default_dev_ops = {
3282 .dev_start = bond_ethdev_start,
3283 .dev_stop = bond_ethdev_stop,
3284 .dev_close = bond_ethdev_close,
3285 .dev_configure = bond_ethdev_configure,
3286 .dev_infos_get = bond_ethdev_info,
3287 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3288 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3289 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3290 .rx_queue_release = bond_ethdev_rx_queue_release,
3291 .tx_queue_release = bond_ethdev_tx_queue_release,
3292 .link_update = bond_ethdev_link_update,
3293 .stats_get = bond_ethdev_stats_get,
3294 .stats_reset = bond_ethdev_stats_reset,
3295 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3296 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3297 .allmulticast_enable = bond_ethdev_allmulticast_enable,
3298 .allmulticast_disable = bond_ethdev_allmulticast_disable,
3299 .reta_update = bond_ethdev_rss_reta_update,
3300 .reta_query = bond_ethdev_rss_reta_query,
3301 .rss_hash_update = bond_ethdev_rss_hash_update,
3302 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3303 .mtu_set = bond_ethdev_mtu_set,
3304 .mac_addr_set = bond_ethdev_mac_address_set,
3305 .mac_addr_add = bond_ethdev_mac_addr_add,
3306 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3307 .flow_ops_get = bond_flow_ops_get
3311 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3313 const char *name = rte_vdev_device_name(dev);
3314 uint8_t socket_id = dev->device.numa_node;
3315 struct bond_dev_private *internals = NULL;
3316 struct rte_eth_dev *eth_dev = NULL;
3317 uint32_t vlan_filter_bmp_size;
3319 /* now do all data allocation - for eth_dev structure, dummy pci driver
3320 * and internal (private) data
3323 /* reserve an ethdev entry */
3324 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3325 if (eth_dev == NULL) {
3326 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3330 internals = eth_dev->data->dev_private;
3331 eth_dev->data->nb_rx_queues = (uint16_t)1;
3332 eth_dev->data->nb_tx_queues = (uint16_t)1;
3334 /* Allocate memory for storing MAC addresses */
3335 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3336 BOND_MAX_MAC_ADDRS, 0, socket_id);
3337 if (eth_dev->data->mac_addrs == NULL) {
3339 "Failed to allocate %u bytes needed to store MAC addresses",
3340 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3344 eth_dev->dev_ops = &default_dev_ops;
3345 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3346 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3348 rte_spinlock_init(&internals->lock);
3349 rte_spinlock_init(&internals->lsc_lock);
3351 internals->port_id = eth_dev->data->port_id;
3352 internals->mode = BONDING_MODE_INVALID;
3353 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3354 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3355 internals->burst_xmit_hash = burst_xmit_l2_hash;
3356 internals->user_defined_mac = 0;
3358 internals->link_status_polling_enabled = 0;
3360 internals->link_status_polling_interval_ms =
3361 DEFAULT_POLLING_INTERVAL_10_MS;
3362 internals->link_down_delay_ms = 0;
3363 internals->link_up_delay_ms = 0;
3365 internals->slave_count = 0;
3366 internals->active_slave_count = 0;
3367 internals->rx_offload_capa = 0;
3368 internals->tx_offload_capa = 0;
3369 internals->rx_queue_offload_capa = 0;
3370 internals->tx_queue_offload_capa = 0;
3371 internals->candidate_max_rx_pktlen = 0;
3372 internals->max_rx_pktlen = 0;
3374 /* Initially allow to choose any offload type */
3375 internals->flow_type_rss_offloads = RTE_ETH_RSS_PROTO_MASK;
3377 memset(&internals->default_rxconf, 0,
3378 sizeof(internals->default_rxconf));
3379 memset(&internals->default_txconf, 0,
3380 sizeof(internals->default_txconf));
3382 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3383 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3385 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3386 memset(internals->slaves, 0, sizeof(internals->slaves));
3388 TAILQ_INIT(&internals->flow_list);
3389 internals->flow_isolated_valid = 0;
3391 /* Set mode 4 default configuration */
3392 bond_mode_8023ad_setup(eth_dev, NULL);
3393 if (bond_ethdev_mode_set(eth_dev, mode)) {
3394 RTE_BOND_LOG(ERR, "Failed to set bonded device %u mode to %u",
3395 eth_dev->data->port_id, mode);
3399 vlan_filter_bmp_size =
3400 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3401 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3402 RTE_CACHE_LINE_SIZE);
3403 if (internals->vlan_filter_bmpmem == NULL) {
3405 "Failed to allocate vlan bitmap for bonded device %u",
3406 eth_dev->data->port_id);
3410 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3411 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3412 if (internals->vlan_filter_bmp == NULL) {
3414 "Failed to init vlan bitmap for bonded device %u",
3415 eth_dev->data->port_id);
3416 rte_free(internals->vlan_filter_bmpmem);
3420 return eth_dev->data->port_id;
3423 rte_free(internals);
3424 if (eth_dev != NULL)
3425 eth_dev->data->dev_private = NULL;
3426 rte_eth_dev_release_port(eth_dev);
3431 bond_probe(struct rte_vdev_device *dev)
3434 struct bond_dev_private *internals;
3435 struct rte_kvargs *kvlist;
3436 uint8_t bonding_mode;
3437 int arg_count, port_id;
3440 struct rte_eth_dev *eth_dev;
3445 name = rte_vdev_device_name(dev);
3446 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3448 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3449 eth_dev = rte_eth_dev_attach_secondary(name);
3451 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3454 /* TODO: request info from primary to set up Rx and Tx */
3455 eth_dev->dev_ops = &default_dev_ops;
3456 eth_dev->device = &dev->device;
3457 rte_eth_dev_probing_finish(eth_dev);
3461 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3462 pmd_bond_init_valid_arguments);
3463 if (kvlist == NULL) {
3464 RTE_BOND_LOG(ERR, "Invalid args in %s", rte_vdev_device_args(dev));
3468 /* Parse link bonding mode */
3469 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3470 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3471 &bond_ethdev_parse_slave_mode_kvarg,
3472 &bonding_mode) != 0) {
3473 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3478 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3483 /* Parse socket id to create bonding device on */
3484 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3485 if (arg_count == 1) {
3486 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3487 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3489 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3490 "bonded device %s", name);
3493 } else if (arg_count > 1) {
3494 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3495 "bonded device %s", name);
3498 socket_id = rte_socket_id();
3501 dev->device.numa_node = socket_id;
3503 /* Create link bonding eth device */
3504 port_id = bond_alloc(dev, bonding_mode);
3506 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3507 "socket %u.", name, bonding_mode, socket_id);
3510 internals = rte_eth_devices[port_id].data->dev_private;
3511 internals->kvlist = kvlist;
3513 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3514 if (rte_kvargs_process(kvlist,
3515 PMD_BOND_AGG_MODE_KVARG,
3516 &bond_ethdev_parse_slave_agg_mode_kvarg,
3519 "Failed to parse agg selection mode for bonded device %s",
3524 if (internals->mode == BONDING_MODE_8023AD)
3525 internals->mode4.agg_selection = agg_mode;
3527 internals->mode4.agg_selection = AGG_STABLE;
3530 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3531 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3532 "socket %u.", name, port_id, bonding_mode, socket_id);
3536 rte_kvargs_free(kvlist);
3542 bond_remove(struct rte_vdev_device *dev)
3544 struct rte_eth_dev *eth_dev;
3545 struct bond_dev_private *internals;
3552 name = rte_vdev_device_name(dev);
3553 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3555 /* find an ethdev entry */
3556 eth_dev = rte_eth_dev_allocated(name);
3557 if (eth_dev == NULL)
3558 return 0; /* port already released */
3560 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3561 return rte_eth_dev_release_port(eth_dev);
3563 RTE_ASSERT(eth_dev->device == &dev->device);
3565 internals = eth_dev->data->dev_private;
3566 if (internals->slave_count != 0)
3569 if (eth_dev->data->dev_started == 1) {
3570 ret = bond_ethdev_stop(eth_dev);
3571 bond_ethdev_close(eth_dev);
3573 rte_eth_dev_release_port(eth_dev);
3578 /* this part will resolve the slave portids after all the other pdev and vdev
3579 * have been allocated */
3581 bond_ethdev_configure(struct rte_eth_dev *dev)
3583 const char *name = dev->device->name;
3584 struct bond_dev_private *internals = dev->data->dev_private;
3585 struct rte_kvargs *kvlist = internals->kvlist;
3587 uint16_t port_id = dev - rte_eth_devices;
3590 static const uint8_t default_rss_key[40] = {
3591 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3592 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3593 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3594 0xBE, 0xAC, 0x01, 0xFA
3600 * If RSS is enabled, fill table with default values and
3601 * set key to the value specified in port RSS configuration.
3602 * Fall back to default RSS key if the key is not specified
3604 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
3605 struct rte_eth_rss_conf *rss_conf =
3606 &dev->data->dev_conf.rx_adv_conf.rss_conf;
3608 if (internals->rss_key_len == 0) {
3609 internals->rss_key_len = sizeof(default_rss_key);
3612 if (rss_conf->rss_key != NULL) {
3613 if (internals->rss_key_len > rss_conf->rss_key_len) {
3614 RTE_BOND_LOG(ERR, "Invalid rss key length(%u)",
3615 rss_conf->rss_key_len);
3619 memcpy(internals->rss_key, rss_conf->rss_key,
3620 internals->rss_key_len);
3622 if (internals->rss_key_len > sizeof(default_rss_key)) {
3624 * If the rss_key includes standard_rss_key and
3625 * extended_hash_key, the rss key length will be
3626 * larger than default rss key length, so it should
3627 * re-calculate the hash key.
3629 for (i = 0; i < internals->rss_key_len; i++)
3630 internals->rss_key[i] = (uint8_t)rte_rand();
3632 memcpy(internals->rss_key, default_rss_key,
3633 internals->rss_key_len);
3637 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3638 internals->reta_conf[i].mask = ~0LL;
3639 for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3640 internals->reta_conf[i].reta[j] =
3641 (i * RTE_ETH_RETA_GROUP_SIZE + j) %
3642 dev->data->nb_rx_queues;
3646 /* set the max_rx_pktlen */
3647 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3650 * if no kvlist, it means that this bonded device has been created
3651 * through the bonding api.
3656 /* Parse MAC address for bonded device */
3657 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3658 if (arg_count == 1) {
3659 struct rte_ether_addr bond_mac;
3661 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3662 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3663 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3668 /* Set MAC address */
3669 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3671 "Failed to set mac address on bonded device %s",
3675 } else if (arg_count > 1) {
3677 "MAC address can be specified only once for bonded device %s",
3682 /* Parse/set balance mode transmit policy */
3683 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3684 if (arg_count == 1) {
3685 uint8_t xmit_policy;
3687 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3688 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3691 "Invalid xmit policy specified for bonded device %s",
3696 /* Set balance mode transmit policy*/
3697 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3699 "Failed to set balance xmit policy on bonded device %s",
3703 } else if (arg_count > 1) {
3705 "Transmit policy can be specified only once for bonded device %s",
3710 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3711 if (rte_kvargs_process(kvlist,
3712 PMD_BOND_AGG_MODE_KVARG,
3713 &bond_ethdev_parse_slave_agg_mode_kvarg,
3716 "Failed to parse agg selection mode for bonded device %s",
3719 if (internals->mode == BONDING_MODE_8023AD) {
3720 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3724 "Invalid args for agg selection set for bonded device %s",
3731 /* Parse/add slave ports to bonded device */
3732 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3733 struct bond_ethdev_slave_ports slave_ports;
3736 memset(&slave_ports, 0, sizeof(slave_ports));
3738 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3739 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3741 "Failed to parse slave ports for bonded device %s",
3746 for (i = 0; i < slave_ports.slave_count; i++) {
3747 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3749 "Failed to add port %d as slave to bonded device %s",
3750 slave_ports.slaves[i], name);
3755 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3759 /* Parse/set primary slave port id*/
3760 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3761 if (arg_count == 1) {
3762 uint16_t primary_slave_port_id;
3764 if (rte_kvargs_process(kvlist,
3765 PMD_BOND_PRIMARY_SLAVE_KVARG,
3766 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3767 &primary_slave_port_id) < 0) {
3769 "Invalid primary slave port id specified for bonded device %s",
3774 /* Set balance mode transmit policy*/
3775 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3778 "Failed to set primary slave port %d on bonded device %s",
3779 primary_slave_port_id, name);
3782 } else if (arg_count > 1) {
3784 "Primary slave can be specified only once for bonded device %s",
3789 /* Parse link status monitor polling interval */
3790 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3791 if (arg_count == 1) {
3792 uint32_t lsc_poll_interval_ms;
3794 if (rte_kvargs_process(kvlist,
3795 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3796 &bond_ethdev_parse_time_ms_kvarg,
3797 &lsc_poll_interval_ms) < 0) {
3799 "Invalid lsc polling interval value specified for bonded"
3800 " device %s", name);
3804 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3807 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3808 lsc_poll_interval_ms, name);
3811 } else if (arg_count > 1) {
3813 "LSC polling interval can be specified only once for bonded"
3814 " device %s", name);
3818 /* Parse link up interrupt propagation delay */
3819 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3820 if (arg_count == 1) {
3821 uint32_t link_up_delay_ms;
3823 if (rte_kvargs_process(kvlist,
3824 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3825 &bond_ethdev_parse_time_ms_kvarg,
3826 &link_up_delay_ms) < 0) {
3828 "Invalid link up propagation delay value specified for"
3829 " bonded device %s", name);
3833 /* Set balance mode transmit policy*/
3834 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3837 "Failed to set link up propagation delay (%u ms) on bonded"
3838 " device %s", link_up_delay_ms, name);
3841 } else if (arg_count > 1) {
3843 "Link up propagation delay can be specified only once for"
3844 " bonded device %s", name);
3848 /* Parse link down interrupt propagation delay */
3849 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3850 if (arg_count == 1) {
3851 uint32_t link_down_delay_ms;
3853 if (rte_kvargs_process(kvlist,
3854 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3855 &bond_ethdev_parse_time_ms_kvarg,
3856 &link_down_delay_ms) < 0) {
3858 "Invalid link down propagation delay value specified for"
3859 " bonded device %s", name);
3863 /* Set balance mode transmit policy*/
3864 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3867 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3868 link_down_delay_ms, name);
3871 } else if (arg_count > 1) {
3873 "Link down propagation delay can be specified only once for bonded device %s",
3878 /* configure slaves so we can pass mtu setting */
3879 for (i = 0; i < internals->slave_count; i++) {
3880 struct rte_eth_dev *slave_ethdev =
3881 &(rte_eth_devices[internals->slaves[i].port_id]);
3882 if (slave_configure(dev, slave_ethdev) != 0) {
3884 "bonded port (%d) failed to configure slave device (%d)",
3886 internals->slaves[i].port_id);
3893 struct rte_vdev_driver pmd_bond_drv = {
3894 .probe = bond_probe,
3895 .remove = bond_remove,
3898 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3899 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3901 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3905 "xmit_policy=[l2 | l23 | l34] "
3906 "agg_mode=[count | stable | bandwidth] "
3909 "lsc_poll_period_ms=<int> "
3911 "down_delay=<int>");
3913 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3914 * this library, see meson.build.
3916 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);