1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
6 #include <netinet/in.h>
9 #include <rte_malloc.h>
10 #include <ethdev_driver.h>
11 #include <ethdev_vdev.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
39 size_t vlan_offset = 0;
41 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 struct rte_vlan_hdr *vlan_hdr =
44 (struct rte_vlan_hdr *)(eth_hdr + 1);
46 vlan_offset = sizeof(struct rte_vlan_hdr);
47 *proto = vlan_hdr->eth_proto;
49 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 vlan_hdr = vlan_hdr + 1;
51 *proto = vlan_hdr->eth_proto;
52 vlan_offset += sizeof(struct rte_vlan_hdr);
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 struct bond_dev_private *internals;
63 uint16_t num_rx_total = 0;
65 uint16_t active_slave;
68 /* Cast to structure, containing bonded device's port id and queue id */
69 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 internals = bd_rx_q->dev_private;
71 slave_count = internals->active_slave_count;
72 active_slave = bd_rx_q->active_slave;
74 for (i = 0; i < slave_count && nb_pkts; i++) {
75 uint16_t num_rx_slave;
77 /* Offset of pointer to *bufs increases as packets are received
78 * from other slaves */
80 rte_eth_rx_burst(internals->active_slaves[active_slave],
82 bufs + num_rx_total, nb_pkts);
83 num_rx_total += num_rx_slave;
84 nb_pkts -= num_rx_slave;
85 if (++active_slave == slave_count)
89 if (++bd_rx_q->active_slave >= slave_count)
90 bd_rx_q->active_slave = 0;
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
98 struct bond_dev_private *internals;
100 /* Cast to structure, containing bonded device's port id and queue id */
101 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
103 internals = bd_rx_q->dev_private;
105 return rte_eth_rx_burst(internals->current_primary_port,
106 bd_rx_q->queue_id, bufs, nb_pkts);
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
112 const uint16_t ether_type_slow_be =
113 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
115 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 (ethertype == ether_type_slow_be &&
117 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
120 /*****************************************************************************
121 * Flow director's setup for mode 4 optimization
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 .dst.addr_bytes = { 0 },
126 .src.addr_bytes = { 0 },
127 .type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 .dst.addr_bytes = { 0 },
132 .src.addr_bytes = { 0 },
136 static struct rte_flow_item flow_item_8023ad[] = {
138 .type = RTE_FLOW_ITEM_TYPE_ETH,
139 .spec = &flow_item_eth_type_8023ad,
141 .mask = &flow_item_eth_mask_type_8023ad,
144 .type = RTE_FLOW_ITEM_TYPE_END,
151 const struct rte_flow_attr flow_attr_8023ad = {
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 uint16_t slave_port) {
162 struct rte_eth_dev_info slave_info;
163 struct rte_flow_error error;
164 struct bond_dev_private *internals = bond_dev->data->dev_private;
166 const struct rte_flow_action_queue lacp_queue_conf = {
170 const struct rte_flow_action actions[] = {
172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 .conf = &lacp_queue_conf
176 .type = RTE_FLOW_ACTION_TYPE_END,
180 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 flow_item_8023ad, actions, &error);
183 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 __func__, error.message, slave_port,
185 internals->mode4.dedicated_queues.rx_qid);
189 ret = rte_eth_dev_info_get(slave_port, &slave_info);
192 "%s: Error during getting device (port %u) info: %s\n",
193 __func__, slave_port, strerror(-ret));
198 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
201 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 __func__, slave_port);
210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 struct bond_dev_private *internals = bond_dev->data->dev_private;
213 struct rte_eth_dev_info bond_info;
217 /* Verify if all slaves in bonding supports flow director and */
218 if (internals->slave_count > 0) {
219 ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
222 "%s: Error during getting device (port %u) info: %s\n",
223 __func__, bond_dev->data->port_id,
229 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
232 for (idx = 0; idx < internals->slave_count; idx++) {
233 if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 internals->slaves[idx].port_id) != 0)
243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
245 struct rte_flow_error error;
246 struct bond_dev_private *internals = bond_dev->data->dev_private;
247 struct rte_flow_action_queue lacp_queue_conf = {
248 .index = internals->mode4.dedicated_queues.rx_qid,
251 const struct rte_flow_action actions[] = {
253 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 .conf = &lacp_queue_conf
257 .type = RTE_FLOW_ACTION_TYPE_END,
261 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 "(slave_port=%d queue_id=%d)",
266 error.message, slave_port,
267 internals->mode4.dedicated_queues.rx_qid);
274 static inline uint16_t
275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
278 /* Cast to structure, containing bonded device's port id and queue id */
279 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 struct bond_dev_private *internals = bd_rx_q->dev_private;
281 struct rte_eth_dev *bonded_eth_dev =
282 &rte_eth_devices[internals->port_id];
283 struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 struct rte_ether_hdr *hdr;
286 const uint16_t ether_type_slow_be =
287 rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 uint16_t num_rx_total = 0; /* Total number of received packets */
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 uint16_t slave_count, idx;
292 uint8_t collecting; /* current slave collecting status */
293 const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
300 /* Copy slave list to protect against slave up/down changes during tx
302 slave_count = internals->active_slave_count;
303 memcpy(slaves, internals->active_slaves,
304 sizeof(internals->active_slaves[0]) * slave_count);
306 idx = bd_rx_q->active_slave;
307 if (idx >= slave_count) {
308 bd_rx_q->active_slave = 0;
311 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
313 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
316 /* Read packets from this slave */
317 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 &bufs[num_rx_total], nb_pkts - num_rx_total);
320 for (k = j; k < 2 && k < num_rx_total; k++)
321 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
323 /* Handle slow protocol packets. */
324 while (j < num_rx_total) {
325 if (j + 3 < num_rx_total)
326 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
328 hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
331 /* Remove packet from array if:
332 * - it is slow packet but no dedicated rxq is present,
333 * - slave is not in collecting state,
334 * - bonding interface is not in promiscuous mode:
335 * - packet is unicast and address does not match,
336 * - packet is multicast and bonding interface
337 * is not in allmulti,
341 is_lacp_packets(hdr->ether_type, subtype,
345 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 !rte_is_same_ether_addr(bond_mac,
349 rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
351 if (hdr->ether_type == ether_type_slow_be) {
352 bond_mode_8023ad_handle_slow_pkt(
353 internals, slaves[idx], bufs[j]);
355 rte_pktmbuf_free(bufs[j]);
357 /* Packet is managed by mode 4 or dropped, shift the array */
359 if (j < num_rx_total) {
360 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
366 if (unlikely(++idx == slave_count))
370 if (++bd_rx_q->active_slave >= slave_count)
371 bd_rx_q->active_slave = 0;
377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
380 return rx_burst_8023ad(queue, bufs, nb_pkts, false);
384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
387 return rx_burst_8023ad(queue, bufs, nb_pkts, true);
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
400 case RTE_ARP_OP_REQUEST:
401 strlcpy(buf, "ARP Request", buf_len);
403 case RTE_ARP_OP_REPLY:
404 strlcpy(buf, "ARP Reply", buf_len);
406 case RTE_ARP_OP_REVREQUEST:
407 strlcpy(buf, "Reverse ARP Request", buf_len);
409 case RTE_ARP_OP_REVREPLY:
410 strlcpy(buf, "Reverse ARP Reply", buf_len);
412 case RTE_ARP_OP_INVREQUEST:
413 strlcpy(buf, "Peer Identify Request", buf_len);
415 case RTE_ARP_OP_INVREPLY:
416 strlcpy(buf, "Peer Identify Reply", buf_len);
421 strlcpy(buf, "Unknown", buf_len);
425 #define MaxIPv4String 16
427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
431 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
437 #define MAX_CLIENTS_NUMBER 128
438 uint8_t active_clients;
439 struct client_stats_t {
442 uint32_t ipv4_rx_packets;
443 uint32_t ipv4_tx_packets;
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
452 for (; i < MAX_CLIENTS_NUMBER; i++) {
453 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
454 /* Just update RX packets number for this client */
455 if (TXorRXindicator == &burstnumberRX)
456 client_stats[i].ipv4_rx_packets++;
458 client_stats[i].ipv4_tx_packets++;
462 /* We have a new client. Insert him to the table, and increment stats */
463 if (TXorRXindicator == &burstnumberRX)
464 client_stats[active_clients].ipv4_rx_packets++;
466 client_stats[active_clients].ipv4_tx_packets++;
467 client_stats[active_clients].ipv4_addr = addr;
468 client_stats[active_clients].port = port;
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 rte_log(RTE_LOG_DEBUG, bond_logtype, \
476 "%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
477 "DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \
480 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
481 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
482 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
484 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
485 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
486 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
488 arp_op, ++burstnumber)
492 mode6_debug(const char __rte_unused *info,
493 struct rte_ether_hdr *eth_h, uint16_t port,
494 uint32_t __rte_unused *burstnumber)
496 struct rte_ipv4_hdr *ipv4_h;
497 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
498 struct rte_arp_hdr *arp_h;
505 uint16_t ether_type = eth_h->ether_type;
506 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
508 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
509 strlcpy(buf, info, 16);
512 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
513 ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
514 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
515 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
516 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
517 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
519 update_client_stats(ipv4_h->src_addr, port, burstnumber);
521 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
522 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
523 arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
524 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
525 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
526 arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
527 ArpOp, sizeof(ArpOp));
528 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
535 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
537 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
538 struct bond_dev_private *internals = bd_rx_q->dev_private;
539 struct rte_ether_hdr *eth_h;
540 uint16_t ether_type, offset;
541 uint16_t nb_recv_pkts;
544 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
546 for (i = 0; i < nb_recv_pkts; i++) {
547 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
548 ether_type = eth_h->ether_type;
549 offset = get_vlan_offset(eth_h, ðer_type);
551 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
552 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
553 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
555 bond_mode_alb_arp_recv(eth_h, offset, internals);
557 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
558 else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
559 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
567 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
570 struct bond_dev_private *internals;
571 struct bond_tx_queue *bd_tx_q;
573 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
574 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
576 uint16_t num_of_slaves;
577 uint16_t slaves[RTE_MAX_ETHPORTS];
579 uint16_t num_tx_total = 0, num_tx_slave;
581 static int slave_idx = 0;
582 int i, cslave_idx = 0, tx_fail_total = 0;
584 bd_tx_q = (struct bond_tx_queue *)queue;
585 internals = bd_tx_q->dev_private;
587 /* Copy slave list to protect against slave up/down changes during tx
589 num_of_slaves = internals->active_slave_count;
590 memcpy(slaves, internals->active_slaves,
591 sizeof(internals->active_slaves[0]) * num_of_slaves);
593 if (num_of_slaves < 1)
596 /* Populate slaves mbuf with which packets are to be sent on it */
597 for (i = 0; i < nb_pkts; i++) {
598 cslave_idx = (slave_idx + i) % num_of_slaves;
599 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
602 /* increment current slave index so the next call to tx burst starts on the
604 slave_idx = ++cslave_idx;
606 /* Send packet burst on each slave device */
607 for (i = 0; i < num_of_slaves; i++) {
608 if (slave_nb_pkts[i] > 0) {
609 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
610 slave_bufs[i], slave_nb_pkts[i]);
612 /* if tx burst fails move packets to end of bufs */
613 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
614 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
616 tx_fail_total += tx_fail_slave;
618 memcpy(&bufs[nb_pkts - tx_fail_total],
619 &slave_bufs[i][num_tx_slave],
620 tx_fail_slave * sizeof(bufs[0]));
622 num_tx_total += num_tx_slave;
630 bond_ethdev_tx_burst_active_backup(void *queue,
631 struct rte_mbuf **bufs, uint16_t nb_pkts)
633 struct bond_dev_private *internals;
634 struct bond_tx_queue *bd_tx_q;
636 bd_tx_q = (struct bond_tx_queue *)queue;
637 internals = bd_tx_q->dev_private;
639 if (internals->active_slave_count < 1)
642 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
646 static inline uint16_t
647 ether_hash(struct rte_ether_hdr *eth_hdr)
649 unaligned_uint16_t *word_src_addr =
650 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
651 unaligned_uint16_t *word_dst_addr =
652 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
654 return (word_src_addr[0] ^ word_dst_addr[0]) ^
655 (word_src_addr[1] ^ word_dst_addr[1]) ^
656 (word_src_addr[2] ^ word_dst_addr[2]);
659 static inline uint32_t
660 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
662 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
665 static inline uint32_t
666 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
668 unaligned_uint32_t *word_src_addr =
669 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
670 unaligned_uint32_t *word_dst_addr =
671 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
673 return (word_src_addr[0] ^ word_dst_addr[0]) ^
674 (word_src_addr[1] ^ word_dst_addr[1]) ^
675 (word_src_addr[2] ^ word_dst_addr[2]) ^
676 (word_src_addr[3] ^ word_dst_addr[3]);
681 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682 uint16_t slave_count, uint16_t *slaves)
684 struct rte_ether_hdr *eth_hdr;
688 for (i = 0; i < nb_pkts; i++) {
689 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
691 hash = ether_hash(eth_hdr);
693 slaves[i] = (hash ^= hash >> 8) % slave_count;
698 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
699 uint16_t slave_count, uint16_t *slaves)
702 struct rte_ether_hdr *eth_hdr;
705 uint32_t hash, l3hash;
707 for (i = 0; i < nb_pkts; i++) {
708 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
711 proto = eth_hdr->ether_type;
712 hash = ether_hash(eth_hdr);
714 vlan_offset = get_vlan_offset(eth_hdr, &proto);
716 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
717 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
718 ((char *)(eth_hdr + 1) + vlan_offset);
719 l3hash = ipv4_hash(ipv4_hdr);
721 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
722 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
723 ((char *)(eth_hdr + 1) + vlan_offset);
724 l3hash = ipv6_hash(ipv6_hdr);
727 hash = hash ^ l3hash;
731 slaves[i] = hash % slave_count;
736 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
737 uint16_t slave_count, uint16_t *slaves)
739 struct rte_ether_hdr *eth_hdr;
744 struct rte_udp_hdr *udp_hdr;
745 struct rte_tcp_hdr *tcp_hdr;
746 uint32_t hash, l3hash, l4hash;
748 for (i = 0; i < nb_pkts; i++) {
749 eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
750 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
751 proto = eth_hdr->ether_type;
752 vlan_offset = get_vlan_offset(eth_hdr, &proto);
756 if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
757 struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
758 ((char *)(eth_hdr + 1) + vlan_offset);
759 size_t ip_hdr_offset;
761 l3hash = ipv4_hash(ipv4_hdr);
763 /* there is no L4 header in fragmented packet */
764 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
766 ip_hdr_offset = (ipv4_hdr->version_ihl
767 & RTE_IPV4_HDR_IHL_MASK) *
768 RTE_IPV4_IHL_MULTIPLIER;
770 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
771 tcp_hdr = (struct rte_tcp_hdr *)
774 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
776 l4hash = HASH_L4_PORTS(tcp_hdr);
777 } else if (ipv4_hdr->next_proto_id ==
779 udp_hdr = (struct rte_udp_hdr *)
782 if ((size_t)udp_hdr + sizeof(*udp_hdr)
784 l4hash = HASH_L4_PORTS(udp_hdr);
787 } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
788 struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
789 ((char *)(eth_hdr + 1) + vlan_offset);
790 l3hash = ipv6_hash(ipv6_hdr);
792 if (ipv6_hdr->proto == IPPROTO_TCP) {
793 tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
794 l4hash = HASH_L4_PORTS(tcp_hdr);
795 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
796 udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
797 l4hash = HASH_L4_PORTS(udp_hdr);
801 hash = l3hash ^ l4hash;
805 slaves[i] = hash % slave_count;
810 uint64_t bwg_left_int;
811 uint64_t bwg_left_remainder;
816 bond_tlb_activate_slave(struct bond_dev_private *internals) {
819 for (i = 0; i < internals->active_slave_count; i++) {
820 tlb_last_obytets[internals->active_slaves[i]] = 0;
825 bandwidth_cmp(const void *a, const void *b)
827 const struct bwg_slave *bwg_a = a;
828 const struct bwg_slave *bwg_b = b;
829 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
830 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
831 (int64_t)bwg_a->bwg_left_remainder;
845 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
846 struct bwg_slave *bwg_slave)
848 struct rte_eth_link link_status;
851 ret = rte_eth_link_get_nowait(port_id, &link_status);
853 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
854 port_id, rte_strerror(-ret));
857 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
860 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
861 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
862 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
866 bond_ethdev_update_tlb_slave_cb(void *arg)
868 struct bond_dev_private *internals = arg;
869 struct rte_eth_stats slave_stats;
870 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
871 uint16_t slave_count;
874 uint8_t update_stats = 0;
878 internals->slave_update_idx++;
881 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
884 for (i = 0; i < internals->active_slave_count; i++) {
885 slave_id = internals->active_slaves[i];
886 rte_eth_stats_get(slave_id, &slave_stats);
887 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
888 bandwidth_left(slave_id, tx_bytes,
889 internals->slave_update_idx, &bwg_array[i]);
890 bwg_array[i].slave = slave_id;
893 tlb_last_obytets[slave_id] = slave_stats.obytes;
897 if (update_stats == 1)
898 internals->slave_update_idx = 0;
901 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
902 for (i = 0; i < slave_count; i++)
903 internals->tlb_slaves_order[i] = bwg_array[i].slave;
905 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
906 (struct bond_dev_private *)internals);
910 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
912 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
913 struct bond_dev_private *internals = bd_tx_q->dev_private;
915 struct rte_eth_dev *primary_port =
916 &rte_eth_devices[internals->primary_port];
917 uint16_t num_tx_total = 0;
920 uint16_t num_of_slaves = internals->active_slave_count;
921 uint16_t slaves[RTE_MAX_ETHPORTS];
923 struct rte_ether_hdr *ether_hdr;
924 struct rte_ether_addr primary_slave_addr;
925 struct rte_ether_addr active_slave_addr;
927 if (num_of_slaves < 1)
930 memcpy(slaves, internals->tlb_slaves_order,
931 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
934 rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
937 for (i = 0; i < 3; i++)
938 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
941 for (i = 0; i < num_of_slaves; i++) {
942 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
943 for (j = num_tx_total; j < nb_pkts; j++) {
945 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
947 ether_hdr = rte_pktmbuf_mtod(bufs[j],
948 struct rte_ether_hdr *);
949 if (rte_is_same_ether_addr(ðer_hdr->s_addr,
950 &primary_slave_addr))
951 rte_ether_addr_copy(&active_slave_addr,
953 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
954 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
958 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959 bufs + num_tx_total, nb_pkts - num_tx_total);
961 if (num_tx_total == nb_pkts)
969 bond_tlb_disable(struct bond_dev_private *internals)
971 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
975 bond_tlb_enable(struct bond_dev_private *internals)
977 bond_ethdev_update_tlb_slave_cb(internals);
981 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
983 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
984 struct bond_dev_private *internals = bd_tx_q->dev_private;
986 struct rte_ether_hdr *eth_h;
987 uint16_t ether_type, offset;
989 struct client_data *client_info;
992 * We create transmit buffers for every slave and one additional to send
993 * through tlb. In worst case every packet will be send on one port.
995 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
996 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
999 * We create separate transmit buffers for update packets as they won't
1000 * be counted in num_tx_total.
1002 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1005 struct rte_mbuf *upd_pkt;
1008 uint16_t num_send, num_not_send = 0;
1009 uint16_t num_tx_total = 0;
1014 /* Search tx buffer for ARP packets and forward them to alb */
1015 for (i = 0; i < nb_pkts; i++) {
1016 eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017 ether_type = eth_h->ether_type;
1018 offset = get_vlan_offset(eth_h, ðer_type);
1020 if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1023 /* Change src mac in eth header */
1024 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1026 /* Add packet to slave tx buffer */
1027 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028 slave_bufs_pkts[slave_idx]++;
1030 /* If packet is not ARP, send it with TLB policy */
1031 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1033 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1037 /* Update connected client ARP tables */
1038 if (internals->mode6.ntt) {
1039 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040 client_info = &internals->mode6.client_table[i];
1042 if (client_info->in_use) {
1043 /* Allocate new packet to send ARP update on current slave */
1044 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045 if (upd_pkt == NULL) {
1047 "Failed to allocate ARP packet from pool");
1050 pkt_size = sizeof(struct rte_ether_hdr) +
1051 sizeof(struct rte_arp_hdr) +
1052 client_info->vlan_count *
1053 sizeof(struct rte_vlan_hdr);
1054 upd_pkt->data_len = pkt_size;
1055 upd_pkt->pkt_len = pkt_size;
1057 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1060 /* Add packet to update tx buffer */
1061 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062 update_bufs_pkts[slave_idx]++;
1065 internals->mode6.ntt = 0;
1068 /* Send ARP packets on proper slaves */
1069 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070 if (slave_bufs_pkts[i] > 0) {
1071 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072 slave_bufs[i], slave_bufs_pkts[i]);
1073 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074 bufs[nb_pkts - 1 - num_not_send - j] =
1075 slave_bufs[i][nb_pkts - 1 - j];
1078 num_tx_total += num_send;
1079 num_not_send += slave_bufs_pkts[i] - num_send;
1081 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082 /* Print TX stats including update packets */
1083 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085 struct rte_ether_hdr *);
1086 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1092 /* Send update packets on proper slaves */
1093 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094 if (update_bufs_pkts[i] > 0) {
1095 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096 update_bufs_pkts[i]);
1097 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098 rte_pktmbuf_free(update_bufs[i][j]);
1100 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101 for (j = 0; j < update_bufs_pkts[i]; j++) {
1102 eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103 struct rte_ether_hdr *);
1104 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1110 /* Send non-ARP packets using tlb policy */
1111 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112 num_send = bond_ethdev_tx_burst_tlb(queue,
1113 slave_bufs[RTE_MAX_ETHPORTS],
1114 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1116 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117 bufs[nb_pkts - 1 - num_not_send - j] =
1118 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1121 num_tx_total += num_send;
1124 return num_tx_total;
1127 static inline uint16_t
1128 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129 uint16_t *slave_port_ids, uint16_t slave_count)
1131 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132 struct bond_dev_private *internals = bd_tx_q->dev_private;
1134 /* Array to sort mbufs for transmission on each slave into */
1135 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136 /* Number of mbufs for transmission on each slave */
1137 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138 /* Mapping array generated by hash function to map mbufs to slaves */
1139 uint16_t bufs_slave_port_idxs[nb_bufs];
1141 uint16_t slave_tx_count;
1142 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1147 * Populate slaves mbuf with the packets which are to be sent on it
1148 * selecting output slave using hash based on xmit policy
1150 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151 bufs_slave_port_idxs);
1153 for (i = 0; i < nb_bufs; i++) {
1154 /* Populate slave mbuf arrays with mbufs for that slave. */
1155 uint16_t slave_idx = bufs_slave_port_idxs[i];
1157 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1160 /* Send packet burst on each slave device */
1161 for (i = 0; i < slave_count; i++) {
1162 if (slave_nb_bufs[i] == 0)
1165 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166 bd_tx_q->queue_id, slave_bufs[i],
1169 total_tx_count += slave_tx_count;
1171 /* If tx burst fails move packets to end of bufs */
1172 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173 int slave_tx_fail_count = slave_nb_bufs[i] -
1175 total_tx_fail_count += slave_tx_fail_count;
1176 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177 &slave_bufs[i][slave_tx_count],
1178 slave_tx_fail_count * sizeof(bufs[0]));
1182 return total_tx_count;
1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1189 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190 struct bond_dev_private *internals = bd_tx_q->dev_private;
1192 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193 uint16_t slave_count;
1195 if (unlikely(nb_bufs == 0))
1198 /* Copy slave list to protect against slave up/down changes during tx
1201 slave_count = internals->active_slave_count;
1202 if (unlikely(slave_count < 1))
1205 memcpy(slave_port_ids, internals->active_slaves,
1206 sizeof(slave_port_ids[0]) * slave_count);
1207 return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1211 static inline uint16_t
1212 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1215 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216 struct bond_dev_private *internals = bd_tx_q->dev_private;
1218 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219 uint16_t slave_count;
1221 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222 uint16_t dist_slave_count;
1224 uint16_t slave_tx_count;
1228 /* Copy slave list to protect against slave up/down changes during tx
1230 slave_count = internals->active_slave_count;
1231 if (unlikely(slave_count < 1))
1234 memcpy(slave_port_ids, internals->active_slaves,
1235 sizeof(slave_port_ids[0]) * slave_count);
1240 /* Check for LACP control packets and send if available */
1241 for (i = 0; i < slave_count; i++) {
1242 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243 struct rte_mbuf *ctrl_pkt = NULL;
1245 if (likely(rte_ring_empty(port->tx_ring)))
1248 if (rte_ring_dequeue(port->tx_ring,
1249 (void **)&ctrl_pkt) != -ENOENT) {
1250 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251 bd_tx_q->queue_id, &ctrl_pkt, 1);
1253 * re-enqueue LAG control plane packets to buffering
1254 * ring if transmission fails so the packet isn't lost.
1256 if (slave_tx_count != 1)
1257 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1262 if (unlikely(nb_bufs == 0))
1265 dist_slave_count = 0;
1266 for (i = 0; i < slave_count; i++) {
1267 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1269 if (ACTOR_STATE(port, DISTRIBUTING))
1270 dist_slave_port_ids[dist_slave_count++] =
1274 if (unlikely(dist_slave_count < 1))
1277 return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1285 return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1289 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1292 return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1296 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1299 struct bond_dev_private *internals;
1300 struct bond_tx_queue *bd_tx_q;
1302 uint16_t slaves[RTE_MAX_ETHPORTS];
1303 uint8_t tx_failed_flag = 0;
1304 uint16_t num_of_slaves;
1306 uint16_t max_nb_of_tx_pkts = 0;
1308 int slave_tx_total[RTE_MAX_ETHPORTS];
1309 int i, most_successful_tx_slave = -1;
1311 bd_tx_q = (struct bond_tx_queue *)queue;
1312 internals = bd_tx_q->dev_private;
1314 /* Copy slave list to protect against slave up/down changes during tx
1316 num_of_slaves = internals->active_slave_count;
1317 memcpy(slaves, internals->active_slaves,
1318 sizeof(internals->active_slaves[0]) * num_of_slaves);
1320 if (num_of_slaves < 1)
1323 /* Increment reference count on mbufs */
1324 for (i = 0; i < nb_pkts; i++)
1325 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1327 /* Transmit burst on each active slave */
1328 for (i = 0; i < num_of_slaves; i++) {
1329 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1332 if (unlikely(slave_tx_total[i] < nb_pkts))
1335 /* record the value and slave index for the slave which transmits the
1336 * maximum number of packets */
1337 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338 max_nb_of_tx_pkts = slave_tx_total[i];
1339 most_successful_tx_slave = i;
1343 /* if slaves fail to transmit packets from burst, the calling application
1344 * is not expected to know about multiple references to packets so we must
1345 * handle failures of all packets except those of the most successful slave
1347 if (unlikely(tx_failed_flag))
1348 for (i = 0; i < num_of_slaves; i++)
1349 if (i != most_successful_tx_slave)
1350 while (slave_tx_total[i] < nb_pkts)
1351 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1353 return max_nb_of_tx_pkts;
1357 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1359 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1361 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1363 * If in mode 4 then save the link properties of the first
1364 * slave, all subsequent slaves must match these properties
1366 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1368 bond_link->link_autoneg = slave_link->link_autoneg;
1369 bond_link->link_duplex = slave_link->link_duplex;
1370 bond_link->link_speed = slave_link->link_speed;
1373 * In any other mode the link properties are set to default
1374 * values of AUTONEG/DUPLEX
1376 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1382 link_properties_valid(struct rte_eth_dev *ethdev,
1383 struct rte_eth_link *slave_link)
1385 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1387 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1390 if (bond_link->link_duplex != slave_link->link_duplex ||
1391 bond_link->link_autoneg != slave_link->link_autoneg ||
1392 bond_link->link_speed != slave_link->link_speed)
1400 mac_address_get(struct rte_eth_dev *eth_dev,
1401 struct rte_ether_addr *dst_mac_addr)
1403 struct rte_ether_addr *mac_addr;
1405 if (eth_dev == NULL) {
1406 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1410 if (dst_mac_addr == NULL) {
1411 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1415 mac_addr = eth_dev->data->mac_addrs;
1417 rte_ether_addr_copy(mac_addr, dst_mac_addr);
1422 mac_address_set(struct rte_eth_dev *eth_dev,
1423 struct rte_ether_addr *new_mac_addr)
1425 struct rte_ether_addr *mac_addr;
1427 if (eth_dev == NULL) {
1428 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1432 if (new_mac_addr == NULL) {
1433 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1437 mac_addr = eth_dev->data->mac_addrs;
1439 /* If new MAC is different to current MAC then update */
1440 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1446 static const struct rte_ether_addr null_mac_addr;
1449 * Add additional MAC addresses to the slave
1452 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 uint16_t slave_port_id)
1456 struct rte_ether_addr *mac_addr;
1458 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1463 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1466 for (i--; i > 0; i--)
1467 rte_eth_dev_mac_addr_remove(slave_port_id,
1468 &bonded_eth_dev->data->mac_addrs[i]);
1477 * Remove additional MAC addresses from the slave
1480 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481 uint16_t slave_port_id)
1484 struct rte_ether_addr *mac_addr;
1487 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489 if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1492 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493 /* save only the first error */
1494 if (ret < 0 && rc == 0)
1502 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1504 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1508 /* Update slave devices MAC addresses */
1509 if (internals->slave_count < 1)
1512 switch (internals->mode) {
1513 case BONDING_MODE_ROUND_ROBIN:
1514 case BONDING_MODE_BALANCE:
1515 case BONDING_MODE_BROADCAST:
1516 for (i = 0; i < internals->slave_count; i++) {
1517 if (rte_eth_dev_default_mac_addr_set(
1518 internals->slaves[i].port_id,
1519 bonded_eth_dev->data->mac_addrs)) {
1520 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521 internals->slaves[i].port_id);
1526 case BONDING_MODE_8023AD:
1527 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1529 case BONDING_MODE_ACTIVE_BACKUP:
1530 case BONDING_MODE_TLB:
1531 case BONDING_MODE_ALB:
1534 for (i = 0; i < internals->slave_count; i++) {
1535 if (internals->slaves[i].port_id ==
1536 internals->current_primary_port) {
1537 if (rte_eth_dev_default_mac_addr_set(
1538 internals->current_primary_port,
1539 bonded_eth_dev->data->mac_addrs)) {
1540 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541 internals->current_primary_port);
1545 if (rte_eth_dev_default_mac_addr_set(
1546 internals->slaves[i].port_id,
1547 &internals->slaves[i].persisted_mac_addr)) {
1548 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549 internals->slaves[i].port_id);
1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1563 struct bond_dev_private *internals;
1565 internals = eth_dev->data->dev_private;
1568 case BONDING_MODE_ROUND_ROBIN:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1572 case BONDING_MODE_ACTIVE_BACKUP:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1576 case BONDING_MODE_BALANCE:
1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1580 case BONDING_MODE_BROADCAST:
1581 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1584 case BONDING_MODE_8023AD:
1585 if (bond_mode_8023ad_enable(eth_dev) != 0)
1588 if (internals->mode4.dedicated_queues.enabled == 0) {
1589 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 RTE_BOND_LOG(WARNING,
1592 "Using mode 4, it is necessary to do TX burst "
1593 "and RX burst at least every 100ms.");
1595 /* Use flow director's optimization */
1596 eth_dev->rx_pkt_burst =
1597 bond_ethdev_rx_burst_8023ad_fast_queue;
1598 eth_dev->tx_pkt_burst =
1599 bond_ethdev_tx_burst_8023ad_fast_queue;
1602 case BONDING_MODE_TLB:
1603 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1606 case BONDING_MODE_ALB:
1607 if (bond_mode_alb_enable(eth_dev) != 0)
1610 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1617 internals->mode = mode;
1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 struct rte_eth_dev *slave_eth_dev)
1628 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1631 if (port->slow_pool == NULL) {
1633 int slave_id = slave_eth_dev->data->port_id;
1635 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1637 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639 slave_eth_dev->data->numa_node);
1641 /* Any memory allocation failure in initialization is critical because
1642 * resources can't be free, so reinitialization is impossible. */
1643 if (port->slow_pool == NULL) {
1644 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645 slave_id, mem_name, rte_strerror(rte_errno));
1649 if (internals->mode4.dedicated_queues.enabled == 1) {
1650 /* Configure slow Rx queue */
1652 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653 internals->mode4.dedicated_queues.rx_qid, 128,
1654 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655 NULL, port->slow_pool);
1658 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659 slave_eth_dev->data->port_id,
1660 internals->mode4.dedicated_queues.rx_qid,
1665 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666 internals->mode4.dedicated_queues.tx_qid, 512,
1667 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1671 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672 slave_eth_dev->data->port_id,
1673 internals->mode4.dedicated_queues.tx_qid,
1682 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683 struct rte_eth_dev *slave_eth_dev)
1685 struct bond_rx_queue *bd_rx_q;
1686 struct bond_tx_queue *bd_tx_q;
1687 uint16_t nb_rx_queues;
1688 uint16_t nb_tx_queues;
1692 struct rte_flow_error flow_error;
1694 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1697 errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1699 RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1700 slave_eth_dev->data->port_id, errval);
1702 /* Enable interrupts on slave device if supported */
1703 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1704 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1706 /* If RSS is enabled for bonding, try to enable it for slaves */
1707 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1708 if (internals->rss_key_len != 0) {
1709 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1710 internals->rss_key_len;
1711 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1714 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1717 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1718 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1719 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1720 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1723 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1724 DEV_RX_OFFLOAD_VLAN_FILTER)
1725 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1726 DEV_RX_OFFLOAD_VLAN_FILTER;
1728 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1729 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1731 slave_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1732 bonded_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1734 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1735 DEV_RX_OFFLOAD_JUMBO_FRAME)
1736 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1737 DEV_RX_OFFLOAD_JUMBO_FRAME;
1739 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1740 ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1742 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1743 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1745 if (internals->mode == BONDING_MODE_8023AD) {
1746 if (internals->mode4.dedicated_queues.enabled == 1) {
1752 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1753 bonded_eth_dev->data->mtu);
1754 if (errval != 0 && errval != -ENOTSUP) {
1755 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1756 slave_eth_dev->data->port_id, errval);
1760 /* Configure device */
1761 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1762 nb_rx_queues, nb_tx_queues,
1763 &(slave_eth_dev->data->dev_conf));
1765 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1766 slave_eth_dev->data->port_id, errval);
1770 /* Setup Rx Queues */
1771 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1772 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1774 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1775 bd_rx_q->nb_rx_desc,
1776 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1777 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1780 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1781 slave_eth_dev->data->port_id, q_id, errval);
1786 /* Setup Tx Queues */
1787 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1788 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1790 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1791 bd_tx_q->nb_tx_desc,
1792 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1796 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1797 slave_eth_dev->data->port_id, q_id, errval);
1802 if (internals->mode == BONDING_MODE_8023AD &&
1803 internals->mode4.dedicated_queues.enabled == 1) {
1804 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1808 errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1809 slave_eth_dev->data->port_id);
1812 "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1813 slave_eth_dev->data->port_id, errval);
1817 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1818 rte_flow_destroy(slave_eth_dev->data->port_id,
1819 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1822 errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1823 slave_eth_dev->data->port_id);
1826 "bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1827 slave_eth_dev->data->port_id, errval);
1833 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1835 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1836 slave_eth_dev->data->port_id, errval);
1840 /* If RSS is enabled for bonding, synchronize RETA */
1841 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1843 struct bond_dev_private *internals;
1845 internals = bonded_eth_dev->data->dev_private;
1847 for (i = 0; i < internals->slave_count; i++) {
1848 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1849 errval = rte_eth_dev_rss_reta_update(
1850 slave_eth_dev->data->port_id,
1851 &internals->reta_conf[0],
1852 internals->slaves[i].reta_size);
1854 RTE_BOND_LOG(WARNING,
1855 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1856 " RSS Configuration for bonding may be inconsistent.",
1857 slave_eth_dev->data->port_id, errval);
1864 /* If lsc interrupt is set, check initial slave's link status */
1865 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1866 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1867 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1868 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1876 slave_remove(struct bond_dev_private *internals,
1877 struct rte_eth_dev *slave_eth_dev)
1881 for (i = 0; i < internals->slave_count; i++)
1882 if (internals->slaves[i].port_id ==
1883 slave_eth_dev->data->port_id)
1886 if (i < (internals->slave_count - 1)) {
1887 struct rte_flow *flow;
1889 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1890 sizeof(internals->slaves[0]) *
1891 (internals->slave_count - i - 1));
1892 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1893 memmove(&flow->flows[i], &flow->flows[i + 1],
1894 sizeof(flow->flows[0]) *
1895 (internals->slave_count - i - 1));
1896 flow->flows[internals->slave_count - 1] = NULL;
1900 internals->slave_count--;
1902 /* force reconfiguration of slave interfaces */
1903 rte_eth_dev_internal_reset(slave_eth_dev);
1907 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1910 slave_add(struct bond_dev_private *internals,
1911 struct rte_eth_dev *slave_eth_dev)
1913 struct bond_slave_details *slave_details =
1914 &internals->slaves[internals->slave_count];
1916 slave_details->port_id = slave_eth_dev->data->port_id;
1917 slave_details->last_link_status = 0;
1919 /* Mark slave devices that don't support interrupts so we can
1920 * compensate when we start the bond
1922 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1923 slave_details->link_status_poll_enabled = 1;
1926 slave_details->link_status_wait_to_complete = 0;
1927 /* clean tlb_last_obytes when adding port for bonding device */
1928 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1929 sizeof(struct rte_ether_addr));
1933 bond_ethdev_primary_set(struct bond_dev_private *internals,
1934 uint16_t slave_port_id)
1938 if (internals->active_slave_count < 1)
1939 internals->current_primary_port = slave_port_id;
1941 /* Search bonded device slave ports for new proposed primary port */
1942 for (i = 0; i < internals->active_slave_count; i++) {
1943 if (internals->active_slaves[i] == slave_port_id)
1944 internals->current_primary_port = slave_port_id;
1949 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1952 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1954 struct bond_dev_private *internals;
1957 /* slave eth dev will be started by bonded device */
1958 if (check_for_bonded_ethdev(eth_dev)) {
1959 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1960 eth_dev->data->port_id);
1964 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1965 eth_dev->data->dev_started = 1;
1967 internals = eth_dev->data->dev_private;
1969 if (internals->slave_count == 0) {
1970 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1974 if (internals->user_defined_mac == 0) {
1975 struct rte_ether_addr *new_mac_addr = NULL;
1977 for (i = 0; i < internals->slave_count; i++)
1978 if (internals->slaves[i].port_id == internals->primary_port)
1979 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1981 if (new_mac_addr == NULL)
1984 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1985 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1986 eth_dev->data->port_id);
1991 if (internals->mode == BONDING_MODE_8023AD) {
1992 if (internals->mode4.dedicated_queues.enabled == 1) {
1993 internals->mode4.dedicated_queues.rx_qid =
1994 eth_dev->data->nb_rx_queues;
1995 internals->mode4.dedicated_queues.tx_qid =
1996 eth_dev->data->nb_tx_queues;
2001 /* Reconfigure each slave device if starting bonded device */
2002 for (i = 0; i < internals->slave_count; i++) {
2003 struct rte_eth_dev *slave_ethdev =
2004 &(rte_eth_devices[internals->slaves[i].port_id]);
2005 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2007 "bonded port (%d) failed to reconfigure slave device (%d)",
2008 eth_dev->data->port_id,
2009 internals->slaves[i].port_id);
2012 /* We will need to poll for link status if any slave doesn't
2013 * support interrupts
2015 if (internals->slaves[i].link_status_poll_enabled)
2016 internals->link_status_polling_enabled = 1;
2019 /* start polling if needed */
2020 if (internals->link_status_polling_enabled) {
2022 internals->link_status_polling_interval_ms * 1000,
2023 bond_ethdev_slave_link_status_change_monitor,
2024 (void *)&rte_eth_devices[internals->port_id]);
2027 /* Update all slave devices MACs*/
2028 if (mac_address_slaves_update(eth_dev) != 0)
2031 if (internals->user_defined_primary_port)
2032 bond_ethdev_primary_set(internals, internals->primary_port);
2034 if (internals->mode == BONDING_MODE_8023AD)
2035 bond_mode_8023ad_start(eth_dev);
2037 if (internals->mode == BONDING_MODE_TLB ||
2038 internals->mode == BONDING_MODE_ALB)
2039 bond_tlb_enable(internals);
2044 eth_dev->data->dev_started = 0;
2049 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2053 if (dev->data->rx_queues != NULL) {
2054 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2055 rte_free(dev->data->rx_queues[i]);
2056 dev->data->rx_queues[i] = NULL;
2058 dev->data->nb_rx_queues = 0;
2061 if (dev->data->tx_queues != NULL) {
2062 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2063 rte_free(dev->data->tx_queues[i]);
2064 dev->data->tx_queues[i] = NULL;
2066 dev->data->nb_tx_queues = 0;
2071 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2073 struct bond_dev_private *internals = eth_dev->data->dev_private;
2077 if (internals->mode == BONDING_MODE_8023AD) {
2081 bond_mode_8023ad_stop(eth_dev);
2083 /* Discard all messages to/from mode 4 state machines */
2084 for (i = 0; i < internals->active_slave_count; i++) {
2085 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2087 RTE_ASSERT(port->rx_ring != NULL);
2088 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2089 rte_pktmbuf_free(pkt);
2091 RTE_ASSERT(port->tx_ring != NULL);
2092 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2093 rte_pktmbuf_free(pkt);
2097 if (internals->mode == BONDING_MODE_TLB ||
2098 internals->mode == BONDING_MODE_ALB) {
2099 bond_tlb_disable(internals);
2100 for (i = 0; i < internals->active_slave_count; i++)
2101 tlb_last_obytets[internals->active_slaves[i]] = 0;
2104 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2105 eth_dev->data->dev_started = 0;
2107 internals->link_status_polling_enabled = 0;
2108 for (i = 0; i < internals->slave_count; i++) {
2109 uint16_t slave_id = internals->slaves[i].port_id;
2110 if (find_slave_by_id(internals->active_slaves,
2111 internals->active_slave_count, slave_id) !=
2112 internals->active_slave_count) {
2113 internals->slaves[i].last_link_status = 0;
2114 ret = rte_eth_dev_stop(slave_id);
2116 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2120 deactivate_slave(eth_dev, slave_id);
2128 bond_ethdev_close(struct rte_eth_dev *dev)
2130 struct bond_dev_private *internals = dev->data->dev_private;
2131 uint16_t bond_port_id = internals->port_id;
2133 struct rte_flow_error ferror;
2135 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2138 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2139 while (internals->slave_count != skipped) {
2140 uint16_t port_id = internals->slaves[skipped].port_id;
2142 if (rte_eth_dev_stop(port_id) != 0) {
2143 RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2148 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2150 "Failed to remove port %d from bonded device %s",
2151 port_id, dev->device->name);
2155 bond_flow_ops.flush(dev, &ferror);
2156 bond_ethdev_free_queues(dev);
2157 rte_bitmap_reset(internals->vlan_filter_bmp);
2158 rte_bitmap_free(internals->vlan_filter_bmp);
2159 rte_free(internals->vlan_filter_bmpmem);
2161 /* Try to release mempool used in mode6. If the bond
2162 * device is not mode6, free the NULL is not problem.
2164 rte_mempool_free(internals->mode6.mempool);
2169 /* forward declaration */
2170 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2173 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2175 struct bond_dev_private *internals = dev->data->dev_private;
2176 struct bond_slave_details slave;
2179 uint16_t max_nb_rx_queues = UINT16_MAX;
2180 uint16_t max_nb_tx_queues = UINT16_MAX;
2181 uint16_t max_rx_desc_lim = UINT16_MAX;
2182 uint16_t max_tx_desc_lim = UINT16_MAX;
2184 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2186 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2187 internals->candidate_max_rx_pktlen :
2188 RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2190 /* Max number of tx/rx queues that the bonded device can support is the
2191 * minimum values of the bonded slaves, as all slaves must be capable
2192 * of supporting the same number of tx/rx queues.
2194 if (internals->slave_count > 0) {
2195 struct rte_eth_dev_info slave_info;
2198 for (idx = 0; idx < internals->slave_count; idx++) {
2199 slave = internals->slaves[idx];
2200 ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2203 "%s: Error during getting device (port %u) info: %s\n",
2211 if (slave_info.max_rx_queues < max_nb_rx_queues)
2212 max_nb_rx_queues = slave_info.max_rx_queues;
2214 if (slave_info.max_tx_queues < max_nb_tx_queues)
2215 max_nb_tx_queues = slave_info.max_tx_queues;
2217 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2218 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2220 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2221 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2225 dev_info->max_rx_queues = max_nb_rx_queues;
2226 dev_info->max_tx_queues = max_nb_tx_queues;
2228 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2229 sizeof(dev_info->default_rxconf));
2230 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2231 sizeof(dev_info->default_txconf));
2233 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2234 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2237 * If dedicated hw queues enabled for link bonding device in LACP mode
2238 * then we need to reduce the maximum number of data path queues by 1.
2240 if (internals->mode == BONDING_MODE_8023AD &&
2241 internals->mode4.dedicated_queues.enabled == 1) {
2242 dev_info->max_rx_queues--;
2243 dev_info->max_tx_queues--;
2246 dev_info->min_rx_bufsize = 0;
2248 dev_info->rx_offload_capa = internals->rx_offload_capa;
2249 dev_info->tx_offload_capa = internals->tx_offload_capa;
2250 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2251 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2252 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2254 dev_info->reta_size = internals->reta_size;
2260 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2264 struct bond_dev_private *internals = dev->data->dev_private;
2266 /* don't do this while a slave is being added */
2267 rte_spinlock_lock(&internals->lock);
2270 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2272 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2274 for (i = 0; i < internals->slave_count; i++) {
2275 uint16_t port_id = internals->slaves[i].port_id;
2277 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2279 RTE_BOND_LOG(WARNING,
2280 "Setting VLAN filter on slave port %u not supported.",
2284 rte_spinlock_unlock(&internals->lock);
2289 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2290 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2291 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2293 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2294 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2295 0, dev->data->numa_node);
2296 if (bd_rx_q == NULL)
2299 bd_rx_q->queue_id = rx_queue_id;
2300 bd_rx_q->dev_private = dev->data->dev_private;
2302 bd_rx_q->nb_rx_desc = nb_rx_desc;
2304 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2305 bd_rx_q->mb_pool = mb_pool;
2307 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2313 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2314 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2315 const struct rte_eth_txconf *tx_conf)
2317 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2318 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2319 0, dev->data->numa_node);
2321 if (bd_tx_q == NULL)
2324 bd_tx_q->queue_id = tx_queue_id;
2325 bd_tx_q->dev_private = dev->data->dev_private;
2327 bd_tx_q->nb_tx_desc = nb_tx_desc;
2328 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2330 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2336 bond_ethdev_rx_queue_release(void *queue)
2345 bond_ethdev_tx_queue_release(void *queue)
2354 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2356 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2357 struct bond_dev_private *internals;
2359 /* Default value for polling slave found is true as we don't want to
2360 * disable the polling thread if we cannot get the lock */
2361 int i, polling_slave_found = 1;
2366 bonded_ethdev = cb_arg;
2367 internals = bonded_ethdev->data->dev_private;
2369 if (!bonded_ethdev->data->dev_started ||
2370 !internals->link_status_polling_enabled)
2373 /* If device is currently being configured then don't check slaves link
2374 * status, wait until next period */
2375 if (rte_spinlock_trylock(&internals->lock)) {
2376 if (internals->slave_count > 0)
2377 polling_slave_found = 0;
2379 for (i = 0; i < internals->slave_count; i++) {
2380 if (!internals->slaves[i].link_status_poll_enabled)
2383 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2384 polling_slave_found = 1;
2386 /* Update slave link status */
2387 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2388 internals->slaves[i].link_status_wait_to_complete);
2390 /* if link status has changed since last checked then call lsc
2392 if (slave_ethdev->data->dev_link.link_status !=
2393 internals->slaves[i].last_link_status) {
2394 internals->slaves[i].last_link_status =
2395 slave_ethdev->data->dev_link.link_status;
2397 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2398 RTE_ETH_EVENT_INTR_LSC,
2399 &bonded_ethdev->data->port_id,
2403 rte_spinlock_unlock(&internals->lock);
2406 if (polling_slave_found)
2407 /* Set alarm to continue monitoring link status of slave ethdev's */
2408 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2409 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2413 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2415 int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2417 struct bond_dev_private *bond_ctx;
2418 struct rte_eth_link slave_link;
2420 bool one_link_update_succeeded;
2424 bond_ctx = ethdev->data->dev_private;
2426 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2428 if (ethdev->data->dev_started == 0 ||
2429 bond_ctx->active_slave_count == 0) {
2430 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2434 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2436 if (wait_to_complete)
2437 link_update = rte_eth_link_get;
2439 link_update = rte_eth_link_get_nowait;
2441 switch (bond_ctx->mode) {
2442 case BONDING_MODE_BROADCAST:
2444 * Setting link speed to UINT32_MAX to ensure we pick up the
2445 * value of the first active slave
2447 ethdev->data->dev_link.link_speed = UINT32_MAX;
2450 * link speed is minimum value of all the slaves link speed as
2451 * packet loss will occur on this slave if transmission at rates
2452 * greater than this are attempted
2454 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2455 ret = link_update(bond_ctx->active_slaves[idx],
2458 ethdev->data->dev_link.link_speed =
2461 "Slave (port %u) link get failed: %s",
2462 bond_ctx->active_slaves[idx],
2463 rte_strerror(-ret));
2467 if (slave_link.link_speed <
2468 ethdev->data->dev_link.link_speed)
2469 ethdev->data->dev_link.link_speed =
2470 slave_link.link_speed;
2473 case BONDING_MODE_ACTIVE_BACKUP:
2474 /* Current primary slave */
2475 ret = link_update(bond_ctx->current_primary_port, &slave_link);
2477 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2478 bond_ctx->current_primary_port,
2479 rte_strerror(-ret));
2483 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2485 case BONDING_MODE_8023AD:
2486 ethdev->data->dev_link.link_autoneg =
2487 bond_ctx->mode4.slave_link.link_autoneg;
2488 ethdev->data->dev_link.link_duplex =
2489 bond_ctx->mode4.slave_link.link_duplex;
2491 /* to update link speed */
2492 case BONDING_MODE_ROUND_ROBIN:
2493 case BONDING_MODE_BALANCE:
2494 case BONDING_MODE_TLB:
2495 case BONDING_MODE_ALB:
2498 * In theses mode the maximum theoretical link speed is the sum
2501 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2502 one_link_update_succeeded = false;
2504 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2505 ret = link_update(bond_ctx->active_slaves[idx],
2509 "Slave (port %u) link get failed: %s",
2510 bond_ctx->active_slaves[idx],
2511 rte_strerror(-ret));
2515 one_link_update_succeeded = true;
2516 ethdev->data->dev_link.link_speed +=
2517 slave_link.link_speed;
2520 if (!one_link_update_succeeded) {
2521 RTE_BOND_LOG(ERR, "All slaves link get failed");
2532 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2534 struct bond_dev_private *internals = dev->data->dev_private;
2535 struct rte_eth_stats slave_stats;
2538 for (i = 0; i < internals->slave_count; i++) {
2539 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2541 stats->ipackets += slave_stats.ipackets;
2542 stats->opackets += slave_stats.opackets;
2543 stats->ibytes += slave_stats.ibytes;
2544 stats->obytes += slave_stats.obytes;
2545 stats->imissed += slave_stats.imissed;
2546 stats->ierrors += slave_stats.ierrors;
2547 stats->oerrors += slave_stats.oerrors;
2548 stats->rx_nombuf += slave_stats.rx_nombuf;
2550 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2551 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2552 stats->q_opackets[j] += slave_stats.q_opackets[j];
2553 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2554 stats->q_obytes[j] += slave_stats.q_obytes[j];
2555 stats->q_errors[j] += slave_stats.q_errors[j];
2564 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2566 struct bond_dev_private *internals = dev->data->dev_private;
2571 for (i = 0, err = 0; i < internals->slave_count; i++) {
2572 ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2581 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2583 struct bond_dev_private *internals = eth_dev->data->dev_private;
2588 switch (internals->mode) {
2589 /* Promiscuous mode is propagated to all slaves */
2590 case BONDING_MODE_ROUND_ROBIN:
2591 case BONDING_MODE_BALANCE:
2592 case BONDING_MODE_BROADCAST:
2593 case BONDING_MODE_8023AD: {
2594 unsigned int slave_ok = 0;
2596 for (i = 0; i < internals->slave_count; i++) {
2597 port_id = internals->slaves[i].port_id;
2599 ret = rte_eth_promiscuous_enable(port_id);
2602 "Failed to enable promiscuous mode for port %u: %s",
2603 port_id, rte_strerror(-ret));
2608 * Report success if operation is successful on at least
2609 * on one slave. Otherwise return last error code.
2615 /* Promiscuous mode is propagated only to primary slave */
2616 case BONDING_MODE_ACTIVE_BACKUP:
2617 case BONDING_MODE_TLB:
2618 case BONDING_MODE_ALB:
2620 /* Do not touch promisc when there cannot be primary ports */
2621 if (internals->slave_count == 0)
2623 port_id = internals->current_primary_port;
2624 ret = rte_eth_promiscuous_enable(port_id);
2627 "Failed to enable promiscuous mode for port %u: %s",
2628 port_id, rte_strerror(-ret));
2635 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2637 struct bond_dev_private *internals = dev->data->dev_private;
2642 switch (internals->mode) {
2643 /* Promiscuous mode is propagated to all slaves */
2644 case BONDING_MODE_ROUND_ROBIN:
2645 case BONDING_MODE_BALANCE:
2646 case BONDING_MODE_BROADCAST:
2647 case BONDING_MODE_8023AD: {
2648 unsigned int slave_ok = 0;
2650 for (i = 0; i < internals->slave_count; i++) {
2651 port_id = internals->slaves[i].port_id;
2653 if (internals->mode == BONDING_MODE_8023AD &&
2654 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2655 BOND_8023AD_FORCED_PROMISC) {
2659 ret = rte_eth_promiscuous_disable(port_id);
2662 "Failed to disable promiscuous mode for port %u: %s",
2663 port_id, rte_strerror(-ret));
2668 * Report success if operation is successful on at least
2669 * on one slave. Otherwise return last error code.
2675 /* Promiscuous mode is propagated only to primary slave */
2676 case BONDING_MODE_ACTIVE_BACKUP:
2677 case BONDING_MODE_TLB:
2678 case BONDING_MODE_ALB:
2680 /* Do not touch promisc when there cannot be primary ports */
2681 if (internals->slave_count == 0)
2683 port_id = internals->current_primary_port;
2684 ret = rte_eth_promiscuous_disable(port_id);
2687 "Failed to disable promiscuous mode for port %u: %s",
2688 port_id, rte_strerror(-ret));
2695 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2697 struct bond_dev_private *internals = eth_dev->data->dev_private;
2702 switch (internals->mode) {
2703 /* allmulti mode is propagated to all slaves */
2704 case BONDING_MODE_ROUND_ROBIN:
2705 case BONDING_MODE_BALANCE:
2706 case BONDING_MODE_BROADCAST:
2707 case BONDING_MODE_8023AD: {
2708 unsigned int slave_ok = 0;
2710 for (i = 0; i < internals->slave_count; i++) {
2711 port_id = internals->slaves[i].port_id;
2713 ret = rte_eth_allmulticast_enable(port_id);
2716 "Failed to enable allmulti mode for port %u: %s",
2717 port_id, rte_strerror(-ret));
2722 * Report success if operation is successful on at least
2723 * on one slave. Otherwise return last error code.
2729 /* allmulti mode is propagated only to primary slave */
2730 case BONDING_MODE_ACTIVE_BACKUP:
2731 case BONDING_MODE_TLB:
2732 case BONDING_MODE_ALB:
2734 /* Do not touch allmulti when there cannot be primary ports */
2735 if (internals->slave_count == 0)
2737 port_id = internals->current_primary_port;
2738 ret = rte_eth_allmulticast_enable(port_id);
2741 "Failed to enable allmulti mode for port %u: %s",
2742 port_id, rte_strerror(-ret));
2749 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2751 struct bond_dev_private *internals = eth_dev->data->dev_private;
2756 switch (internals->mode) {
2757 /* allmulti mode is propagated to all slaves */
2758 case BONDING_MODE_ROUND_ROBIN:
2759 case BONDING_MODE_BALANCE:
2760 case BONDING_MODE_BROADCAST:
2761 case BONDING_MODE_8023AD: {
2762 unsigned int slave_ok = 0;
2764 for (i = 0; i < internals->slave_count; i++) {
2765 uint16_t port_id = internals->slaves[i].port_id;
2767 if (internals->mode == BONDING_MODE_8023AD &&
2768 bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2769 BOND_8023AD_FORCED_ALLMULTI)
2772 ret = rte_eth_allmulticast_disable(port_id);
2775 "Failed to disable allmulti mode for port %u: %s",
2776 port_id, rte_strerror(-ret));
2781 * Report success if operation is successful on at least
2782 * on one slave. Otherwise return last error code.
2788 /* allmulti mode is propagated only to primary slave */
2789 case BONDING_MODE_ACTIVE_BACKUP:
2790 case BONDING_MODE_TLB:
2791 case BONDING_MODE_ALB:
2793 /* Do not touch allmulti when there cannot be primary ports */
2794 if (internals->slave_count == 0)
2796 port_id = internals->current_primary_port;
2797 ret = rte_eth_allmulticast_disable(port_id);
2800 "Failed to disable allmulti mode for port %u: %s",
2801 port_id, rte_strerror(-ret));
2808 bond_ethdev_delayed_lsc_propagation(void *arg)
2813 rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2814 RTE_ETH_EVENT_INTR_LSC, NULL);
2818 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2819 void *param, void *ret_param __rte_unused)
2821 struct rte_eth_dev *bonded_eth_dev;
2822 struct bond_dev_private *internals;
2823 struct rte_eth_link link;
2827 uint8_t lsc_flag = 0;
2828 int valid_slave = 0;
2829 uint16_t active_pos;
2832 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2835 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2837 if (check_for_bonded_ethdev(bonded_eth_dev))
2840 internals = bonded_eth_dev->data->dev_private;
2842 /* If the device isn't started don't handle interrupts */
2843 if (!bonded_eth_dev->data->dev_started)
2846 /* verify that port_id is a valid slave of bonded port */
2847 for (i = 0; i < internals->slave_count; i++) {
2848 if (internals->slaves[i].port_id == port_id) {
2857 /* Synchronize lsc callback parallel calls either by real link event
2858 * from the slaves PMDs or by the bonding PMD itself.
2860 rte_spinlock_lock(&internals->lsc_lock);
2862 /* Search for port in active port list */
2863 active_pos = find_slave_by_id(internals->active_slaves,
2864 internals->active_slave_count, port_id);
2866 ret = rte_eth_link_get_nowait(port_id, &link);
2868 RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2870 if (ret == 0 && link.link_status) {
2871 if (active_pos < internals->active_slave_count)
2874 /* check link state properties if bonded link is up*/
2875 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2876 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2877 RTE_BOND_LOG(ERR, "Invalid link properties "
2878 "for slave %d in bonding mode %d",
2879 port_id, internals->mode);
2881 /* inherit slave link properties */
2882 link_properties_set(bonded_eth_dev, &link);
2885 /* If no active slave ports then set this port to be
2888 if (internals->active_slave_count < 1) {
2889 /* If first active slave, then change link status */
2890 bonded_eth_dev->data->dev_link.link_status =
2892 internals->current_primary_port = port_id;
2895 mac_address_slaves_update(bonded_eth_dev);
2898 activate_slave(bonded_eth_dev, port_id);
2900 /* If the user has defined the primary port then default to
2903 if (internals->user_defined_primary_port &&
2904 internals->primary_port == port_id)
2905 bond_ethdev_primary_set(internals, port_id);
2907 if (active_pos == internals->active_slave_count)
2910 /* Remove from active slave list */
2911 deactivate_slave(bonded_eth_dev, port_id);
2913 if (internals->active_slave_count < 1)
2916 /* Update primary id, take first active slave from list or if none
2917 * available set to -1 */
2918 if (port_id == internals->current_primary_port) {
2919 if (internals->active_slave_count > 0)
2920 bond_ethdev_primary_set(internals,
2921 internals->active_slaves[0]);
2923 internals->current_primary_port = internals->primary_port;
2924 mac_address_slaves_update(bonded_eth_dev);
2930 * Update bonded device link properties after any change to active
2933 bond_ethdev_link_update(bonded_eth_dev, 0);
2936 /* Cancel any possible outstanding interrupts if delays are enabled */
2937 if (internals->link_up_delay_ms > 0 ||
2938 internals->link_down_delay_ms > 0)
2939 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2942 if (bonded_eth_dev->data->dev_link.link_status) {
2943 if (internals->link_up_delay_ms > 0)
2944 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2945 bond_ethdev_delayed_lsc_propagation,
2946 (void *)bonded_eth_dev);
2948 rte_eth_dev_callback_process(bonded_eth_dev,
2949 RTE_ETH_EVENT_INTR_LSC,
2953 if (internals->link_down_delay_ms > 0)
2954 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2955 bond_ethdev_delayed_lsc_propagation,
2956 (void *)bonded_eth_dev);
2958 rte_eth_dev_callback_process(bonded_eth_dev,
2959 RTE_ETH_EVENT_INTR_LSC,
2964 rte_spinlock_unlock(&internals->lsc_lock);
2970 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2971 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2975 int slave_reta_size;
2976 unsigned reta_count;
2977 struct bond_dev_private *internals = dev->data->dev_private;
2979 if (reta_size != internals->reta_size)
2982 /* Copy RETA table */
2983 reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2984 RTE_RETA_GROUP_SIZE;
2986 for (i = 0; i < reta_count; i++) {
2987 internals->reta_conf[i].mask = reta_conf[i].mask;
2988 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2989 if ((reta_conf[i].mask >> j) & 0x01)
2990 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2993 /* Fill rest of array */
2994 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2995 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2996 sizeof(internals->reta_conf[0]) * reta_count);
2998 /* Propagate RETA over slaves */
2999 for (i = 0; i < internals->slave_count; i++) {
3000 slave_reta_size = internals->slaves[i].reta_size;
3001 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
3002 &internals->reta_conf[0], slave_reta_size);
3011 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3012 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3015 struct bond_dev_private *internals = dev->data->dev_private;
3017 if (reta_size != internals->reta_size)
3020 /* Copy RETA table */
3021 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3022 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3023 if ((reta_conf[i].mask >> j) & 0x01)
3024 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3030 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3031 struct rte_eth_rss_conf *rss_conf)
3034 struct bond_dev_private *internals = dev->data->dev_private;
3035 struct rte_eth_rss_conf bond_rss_conf;
3037 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3039 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3041 if (bond_rss_conf.rss_hf != 0)
3042 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3044 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3045 sizeof(internals->rss_key)) {
3046 if (bond_rss_conf.rss_key_len == 0)
3047 bond_rss_conf.rss_key_len = 40;
3048 internals->rss_key_len = bond_rss_conf.rss_key_len;
3049 memcpy(internals->rss_key, bond_rss_conf.rss_key,
3050 internals->rss_key_len);
3053 for (i = 0; i < internals->slave_count; i++) {
3054 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3064 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3065 struct rte_eth_rss_conf *rss_conf)
3067 struct bond_dev_private *internals = dev->data->dev_private;
3069 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3070 rss_conf->rss_key_len = internals->rss_key_len;
3071 if (rss_conf->rss_key)
3072 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3078 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3080 struct rte_eth_dev *slave_eth_dev;
3081 struct bond_dev_private *internals = dev->data->dev_private;
3084 rte_spinlock_lock(&internals->lock);
3086 for (i = 0; i < internals->slave_count; i++) {
3087 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3088 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3089 rte_spinlock_unlock(&internals->lock);
3093 for (i = 0; i < internals->slave_count; i++) {
3094 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3096 rte_spinlock_unlock(&internals->lock);
3101 rte_spinlock_unlock(&internals->lock);
3106 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3107 struct rte_ether_addr *addr)
3109 if (mac_address_set(dev, addr)) {
3110 RTE_BOND_LOG(ERR, "Failed to update MAC address");
3118 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3119 const struct rte_flow_ops **ops)
3121 *ops = &bond_flow_ops;
3126 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3127 struct rte_ether_addr *mac_addr,
3128 __rte_unused uint32_t index, uint32_t vmdq)
3130 struct rte_eth_dev *slave_eth_dev;
3131 struct bond_dev_private *internals = dev->data->dev_private;
3134 rte_spinlock_lock(&internals->lock);
3136 for (i = 0; i < internals->slave_count; i++) {
3137 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3138 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3139 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3145 for (i = 0; i < internals->slave_count; i++) {
3146 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3150 for (i--; i >= 0; i--)
3151 rte_eth_dev_mac_addr_remove(
3152 internals->slaves[i].port_id, mac_addr);
3159 rte_spinlock_unlock(&internals->lock);
3164 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3166 struct rte_eth_dev *slave_eth_dev;
3167 struct bond_dev_private *internals = dev->data->dev_private;
3170 rte_spinlock_lock(&internals->lock);
3172 for (i = 0; i < internals->slave_count; i++) {
3173 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3174 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3178 struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3180 for (i = 0; i < internals->slave_count; i++)
3181 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3185 rte_spinlock_unlock(&internals->lock);
3188 const struct eth_dev_ops default_dev_ops = {
3189 .dev_start = bond_ethdev_start,
3190 .dev_stop = bond_ethdev_stop,
3191 .dev_close = bond_ethdev_close,
3192 .dev_configure = bond_ethdev_configure,
3193 .dev_infos_get = bond_ethdev_info,
3194 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3195 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3196 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3197 .rx_queue_release = bond_ethdev_rx_queue_release,
3198 .tx_queue_release = bond_ethdev_tx_queue_release,
3199 .link_update = bond_ethdev_link_update,
3200 .stats_get = bond_ethdev_stats_get,
3201 .stats_reset = bond_ethdev_stats_reset,
3202 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3203 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3204 .allmulticast_enable = bond_ethdev_allmulticast_enable,
3205 .allmulticast_disable = bond_ethdev_allmulticast_disable,
3206 .reta_update = bond_ethdev_rss_reta_update,
3207 .reta_query = bond_ethdev_rss_reta_query,
3208 .rss_hash_update = bond_ethdev_rss_hash_update,
3209 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3210 .mtu_set = bond_ethdev_mtu_set,
3211 .mac_addr_set = bond_ethdev_mac_address_set,
3212 .mac_addr_add = bond_ethdev_mac_addr_add,
3213 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3214 .flow_ops_get = bond_flow_ops_get
3218 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3220 const char *name = rte_vdev_device_name(dev);
3221 uint8_t socket_id = dev->device.numa_node;
3222 struct bond_dev_private *internals = NULL;
3223 struct rte_eth_dev *eth_dev = NULL;
3224 uint32_t vlan_filter_bmp_size;
3226 /* now do all data allocation - for eth_dev structure, dummy pci driver
3227 * and internal (private) data
3230 /* reserve an ethdev entry */
3231 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3232 if (eth_dev == NULL) {
3233 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3237 internals = eth_dev->data->dev_private;
3238 eth_dev->data->nb_rx_queues = (uint16_t)1;
3239 eth_dev->data->nb_tx_queues = (uint16_t)1;
3241 /* Allocate memory for storing MAC addresses */
3242 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3243 BOND_MAX_MAC_ADDRS, 0, socket_id);
3244 if (eth_dev->data->mac_addrs == NULL) {
3246 "Failed to allocate %u bytes needed to store MAC addresses",
3247 RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3251 eth_dev->dev_ops = &default_dev_ops;
3252 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3253 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3255 rte_spinlock_init(&internals->lock);
3256 rte_spinlock_init(&internals->lsc_lock);
3258 internals->port_id = eth_dev->data->port_id;
3259 internals->mode = BONDING_MODE_INVALID;
3260 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3261 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3262 internals->burst_xmit_hash = burst_xmit_l2_hash;
3263 internals->user_defined_mac = 0;
3265 internals->link_status_polling_enabled = 0;
3267 internals->link_status_polling_interval_ms =
3268 DEFAULT_POLLING_INTERVAL_10_MS;
3269 internals->link_down_delay_ms = 0;
3270 internals->link_up_delay_ms = 0;
3272 internals->slave_count = 0;
3273 internals->active_slave_count = 0;
3274 internals->rx_offload_capa = 0;
3275 internals->tx_offload_capa = 0;
3276 internals->rx_queue_offload_capa = 0;
3277 internals->tx_queue_offload_capa = 0;
3278 internals->candidate_max_rx_pktlen = 0;
3279 internals->max_rx_pktlen = 0;
3281 /* Initially allow to choose any offload type */
3282 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3284 memset(&internals->default_rxconf, 0,
3285 sizeof(internals->default_rxconf));
3286 memset(&internals->default_txconf, 0,
3287 sizeof(internals->default_txconf));
3289 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3290 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3292 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3293 memset(internals->slaves, 0, sizeof(internals->slaves));
3295 TAILQ_INIT(&internals->flow_list);
3296 internals->flow_isolated_valid = 0;
3298 /* Set mode 4 default configuration */
3299 bond_mode_8023ad_setup(eth_dev, NULL);
3300 if (bond_ethdev_mode_set(eth_dev, mode)) {
3301 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3302 eth_dev->data->port_id, mode);
3306 vlan_filter_bmp_size =
3307 rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3308 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3309 RTE_CACHE_LINE_SIZE);
3310 if (internals->vlan_filter_bmpmem == NULL) {
3312 "Failed to allocate vlan bitmap for bonded device %u",
3313 eth_dev->data->port_id);
3317 internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3318 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3319 if (internals->vlan_filter_bmp == NULL) {
3321 "Failed to init vlan bitmap for bonded device %u",
3322 eth_dev->data->port_id);
3323 rte_free(internals->vlan_filter_bmpmem);
3327 return eth_dev->data->port_id;
3330 rte_free(internals);
3331 if (eth_dev != NULL)
3332 eth_dev->data->dev_private = NULL;
3333 rte_eth_dev_release_port(eth_dev);
3338 bond_probe(struct rte_vdev_device *dev)
3341 struct bond_dev_private *internals;
3342 struct rte_kvargs *kvlist;
3343 uint8_t bonding_mode;
3344 int arg_count, port_id;
3347 struct rte_eth_dev *eth_dev;
3352 name = rte_vdev_device_name(dev);
3353 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3355 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3356 eth_dev = rte_eth_dev_attach_secondary(name);
3358 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3361 /* TODO: request info from primary to set up Rx and Tx */
3362 eth_dev->dev_ops = &default_dev_ops;
3363 eth_dev->device = &dev->device;
3364 rte_eth_dev_probing_finish(eth_dev);
3368 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3369 pmd_bond_init_valid_arguments);
3373 /* Parse link bonding mode */
3374 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3375 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3376 &bond_ethdev_parse_slave_mode_kvarg,
3377 &bonding_mode) != 0) {
3378 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3383 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3388 /* Parse socket id to create bonding device on */
3389 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3390 if (arg_count == 1) {
3391 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3392 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3394 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3395 "bonded device %s", name);
3398 } else if (arg_count > 1) {
3399 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3400 "bonded device %s", name);
3403 socket_id = rte_socket_id();
3406 dev->device.numa_node = socket_id;
3408 /* Create link bonding eth device */
3409 port_id = bond_alloc(dev, bonding_mode);
3411 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3412 "socket %u.", name, bonding_mode, socket_id);
3415 internals = rte_eth_devices[port_id].data->dev_private;
3416 internals->kvlist = kvlist;
3418 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3419 if (rte_kvargs_process(kvlist,
3420 PMD_BOND_AGG_MODE_KVARG,
3421 &bond_ethdev_parse_slave_agg_mode_kvarg,
3424 "Failed to parse agg selection mode for bonded device %s",
3429 if (internals->mode == BONDING_MODE_8023AD)
3430 internals->mode4.agg_selection = agg_mode;
3432 internals->mode4.agg_selection = AGG_STABLE;
3435 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3436 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3437 "socket %u.", name, port_id, bonding_mode, socket_id);
3441 rte_kvargs_free(kvlist);
3447 bond_remove(struct rte_vdev_device *dev)
3449 struct rte_eth_dev *eth_dev;
3450 struct bond_dev_private *internals;
3457 name = rte_vdev_device_name(dev);
3458 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3460 /* find an ethdev entry */
3461 eth_dev = rte_eth_dev_allocated(name);
3462 if (eth_dev == NULL)
3463 return 0; /* port already released */
3465 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3466 return rte_eth_dev_release_port(eth_dev);
3468 RTE_ASSERT(eth_dev->device == &dev->device);
3470 internals = eth_dev->data->dev_private;
3471 if (internals->slave_count != 0)
3474 if (eth_dev->data->dev_started == 1) {
3475 ret = bond_ethdev_stop(eth_dev);
3476 bond_ethdev_close(eth_dev);
3478 if (internals->kvlist != NULL)
3479 rte_kvargs_free(internals->kvlist);
3480 rte_eth_dev_release_port(eth_dev);
3485 /* this part will resolve the slave portids after all the other pdev and vdev
3486 * have been allocated */
3488 bond_ethdev_configure(struct rte_eth_dev *dev)
3490 const char *name = dev->device->name;
3491 struct bond_dev_private *internals = dev->data->dev_private;
3492 struct rte_kvargs *kvlist = internals->kvlist;
3494 uint16_t port_id = dev - rte_eth_devices;
3497 static const uint8_t default_rss_key[40] = {
3498 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3499 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3500 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3501 0xBE, 0xAC, 0x01, 0xFA
3507 * If RSS is enabled, fill table with default values and
3508 * set key to the the value specified in port RSS configuration.
3509 * Fall back to default RSS key if the key is not specified
3511 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3512 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3513 internals->rss_key_len =
3514 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3515 memcpy(internals->rss_key,
3516 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3517 internals->rss_key_len);
3519 internals->rss_key_len = sizeof(default_rss_key);
3520 memcpy(internals->rss_key, default_rss_key,
3521 internals->rss_key_len);
3524 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3525 internals->reta_conf[i].mask = ~0LL;
3526 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3527 internals->reta_conf[i].reta[j] =
3528 (i * RTE_RETA_GROUP_SIZE + j) %
3529 dev->data->nb_rx_queues;
3533 /* set the max_rx_pktlen */
3534 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3537 * if no kvlist, it means that this bonded device has been created
3538 * through the bonding api.
3543 /* Parse MAC address for bonded device */
3544 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3545 if (arg_count == 1) {
3546 struct rte_ether_addr bond_mac;
3548 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3549 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3550 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3555 /* Set MAC address */
3556 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3558 "Failed to set mac address on bonded device %s",
3562 } else if (arg_count > 1) {
3564 "MAC address can be specified only once for bonded device %s",
3569 /* Parse/set balance mode transmit policy */
3570 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3571 if (arg_count == 1) {
3572 uint8_t xmit_policy;
3574 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3575 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3578 "Invalid xmit policy specified for bonded device %s",
3583 /* Set balance mode transmit policy*/
3584 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3586 "Failed to set balance xmit policy on bonded device %s",
3590 } else if (arg_count > 1) {
3592 "Transmit policy can be specified only once for bonded device %s",
3597 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3598 if (rte_kvargs_process(kvlist,
3599 PMD_BOND_AGG_MODE_KVARG,
3600 &bond_ethdev_parse_slave_agg_mode_kvarg,
3603 "Failed to parse agg selection mode for bonded device %s",
3606 if (internals->mode == BONDING_MODE_8023AD) {
3607 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3611 "Invalid args for agg selection set for bonded device %s",
3618 /* Parse/add slave ports to bonded device */
3619 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3620 struct bond_ethdev_slave_ports slave_ports;
3623 memset(&slave_ports, 0, sizeof(slave_ports));
3625 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3626 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3628 "Failed to parse slave ports for bonded device %s",
3633 for (i = 0; i < slave_ports.slave_count; i++) {
3634 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3636 "Failed to add port %d as slave to bonded device %s",
3637 slave_ports.slaves[i], name);
3642 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3646 /* Parse/set primary slave port id*/
3647 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3648 if (arg_count == 1) {
3649 uint16_t primary_slave_port_id;
3651 if (rte_kvargs_process(kvlist,
3652 PMD_BOND_PRIMARY_SLAVE_KVARG,
3653 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3654 &primary_slave_port_id) < 0) {
3656 "Invalid primary slave port id specified for bonded device %s",
3661 /* Set balance mode transmit policy*/
3662 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3665 "Failed to set primary slave port %d on bonded device %s",
3666 primary_slave_port_id, name);
3669 } else if (arg_count > 1) {
3671 "Primary slave can be specified only once for bonded device %s",
3676 /* Parse link status monitor polling interval */
3677 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3678 if (arg_count == 1) {
3679 uint32_t lsc_poll_interval_ms;
3681 if (rte_kvargs_process(kvlist,
3682 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3683 &bond_ethdev_parse_time_ms_kvarg,
3684 &lsc_poll_interval_ms) < 0) {
3686 "Invalid lsc polling interval value specified for bonded"
3687 " device %s", name);
3691 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3694 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3695 lsc_poll_interval_ms, name);
3698 } else if (arg_count > 1) {
3700 "LSC polling interval can be specified only once for bonded"
3701 " device %s", name);
3705 /* Parse link up interrupt propagation delay */
3706 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3707 if (arg_count == 1) {
3708 uint32_t link_up_delay_ms;
3710 if (rte_kvargs_process(kvlist,
3711 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3712 &bond_ethdev_parse_time_ms_kvarg,
3713 &link_up_delay_ms) < 0) {
3715 "Invalid link up propagation delay value specified for"
3716 " bonded device %s", name);
3720 /* Set balance mode transmit policy*/
3721 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3724 "Failed to set link up propagation delay (%u ms) on bonded"
3725 " device %s", link_up_delay_ms, name);
3728 } else if (arg_count > 1) {
3730 "Link up propagation delay can be specified only once for"
3731 " bonded device %s", name);
3735 /* Parse link down interrupt propagation delay */
3736 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3737 if (arg_count == 1) {
3738 uint32_t link_down_delay_ms;
3740 if (rte_kvargs_process(kvlist,
3741 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3742 &bond_ethdev_parse_time_ms_kvarg,
3743 &link_down_delay_ms) < 0) {
3745 "Invalid link down propagation delay value specified for"
3746 " bonded device %s", name);
3750 /* Set balance mode transmit policy*/
3751 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3754 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3755 link_down_delay_ms, name);
3758 } else if (arg_count > 1) {
3760 "Link down propagation delay can be specified only once for bonded device %s",
3768 struct rte_vdev_driver pmd_bond_drv = {
3769 .probe = bond_probe,
3770 .remove = bond_remove,
3773 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3774 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3776 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3780 "xmit_policy=[l2 | l23 | l34] "
3781 "agg_mode=[count | stable | bandwidth] "
3784 "lsc_poll_period_ms=<int> "
3786 "down_delay=<int>");
3788 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3789 * this library, see meson.build.
3791 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);