1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
38 size_t vlan_offset = 0;
40 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
44 vlan_offset = sizeof(struct vlan_hdr);
45 *proto = vlan_hdr->eth_proto;
47 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48 vlan_hdr = vlan_hdr + 1;
49 *proto = vlan_hdr->eth_proto;
50 vlan_offset += sizeof(struct vlan_hdr);
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 struct bond_dev_private *internals;
61 uint16_t num_rx_total = 0;
63 uint16_t active_slave;
66 /* Cast to structure, containing bonded device's port id and queue id */
67 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68 internals = bd_rx_q->dev_private;
69 slave_count = internals->active_slave_count;
70 active_slave = internals->active_slave;
72 for (i = 0; i < slave_count && nb_pkts; i++) {
73 uint16_t num_rx_slave;
75 /* Offset of pointer to *bufs increases as packets are received
76 * from other slaves */
78 rte_eth_rx_burst(internals->active_slaves[active_slave],
80 bufs + num_rx_total, nb_pkts);
81 num_rx_total += num_rx_slave;
82 nb_pkts -= num_rx_slave;
83 if (++active_slave == slave_count)
87 if (++internals->active_slave >= slave_count)
88 internals->active_slave = 0;
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 struct bond_dev_private *internals;
98 /* Cast to structure, containing bonded device's port id and queue id */
99 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
101 internals = bd_rx_q->dev_private;
103 return rte_eth_rx_burst(internals->current_primary_port,
104 bd_rx_q->queue_id, bufs, nb_pkts);
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
110 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
112 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113 (ethertype == ether_type_slow_be &&
114 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
117 /*****************************************************************************
118 * Flow director's setup for mode 4 optimization
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122 .dst.addr_bytes = { 0 },
123 .src.addr_bytes = { 0 },
124 .type = RTE_BE16(ETHER_TYPE_SLOW),
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128 .dst.addr_bytes = { 0 },
129 .src.addr_bytes = { 0 },
133 static struct rte_flow_item flow_item_8023ad[] = {
135 .type = RTE_FLOW_ITEM_TYPE_ETH,
136 .spec = &flow_item_eth_type_8023ad,
138 .mask = &flow_item_eth_mask_type_8023ad,
141 .type = RTE_FLOW_ITEM_TYPE_END,
148 const struct rte_flow_attr flow_attr_8023ad = {
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158 uint16_t slave_port) {
159 struct rte_eth_dev_info slave_info;
160 struct rte_flow_error error;
161 struct bond_dev_private *internals = (struct bond_dev_private *)
162 (bond_dev->data->dev_private);
164 const struct rte_flow_action_queue lacp_queue_conf = {
168 const struct rte_flow_action actions[] = {
170 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171 .conf = &lacp_queue_conf
174 .type = RTE_FLOW_ACTION_TYPE_END,
178 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179 flow_item_8023ad, actions, &error);
181 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182 __func__, error.message, slave_port,
183 internals->mode4.dedicated_queues.rx_qid);
187 rte_eth_dev_info_get(slave_port, &slave_info);
188 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
191 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192 __func__, slave_port);
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202 struct bond_dev_private *internals = (struct bond_dev_private *)
203 (bond_dev->data->dev_private);
204 struct rte_eth_dev_info bond_info;
207 /* Verify if all slaves in bonding supports flow director and */
208 if (internals->slave_count > 0) {
209 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
211 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
214 for (idx = 0; idx < internals->slave_count; idx++) {
215 if (bond_ethdev_8023ad_flow_verify(bond_dev,
216 internals->slaves[idx].port_id) != 0)
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
227 struct rte_flow_error error;
228 struct bond_dev_private *internals = (struct bond_dev_private *)
229 (bond_dev->data->dev_private);
231 struct rte_flow_action_queue lacp_queue_conf = {
232 .index = internals->mode4.dedicated_queues.rx_qid,
235 const struct rte_flow_action actions[] = {
237 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238 .conf = &lacp_queue_conf
241 .type = RTE_FLOW_ACTION_TYPE_END,
245 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249 "(slave_port=%d queue_id=%d)",
250 error.message, slave_port,
251 internals->mode4.dedicated_queues.rx_qid);
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
262 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263 struct bond_dev_private *internals = bd_rx_q->dev_private;
264 uint16_t num_rx_total = 0; /* Total number of received packets */
265 uint16_t slaves[RTE_MAX_ETHPORTS];
266 uint16_t slave_count;
267 uint16_t active_slave;
270 /* Copy slave list to protect against slave up/down changes during tx
272 slave_count = internals->active_slave_count;
273 active_slave = internals->active_slave;
274 memcpy(slaves, internals->active_slaves,
275 sizeof(internals->active_slaves[0]) * slave_count);
277 for (i = 0; i < slave_count && nb_pkts; i++) {
278 uint16_t num_rx_slave;
280 /* Read packets from this slave */
281 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
283 bufs + num_rx_total, nb_pkts);
284 num_rx_total += num_rx_slave;
285 nb_pkts -= num_rx_slave;
287 if (++active_slave == slave_count)
291 if (++internals->active_slave >= slave_count)
292 internals->active_slave = 0;
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
301 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302 struct bond_dev_private *internals = bd_tx_q->dev_private;
304 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305 uint16_t slave_count;
307 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308 uint16_t dist_slave_count;
310 /* 2-D array to sort mbufs for transmission on each slave into */
311 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312 /* Number of mbufs for transmission on each slave */
313 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314 /* Mapping array generated by hash function to map mbufs to slaves */
315 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
317 uint16_t slave_tx_count;
318 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
322 if (unlikely(nb_bufs == 0))
325 /* Copy slave list to protect against slave up/down changes during tx
327 slave_count = internals->active_slave_count;
328 if (unlikely(slave_count < 1))
331 memcpy(slave_port_ids, internals->active_slaves,
332 sizeof(slave_port_ids[0]) * slave_count);
335 dist_slave_count = 0;
336 for (i = 0; i < slave_count; i++) {
337 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
339 if (ACTOR_STATE(port, DISTRIBUTING))
340 dist_slave_port_ids[dist_slave_count++] =
344 if (unlikely(dist_slave_count < 1))
348 * Populate slaves mbuf with the packets which are to be sent on it
349 * selecting output slave using hash based on xmit policy
351 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352 bufs_slave_port_idxs);
354 for (i = 0; i < nb_bufs; i++) {
355 /* Populate slave mbuf arrays with mbufs for that slave. */
356 uint16_t slave_idx = bufs_slave_port_idxs[i];
358 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
362 /* Send packet burst on each slave device */
363 for (i = 0; i < dist_slave_count; i++) {
364 if (slave_nb_bufs[i] == 0)
367 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368 bd_tx_q->queue_id, slave_bufs[i],
371 total_tx_count += slave_tx_count;
373 /* If tx burst fails move packets to end of bufs */
374 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375 int slave_tx_fail_count = slave_nb_bufs[i] -
377 total_tx_fail_count += slave_tx_fail_count;
378 memcpy(&bufs[nb_bufs - total_tx_fail_count],
379 &slave_bufs[i][slave_tx_count],
380 slave_tx_fail_count * sizeof(bufs[0]));
384 return total_tx_count;
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
392 /* Cast to structure, containing bonded device's port id and queue id */
393 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394 struct bond_dev_private *internals = bd_rx_q->dev_private;
395 struct rte_eth_dev *bonded_eth_dev =
396 &rte_eth_devices[internals->port_id];
397 struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398 struct ether_hdr *hdr;
400 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401 uint16_t num_rx_total = 0; /* Total number of received packets */
402 uint16_t slaves[RTE_MAX_ETHPORTS];
403 uint16_t slave_count, idx;
405 uint8_t collecting; /* current slave collecting status */
406 const uint8_t promisc = internals->promiscuous_en;
412 /* Copy slave list to protect against slave up/down changes during tx
414 slave_count = internals->active_slave_count;
415 memcpy(slaves, internals->active_slaves,
416 sizeof(internals->active_slaves[0]) * slave_count);
418 idx = internals->active_slave;
419 if (idx >= slave_count) {
420 internals->active_slave = 0;
423 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
425 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
428 /* Read packets from this slave */
429 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
430 &bufs[num_rx_total], nb_pkts - num_rx_total);
432 for (k = j; k < 2 && k < num_rx_total; k++)
433 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
435 /* Handle slow protocol packets. */
436 while (j < num_rx_total) {
438 /* If packet is not pure L2 and is known, skip it */
439 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
444 if (j + 3 < num_rx_total)
445 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
447 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
448 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
450 /* Remove packet from array if it is slow packet or slave is not
451 * in collecting state or bonding interface is not in promiscuous
452 * mode and packet address does not match. */
453 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
456 !is_multicast_ether_addr(&hdr->d_addr) &&
457 !is_same_ether_addr(bond_mac,
460 if (hdr->ether_type == ether_type_slow_be) {
461 bond_mode_8023ad_handle_slow_pkt(
462 internals, slaves[idx], bufs[j]);
464 rte_pktmbuf_free(bufs[j]);
466 /* Packet is managed by mode 4 or dropped, shift the array */
468 if (j < num_rx_total) {
469 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
475 if (unlikely(++idx == slave_count))
479 if (++internals->active_slave >= slave_count)
480 internals->active_slave = 0;
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
496 strlcpy(buf, "ARP Request", buf_len);
499 strlcpy(buf, "ARP Reply", buf_len);
501 case ARP_OP_REVREQUEST:
502 strlcpy(buf, "Reverse ARP Request", buf_len);
504 case ARP_OP_REVREPLY:
505 strlcpy(buf, "Reverse ARP Reply", buf_len);
507 case ARP_OP_INVREQUEST:
508 strlcpy(buf, "Peer Identify Request", buf_len);
510 case ARP_OP_INVREPLY:
511 strlcpy(buf, "Peer Identify Reply", buf_len);
516 strlcpy(buf, "Unknown", buf_len);
520 #define MaxIPv4String 16
522 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
526 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
532 #define MAX_CLIENTS_NUMBER 128
533 uint8_t active_clients;
534 struct client_stats_t {
537 uint32_t ipv4_rx_packets;
538 uint32_t ipv4_tx_packets;
540 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
543 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
547 for (; i < MAX_CLIENTS_NUMBER; i++) {
548 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
549 /* Just update RX packets number for this client */
550 if (TXorRXindicator == &burstnumberRX)
551 client_stats[i].ipv4_rx_packets++;
553 client_stats[i].ipv4_tx_packets++;
557 /* We have a new client. Insert him to the table, and increment stats */
558 if (TXorRXindicator == &burstnumberRX)
559 client_stats[active_clients].ipv4_rx_packets++;
561 client_stats[active_clients].ipv4_tx_packets++;
562 client_stats[active_clients].ipv4_addr = addr;
563 client_stats[active_clients].port = port;
568 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
569 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570 rte_log(RTE_LOG_DEBUG, bond_logtype, \
571 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
575 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
579 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
583 arp_op, ++burstnumber)
587 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
588 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
590 struct ipv4_hdr *ipv4_h;
591 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592 struct arp_hdr *arp_h;
599 uint16_t ether_type = eth_h->ether_type;
600 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
602 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
603 strlcpy(buf, info, 16);
606 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
613 update_client_stats(ipv4_h->src_addr, port, burstnumber);
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
620 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op),
621 ArpOp, sizeof(ArpOp));
622 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
629 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
631 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
632 struct bond_dev_private *internals = bd_tx_q->dev_private;
633 struct ether_hdr *eth_h;
634 uint16_t ether_type, offset;
635 uint16_t nb_recv_pkts;
638 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
640 for (i = 0; i < nb_recv_pkts; i++) {
641 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
642 ether_type = eth_h->ether_type;
643 offset = get_vlan_offset(eth_h, ðer_type);
645 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
646 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
647 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
649 bond_mode_alb_arp_recv(eth_h, offset, internals);
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
653 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
661 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
664 struct bond_dev_private *internals;
665 struct bond_tx_queue *bd_tx_q;
667 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
668 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
670 uint16_t num_of_slaves;
671 uint16_t slaves[RTE_MAX_ETHPORTS];
673 uint16_t num_tx_total = 0, num_tx_slave;
675 static int slave_idx = 0;
676 int i, cslave_idx = 0, tx_fail_total = 0;
678 bd_tx_q = (struct bond_tx_queue *)queue;
679 internals = bd_tx_q->dev_private;
681 /* Copy slave list to protect against slave up/down changes during tx
683 num_of_slaves = internals->active_slave_count;
684 memcpy(slaves, internals->active_slaves,
685 sizeof(internals->active_slaves[0]) * num_of_slaves);
687 if (num_of_slaves < 1)
690 /* Populate slaves mbuf with which packets are to be sent on it */
691 for (i = 0; i < nb_pkts; i++) {
692 cslave_idx = (slave_idx + i) % num_of_slaves;
693 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
696 /* increment current slave index so the next call to tx burst starts on the
698 slave_idx = ++cslave_idx;
700 /* Send packet burst on each slave device */
701 for (i = 0; i < num_of_slaves; i++) {
702 if (slave_nb_pkts[i] > 0) {
703 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
704 slave_bufs[i], slave_nb_pkts[i]);
706 /* if tx burst fails move packets to end of bufs */
707 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
708 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
710 tx_fail_total += tx_fail_slave;
712 memcpy(&bufs[nb_pkts - tx_fail_total],
713 &slave_bufs[i][num_tx_slave],
714 tx_fail_slave * sizeof(bufs[0]));
716 num_tx_total += num_tx_slave;
724 bond_ethdev_tx_burst_active_backup(void *queue,
725 struct rte_mbuf **bufs, uint16_t nb_pkts)
727 struct bond_dev_private *internals;
728 struct bond_tx_queue *bd_tx_q;
730 bd_tx_q = (struct bond_tx_queue *)queue;
731 internals = bd_tx_q->dev_private;
733 if (internals->active_slave_count < 1)
736 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
740 static inline uint16_t
741 ether_hash(struct ether_hdr *eth_hdr)
743 unaligned_uint16_t *word_src_addr =
744 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
745 unaligned_uint16_t *word_dst_addr =
746 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
748 return (word_src_addr[0] ^ word_dst_addr[0]) ^
749 (word_src_addr[1] ^ word_dst_addr[1]) ^
750 (word_src_addr[2] ^ word_dst_addr[2]);
753 static inline uint32_t
754 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
756 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
759 static inline uint32_t
760 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
762 unaligned_uint32_t *word_src_addr =
763 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
764 unaligned_uint32_t *word_dst_addr =
765 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
767 return (word_src_addr[0] ^ word_dst_addr[0]) ^
768 (word_src_addr[1] ^ word_dst_addr[1]) ^
769 (word_src_addr[2] ^ word_dst_addr[2]) ^
770 (word_src_addr[3] ^ word_dst_addr[3]);
775 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
776 uint16_t slave_count, uint16_t *slaves)
778 struct ether_hdr *eth_hdr;
782 for (i = 0; i < nb_pkts; i++) {
783 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
785 hash = ether_hash(eth_hdr);
787 slaves[i] = (hash ^= hash >> 8) % slave_count;
792 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793 uint16_t slave_count, uint16_t *slaves)
796 struct ether_hdr *eth_hdr;
799 uint32_t hash, l3hash;
801 for (i = 0; i < nb_pkts; i++) {
802 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
805 proto = eth_hdr->ether_type;
806 hash = ether_hash(eth_hdr);
808 vlan_offset = get_vlan_offset(eth_hdr, &proto);
810 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
811 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
812 ((char *)(eth_hdr + 1) + vlan_offset);
813 l3hash = ipv4_hash(ipv4_hdr);
815 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
816 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
817 ((char *)(eth_hdr + 1) + vlan_offset);
818 l3hash = ipv6_hash(ipv6_hdr);
821 hash = hash ^ l3hash;
825 slaves[i] = hash % slave_count;
830 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
831 uint16_t slave_count, uint16_t *slaves)
833 struct ether_hdr *eth_hdr;
838 struct udp_hdr *udp_hdr;
839 struct tcp_hdr *tcp_hdr;
840 uint32_t hash, l3hash, l4hash;
842 for (i = 0; i < nb_pkts; i++) {
843 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
844 proto = eth_hdr->ether_type;
845 vlan_offset = get_vlan_offset(eth_hdr, &proto);
849 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
850 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
851 ((char *)(eth_hdr + 1) + vlan_offset);
852 size_t ip_hdr_offset;
854 l3hash = ipv4_hash(ipv4_hdr);
856 /* there is no L4 header in fragmented packet */
857 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859 ip_hdr_offset = (ipv4_hdr->version_ihl
860 & IPV4_HDR_IHL_MASK) *
863 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
864 tcp_hdr = (struct tcp_hdr *)
867 l4hash = HASH_L4_PORTS(tcp_hdr);
868 } else if (ipv4_hdr->next_proto_id ==
870 udp_hdr = (struct udp_hdr *)
873 l4hash = HASH_L4_PORTS(udp_hdr);
876 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
877 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
878 ((char *)(eth_hdr + 1) + vlan_offset);
879 l3hash = ipv6_hash(ipv6_hdr);
881 if (ipv6_hdr->proto == IPPROTO_TCP) {
882 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
883 l4hash = HASH_L4_PORTS(tcp_hdr);
884 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
885 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
886 l4hash = HASH_L4_PORTS(udp_hdr);
890 hash = l3hash ^ l4hash;
894 slaves[i] = hash % slave_count;
899 uint64_t bwg_left_int;
900 uint64_t bwg_left_remainder;
905 bond_tlb_activate_slave(struct bond_dev_private *internals) {
908 for (i = 0; i < internals->active_slave_count; i++) {
909 tlb_last_obytets[internals->active_slaves[i]] = 0;
914 bandwidth_cmp(const void *a, const void *b)
916 const struct bwg_slave *bwg_a = a;
917 const struct bwg_slave *bwg_b = b;
918 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
919 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
920 (int64_t)bwg_a->bwg_left_remainder;
934 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
935 struct bwg_slave *bwg_slave)
937 struct rte_eth_link link_status;
939 rte_eth_link_get_nowait(port_id, &link_status);
940 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
943 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
944 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
945 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
949 bond_ethdev_update_tlb_slave_cb(void *arg)
951 struct bond_dev_private *internals = arg;
952 struct rte_eth_stats slave_stats;
953 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
954 uint16_t slave_count;
957 uint8_t update_stats = 0;
961 internals->slave_update_idx++;
964 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
967 for (i = 0; i < internals->active_slave_count; i++) {
968 slave_id = internals->active_slaves[i];
969 rte_eth_stats_get(slave_id, &slave_stats);
970 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971 bandwidth_left(slave_id, tx_bytes,
972 internals->slave_update_idx, &bwg_array[i]);
973 bwg_array[i].slave = slave_id;
976 tlb_last_obytets[slave_id] = slave_stats.obytes;
980 if (update_stats == 1)
981 internals->slave_update_idx = 0;
984 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985 for (i = 0; i < slave_count; i++)
986 internals->tlb_slaves_order[i] = bwg_array[i].slave;
988 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989 (struct bond_dev_private *)internals);
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
995 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996 struct bond_dev_private *internals = bd_tx_q->dev_private;
998 struct rte_eth_dev *primary_port =
999 &rte_eth_devices[internals->primary_port];
1000 uint16_t num_tx_total = 0;
1003 uint16_t num_of_slaves = internals->active_slave_count;
1004 uint16_t slaves[RTE_MAX_ETHPORTS];
1006 struct ether_hdr *ether_hdr;
1007 struct ether_addr primary_slave_addr;
1008 struct ether_addr active_slave_addr;
1010 if (num_of_slaves < 1)
1011 return num_tx_total;
1013 memcpy(slaves, internals->tlb_slaves_order,
1014 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1017 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1020 for (i = 0; i < 3; i++)
1021 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1024 for (i = 0; i < num_of_slaves; i++) {
1025 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026 for (j = num_tx_total; j < nb_pkts; j++) {
1027 if (j + 3 < nb_pkts)
1028 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1030 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1032 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1038 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039 bufs + num_tx_total, nb_pkts - num_tx_total);
1041 if (num_tx_total == nb_pkts)
1045 return num_tx_total;
1049 bond_tlb_disable(struct bond_dev_private *internals)
1051 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1055 bond_tlb_enable(struct bond_dev_private *internals)
1057 bond_ethdev_update_tlb_slave_cb(internals);
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1063 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064 struct bond_dev_private *internals = bd_tx_q->dev_private;
1066 struct ether_hdr *eth_h;
1067 uint16_t ether_type, offset;
1069 struct client_data *client_info;
1072 * We create transmit buffers for every slave and one additional to send
1073 * through tlb. In worst case every packet will be send on one port.
1075 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1079 * We create separate transmit buffers for update packets as they won't
1080 * be counted in num_tx_total.
1082 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1085 struct rte_mbuf *upd_pkt;
1088 uint16_t num_send, num_not_send = 0;
1089 uint16_t num_tx_total = 0;
1094 /* Search tx buffer for ARP packets and forward them to alb */
1095 for (i = 0; i < nb_pkts; i++) {
1096 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097 ether_type = eth_h->ether_type;
1098 offset = get_vlan_offset(eth_h, ðer_type);
1100 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1103 /* Change src mac in eth header */
1104 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1106 /* Add packet to slave tx buffer */
1107 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108 slave_bufs_pkts[slave_idx]++;
1110 /* If packet is not ARP, send it with TLB policy */
1111 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1113 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1117 /* Update connected client ARP tables */
1118 if (internals->mode6.ntt) {
1119 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120 client_info = &internals->mode6.client_table[i];
1122 if (client_info->in_use) {
1123 /* Allocate new packet to send ARP update on current slave */
1124 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125 if (upd_pkt == NULL) {
1127 "Failed to allocate ARP packet from pool");
1130 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1131 + client_info->vlan_count * sizeof(struct vlan_hdr);
1132 upd_pkt->data_len = pkt_size;
1133 upd_pkt->pkt_len = pkt_size;
1135 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1138 /* Add packet to update tx buffer */
1139 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1140 update_bufs_pkts[slave_idx]++;
1143 internals->mode6.ntt = 0;
1146 /* Send ARP packets on proper slaves */
1147 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1148 if (slave_bufs_pkts[i] > 0) {
1149 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1150 slave_bufs[i], slave_bufs_pkts[i]);
1151 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1152 bufs[nb_pkts - 1 - num_not_send - j] =
1153 slave_bufs[i][nb_pkts - 1 - j];
1156 num_tx_total += num_send;
1157 num_not_send += slave_bufs_pkts[i] - num_send;
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160 /* Print TX stats including update packets */
1161 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1162 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1163 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1169 /* Send update packets on proper slaves */
1170 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1171 if (update_bufs_pkts[i] > 0) {
1172 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1173 update_bufs_pkts[i]);
1174 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1175 rte_pktmbuf_free(update_bufs[i][j]);
1177 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1178 for (j = 0; j < update_bufs_pkts[i]; j++) {
1179 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1180 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1186 /* Send non-ARP packets using tlb policy */
1187 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1188 num_send = bond_ethdev_tx_burst_tlb(queue,
1189 slave_bufs[RTE_MAX_ETHPORTS],
1190 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1192 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1193 bufs[nb_pkts - 1 - num_not_send - j] =
1194 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1197 num_tx_total += num_send;
1200 return num_tx_total;
1204 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1207 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1208 struct bond_dev_private *internals = bd_tx_q->dev_private;
1210 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1211 uint16_t slave_count;
1213 /* Array to sort mbufs for transmission on each slave into */
1214 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1215 /* Number of mbufs for transmission on each slave */
1216 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1217 /* Mapping array generated by hash function to map mbufs to slaves */
1218 uint16_t bufs_slave_port_idxs[nb_bufs];
1220 uint16_t slave_tx_count;
1221 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1225 if (unlikely(nb_bufs == 0))
1228 /* Copy slave list to protect against slave up/down changes during tx
1230 slave_count = internals->active_slave_count;
1231 if (unlikely(slave_count < 1))
1234 memcpy(slave_port_ids, internals->active_slaves,
1235 sizeof(slave_port_ids[0]) * slave_count);
1238 * Populate slaves mbuf with the packets which are to be sent on it
1239 * selecting output slave using hash based on xmit policy
1241 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1242 bufs_slave_port_idxs);
1244 for (i = 0; i < nb_bufs; i++) {
1245 /* Populate slave mbuf arrays with mbufs for that slave. */
1246 uint16_t slave_idx = bufs_slave_port_idxs[i];
1248 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1251 /* Send packet burst on each slave device */
1252 for (i = 0; i < slave_count; i++) {
1253 if (slave_nb_bufs[i] == 0)
1256 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1257 bd_tx_q->queue_id, slave_bufs[i],
1260 total_tx_count += slave_tx_count;
1262 /* If tx burst fails move packets to end of bufs */
1263 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1264 int slave_tx_fail_count = slave_nb_bufs[i] -
1266 total_tx_fail_count += slave_tx_fail_count;
1267 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1268 &slave_bufs[i][slave_tx_count],
1269 slave_tx_fail_count * sizeof(bufs[0]));
1273 return total_tx_count;
1277 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1280 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1281 struct bond_dev_private *internals = bd_tx_q->dev_private;
1283 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1284 uint16_t slave_count;
1286 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1287 uint16_t dist_slave_count;
1289 /* 2-D array to sort mbufs for transmission on each slave into */
1290 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1291 /* Number of mbufs for transmission on each slave */
1292 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1293 /* Mapping array generated by hash function to map mbufs to slaves */
1294 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1296 uint16_t slave_tx_count;
1297 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1301 /* Copy slave list to protect against slave up/down changes during tx
1303 slave_count = internals->active_slave_count;
1304 if (unlikely(slave_count < 1))
1307 memcpy(slave_port_ids, internals->active_slaves,
1308 sizeof(slave_port_ids[0]) * slave_count);
1310 /* Check for LACP control packets and send if available */
1311 for (i = 0; i < slave_count; i++) {
1312 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1313 struct rte_mbuf *ctrl_pkt = NULL;
1315 if (likely(rte_ring_empty(port->tx_ring)))
1318 if (rte_ring_dequeue(port->tx_ring,
1319 (void **)&ctrl_pkt) != -ENOENT) {
1320 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1321 bd_tx_q->queue_id, &ctrl_pkt, 1);
1323 * re-enqueue LAG control plane packets to buffering
1324 * ring if transmission fails so the packet isn't lost.
1326 if (slave_tx_count != 1)
1327 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1331 if (unlikely(nb_bufs == 0))
1334 dist_slave_count = 0;
1335 for (i = 0; i < slave_count; i++) {
1336 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1338 if (ACTOR_STATE(port, DISTRIBUTING))
1339 dist_slave_port_ids[dist_slave_count++] =
1343 if (likely(dist_slave_count > 0)) {
1346 * Populate slaves mbuf with the packets which are to be sent
1347 * on it, selecting output slave using hash based on xmit policy
1349 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1350 bufs_slave_port_idxs);
1352 for (i = 0; i < nb_bufs; i++) {
1354 * Populate slave mbuf arrays with mbufs for that
1357 uint16_t slave_idx = bufs_slave_port_idxs[i];
1359 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1364 /* Send packet burst on each slave device */
1365 for (i = 0; i < dist_slave_count; i++) {
1366 if (slave_nb_bufs[i] == 0)
1369 slave_tx_count = rte_eth_tx_burst(
1370 dist_slave_port_ids[i],
1371 bd_tx_q->queue_id, slave_bufs[i],
1374 total_tx_count += slave_tx_count;
1376 /* If tx burst fails move packets to end of bufs */
1377 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1378 int slave_tx_fail_count = slave_nb_bufs[i] -
1380 total_tx_fail_count += slave_tx_fail_count;
1382 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1383 &slave_bufs[i][slave_tx_count],
1384 slave_tx_fail_count * sizeof(bufs[0]));
1389 return total_tx_count;
1393 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1396 struct bond_dev_private *internals;
1397 struct bond_tx_queue *bd_tx_q;
1399 uint16_t slaves[RTE_MAX_ETHPORTS];
1400 uint8_t tx_failed_flag = 0;
1401 uint16_t num_of_slaves;
1403 uint16_t max_nb_of_tx_pkts = 0;
1405 int slave_tx_total[RTE_MAX_ETHPORTS];
1406 int i, most_successful_tx_slave = -1;
1408 bd_tx_q = (struct bond_tx_queue *)queue;
1409 internals = bd_tx_q->dev_private;
1411 /* Copy slave list to protect against slave up/down changes during tx
1413 num_of_slaves = internals->active_slave_count;
1414 memcpy(slaves, internals->active_slaves,
1415 sizeof(internals->active_slaves[0]) * num_of_slaves);
1417 if (num_of_slaves < 1)
1420 /* Increment reference count on mbufs */
1421 for (i = 0; i < nb_pkts; i++)
1422 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1424 /* Transmit burst on each active slave */
1425 for (i = 0; i < num_of_slaves; i++) {
1426 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1429 if (unlikely(slave_tx_total[i] < nb_pkts))
1432 /* record the value and slave index for the slave which transmits the
1433 * maximum number of packets */
1434 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1435 max_nb_of_tx_pkts = slave_tx_total[i];
1436 most_successful_tx_slave = i;
1440 /* if slaves fail to transmit packets from burst, the calling application
1441 * is not expected to know about multiple references to packets so we must
1442 * handle failures of all packets except those of the most successful slave
1444 if (unlikely(tx_failed_flag))
1445 for (i = 0; i < num_of_slaves; i++)
1446 if (i != most_successful_tx_slave)
1447 while (slave_tx_total[i] < nb_pkts)
1448 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1450 return max_nb_of_tx_pkts;
1454 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1456 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1458 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1460 * If in mode 4 then save the link properties of the first
1461 * slave, all subsequent slaves must match these properties
1463 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1465 bond_link->link_autoneg = slave_link->link_autoneg;
1466 bond_link->link_duplex = slave_link->link_duplex;
1467 bond_link->link_speed = slave_link->link_speed;
1470 * In any other mode the link properties are set to default
1471 * values of AUTONEG/DUPLEX
1473 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1474 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1479 link_properties_valid(struct rte_eth_dev *ethdev,
1480 struct rte_eth_link *slave_link)
1482 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1484 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1485 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1487 if (bond_link->link_duplex != slave_link->link_duplex ||
1488 bond_link->link_autoneg != slave_link->link_autoneg ||
1489 bond_link->link_speed != slave_link->link_speed)
1497 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1499 struct ether_addr *mac_addr;
1501 if (eth_dev == NULL) {
1502 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1506 if (dst_mac_addr == NULL) {
1507 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1511 mac_addr = eth_dev->data->mac_addrs;
1513 ether_addr_copy(mac_addr, dst_mac_addr);
1518 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1520 struct ether_addr *mac_addr;
1522 if (eth_dev == NULL) {
1523 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1527 if (new_mac_addr == NULL) {
1528 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1532 mac_addr = eth_dev->data->mac_addrs;
1534 /* If new MAC is different to current MAC then update */
1535 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1536 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1541 static const struct ether_addr null_mac_addr;
1544 * Add additional MAC addresses to the slave
1547 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1548 uint16_t slave_port_id)
1551 struct ether_addr *mac_addr;
1553 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1554 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1555 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1558 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1561 for (i--; i > 0; i--)
1562 rte_eth_dev_mac_addr_remove(slave_port_id,
1563 &bonded_eth_dev->data->mac_addrs[i]);
1572 * Remove additional MAC addresses from the slave
1575 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1576 uint16_t slave_port_id)
1579 struct ether_addr *mac_addr;
1582 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1583 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1584 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1587 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1588 /* save only the first error */
1589 if (ret < 0 && rc == 0)
1597 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1599 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1602 /* Update slave devices MAC addresses */
1603 if (internals->slave_count < 1)
1606 switch (internals->mode) {
1607 case BONDING_MODE_ROUND_ROBIN:
1608 case BONDING_MODE_BALANCE:
1609 case BONDING_MODE_BROADCAST:
1610 for (i = 0; i < internals->slave_count; i++) {
1611 if (rte_eth_dev_default_mac_addr_set(
1612 internals->slaves[i].port_id,
1613 bonded_eth_dev->data->mac_addrs)) {
1614 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1615 internals->slaves[i].port_id);
1620 case BONDING_MODE_8023AD:
1621 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1623 case BONDING_MODE_ACTIVE_BACKUP:
1624 case BONDING_MODE_TLB:
1625 case BONDING_MODE_ALB:
1627 for (i = 0; i < internals->slave_count; i++) {
1628 if (internals->slaves[i].port_id ==
1629 internals->current_primary_port) {
1630 if (rte_eth_dev_default_mac_addr_set(
1631 internals->primary_port,
1632 bonded_eth_dev->data->mac_addrs)) {
1633 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1634 internals->current_primary_port);
1638 if (rte_eth_dev_default_mac_addr_set(
1639 internals->slaves[i].port_id,
1640 &internals->slaves[i].persisted_mac_addr)) {
1641 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1642 internals->slaves[i].port_id);
1653 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1655 struct bond_dev_private *internals;
1657 internals = eth_dev->data->dev_private;
1660 case BONDING_MODE_ROUND_ROBIN:
1661 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1662 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1664 case BONDING_MODE_ACTIVE_BACKUP:
1665 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1666 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1668 case BONDING_MODE_BALANCE:
1669 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1670 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1672 case BONDING_MODE_BROADCAST:
1673 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1674 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1676 case BONDING_MODE_8023AD:
1677 if (bond_mode_8023ad_enable(eth_dev) != 0)
1680 if (internals->mode4.dedicated_queues.enabled == 0) {
1681 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1682 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1683 RTE_BOND_LOG(WARNING,
1684 "Using mode 4, it is necessary to do TX burst "
1685 "and RX burst at least every 100ms.");
1687 /* Use flow director's optimization */
1688 eth_dev->rx_pkt_burst =
1689 bond_ethdev_rx_burst_8023ad_fast_queue;
1690 eth_dev->tx_pkt_burst =
1691 bond_ethdev_tx_burst_8023ad_fast_queue;
1694 case BONDING_MODE_TLB:
1695 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1696 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1698 case BONDING_MODE_ALB:
1699 if (bond_mode_alb_enable(eth_dev) != 0)
1702 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1703 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1709 internals->mode = mode;
1716 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1717 struct rte_eth_dev *slave_eth_dev)
1720 struct bond_dev_private *internals = (struct bond_dev_private *)
1721 bonded_eth_dev->data->dev_private;
1722 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1724 if (port->slow_pool == NULL) {
1726 int slave_id = slave_eth_dev->data->port_id;
1728 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1730 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1731 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1732 slave_eth_dev->data->numa_node);
1734 /* Any memory allocation failure in initialization is critical because
1735 * resources can't be free, so reinitialization is impossible. */
1736 if (port->slow_pool == NULL) {
1737 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1738 slave_id, mem_name, rte_strerror(rte_errno));
1742 if (internals->mode4.dedicated_queues.enabled == 1) {
1743 /* Configure slow Rx queue */
1745 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1746 internals->mode4.dedicated_queues.rx_qid, 128,
1747 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1748 NULL, port->slow_pool);
1751 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1752 slave_eth_dev->data->port_id,
1753 internals->mode4.dedicated_queues.rx_qid,
1758 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1759 internals->mode4.dedicated_queues.tx_qid, 512,
1760 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1764 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1765 slave_eth_dev->data->port_id,
1766 internals->mode4.dedicated_queues.tx_qid,
1775 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1776 struct rte_eth_dev *slave_eth_dev)
1778 struct bond_rx_queue *bd_rx_q;
1779 struct bond_tx_queue *bd_tx_q;
1780 uint16_t nb_rx_queues;
1781 uint16_t nb_tx_queues;
1785 struct rte_flow_error flow_error;
1787 struct bond_dev_private *internals = (struct bond_dev_private *)
1788 bonded_eth_dev->data->dev_private;
1791 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1793 /* Enable interrupts on slave device if supported */
1794 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1795 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1797 /* If RSS is enabled for bonding, try to enable it for slaves */
1798 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1799 if (internals->rss_key_len != 0) {
1800 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1801 internals->rss_key_len;
1802 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1805 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1808 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1809 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1810 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1811 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1814 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1815 DEV_RX_OFFLOAD_VLAN_FILTER)
1816 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1817 DEV_RX_OFFLOAD_VLAN_FILTER;
1819 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1820 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1822 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1823 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1825 if (internals->mode == BONDING_MODE_8023AD) {
1826 if (internals->mode4.dedicated_queues.enabled == 1) {
1832 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1833 bonded_eth_dev->data->mtu);
1834 if (errval != 0 && errval != -ENOTSUP) {
1835 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1836 slave_eth_dev->data->port_id, errval);
1840 /* Configure device */
1841 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1842 nb_rx_queues, nb_tx_queues,
1843 &(slave_eth_dev->data->dev_conf));
1845 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1846 slave_eth_dev->data->port_id, errval);
1850 /* Setup Rx Queues */
1851 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1852 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1854 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1855 bd_rx_q->nb_rx_desc,
1856 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1857 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1860 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1861 slave_eth_dev->data->port_id, q_id, errval);
1866 /* Setup Tx Queues */
1867 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1868 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1870 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1871 bd_tx_q->nb_tx_desc,
1872 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1876 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1877 slave_eth_dev->data->port_id, q_id, errval);
1882 if (internals->mode == BONDING_MODE_8023AD &&
1883 internals->mode4.dedicated_queues.enabled == 1) {
1884 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1888 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1889 slave_eth_dev->data->port_id) != 0) {
1891 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1892 slave_eth_dev->data->port_id, q_id, errval);
1896 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1897 rte_flow_destroy(slave_eth_dev->data->port_id,
1898 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1901 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1902 slave_eth_dev->data->port_id);
1906 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1908 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1909 slave_eth_dev->data->port_id, errval);
1913 /* If RSS is enabled for bonding, synchronize RETA */
1914 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1916 struct bond_dev_private *internals;
1918 internals = bonded_eth_dev->data->dev_private;
1920 for (i = 0; i < internals->slave_count; i++) {
1921 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1922 errval = rte_eth_dev_rss_reta_update(
1923 slave_eth_dev->data->port_id,
1924 &internals->reta_conf[0],
1925 internals->slaves[i].reta_size);
1927 RTE_BOND_LOG(WARNING,
1928 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1929 " RSS Configuration for bonding may be inconsistent.",
1930 slave_eth_dev->data->port_id, errval);
1937 /* If lsc interrupt is set, check initial slave's link status */
1938 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1939 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1940 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1941 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1949 slave_remove(struct bond_dev_private *internals,
1950 struct rte_eth_dev *slave_eth_dev)
1954 for (i = 0; i < internals->slave_count; i++)
1955 if (internals->slaves[i].port_id ==
1956 slave_eth_dev->data->port_id)
1959 if (i < (internals->slave_count - 1)) {
1960 struct rte_flow *flow;
1962 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1963 sizeof(internals->slaves[0]) *
1964 (internals->slave_count - i - 1));
1965 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1966 memmove(&flow->flows[i], &flow->flows[i + 1],
1967 sizeof(flow->flows[0]) *
1968 (internals->slave_count - i - 1));
1969 flow->flows[internals->slave_count - 1] = NULL;
1973 internals->slave_count--;
1975 /* force reconfiguration of slave interfaces */
1976 _rte_eth_dev_reset(slave_eth_dev);
1980 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1983 slave_add(struct bond_dev_private *internals,
1984 struct rte_eth_dev *slave_eth_dev)
1986 struct bond_slave_details *slave_details =
1987 &internals->slaves[internals->slave_count];
1989 slave_details->port_id = slave_eth_dev->data->port_id;
1990 slave_details->last_link_status = 0;
1992 /* Mark slave devices that don't support interrupts so we can
1993 * compensate when we start the bond
1995 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1996 slave_details->link_status_poll_enabled = 1;
1999 slave_details->link_status_wait_to_complete = 0;
2000 /* clean tlb_last_obytes when adding port for bonding device */
2001 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2002 sizeof(struct ether_addr));
2006 bond_ethdev_primary_set(struct bond_dev_private *internals,
2007 uint16_t slave_port_id)
2011 if (internals->active_slave_count < 1)
2012 internals->current_primary_port = slave_port_id;
2014 /* Search bonded device slave ports for new proposed primary port */
2015 for (i = 0; i < internals->active_slave_count; i++) {
2016 if (internals->active_slaves[i] == slave_port_id)
2017 internals->current_primary_port = slave_port_id;
2022 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2025 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2027 struct bond_dev_private *internals;
2030 /* slave eth dev will be started by bonded device */
2031 if (check_for_bonded_ethdev(eth_dev)) {
2032 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2033 eth_dev->data->port_id);
2037 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2038 eth_dev->data->dev_started = 1;
2040 internals = eth_dev->data->dev_private;
2042 if (internals->slave_count == 0) {
2043 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2047 if (internals->user_defined_mac == 0) {
2048 struct ether_addr *new_mac_addr = NULL;
2050 for (i = 0; i < internals->slave_count; i++)
2051 if (internals->slaves[i].port_id == internals->primary_port)
2052 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2054 if (new_mac_addr == NULL)
2057 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2058 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2059 eth_dev->data->port_id);
2064 /* If bonded device is configure in promiscuous mode then re-apply config */
2065 if (internals->promiscuous_en)
2066 bond_ethdev_promiscuous_enable(eth_dev);
2068 if (internals->mode == BONDING_MODE_8023AD) {
2069 if (internals->mode4.dedicated_queues.enabled == 1) {
2070 internals->mode4.dedicated_queues.rx_qid =
2071 eth_dev->data->nb_rx_queues;
2072 internals->mode4.dedicated_queues.tx_qid =
2073 eth_dev->data->nb_tx_queues;
2078 /* Reconfigure each slave device if starting bonded device */
2079 for (i = 0; i < internals->slave_count; i++) {
2080 struct rte_eth_dev *slave_ethdev =
2081 &(rte_eth_devices[internals->slaves[i].port_id]);
2082 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2084 "bonded port (%d) failed to reconfigure slave device (%d)",
2085 eth_dev->data->port_id,
2086 internals->slaves[i].port_id);
2089 /* We will need to poll for link status if any slave doesn't
2090 * support interrupts
2092 if (internals->slaves[i].link_status_poll_enabled)
2093 internals->link_status_polling_enabled = 1;
2096 /* start polling if needed */
2097 if (internals->link_status_polling_enabled) {
2099 internals->link_status_polling_interval_ms * 1000,
2100 bond_ethdev_slave_link_status_change_monitor,
2101 (void *)&rte_eth_devices[internals->port_id]);
2104 /* Update all slave devices MACs*/
2105 if (mac_address_slaves_update(eth_dev) != 0)
2108 if (internals->user_defined_primary_port)
2109 bond_ethdev_primary_set(internals, internals->primary_port);
2111 if (internals->mode == BONDING_MODE_8023AD)
2112 bond_mode_8023ad_start(eth_dev);
2114 if (internals->mode == BONDING_MODE_TLB ||
2115 internals->mode == BONDING_MODE_ALB)
2116 bond_tlb_enable(internals);
2121 eth_dev->data->dev_started = 0;
2126 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2130 if (dev->data->rx_queues != NULL) {
2131 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2132 rte_free(dev->data->rx_queues[i]);
2133 dev->data->rx_queues[i] = NULL;
2135 dev->data->nb_rx_queues = 0;
2138 if (dev->data->tx_queues != NULL) {
2139 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2140 rte_free(dev->data->tx_queues[i]);
2141 dev->data->tx_queues[i] = NULL;
2143 dev->data->nb_tx_queues = 0;
2148 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2150 struct bond_dev_private *internals = eth_dev->data->dev_private;
2153 if (internals->mode == BONDING_MODE_8023AD) {
2157 bond_mode_8023ad_stop(eth_dev);
2159 /* Discard all messages to/from mode 4 state machines */
2160 for (i = 0; i < internals->active_slave_count; i++) {
2161 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2163 RTE_ASSERT(port->rx_ring != NULL);
2164 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2165 rte_pktmbuf_free(pkt);
2167 RTE_ASSERT(port->tx_ring != NULL);
2168 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2169 rte_pktmbuf_free(pkt);
2173 if (internals->mode == BONDING_MODE_TLB ||
2174 internals->mode == BONDING_MODE_ALB) {
2175 bond_tlb_disable(internals);
2176 for (i = 0; i < internals->active_slave_count; i++)
2177 tlb_last_obytets[internals->active_slaves[i]] = 0;
2180 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2181 eth_dev->data->dev_started = 0;
2183 internals->link_status_polling_enabled = 0;
2184 for (i = 0; i < internals->slave_count; i++) {
2185 uint16_t slave_id = internals->slaves[i].port_id;
2186 if (find_slave_by_id(internals->active_slaves,
2187 internals->active_slave_count, slave_id) !=
2188 internals->active_slave_count) {
2189 internals->slaves[i].last_link_status = 0;
2190 rte_eth_dev_stop(slave_id);
2191 deactivate_slave(eth_dev, slave_id);
2197 bond_ethdev_close(struct rte_eth_dev *dev)
2199 struct bond_dev_private *internals = dev->data->dev_private;
2200 uint16_t bond_port_id = internals->port_id;
2202 struct rte_flow_error ferror;
2204 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2205 while (internals->slave_count != skipped) {
2206 uint16_t port_id = internals->slaves[skipped].port_id;
2208 rte_eth_dev_stop(port_id);
2210 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2212 "Failed to remove port %d from bonded device %s",
2213 port_id, dev->device->name);
2217 bond_flow_ops.flush(dev, &ferror);
2218 bond_ethdev_free_queues(dev);
2219 rte_bitmap_reset(internals->vlan_filter_bmp);
2222 /* forward declaration */
2223 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2226 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2228 struct bond_dev_private *internals = dev->data->dev_private;
2230 uint16_t max_nb_rx_queues = UINT16_MAX;
2231 uint16_t max_nb_tx_queues = UINT16_MAX;
2232 uint16_t max_rx_desc_lim = UINT16_MAX;
2233 uint16_t max_tx_desc_lim = UINT16_MAX;
2235 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2237 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2238 internals->candidate_max_rx_pktlen :
2239 ETHER_MAX_JUMBO_FRAME_LEN;
2241 /* Max number of tx/rx queues that the bonded device can support is the
2242 * minimum values of the bonded slaves, as all slaves must be capable
2243 * of supporting the same number of tx/rx queues.
2245 if (internals->slave_count > 0) {
2246 struct rte_eth_dev_info slave_info;
2249 for (idx = 0; idx < internals->slave_count; idx++) {
2250 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2253 if (slave_info.max_rx_queues < max_nb_rx_queues)
2254 max_nb_rx_queues = slave_info.max_rx_queues;
2256 if (slave_info.max_tx_queues < max_nb_tx_queues)
2257 max_nb_tx_queues = slave_info.max_tx_queues;
2259 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2260 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2262 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2263 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2267 dev_info->max_rx_queues = max_nb_rx_queues;
2268 dev_info->max_tx_queues = max_nb_tx_queues;
2270 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2271 sizeof(dev_info->default_rxconf));
2272 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2273 sizeof(dev_info->default_txconf));
2275 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2276 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2279 * If dedicated hw queues enabled for link bonding device in LACP mode
2280 * then we need to reduce the maximum number of data path queues by 1.
2282 if (internals->mode == BONDING_MODE_8023AD &&
2283 internals->mode4.dedicated_queues.enabled == 1) {
2284 dev_info->max_rx_queues--;
2285 dev_info->max_tx_queues--;
2288 dev_info->min_rx_bufsize = 0;
2290 dev_info->rx_offload_capa = internals->rx_offload_capa;
2291 dev_info->tx_offload_capa = internals->tx_offload_capa;
2292 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2293 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2294 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2296 dev_info->reta_size = internals->reta_size;
2300 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2304 struct bond_dev_private *internals = dev->data->dev_private;
2306 /* don't do this while a slave is being added */
2307 rte_spinlock_lock(&internals->lock);
2310 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2312 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2314 for (i = 0; i < internals->slave_count; i++) {
2315 uint16_t port_id = internals->slaves[i].port_id;
2317 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2319 RTE_BOND_LOG(WARNING,
2320 "Setting VLAN filter on slave port %u not supported.",
2324 rte_spinlock_unlock(&internals->lock);
2329 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2330 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2331 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2333 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2334 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2335 0, dev->data->numa_node);
2336 if (bd_rx_q == NULL)
2339 bd_rx_q->queue_id = rx_queue_id;
2340 bd_rx_q->dev_private = dev->data->dev_private;
2342 bd_rx_q->nb_rx_desc = nb_rx_desc;
2344 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2345 bd_rx_q->mb_pool = mb_pool;
2347 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2353 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2354 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2355 const struct rte_eth_txconf *tx_conf)
2357 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2358 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2359 0, dev->data->numa_node);
2361 if (bd_tx_q == NULL)
2364 bd_tx_q->queue_id = tx_queue_id;
2365 bd_tx_q->dev_private = dev->data->dev_private;
2367 bd_tx_q->nb_tx_desc = nb_tx_desc;
2368 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2370 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2376 bond_ethdev_rx_queue_release(void *queue)
2385 bond_ethdev_tx_queue_release(void *queue)
2394 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2396 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2397 struct bond_dev_private *internals;
2399 /* Default value for polling slave found is true as we don't want to
2400 * disable the polling thread if we cannot get the lock */
2401 int i, polling_slave_found = 1;
2406 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2407 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2409 if (!bonded_ethdev->data->dev_started ||
2410 !internals->link_status_polling_enabled)
2413 /* If device is currently being configured then don't check slaves link
2414 * status, wait until next period */
2415 if (rte_spinlock_trylock(&internals->lock)) {
2416 if (internals->slave_count > 0)
2417 polling_slave_found = 0;
2419 for (i = 0; i < internals->slave_count; i++) {
2420 if (!internals->slaves[i].link_status_poll_enabled)
2423 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2424 polling_slave_found = 1;
2426 /* Update slave link status */
2427 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2428 internals->slaves[i].link_status_wait_to_complete);
2430 /* if link status has changed since last checked then call lsc
2432 if (slave_ethdev->data->dev_link.link_status !=
2433 internals->slaves[i].last_link_status) {
2434 internals->slaves[i].last_link_status =
2435 slave_ethdev->data->dev_link.link_status;
2437 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2438 RTE_ETH_EVENT_INTR_LSC,
2439 &bonded_ethdev->data->port_id,
2443 rte_spinlock_unlock(&internals->lock);
2446 if (polling_slave_found)
2447 /* Set alarm to continue monitoring link status of slave ethdev's */
2448 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2449 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2453 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2455 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2457 struct bond_dev_private *bond_ctx;
2458 struct rte_eth_link slave_link;
2462 bond_ctx = ethdev->data->dev_private;
2464 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2466 if (ethdev->data->dev_started == 0 ||
2467 bond_ctx->active_slave_count == 0) {
2468 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2472 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2474 if (wait_to_complete)
2475 link_update = rte_eth_link_get;
2477 link_update = rte_eth_link_get_nowait;
2479 switch (bond_ctx->mode) {
2480 case BONDING_MODE_BROADCAST:
2482 * Setting link speed to UINT32_MAX to ensure we pick up the
2483 * value of the first active slave
2485 ethdev->data->dev_link.link_speed = UINT32_MAX;
2488 * link speed is minimum value of all the slaves link speed as
2489 * packet loss will occur on this slave if transmission at rates
2490 * greater than this are attempted
2492 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2493 link_update(bond_ctx->active_slaves[0], &slave_link);
2495 if (slave_link.link_speed <
2496 ethdev->data->dev_link.link_speed)
2497 ethdev->data->dev_link.link_speed =
2498 slave_link.link_speed;
2501 case BONDING_MODE_ACTIVE_BACKUP:
2502 /* Current primary slave */
2503 link_update(bond_ctx->current_primary_port, &slave_link);
2505 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2507 case BONDING_MODE_8023AD:
2508 ethdev->data->dev_link.link_autoneg =
2509 bond_ctx->mode4.slave_link.link_autoneg;
2510 ethdev->data->dev_link.link_duplex =
2511 bond_ctx->mode4.slave_link.link_duplex;
2512 /* fall through to update link speed */
2513 case BONDING_MODE_ROUND_ROBIN:
2514 case BONDING_MODE_BALANCE:
2515 case BONDING_MODE_TLB:
2516 case BONDING_MODE_ALB:
2519 * In theses mode the maximum theoretical link speed is the sum
2522 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2524 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2525 link_update(bond_ctx->active_slaves[idx], &slave_link);
2527 ethdev->data->dev_link.link_speed +=
2528 slave_link.link_speed;
2538 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2540 struct bond_dev_private *internals = dev->data->dev_private;
2541 struct rte_eth_stats slave_stats;
2544 for (i = 0; i < internals->slave_count; i++) {
2545 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2547 stats->ipackets += slave_stats.ipackets;
2548 stats->opackets += slave_stats.opackets;
2549 stats->ibytes += slave_stats.ibytes;
2550 stats->obytes += slave_stats.obytes;
2551 stats->imissed += slave_stats.imissed;
2552 stats->ierrors += slave_stats.ierrors;
2553 stats->oerrors += slave_stats.oerrors;
2554 stats->rx_nombuf += slave_stats.rx_nombuf;
2556 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2557 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2558 stats->q_opackets[j] += slave_stats.q_opackets[j];
2559 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2560 stats->q_obytes[j] += slave_stats.q_obytes[j];
2561 stats->q_errors[j] += slave_stats.q_errors[j];
2570 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2572 struct bond_dev_private *internals = dev->data->dev_private;
2575 for (i = 0; i < internals->slave_count; i++)
2576 rte_eth_stats_reset(internals->slaves[i].port_id);
2580 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2582 struct bond_dev_private *internals = eth_dev->data->dev_private;
2585 internals->promiscuous_en = 1;
2587 switch (internals->mode) {
2588 /* Promiscuous mode is propagated to all slaves */
2589 case BONDING_MODE_ROUND_ROBIN:
2590 case BONDING_MODE_BALANCE:
2591 case BONDING_MODE_BROADCAST:
2592 for (i = 0; i < internals->slave_count; i++)
2593 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2595 /* In mode4 promiscus mode is managed when slave is added/removed */
2596 case BONDING_MODE_8023AD:
2598 /* Promiscuous mode is propagated only to primary slave */
2599 case BONDING_MODE_ACTIVE_BACKUP:
2600 case BONDING_MODE_TLB:
2601 case BONDING_MODE_ALB:
2603 /* Do not touch promisc when there cannot be primary ports */
2604 if (internals->slave_count == 0)
2606 rte_eth_promiscuous_enable(internals->current_primary_port);
2611 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2613 struct bond_dev_private *internals = dev->data->dev_private;
2616 internals->promiscuous_en = 0;
2618 switch (internals->mode) {
2619 /* Promiscuous mode is propagated to all slaves */
2620 case BONDING_MODE_ROUND_ROBIN:
2621 case BONDING_MODE_BALANCE:
2622 case BONDING_MODE_BROADCAST:
2623 for (i = 0; i < internals->slave_count; i++)
2624 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2626 /* In mode4 promiscus mode is set managed when slave is added/removed */
2627 case BONDING_MODE_8023AD:
2629 /* Promiscuous mode is propagated only to primary slave */
2630 case BONDING_MODE_ACTIVE_BACKUP:
2631 case BONDING_MODE_TLB:
2632 case BONDING_MODE_ALB:
2634 /* Do not touch promisc when there cannot be primary ports */
2635 if (internals->slave_count == 0)
2637 rte_eth_promiscuous_disable(internals->current_primary_port);
2642 bond_ethdev_delayed_lsc_propagation(void *arg)
2647 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2648 RTE_ETH_EVENT_INTR_LSC, NULL);
2652 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2653 void *param, void *ret_param __rte_unused)
2655 struct rte_eth_dev *bonded_eth_dev;
2656 struct bond_dev_private *internals;
2657 struct rte_eth_link link;
2660 uint8_t lsc_flag = 0;
2661 int valid_slave = 0;
2662 uint16_t active_pos;
2665 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2668 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2670 if (check_for_bonded_ethdev(bonded_eth_dev))
2673 internals = bonded_eth_dev->data->dev_private;
2675 /* If the device isn't started don't handle interrupts */
2676 if (!bonded_eth_dev->data->dev_started)
2679 /* verify that port_id is a valid slave of bonded port */
2680 for (i = 0; i < internals->slave_count; i++) {
2681 if (internals->slaves[i].port_id == port_id) {
2690 /* Synchronize lsc callback parallel calls either by real link event
2691 * from the slaves PMDs or by the bonding PMD itself.
2693 rte_spinlock_lock(&internals->lsc_lock);
2695 /* Search for port in active port list */
2696 active_pos = find_slave_by_id(internals->active_slaves,
2697 internals->active_slave_count, port_id);
2699 rte_eth_link_get_nowait(port_id, &link);
2700 if (link.link_status) {
2701 if (active_pos < internals->active_slave_count)
2704 /* check link state properties if bonded link is up*/
2705 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2706 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2707 RTE_BOND_LOG(ERR, "Invalid link properties "
2708 "for slave %d in bonding mode %d",
2709 port_id, internals->mode);
2711 /* inherit slave link properties */
2712 link_properties_set(bonded_eth_dev, &link);
2715 /* If no active slave ports then set this port to be
2718 if (internals->active_slave_count < 1) {
2719 /* If first active slave, then change link status */
2720 bonded_eth_dev->data->dev_link.link_status =
2722 internals->current_primary_port = port_id;
2725 mac_address_slaves_update(bonded_eth_dev);
2728 activate_slave(bonded_eth_dev, port_id);
2730 /* If the user has defined the primary port then default to
2733 if (internals->user_defined_primary_port &&
2734 internals->primary_port == port_id)
2735 bond_ethdev_primary_set(internals, port_id);
2737 if (active_pos == internals->active_slave_count)
2740 /* Remove from active slave list */
2741 deactivate_slave(bonded_eth_dev, port_id);
2743 if (internals->active_slave_count < 1)
2746 /* Update primary id, take first active slave from list or if none
2747 * available set to -1 */
2748 if (port_id == internals->current_primary_port) {
2749 if (internals->active_slave_count > 0)
2750 bond_ethdev_primary_set(internals,
2751 internals->active_slaves[0]);
2753 internals->current_primary_port = internals->primary_port;
2759 * Update bonded device link properties after any change to active
2762 bond_ethdev_link_update(bonded_eth_dev, 0);
2765 /* Cancel any possible outstanding interrupts if delays are enabled */
2766 if (internals->link_up_delay_ms > 0 ||
2767 internals->link_down_delay_ms > 0)
2768 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2771 if (bonded_eth_dev->data->dev_link.link_status) {
2772 if (internals->link_up_delay_ms > 0)
2773 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2774 bond_ethdev_delayed_lsc_propagation,
2775 (void *)bonded_eth_dev);
2777 _rte_eth_dev_callback_process(bonded_eth_dev,
2778 RTE_ETH_EVENT_INTR_LSC,
2782 if (internals->link_down_delay_ms > 0)
2783 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2784 bond_ethdev_delayed_lsc_propagation,
2785 (void *)bonded_eth_dev);
2787 _rte_eth_dev_callback_process(bonded_eth_dev,
2788 RTE_ETH_EVENT_INTR_LSC,
2793 rte_spinlock_unlock(&internals->lsc_lock);
2799 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2800 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2804 int slave_reta_size;
2805 unsigned reta_count;
2806 struct bond_dev_private *internals = dev->data->dev_private;
2808 if (reta_size != internals->reta_size)
2811 /* Copy RETA table */
2812 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2814 for (i = 0; i < reta_count; i++) {
2815 internals->reta_conf[i].mask = reta_conf[i].mask;
2816 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2817 if ((reta_conf[i].mask >> j) & 0x01)
2818 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2821 /* Fill rest of array */
2822 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2823 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2824 sizeof(internals->reta_conf[0]) * reta_count);
2826 /* Propagate RETA over slaves */
2827 for (i = 0; i < internals->slave_count; i++) {
2828 slave_reta_size = internals->slaves[i].reta_size;
2829 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2830 &internals->reta_conf[0], slave_reta_size);
2839 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2840 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2843 struct bond_dev_private *internals = dev->data->dev_private;
2845 if (reta_size != internals->reta_size)
2848 /* Copy RETA table */
2849 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2850 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2851 if ((reta_conf[i].mask >> j) & 0x01)
2852 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2858 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2859 struct rte_eth_rss_conf *rss_conf)
2862 struct bond_dev_private *internals = dev->data->dev_private;
2863 struct rte_eth_rss_conf bond_rss_conf;
2865 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2867 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2869 if (bond_rss_conf.rss_hf != 0)
2870 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2872 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2873 sizeof(internals->rss_key)) {
2874 if (bond_rss_conf.rss_key_len == 0)
2875 bond_rss_conf.rss_key_len = 40;
2876 internals->rss_key_len = bond_rss_conf.rss_key_len;
2877 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2878 internals->rss_key_len);
2881 for (i = 0; i < internals->slave_count; i++) {
2882 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2892 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2893 struct rte_eth_rss_conf *rss_conf)
2895 struct bond_dev_private *internals = dev->data->dev_private;
2897 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2898 rss_conf->rss_key_len = internals->rss_key_len;
2899 if (rss_conf->rss_key)
2900 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2906 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2908 struct rte_eth_dev *slave_eth_dev;
2909 struct bond_dev_private *internals = dev->data->dev_private;
2912 rte_spinlock_lock(&internals->lock);
2914 for (i = 0; i < internals->slave_count; i++) {
2915 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2916 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2917 rte_spinlock_unlock(&internals->lock);
2921 for (i = 0; i < internals->slave_count; i++) {
2922 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2924 rte_spinlock_unlock(&internals->lock);
2929 rte_spinlock_unlock(&internals->lock);
2934 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2936 if (mac_address_set(dev, addr)) {
2937 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2945 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2946 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2948 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2949 *(const void **)arg = &bond_flow_ops;
2956 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2957 __rte_unused uint32_t index, uint32_t vmdq)
2959 struct rte_eth_dev *slave_eth_dev;
2960 struct bond_dev_private *internals = dev->data->dev_private;
2963 rte_spinlock_lock(&internals->lock);
2965 for (i = 0; i < internals->slave_count; i++) {
2966 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2967 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2968 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2974 for (i = 0; i < internals->slave_count; i++) {
2975 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2979 for (i--; i >= 0; i--)
2980 rte_eth_dev_mac_addr_remove(
2981 internals->slaves[i].port_id, mac_addr);
2988 rte_spinlock_unlock(&internals->lock);
2993 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2995 struct rte_eth_dev *slave_eth_dev;
2996 struct bond_dev_private *internals = dev->data->dev_private;
2999 rte_spinlock_lock(&internals->lock);
3001 for (i = 0; i < internals->slave_count; i++) {
3002 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3003 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3007 struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3009 for (i = 0; i < internals->slave_count; i++)
3010 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3014 rte_spinlock_unlock(&internals->lock);
3017 const struct eth_dev_ops default_dev_ops = {
3018 .dev_start = bond_ethdev_start,
3019 .dev_stop = bond_ethdev_stop,
3020 .dev_close = bond_ethdev_close,
3021 .dev_configure = bond_ethdev_configure,
3022 .dev_infos_get = bond_ethdev_info,
3023 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3024 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3025 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3026 .rx_queue_release = bond_ethdev_rx_queue_release,
3027 .tx_queue_release = bond_ethdev_tx_queue_release,
3028 .link_update = bond_ethdev_link_update,
3029 .stats_get = bond_ethdev_stats_get,
3030 .stats_reset = bond_ethdev_stats_reset,
3031 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3032 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3033 .reta_update = bond_ethdev_rss_reta_update,
3034 .reta_query = bond_ethdev_rss_reta_query,
3035 .rss_hash_update = bond_ethdev_rss_hash_update,
3036 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3037 .mtu_set = bond_ethdev_mtu_set,
3038 .mac_addr_set = bond_ethdev_mac_address_set,
3039 .mac_addr_add = bond_ethdev_mac_addr_add,
3040 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3041 .filter_ctrl = bond_filter_ctrl
3045 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3047 const char *name = rte_vdev_device_name(dev);
3048 uint8_t socket_id = dev->device.numa_node;
3049 struct bond_dev_private *internals = NULL;
3050 struct rte_eth_dev *eth_dev = NULL;
3051 uint32_t vlan_filter_bmp_size;
3053 /* now do all data allocation - for eth_dev structure, dummy pci driver
3054 * and internal (private) data
3057 /* reserve an ethdev entry */
3058 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3059 if (eth_dev == NULL) {
3060 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3064 internals = eth_dev->data->dev_private;
3065 eth_dev->data->nb_rx_queues = (uint16_t)1;
3066 eth_dev->data->nb_tx_queues = (uint16_t)1;
3068 /* Allocate memory for storing MAC addresses */
3069 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3070 BOND_MAX_MAC_ADDRS, 0, socket_id);
3071 if (eth_dev->data->mac_addrs == NULL) {
3073 "Failed to allocate %u bytes needed to store MAC addresses",
3074 ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3078 eth_dev->dev_ops = &default_dev_ops;
3079 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3081 rte_spinlock_init(&internals->lock);
3082 rte_spinlock_init(&internals->lsc_lock);
3084 internals->port_id = eth_dev->data->port_id;
3085 internals->mode = BONDING_MODE_INVALID;
3086 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3087 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3088 internals->burst_xmit_hash = burst_xmit_l2_hash;
3089 internals->user_defined_mac = 0;
3091 internals->link_status_polling_enabled = 0;
3093 internals->link_status_polling_interval_ms =
3094 DEFAULT_POLLING_INTERVAL_10_MS;
3095 internals->link_down_delay_ms = 0;
3096 internals->link_up_delay_ms = 0;
3098 internals->slave_count = 0;
3099 internals->active_slave_count = 0;
3100 internals->rx_offload_capa = 0;
3101 internals->tx_offload_capa = 0;
3102 internals->rx_queue_offload_capa = 0;
3103 internals->tx_queue_offload_capa = 0;
3104 internals->candidate_max_rx_pktlen = 0;
3105 internals->max_rx_pktlen = 0;
3107 /* Initially allow to choose any offload type */
3108 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3110 memset(&internals->default_rxconf, 0,
3111 sizeof(internals->default_rxconf));
3112 memset(&internals->default_txconf, 0,
3113 sizeof(internals->default_txconf));
3115 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3116 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3118 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3119 memset(internals->slaves, 0, sizeof(internals->slaves));
3121 TAILQ_INIT(&internals->flow_list);
3122 internals->flow_isolated_valid = 0;
3124 /* Set mode 4 default configuration */
3125 bond_mode_8023ad_setup(eth_dev, NULL);
3126 if (bond_ethdev_mode_set(eth_dev, mode)) {
3127 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3128 eth_dev->data->port_id, mode);
3132 vlan_filter_bmp_size =
3133 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3134 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3135 RTE_CACHE_LINE_SIZE);
3136 if (internals->vlan_filter_bmpmem == NULL) {
3138 "Failed to allocate vlan bitmap for bonded device %u",
3139 eth_dev->data->port_id);
3143 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3144 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3145 if (internals->vlan_filter_bmp == NULL) {
3147 "Failed to init vlan bitmap for bonded device %u",
3148 eth_dev->data->port_id);
3149 rte_free(internals->vlan_filter_bmpmem);
3153 return eth_dev->data->port_id;
3156 rte_free(internals);
3157 if (eth_dev != NULL)
3158 eth_dev->data->dev_private = NULL;
3159 rte_eth_dev_release_port(eth_dev);
3164 bond_probe(struct rte_vdev_device *dev)
3167 struct bond_dev_private *internals;
3168 struct rte_kvargs *kvlist;
3169 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3170 int arg_count, port_id;
3172 struct rte_eth_dev *eth_dev;
3177 name = rte_vdev_device_name(dev);
3178 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3180 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3181 eth_dev = rte_eth_dev_attach_secondary(name);
3183 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3186 /* TODO: request info from primary to set up Rx and Tx */
3187 eth_dev->dev_ops = &default_dev_ops;
3188 eth_dev->device = &dev->device;
3189 rte_eth_dev_probing_finish(eth_dev);
3193 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3194 pmd_bond_init_valid_arguments);
3198 /* Parse link bonding mode */
3199 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3200 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3201 &bond_ethdev_parse_slave_mode_kvarg,
3202 &bonding_mode) != 0) {
3203 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3208 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3213 /* Parse socket id to create bonding device on */
3214 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3215 if (arg_count == 1) {
3216 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3217 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3219 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3220 "bonded device %s", name);
3223 } else if (arg_count > 1) {
3224 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3225 "bonded device %s", name);
3228 socket_id = rte_socket_id();
3231 dev->device.numa_node = socket_id;
3233 /* Create link bonding eth device */
3234 port_id = bond_alloc(dev, bonding_mode);
3236 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3237 "socket %u.", name, bonding_mode, socket_id);
3240 internals = rte_eth_devices[port_id].data->dev_private;
3241 internals->kvlist = kvlist;
3243 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3244 if (rte_kvargs_process(kvlist,
3245 PMD_BOND_AGG_MODE_KVARG,
3246 &bond_ethdev_parse_slave_agg_mode_kvarg,
3249 "Failed to parse agg selection mode for bonded device %s",
3254 if (internals->mode == BONDING_MODE_8023AD)
3255 internals->mode4.agg_selection = agg_mode;
3257 internals->mode4.agg_selection = AGG_STABLE;
3260 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3261 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3262 "socket %u.", name, port_id, bonding_mode, socket_id);
3266 rte_kvargs_free(kvlist);
3272 bond_remove(struct rte_vdev_device *dev)
3274 struct rte_eth_dev *eth_dev;
3275 struct bond_dev_private *internals;
3281 name = rte_vdev_device_name(dev);
3282 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3284 /* now free all data allocation - for eth_dev structure,
3285 * dummy pci driver and internal (private) data
3288 /* find an ethdev entry */
3289 eth_dev = rte_eth_dev_allocated(name);
3290 if (eth_dev == NULL)
3293 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3294 return rte_eth_dev_release_port(eth_dev);
3296 RTE_ASSERT(eth_dev->device == &dev->device);
3298 internals = eth_dev->data->dev_private;
3299 if (internals->slave_count != 0)
3302 if (eth_dev->data->dev_started == 1) {
3303 bond_ethdev_stop(eth_dev);
3304 bond_ethdev_close(eth_dev);
3307 eth_dev->dev_ops = NULL;
3308 eth_dev->rx_pkt_burst = NULL;
3309 eth_dev->tx_pkt_burst = NULL;
3311 internals = eth_dev->data->dev_private;
3312 /* Try to release mempool used in mode6. If the bond
3313 * device is not mode6, free the NULL is not problem.
3315 rte_mempool_free(internals->mode6.mempool);
3316 rte_bitmap_free(internals->vlan_filter_bmp);
3317 rte_free(internals->vlan_filter_bmpmem);
3319 rte_eth_dev_release_port(eth_dev);
3324 /* this part will resolve the slave portids after all the other pdev and vdev
3325 * have been allocated */
3327 bond_ethdev_configure(struct rte_eth_dev *dev)
3329 const char *name = dev->device->name;
3330 struct bond_dev_private *internals = dev->data->dev_private;
3331 struct rte_kvargs *kvlist = internals->kvlist;
3333 uint16_t port_id = dev - rte_eth_devices;
3336 static const uint8_t default_rss_key[40] = {
3337 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3338 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3339 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3340 0xBE, 0xAC, 0x01, 0xFA
3346 * If RSS is enabled, fill table with default values and
3347 * set key to the the value specified in port RSS configuration.
3348 * Fall back to default RSS key if the key is not specified
3350 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3351 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3352 internals->rss_key_len =
3353 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3354 memcpy(internals->rss_key,
3355 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3356 internals->rss_key_len);
3358 internals->rss_key_len = sizeof(default_rss_key);
3359 memcpy(internals->rss_key, default_rss_key,
3360 internals->rss_key_len);
3363 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3364 internals->reta_conf[i].mask = ~0LL;
3365 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3366 internals->reta_conf[i].reta[j] =
3367 (i * RTE_RETA_GROUP_SIZE + j) %
3368 dev->data->nb_rx_queues;
3372 /* set the max_rx_pktlen */
3373 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3376 * if no kvlist, it means that this bonded device has been created
3377 * through the bonding api.
3382 /* Parse MAC address for bonded device */
3383 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3384 if (arg_count == 1) {
3385 struct ether_addr bond_mac;
3387 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3388 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3389 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3394 /* Set MAC address */
3395 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3397 "Failed to set mac address on bonded device %s",
3401 } else if (arg_count > 1) {
3403 "MAC address can be specified only once for bonded device %s",
3408 /* Parse/set balance mode transmit policy */
3409 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3410 if (arg_count == 1) {
3411 uint8_t xmit_policy;
3413 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3414 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3417 "Invalid xmit policy specified for bonded device %s",
3422 /* Set balance mode transmit policy*/
3423 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3425 "Failed to set balance xmit policy on bonded device %s",
3429 } else if (arg_count > 1) {
3431 "Transmit policy can be specified only once for bonded device %s",
3436 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3437 if (rte_kvargs_process(kvlist,
3438 PMD_BOND_AGG_MODE_KVARG,
3439 &bond_ethdev_parse_slave_agg_mode_kvarg,
3442 "Failed to parse agg selection mode for bonded device %s",
3445 if (internals->mode == BONDING_MODE_8023AD) {
3446 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3450 "Invalid args for agg selection set for bonded device %s",
3457 /* Parse/add slave ports to bonded device */
3458 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3459 struct bond_ethdev_slave_ports slave_ports;
3462 memset(&slave_ports, 0, sizeof(slave_ports));
3464 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3465 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3467 "Failed to parse slave ports for bonded device %s",
3472 for (i = 0; i < slave_ports.slave_count; i++) {
3473 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3475 "Failed to add port %d as slave to bonded device %s",
3476 slave_ports.slaves[i], name);
3481 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3485 /* Parse/set primary slave port id*/
3486 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3487 if (arg_count == 1) {
3488 uint16_t primary_slave_port_id;
3490 if (rte_kvargs_process(kvlist,
3491 PMD_BOND_PRIMARY_SLAVE_KVARG,
3492 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3493 &primary_slave_port_id) < 0) {
3495 "Invalid primary slave port id specified for bonded device %s",
3500 /* Set balance mode transmit policy*/
3501 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3504 "Failed to set primary slave port %d on bonded device %s",
3505 primary_slave_port_id, name);
3508 } else if (arg_count > 1) {
3510 "Primary slave can be specified only once for bonded device %s",
3515 /* Parse link status monitor polling interval */
3516 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3517 if (arg_count == 1) {
3518 uint32_t lsc_poll_interval_ms;
3520 if (rte_kvargs_process(kvlist,
3521 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3522 &bond_ethdev_parse_time_ms_kvarg,
3523 &lsc_poll_interval_ms) < 0) {
3525 "Invalid lsc polling interval value specified for bonded"
3526 " device %s", name);
3530 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3533 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3534 lsc_poll_interval_ms, name);
3537 } else if (arg_count > 1) {
3539 "LSC polling interval can be specified only once for bonded"
3540 " device %s", name);
3544 /* Parse link up interrupt propagation delay */
3545 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3546 if (arg_count == 1) {
3547 uint32_t link_up_delay_ms;
3549 if (rte_kvargs_process(kvlist,
3550 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3551 &bond_ethdev_parse_time_ms_kvarg,
3552 &link_up_delay_ms) < 0) {
3554 "Invalid link up propagation delay value specified for"
3555 " bonded device %s", name);
3559 /* Set balance mode transmit policy*/
3560 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3563 "Failed to set link up propagation delay (%u ms) on bonded"
3564 " device %s", link_up_delay_ms, name);
3567 } else if (arg_count > 1) {
3569 "Link up propagation delay can be specified only once for"
3570 " bonded device %s", name);
3574 /* Parse link down interrupt propagation delay */
3575 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3576 if (arg_count == 1) {
3577 uint32_t link_down_delay_ms;
3579 if (rte_kvargs_process(kvlist,
3580 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3581 &bond_ethdev_parse_time_ms_kvarg,
3582 &link_down_delay_ms) < 0) {
3584 "Invalid link down propagation delay value specified for"
3585 " bonded device %s", name);
3589 /* Set balance mode transmit policy*/
3590 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3593 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3594 link_down_delay_ms, name);
3597 } else if (arg_count > 1) {
3599 "Link down propagation delay can be specified only once for bonded device %s",
3607 struct rte_vdev_driver pmd_bond_drv = {
3608 .probe = bond_probe,
3609 .remove = bond_remove,
3612 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3613 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3615 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3619 "xmit_policy=[l2 | l23 | l34] "
3620 "agg_mode=[count | stable | bandwidth] "
3623 "lsc_poll_period_ms=<int> "
3625 "down_delay=<int>");
3629 RTE_INIT(bond_init_log)
3631 bond_logtype = rte_log_register("pmd.net.bond");
3632 if (bond_logtype >= 0)
3633 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);