1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
38 size_t vlan_offset = 0;
40 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
44 vlan_offset = sizeof(struct vlan_hdr);
45 *proto = vlan_hdr->eth_proto;
47 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48 vlan_hdr = vlan_hdr + 1;
49 *proto = vlan_hdr->eth_proto;
50 vlan_offset += sizeof(struct vlan_hdr);
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 struct bond_dev_private *internals;
61 uint16_t num_rx_slave = 0;
62 uint16_t num_rx_total = 0;
66 /* Cast to structure, containing bonded device's port id and queue id */
67 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
69 internals = bd_rx_q->dev_private;
72 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
73 /* Offset of pointer to *bufs increases as packets are received
74 * from other slaves */
75 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
76 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
78 num_rx_total += num_rx_slave;
79 nb_pkts -= num_rx_slave;
87 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
90 struct bond_dev_private *internals;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
97 return rte_eth_rx_burst(internals->current_primary_port,
98 bd_rx_q->queue_id, bufs, nb_pkts);
101 static inline uint8_t
102 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
104 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
106 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
107 (ethertype == ether_type_slow_be &&
108 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
111 /*****************************************************************************
112 * Flow director's setup for mode 4 optimization
115 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
116 .dst.addr_bytes = { 0 },
117 .src.addr_bytes = { 0 },
118 .type = RTE_BE16(ETHER_TYPE_SLOW),
121 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
122 .dst.addr_bytes = { 0 },
123 .src.addr_bytes = { 0 },
127 static struct rte_flow_item flow_item_8023ad[] = {
129 .type = RTE_FLOW_ITEM_TYPE_ETH,
130 .spec = &flow_item_eth_type_8023ad,
132 .mask = &flow_item_eth_mask_type_8023ad,
135 .type = RTE_FLOW_ITEM_TYPE_END,
142 const struct rte_flow_attr flow_attr_8023ad = {
151 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
152 uint16_t slave_port) {
153 struct rte_eth_dev_info slave_info;
154 struct rte_flow_error error;
155 struct bond_dev_private *internals = (struct bond_dev_private *)
156 (bond_dev->data->dev_private);
158 const struct rte_flow_action_queue lacp_queue_conf = {
162 const struct rte_flow_action actions[] = {
164 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
165 .conf = &lacp_queue_conf
168 .type = RTE_FLOW_ACTION_TYPE_END,
172 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
173 flow_item_8023ad, actions, &error);
175 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
176 __func__, error.message, slave_port,
177 internals->mode4.dedicated_queues.rx_qid);
181 rte_eth_dev_info_get(slave_port, &slave_info);
182 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
183 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
185 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
186 __func__, slave_port);
194 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
195 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
196 struct bond_dev_private *internals = (struct bond_dev_private *)
197 (bond_dev->data->dev_private);
198 struct rte_eth_dev_info bond_info;
201 /* Verify if all slaves in bonding supports flow director and */
202 if (internals->slave_count > 0) {
203 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
205 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
206 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
208 for (idx = 0; idx < internals->slave_count; idx++) {
209 if (bond_ethdev_8023ad_flow_verify(bond_dev,
210 internals->slaves[idx].port_id) != 0)
219 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
221 struct rte_flow_error error;
222 struct bond_dev_private *internals = (struct bond_dev_private *)
223 (bond_dev->data->dev_private);
225 struct rte_flow_action_queue lacp_queue_conf = {
226 .index = internals->mode4.dedicated_queues.rx_qid,
229 const struct rte_flow_action actions[] = {
231 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
232 .conf = &lacp_queue_conf
235 .type = RTE_FLOW_ACTION_TYPE_END,
239 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
240 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
241 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
242 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
243 "(slave_port=%d queue_id=%d)",
244 error.message, slave_port,
245 internals->mode4.dedicated_queues.rx_qid);
253 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
256 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
257 struct bond_dev_private *internals = bd_rx_q->dev_private;
258 uint16_t num_rx_total = 0; /* Total number of received packets */
259 uint16_t slaves[RTE_MAX_ETHPORTS];
260 uint16_t slave_count;
264 /* Copy slave list to protect against slave up/down changes during tx
266 slave_count = internals->active_slave_count;
267 memcpy(slaves, internals->active_slaves,
268 sizeof(internals->active_slaves[0]) * slave_count);
270 for (i = 0, idx = internals->active_slave;
271 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
272 idx = idx % slave_count;
274 /* Read packets from this slave */
275 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
276 &bufs[num_rx_total], nb_pkts - num_rx_total);
279 internals->active_slave = idx;
285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
288 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
289 struct bond_dev_private *internals = bd_tx_q->dev_private;
291 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
292 uint16_t slave_count;
294 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
295 uint16_t dist_slave_count;
297 /* 2-D array to sort mbufs for transmission on each slave into */
298 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
299 /* Number of mbufs for transmission on each slave */
300 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
301 /* Mapping array generated by hash function to map mbufs to slaves */
302 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
304 uint16_t slave_tx_count;
305 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
309 if (unlikely(nb_bufs == 0))
312 /* Copy slave list to protect against slave up/down changes during tx
314 slave_count = internals->active_slave_count;
315 if (unlikely(slave_count < 1))
318 memcpy(slave_port_ids, internals->active_slaves,
319 sizeof(slave_port_ids[0]) * slave_count);
322 dist_slave_count = 0;
323 for (i = 0; i < slave_count; i++) {
324 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
326 if (ACTOR_STATE(port, DISTRIBUTING))
327 dist_slave_port_ids[dist_slave_count++] =
331 if (unlikely(dist_slave_count < 1))
335 * Populate slaves mbuf with the packets which are to be sent on it
336 * selecting output slave using hash based on xmit policy
338 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
339 bufs_slave_port_idxs);
341 for (i = 0; i < nb_bufs; i++) {
342 /* Populate slave mbuf arrays with mbufs for that slave. */
343 uint8_t slave_idx = bufs_slave_port_idxs[i];
345 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
349 /* Send packet burst on each slave device */
350 for (i = 0; i < dist_slave_count; i++) {
351 if (slave_nb_bufs[i] == 0)
354 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
355 bd_tx_q->queue_id, slave_bufs[i],
358 total_tx_count += slave_tx_count;
360 /* If tx burst fails move packets to end of bufs */
361 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
362 int slave_tx_fail_count = slave_nb_bufs[i] -
364 total_tx_fail_count += slave_tx_fail_count;
365 memcpy(&bufs[nb_bufs - total_tx_fail_count],
366 &slave_bufs[i][slave_tx_count],
367 slave_tx_fail_count * sizeof(bufs[0]));
371 return total_tx_count;
376 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
379 /* Cast to structure, containing bonded device's port id and queue id */
380 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
381 struct bond_dev_private *internals = bd_rx_q->dev_private;
382 struct ether_addr bond_mac;
384 struct ether_hdr *hdr;
386 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
387 uint16_t num_rx_total = 0; /* Total number of received packets */
388 uint16_t slaves[RTE_MAX_ETHPORTS];
389 uint16_t slave_count, idx;
391 uint8_t collecting; /* current slave collecting status */
392 const uint8_t promisc = internals->promiscuous_en;
396 rte_eth_macaddr_get(internals->port_id, &bond_mac);
397 /* Copy slave list to protect against slave up/down changes during tx
399 slave_count = internals->active_slave_count;
400 memcpy(slaves, internals->active_slaves,
401 sizeof(internals->active_slaves[0]) * slave_count);
403 idx = internals->active_slave;
404 if (idx >= slave_count) {
405 internals->active_slave = 0;
408 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
410 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
413 /* Read packets from this slave */
414 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
415 &bufs[num_rx_total], nb_pkts - num_rx_total);
417 for (k = j; k < 2 && k < num_rx_total; k++)
418 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
420 /* Handle slow protocol packets. */
421 while (j < num_rx_total) {
423 /* If packet is not pure L2 and is known, skip it */
424 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
429 if (j + 3 < num_rx_total)
430 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
432 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
433 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
435 /* Remove packet from array if it is slow packet or slave is not
436 * in collecting state or bonding interface is not in promiscuous
437 * mode and packet address does not match. */
438 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
439 !collecting || (!promisc &&
440 !is_multicast_ether_addr(&hdr->d_addr) &&
441 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
443 if (hdr->ether_type == ether_type_slow_be) {
444 bond_mode_8023ad_handle_slow_pkt(
445 internals, slaves[idx], bufs[j]);
447 rte_pktmbuf_free(bufs[j]);
449 /* Packet is managed by mode 4 or dropped, shift the array */
451 if (j < num_rx_total) {
452 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
458 if (unlikely(++idx == slave_count))
462 internals->active_slave = idx;
466 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
467 uint32_t burstnumberRX;
468 uint32_t burstnumberTX;
470 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
473 arp_op_name(uint16_t arp_op, char *buf)
477 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
480 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
482 case ARP_OP_REVREQUEST:
483 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
484 "Reverse ARP Request");
486 case ARP_OP_REVREPLY:
487 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
488 "Reverse ARP Reply");
490 case ARP_OP_INVREQUEST:
491 snprintf(buf, sizeof("Peer Identify Request"), "%s",
492 "Peer Identify Request");
494 case ARP_OP_INVREPLY:
495 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
496 "Peer Identify Reply");
501 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
505 #define MaxIPv4String 16
507 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
511 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
512 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
513 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
517 #define MAX_CLIENTS_NUMBER 128
518 uint8_t active_clients;
519 struct client_stats_t {
522 uint32_t ipv4_rx_packets;
523 uint32_t ipv4_tx_packets;
525 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
528 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
532 for (; i < MAX_CLIENTS_NUMBER; i++) {
533 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
534 /* Just update RX packets number for this client */
535 if (TXorRXindicator == &burstnumberRX)
536 client_stats[i].ipv4_rx_packets++;
538 client_stats[i].ipv4_tx_packets++;
542 /* We have a new client. Insert him to the table, and increment stats */
543 if (TXorRXindicator == &burstnumberRX)
544 client_stats[active_clients].ipv4_rx_packets++;
546 client_stats[active_clients].ipv4_tx_packets++;
547 client_stats[active_clients].ipv4_addr = addr;
548 client_stats[active_clients].port = port;
553 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
554 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
555 rte_log(RTE_LOG_DEBUG, bond_logtype, \
556 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
557 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
560 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
561 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
562 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
564 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
565 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
566 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
568 arp_op, ++burstnumber)
572 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
573 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
575 struct ipv4_hdr *ipv4_h;
576 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
577 struct arp_hdr *arp_h;
584 uint16_t ether_type = eth_h->ether_type;
585 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
587 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
588 strlcpy(buf, info, 16);
591 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
592 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
593 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
594 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
595 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
596 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
598 update_client_stats(ipv4_h->src_addr, port, burstnumber);
600 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
601 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
602 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
603 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
604 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
605 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
606 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
613 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
615 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
616 struct bond_dev_private *internals = bd_tx_q->dev_private;
617 struct ether_hdr *eth_h;
618 uint16_t ether_type, offset;
619 uint16_t nb_recv_pkts;
622 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
624 for (i = 0; i < nb_recv_pkts; i++) {
625 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
626 ether_type = eth_h->ether_type;
627 offset = get_vlan_offset(eth_h, ðer_type);
629 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
630 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
631 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
633 bond_mode_alb_arp_recv(eth_h, offset, internals);
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
637 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
645 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
648 struct bond_dev_private *internals;
649 struct bond_tx_queue *bd_tx_q;
651 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
652 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
654 uint16_t num_of_slaves;
655 uint16_t slaves[RTE_MAX_ETHPORTS];
657 uint16_t num_tx_total = 0, num_tx_slave;
659 static int slave_idx = 0;
660 int i, cslave_idx = 0, tx_fail_total = 0;
662 bd_tx_q = (struct bond_tx_queue *)queue;
663 internals = bd_tx_q->dev_private;
665 /* Copy slave list to protect against slave up/down changes during tx
667 num_of_slaves = internals->active_slave_count;
668 memcpy(slaves, internals->active_slaves,
669 sizeof(internals->active_slaves[0]) * num_of_slaves);
671 if (num_of_slaves < 1)
674 /* Populate slaves mbuf with which packets are to be sent on it */
675 for (i = 0; i < nb_pkts; i++) {
676 cslave_idx = (slave_idx + i) % num_of_slaves;
677 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
680 /* increment current slave index so the next call to tx burst starts on the
682 slave_idx = ++cslave_idx;
684 /* Send packet burst on each slave device */
685 for (i = 0; i < num_of_slaves; i++) {
686 if (slave_nb_pkts[i] > 0) {
687 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
688 slave_bufs[i], slave_nb_pkts[i]);
690 /* if tx burst fails move packets to end of bufs */
691 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
692 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
694 tx_fail_total += tx_fail_slave;
696 memcpy(&bufs[nb_pkts - tx_fail_total],
697 &slave_bufs[i][num_tx_slave],
698 tx_fail_slave * sizeof(bufs[0]));
700 num_tx_total += num_tx_slave;
708 bond_ethdev_tx_burst_active_backup(void *queue,
709 struct rte_mbuf **bufs, uint16_t nb_pkts)
711 struct bond_dev_private *internals;
712 struct bond_tx_queue *bd_tx_q;
714 bd_tx_q = (struct bond_tx_queue *)queue;
715 internals = bd_tx_q->dev_private;
717 if (internals->active_slave_count < 1)
720 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
724 static inline uint16_t
725 ether_hash(struct ether_hdr *eth_hdr)
727 unaligned_uint16_t *word_src_addr =
728 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
729 unaligned_uint16_t *word_dst_addr =
730 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
732 return (word_src_addr[0] ^ word_dst_addr[0]) ^
733 (word_src_addr[1] ^ word_dst_addr[1]) ^
734 (word_src_addr[2] ^ word_dst_addr[2]);
737 static inline uint32_t
738 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
740 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
743 static inline uint32_t
744 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
746 unaligned_uint32_t *word_src_addr =
747 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
748 unaligned_uint32_t *word_dst_addr =
749 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
751 return (word_src_addr[0] ^ word_dst_addr[0]) ^
752 (word_src_addr[1] ^ word_dst_addr[1]) ^
753 (word_src_addr[2] ^ word_dst_addr[2]) ^
754 (word_src_addr[3] ^ word_dst_addr[3]);
759 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
760 uint8_t slave_count, uint16_t *slaves)
762 struct ether_hdr *eth_hdr;
766 for (i = 0; i < nb_pkts; i++) {
767 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
769 hash = ether_hash(eth_hdr);
771 slaves[i] = (hash ^= hash >> 8) % slave_count;
776 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777 uint8_t slave_count, uint16_t *slaves)
780 struct ether_hdr *eth_hdr;
783 uint32_t hash, l3hash;
785 for (i = 0; i < nb_pkts; i++) {
786 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
789 proto = eth_hdr->ether_type;
790 hash = ether_hash(eth_hdr);
792 vlan_offset = get_vlan_offset(eth_hdr, &proto);
794 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
795 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
796 ((char *)(eth_hdr + 1) + vlan_offset);
797 l3hash = ipv4_hash(ipv4_hdr);
799 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
800 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
801 ((char *)(eth_hdr + 1) + vlan_offset);
802 l3hash = ipv6_hash(ipv6_hdr);
805 hash = hash ^ l3hash;
809 slaves[i] = hash % slave_count;
814 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
815 uint8_t slave_count, uint16_t *slaves)
817 struct ether_hdr *eth_hdr;
822 struct udp_hdr *udp_hdr;
823 struct tcp_hdr *tcp_hdr;
824 uint32_t hash, l3hash, l4hash;
826 for (i = 0; i < nb_pkts; i++) {
827 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
828 proto = eth_hdr->ether_type;
829 vlan_offset = get_vlan_offset(eth_hdr, &proto);
833 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
834 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
835 ((char *)(eth_hdr + 1) + vlan_offset);
836 size_t ip_hdr_offset;
838 l3hash = ipv4_hash(ipv4_hdr);
840 /* there is no L4 header in fragmented packet */
841 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
843 ip_hdr_offset = (ipv4_hdr->version_ihl
844 & IPV4_HDR_IHL_MASK) *
847 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
848 tcp_hdr = (struct tcp_hdr *)
851 l4hash = HASH_L4_PORTS(tcp_hdr);
852 } else if (ipv4_hdr->next_proto_id ==
854 udp_hdr = (struct udp_hdr *)
857 l4hash = HASH_L4_PORTS(udp_hdr);
860 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
861 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
862 ((char *)(eth_hdr + 1) + vlan_offset);
863 l3hash = ipv6_hash(ipv6_hdr);
865 if (ipv6_hdr->proto == IPPROTO_TCP) {
866 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
867 l4hash = HASH_L4_PORTS(tcp_hdr);
868 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
869 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
870 l4hash = HASH_L4_PORTS(udp_hdr);
874 hash = l3hash ^ l4hash;
878 slaves[i] = hash % slave_count;
883 uint64_t bwg_left_int;
884 uint64_t bwg_left_remainder;
889 bond_tlb_activate_slave(struct bond_dev_private *internals) {
892 for (i = 0; i < internals->active_slave_count; i++) {
893 tlb_last_obytets[internals->active_slaves[i]] = 0;
898 bandwidth_cmp(const void *a, const void *b)
900 const struct bwg_slave *bwg_a = a;
901 const struct bwg_slave *bwg_b = b;
902 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
903 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
904 (int64_t)bwg_a->bwg_left_remainder;
918 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
919 struct bwg_slave *bwg_slave)
921 struct rte_eth_link link_status;
923 rte_eth_link_get_nowait(port_id, &link_status);
924 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
927 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
928 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
929 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
933 bond_ethdev_update_tlb_slave_cb(void *arg)
935 struct bond_dev_private *internals = arg;
936 struct rte_eth_stats slave_stats;
937 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
941 uint8_t update_stats = 0;
944 internals->slave_update_idx++;
947 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
950 for (i = 0; i < internals->active_slave_count; i++) {
951 slave_id = internals->active_slaves[i];
952 rte_eth_stats_get(slave_id, &slave_stats);
953 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
954 bandwidth_left(slave_id, tx_bytes,
955 internals->slave_update_idx, &bwg_array[i]);
956 bwg_array[i].slave = slave_id;
959 tlb_last_obytets[slave_id] = slave_stats.obytes;
963 if (update_stats == 1)
964 internals->slave_update_idx = 0;
967 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
968 for (i = 0; i < slave_count; i++)
969 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
972 (struct bond_dev_private *)internals);
976 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
979 struct bond_dev_private *internals = bd_tx_q->dev_private;
981 struct rte_eth_dev *primary_port =
982 &rte_eth_devices[internals->primary_port];
983 uint16_t num_tx_total = 0;
986 uint16_t num_of_slaves = internals->active_slave_count;
987 uint16_t slaves[RTE_MAX_ETHPORTS];
989 struct ether_hdr *ether_hdr;
990 struct ether_addr primary_slave_addr;
991 struct ether_addr active_slave_addr;
993 if (num_of_slaves < 1)
996 memcpy(slaves, internals->tlb_slaves_order,
997 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1000 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1003 for (i = 0; i < 3; i++)
1004 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1007 for (i = 0; i < num_of_slaves; i++) {
1008 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1009 for (j = num_tx_total; j < nb_pkts; j++) {
1010 if (j + 3 < nb_pkts)
1011 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1014 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1015 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1016 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1017 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1021 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1022 bufs + num_tx_total, nb_pkts - num_tx_total);
1024 if (num_tx_total == nb_pkts)
1028 return num_tx_total;
1032 bond_tlb_disable(struct bond_dev_private *internals)
1034 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1038 bond_tlb_enable(struct bond_dev_private *internals)
1040 bond_ethdev_update_tlb_slave_cb(internals);
1044 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1047 struct bond_dev_private *internals = bd_tx_q->dev_private;
1049 struct ether_hdr *eth_h;
1050 uint16_t ether_type, offset;
1052 struct client_data *client_info;
1055 * We create transmit buffers for every slave and one additional to send
1056 * through tlb. In worst case every packet will be send on one port.
1058 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1059 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1062 * We create separate transmit buffers for update packets as they won't
1063 * be counted in num_tx_total.
1065 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1066 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068 struct rte_mbuf *upd_pkt;
1071 uint16_t num_send, num_not_send = 0;
1072 uint16_t num_tx_total = 0;
1077 /* Search tx buffer for ARP packets and forward them to alb */
1078 for (i = 0; i < nb_pkts; i++) {
1079 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1080 ether_type = eth_h->ether_type;
1081 offset = get_vlan_offset(eth_h, ðer_type);
1083 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1084 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086 /* Change src mac in eth header */
1087 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1089 /* Add packet to slave tx buffer */
1090 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1091 slave_bufs_pkts[slave_idx]++;
1093 /* If packet is not ARP, send it with TLB policy */
1094 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1100 /* Update connected client ARP tables */
1101 if (internals->mode6.ntt) {
1102 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1103 client_info = &internals->mode6.client_table[i];
1105 if (client_info->in_use) {
1106 /* Allocate new packet to send ARP update on current slave */
1107 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1108 if (upd_pkt == NULL) {
1110 "Failed to allocate ARP packet from pool");
1113 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115 upd_pkt->data_len = pkt_size;
1116 upd_pkt->pkt_len = pkt_size;
1118 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1121 /* Add packet to update tx buffer */
1122 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123 update_bufs_pkts[slave_idx]++;
1126 internals->mode6.ntt = 0;
1129 /* Send ARP packets on proper slaves */
1130 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131 if (slave_bufs_pkts[i] > 0) {
1132 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133 slave_bufs[i], slave_bufs_pkts[i]);
1134 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135 bufs[nb_pkts - 1 - num_not_send - j] =
1136 slave_bufs[i][nb_pkts - 1 - j];
1139 num_tx_total += num_send;
1140 num_not_send += slave_bufs_pkts[i] - num_send;
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143 /* Print TX stats including update packets */
1144 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1152 /* Send update packets on proper slaves */
1153 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154 if (update_bufs_pkts[i] > 0) {
1155 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156 update_bufs_pkts[i]);
1157 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158 rte_pktmbuf_free(update_bufs[i][j]);
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161 for (j = 0; j < update_bufs_pkts[i]; j++) {
1162 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1169 /* Send non-ARP packets using tlb policy */
1170 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171 num_send = bond_ethdev_tx_burst_tlb(queue,
1172 slave_bufs[RTE_MAX_ETHPORTS],
1173 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1175 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176 bufs[nb_pkts - 1 - num_not_send - j] =
1177 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1180 num_tx_total += num_send;
1183 return num_tx_total;
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1190 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1191 struct bond_dev_private *internals = bd_tx_q->dev_private;
1193 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1194 uint16_t slave_count;
1196 /* Array to sort mbufs for transmission on each slave into */
1197 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1198 /* Number of mbufs for transmission on each slave */
1199 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1200 /* Mapping array generated by hash function to map mbufs to slaves */
1201 uint16_t bufs_slave_port_idxs[nb_bufs];
1203 uint16_t slave_tx_count;
1204 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1208 if (unlikely(nb_bufs == 0))
1211 /* Copy slave list to protect against slave up/down changes during tx
1213 slave_count = internals->active_slave_count;
1214 if (unlikely(slave_count < 1))
1217 memcpy(slave_port_ids, internals->active_slaves,
1218 sizeof(slave_port_ids[0]) * slave_count);
1221 * Populate slaves mbuf with the packets which are to be sent on it
1222 * selecting output slave using hash based on xmit policy
1224 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1225 bufs_slave_port_idxs);
1227 for (i = 0; i < nb_bufs; i++) {
1228 /* Populate slave mbuf arrays with mbufs for that slave. */
1229 uint8_t slave_idx = bufs_slave_port_idxs[i];
1231 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1234 /* Send packet burst on each slave device */
1235 for (i = 0; i < slave_count; i++) {
1236 if (slave_nb_bufs[i] == 0)
1239 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1240 bd_tx_q->queue_id, slave_bufs[i],
1243 total_tx_count += slave_tx_count;
1245 /* If tx burst fails move packets to end of bufs */
1246 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1247 int slave_tx_fail_count = slave_nb_bufs[i] -
1249 total_tx_fail_count += slave_tx_fail_count;
1250 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1251 &slave_bufs[i][slave_tx_count],
1252 slave_tx_fail_count * sizeof(bufs[0]));
1256 return total_tx_count;
1260 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1263 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1264 struct bond_dev_private *internals = bd_tx_q->dev_private;
1266 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1267 uint16_t slave_count;
1269 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1270 uint16_t dist_slave_count;
1272 /* 2-D array to sort mbufs for transmission on each slave into */
1273 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1274 /* Number of mbufs for transmission on each slave */
1275 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1276 /* Mapping array generated by hash function to map mbufs to slaves */
1277 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1279 uint16_t slave_tx_count;
1280 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1284 if (unlikely(nb_bufs == 0))
1287 /* Copy slave list to protect against slave up/down changes during tx
1289 slave_count = internals->active_slave_count;
1290 if (unlikely(slave_count < 1))
1293 memcpy(slave_port_ids, internals->active_slaves,
1294 sizeof(slave_port_ids[0]) * slave_count);
1296 dist_slave_count = 0;
1297 for (i = 0; i < slave_count; i++) {
1298 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1300 if (ACTOR_STATE(port, DISTRIBUTING))
1301 dist_slave_port_ids[dist_slave_count++] =
1305 if (likely(dist_slave_count > 1)) {
1308 * Populate slaves mbuf with the packets which are to be sent
1309 * on it, selecting output slave using hash based on xmit policy
1311 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1312 bufs_slave_port_idxs);
1314 for (i = 0; i < nb_bufs; i++) {
1316 * Populate slave mbuf arrays with mbufs for that
1319 uint8_t slave_idx = bufs_slave_port_idxs[i];
1321 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1326 /* Send packet burst on each slave device */
1327 for (i = 0; i < dist_slave_count; i++) {
1328 if (slave_nb_bufs[i] == 0)
1331 slave_tx_count = rte_eth_tx_burst(
1332 dist_slave_port_ids[i],
1333 bd_tx_q->queue_id, slave_bufs[i],
1336 total_tx_count += slave_tx_count;
1338 /* If tx burst fails move packets to end of bufs */
1339 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1340 int slave_tx_fail_count = slave_nb_bufs[i] -
1342 total_tx_fail_count += slave_tx_fail_count;
1344 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1345 &slave_bufs[i][slave_tx_count],
1346 slave_tx_fail_count * sizeof(bufs[0]));
1351 /* Check for LACP control packets and send if available */
1352 for (i = 0; i < slave_count; i++) {
1353 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1354 struct rte_mbuf *ctrl_pkt = NULL;
1356 if (likely(rte_ring_empty(port->tx_ring)))
1359 if (rte_ring_dequeue(port->tx_ring,
1360 (void **)&ctrl_pkt) != -ENOENT) {
1361 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1362 bd_tx_q->queue_id, &ctrl_pkt, 1);
1364 * re-enqueue LAG control plane packets to buffering
1365 * ring if transmission fails so the packet isn't lost.
1367 if (slave_tx_count != 1)
1368 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1372 return total_tx_count;
1376 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1379 struct bond_dev_private *internals;
1380 struct bond_tx_queue *bd_tx_q;
1382 uint8_t tx_failed_flag = 0, num_of_slaves;
1383 uint16_t slaves[RTE_MAX_ETHPORTS];
1385 uint16_t max_nb_of_tx_pkts = 0;
1387 int slave_tx_total[RTE_MAX_ETHPORTS];
1388 int i, most_successful_tx_slave = -1;
1390 bd_tx_q = (struct bond_tx_queue *)queue;
1391 internals = bd_tx_q->dev_private;
1393 /* Copy slave list to protect against slave up/down changes during tx
1395 num_of_slaves = internals->active_slave_count;
1396 memcpy(slaves, internals->active_slaves,
1397 sizeof(internals->active_slaves[0]) * num_of_slaves);
1399 if (num_of_slaves < 1)
1402 /* Increment reference count on mbufs */
1403 for (i = 0; i < nb_pkts; i++)
1404 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1406 /* Transmit burst on each active slave */
1407 for (i = 0; i < num_of_slaves; i++) {
1408 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1411 if (unlikely(slave_tx_total[i] < nb_pkts))
1414 /* record the value and slave index for the slave which transmits the
1415 * maximum number of packets */
1416 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1417 max_nb_of_tx_pkts = slave_tx_total[i];
1418 most_successful_tx_slave = i;
1422 /* if slaves fail to transmit packets from burst, the calling application
1423 * is not expected to know about multiple references to packets so we must
1424 * handle failures of all packets except those of the most successful slave
1426 if (unlikely(tx_failed_flag))
1427 for (i = 0; i < num_of_slaves; i++)
1428 if (i != most_successful_tx_slave)
1429 while (slave_tx_total[i] < nb_pkts)
1430 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1432 return max_nb_of_tx_pkts;
1436 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1438 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1440 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1442 * If in mode 4 then save the link properties of the first
1443 * slave, all subsequent slaves must match these properties
1445 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1447 bond_link->link_autoneg = slave_link->link_autoneg;
1448 bond_link->link_duplex = slave_link->link_duplex;
1449 bond_link->link_speed = slave_link->link_speed;
1452 * In any other mode the link properties are set to default
1453 * values of AUTONEG/DUPLEX
1455 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1456 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1461 link_properties_valid(struct rte_eth_dev *ethdev,
1462 struct rte_eth_link *slave_link)
1464 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1466 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1467 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1469 if (bond_link->link_duplex != slave_link->link_duplex ||
1470 bond_link->link_autoneg != slave_link->link_autoneg ||
1471 bond_link->link_speed != slave_link->link_speed)
1479 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1481 struct ether_addr *mac_addr;
1483 if (eth_dev == NULL) {
1484 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1488 if (dst_mac_addr == NULL) {
1489 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1493 mac_addr = eth_dev->data->mac_addrs;
1495 ether_addr_copy(mac_addr, dst_mac_addr);
1500 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1502 struct ether_addr *mac_addr;
1504 if (eth_dev == NULL) {
1505 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1509 if (new_mac_addr == NULL) {
1510 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1514 mac_addr = eth_dev->data->mac_addrs;
1516 /* If new MAC is different to current MAC then update */
1517 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1518 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1523 static const struct ether_addr null_mac_addr;
1526 * Add additional MAC addresses to the slave
1529 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1530 uint16_t slave_port_id)
1533 struct ether_addr *mac_addr;
1535 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1536 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1537 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1540 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1543 for (i--; i > 0; i--)
1544 rte_eth_dev_mac_addr_remove(slave_port_id,
1545 &bonded_eth_dev->data->mac_addrs[i]);
1554 * Remove additional MAC addresses from the slave
1557 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1558 uint16_t slave_port_id)
1561 struct ether_addr *mac_addr;
1564 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1565 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1566 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1569 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1570 /* save only the first error */
1571 if (ret < 0 && rc == 0)
1579 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1581 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1584 /* Update slave devices MAC addresses */
1585 if (internals->slave_count < 1)
1588 switch (internals->mode) {
1589 case BONDING_MODE_ROUND_ROBIN:
1590 case BONDING_MODE_BALANCE:
1591 case BONDING_MODE_BROADCAST:
1592 for (i = 0; i < internals->slave_count; i++) {
1593 if (rte_eth_dev_default_mac_addr_set(
1594 internals->slaves[i].port_id,
1595 bonded_eth_dev->data->mac_addrs)) {
1596 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1597 internals->slaves[i].port_id);
1602 case BONDING_MODE_8023AD:
1603 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1605 case BONDING_MODE_ACTIVE_BACKUP:
1606 case BONDING_MODE_TLB:
1607 case BONDING_MODE_ALB:
1609 for (i = 0; i < internals->slave_count; i++) {
1610 if (internals->slaves[i].port_id ==
1611 internals->current_primary_port) {
1612 if (rte_eth_dev_default_mac_addr_set(
1613 internals->primary_port,
1614 bonded_eth_dev->data->mac_addrs)) {
1615 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1616 internals->current_primary_port);
1620 if (rte_eth_dev_default_mac_addr_set(
1621 internals->slaves[i].port_id,
1622 &internals->slaves[i].persisted_mac_addr)) {
1623 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1624 internals->slaves[i].port_id);
1635 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1637 struct bond_dev_private *internals;
1639 internals = eth_dev->data->dev_private;
1642 case BONDING_MODE_ROUND_ROBIN:
1643 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1644 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1646 case BONDING_MODE_ACTIVE_BACKUP:
1647 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1648 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1650 case BONDING_MODE_BALANCE:
1651 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1652 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1654 case BONDING_MODE_BROADCAST:
1655 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1656 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1658 case BONDING_MODE_8023AD:
1659 if (bond_mode_8023ad_enable(eth_dev) != 0)
1662 if (internals->mode4.dedicated_queues.enabled == 0) {
1663 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1664 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1665 RTE_BOND_LOG(WARNING,
1666 "Using mode 4, it is necessary to do TX burst "
1667 "and RX burst at least every 100ms.");
1669 /* Use flow director's optimization */
1670 eth_dev->rx_pkt_burst =
1671 bond_ethdev_rx_burst_8023ad_fast_queue;
1672 eth_dev->tx_pkt_burst =
1673 bond_ethdev_tx_burst_8023ad_fast_queue;
1676 case BONDING_MODE_TLB:
1677 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1678 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1680 case BONDING_MODE_ALB:
1681 if (bond_mode_alb_enable(eth_dev) != 0)
1684 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1685 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1691 internals->mode = mode;
1698 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1699 struct rte_eth_dev *slave_eth_dev)
1702 struct bond_dev_private *internals = (struct bond_dev_private *)
1703 bonded_eth_dev->data->dev_private;
1704 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1706 if (port->slow_pool == NULL) {
1708 int slave_id = slave_eth_dev->data->port_id;
1710 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1712 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1713 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1714 slave_eth_dev->data->numa_node);
1716 /* Any memory allocation failure in initialization is critical because
1717 * resources can't be free, so reinitialization is impossible. */
1718 if (port->slow_pool == NULL) {
1719 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1720 slave_id, mem_name, rte_strerror(rte_errno));
1724 if (internals->mode4.dedicated_queues.enabled == 1) {
1725 /* Configure slow Rx queue */
1727 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1728 internals->mode4.dedicated_queues.rx_qid, 128,
1729 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1730 NULL, port->slow_pool);
1733 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1734 slave_eth_dev->data->port_id,
1735 internals->mode4.dedicated_queues.rx_qid,
1740 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1741 internals->mode4.dedicated_queues.tx_qid, 512,
1742 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1746 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1747 slave_eth_dev->data->port_id,
1748 internals->mode4.dedicated_queues.tx_qid,
1757 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1758 struct rte_eth_dev *slave_eth_dev)
1760 struct bond_rx_queue *bd_rx_q;
1761 struct bond_tx_queue *bd_tx_q;
1762 uint16_t nb_rx_queues;
1763 uint16_t nb_tx_queues;
1767 struct rte_flow_error flow_error;
1769 struct bond_dev_private *internals = (struct bond_dev_private *)
1770 bonded_eth_dev->data->dev_private;
1773 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1775 /* Enable interrupts on slave device if supported */
1776 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1777 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1779 /* If RSS is enabled for bonding, try to enable it for slaves */
1780 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1781 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1783 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1784 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1785 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1786 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1788 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1791 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1792 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1793 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1794 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1797 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1798 DEV_RX_OFFLOAD_VLAN_FILTER)
1799 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1800 DEV_RX_OFFLOAD_VLAN_FILTER;
1802 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1803 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1805 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1806 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1808 if (internals->mode == BONDING_MODE_8023AD) {
1809 if (internals->mode4.dedicated_queues.enabled == 1) {
1815 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1816 bonded_eth_dev->data->mtu);
1817 if (errval != 0 && errval != -ENOTSUP) {
1818 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1819 slave_eth_dev->data->port_id, errval);
1823 /* Configure device */
1824 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1825 nb_rx_queues, nb_tx_queues,
1826 &(slave_eth_dev->data->dev_conf));
1828 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1829 slave_eth_dev->data->port_id, errval);
1833 /* Setup Rx Queues */
1834 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1835 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1837 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1838 bd_rx_q->nb_rx_desc,
1839 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1840 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1843 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1844 slave_eth_dev->data->port_id, q_id, errval);
1849 /* Setup Tx Queues */
1850 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1851 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1853 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1854 bd_tx_q->nb_tx_desc,
1855 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1859 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1860 slave_eth_dev->data->port_id, q_id, errval);
1865 if (internals->mode == BONDING_MODE_8023AD &&
1866 internals->mode4.dedicated_queues.enabled == 1) {
1867 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1871 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1872 slave_eth_dev->data->port_id) != 0) {
1874 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1875 slave_eth_dev->data->port_id, q_id, errval);
1879 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1880 rte_flow_destroy(slave_eth_dev->data->port_id,
1881 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1884 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1885 slave_eth_dev->data->port_id);
1889 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1891 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1892 slave_eth_dev->data->port_id, errval);
1896 /* If RSS is enabled for bonding, synchronize RETA */
1897 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1899 struct bond_dev_private *internals;
1901 internals = bonded_eth_dev->data->dev_private;
1903 for (i = 0; i < internals->slave_count; i++) {
1904 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1905 errval = rte_eth_dev_rss_reta_update(
1906 slave_eth_dev->data->port_id,
1907 &internals->reta_conf[0],
1908 internals->slaves[i].reta_size);
1910 RTE_BOND_LOG(WARNING,
1911 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1912 " RSS Configuration for bonding may be inconsistent.",
1913 slave_eth_dev->data->port_id, errval);
1920 /* If lsc interrupt is set, check initial slave's link status */
1921 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1922 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1923 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1924 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1932 slave_remove(struct bond_dev_private *internals,
1933 struct rte_eth_dev *slave_eth_dev)
1937 for (i = 0; i < internals->slave_count; i++)
1938 if (internals->slaves[i].port_id ==
1939 slave_eth_dev->data->port_id)
1942 if (i < (internals->slave_count - 1)) {
1943 struct rte_flow *flow;
1945 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1946 sizeof(internals->slaves[0]) *
1947 (internals->slave_count - i - 1));
1948 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1949 memmove(&flow->flows[i], &flow->flows[i + 1],
1950 sizeof(flow->flows[0]) *
1951 (internals->slave_count - i - 1));
1952 flow->flows[internals->slave_count - 1] = NULL;
1956 internals->slave_count--;
1958 /* force reconfiguration of slave interfaces */
1959 _rte_eth_dev_reset(slave_eth_dev);
1963 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1966 slave_add(struct bond_dev_private *internals,
1967 struct rte_eth_dev *slave_eth_dev)
1969 struct bond_slave_details *slave_details =
1970 &internals->slaves[internals->slave_count];
1972 slave_details->port_id = slave_eth_dev->data->port_id;
1973 slave_details->last_link_status = 0;
1975 /* Mark slave devices that don't support interrupts so we can
1976 * compensate when we start the bond
1978 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1979 slave_details->link_status_poll_enabled = 1;
1982 slave_details->link_status_wait_to_complete = 0;
1983 /* clean tlb_last_obytes when adding port for bonding device */
1984 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1985 sizeof(struct ether_addr));
1989 bond_ethdev_primary_set(struct bond_dev_private *internals,
1990 uint16_t slave_port_id)
1994 if (internals->active_slave_count < 1)
1995 internals->current_primary_port = slave_port_id;
1997 /* Search bonded device slave ports for new proposed primary port */
1998 for (i = 0; i < internals->active_slave_count; i++) {
1999 if (internals->active_slaves[i] == slave_port_id)
2000 internals->current_primary_port = slave_port_id;
2005 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2008 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2010 struct bond_dev_private *internals;
2013 /* slave eth dev will be started by bonded device */
2014 if (check_for_bonded_ethdev(eth_dev)) {
2015 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2016 eth_dev->data->port_id);
2020 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2021 eth_dev->data->dev_started = 1;
2023 internals = eth_dev->data->dev_private;
2025 if (internals->slave_count == 0) {
2026 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2030 if (internals->user_defined_mac == 0) {
2031 struct ether_addr *new_mac_addr = NULL;
2033 for (i = 0; i < internals->slave_count; i++)
2034 if (internals->slaves[i].port_id == internals->primary_port)
2035 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2037 if (new_mac_addr == NULL)
2040 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2041 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2042 eth_dev->data->port_id);
2047 /* If bonded device is configure in promiscuous mode then re-apply config */
2048 if (internals->promiscuous_en)
2049 bond_ethdev_promiscuous_enable(eth_dev);
2051 if (internals->mode == BONDING_MODE_8023AD) {
2052 if (internals->mode4.dedicated_queues.enabled == 1) {
2053 internals->mode4.dedicated_queues.rx_qid =
2054 eth_dev->data->nb_rx_queues;
2055 internals->mode4.dedicated_queues.tx_qid =
2056 eth_dev->data->nb_tx_queues;
2061 /* Reconfigure each slave device if starting bonded device */
2062 for (i = 0; i < internals->slave_count; i++) {
2063 struct rte_eth_dev *slave_ethdev =
2064 &(rte_eth_devices[internals->slaves[i].port_id]);
2065 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2067 "bonded port (%d) failed to reconfigure slave device (%d)",
2068 eth_dev->data->port_id,
2069 internals->slaves[i].port_id);
2072 /* We will need to poll for link status if any slave doesn't
2073 * support interrupts
2075 if (internals->slaves[i].link_status_poll_enabled)
2076 internals->link_status_polling_enabled = 1;
2079 /* start polling if needed */
2080 if (internals->link_status_polling_enabled) {
2082 internals->link_status_polling_interval_ms * 1000,
2083 bond_ethdev_slave_link_status_change_monitor,
2084 (void *)&rte_eth_devices[internals->port_id]);
2087 /* Update all slave devices MACs*/
2088 if (mac_address_slaves_update(eth_dev) != 0)
2091 if (internals->user_defined_primary_port)
2092 bond_ethdev_primary_set(internals, internals->primary_port);
2094 if (internals->mode == BONDING_MODE_8023AD)
2095 bond_mode_8023ad_start(eth_dev);
2097 if (internals->mode == BONDING_MODE_TLB ||
2098 internals->mode == BONDING_MODE_ALB)
2099 bond_tlb_enable(internals);
2104 eth_dev->data->dev_started = 0;
2109 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2113 if (dev->data->rx_queues != NULL) {
2114 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2115 rte_free(dev->data->rx_queues[i]);
2116 dev->data->rx_queues[i] = NULL;
2118 dev->data->nb_rx_queues = 0;
2121 if (dev->data->tx_queues != NULL) {
2122 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2123 rte_free(dev->data->tx_queues[i]);
2124 dev->data->tx_queues[i] = NULL;
2126 dev->data->nb_tx_queues = 0;
2131 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2133 struct bond_dev_private *internals = eth_dev->data->dev_private;
2136 if (internals->mode == BONDING_MODE_8023AD) {
2140 bond_mode_8023ad_stop(eth_dev);
2142 /* Discard all messages to/from mode 4 state machines */
2143 for (i = 0; i < internals->active_slave_count; i++) {
2144 port = &mode_8023ad_ports[internals->active_slaves[i]];
2146 RTE_ASSERT(port->rx_ring != NULL);
2147 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2148 rte_pktmbuf_free(pkt);
2150 RTE_ASSERT(port->tx_ring != NULL);
2151 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2152 rte_pktmbuf_free(pkt);
2156 if (internals->mode == BONDING_MODE_TLB ||
2157 internals->mode == BONDING_MODE_ALB) {
2158 bond_tlb_disable(internals);
2159 for (i = 0; i < internals->active_slave_count; i++)
2160 tlb_last_obytets[internals->active_slaves[i]] = 0;
2163 internals->link_status_polling_enabled = 0;
2164 for (i = 0; i < internals->slave_count; i++)
2165 internals->slaves[i].last_link_status = 0;
2167 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2168 eth_dev->data->dev_started = 0;
2172 bond_ethdev_close(struct rte_eth_dev *dev)
2174 struct bond_dev_private *internals = dev->data->dev_private;
2175 uint8_t bond_port_id = internals->port_id;
2177 struct rte_flow_error ferror;
2179 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2180 while (internals->slave_count != skipped) {
2181 uint16_t port_id = internals->slaves[skipped].port_id;
2183 rte_eth_dev_stop(port_id);
2185 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2187 "Failed to remove port %d from bonded device %s",
2188 port_id, dev->device->name);
2192 bond_flow_ops.flush(dev, &ferror);
2193 bond_ethdev_free_queues(dev);
2194 rte_bitmap_reset(internals->vlan_filter_bmp);
2197 /* forward declaration */
2198 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2201 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2203 struct bond_dev_private *internals = dev->data->dev_private;
2205 uint16_t max_nb_rx_queues = UINT16_MAX;
2206 uint16_t max_nb_tx_queues = UINT16_MAX;
2208 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2210 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2211 internals->candidate_max_rx_pktlen :
2212 ETHER_MAX_JUMBO_FRAME_LEN;
2214 /* Max number of tx/rx queues that the bonded device can support is the
2215 * minimum values of the bonded slaves, as all slaves must be capable
2216 * of supporting the same number of tx/rx queues.
2218 if (internals->slave_count > 0) {
2219 struct rte_eth_dev_info slave_info;
2222 for (idx = 0; idx < internals->slave_count; idx++) {
2223 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2226 if (slave_info.max_rx_queues < max_nb_rx_queues)
2227 max_nb_rx_queues = slave_info.max_rx_queues;
2229 if (slave_info.max_tx_queues < max_nb_tx_queues)
2230 max_nb_tx_queues = slave_info.max_tx_queues;
2234 dev_info->max_rx_queues = max_nb_rx_queues;
2235 dev_info->max_tx_queues = max_nb_tx_queues;
2238 * If dedicated hw queues enabled for link bonding device in LACP mode
2239 * then we need to reduce the maximum number of data path queues by 1.
2241 if (internals->mode == BONDING_MODE_8023AD &&
2242 internals->mode4.dedicated_queues.enabled == 1) {
2243 dev_info->max_rx_queues--;
2244 dev_info->max_tx_queues--;
2247 dev_info->min_rx_bufsize = 0;
2249 dev_info->rx_offload_capa = internals->rx_offload_capa;
2250 dev_info->tx_offload_capa = internals->tx_offload_capa;
2251 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2252 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2253 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2255 dev_info->reta_size = internals->reta_size;
2259 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2263 struct bond_dev_private *internals = dev->data->dev_private;
2265 /* don't do this while a slave is being added */
2266 rte_spinlock_lock(&internals->lock);
2269 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2271 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2273 for (i = 0; i < internals->slave_count; i++) {
2274 uint16_t port_id = internals->slaves[i].port_id;
2276 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2278 RTE_BOND_LOG(WARNING,
2279 "Setting VLAN filter on slave port %u not supported.",
2283 rte_spinlock_unlock(&internals->lock);
2288 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2289 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2290 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2292 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2293 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2294 0, dev->data->numa_node);
2295 if (bd_rx_q == NULL)
2298 bd_rx_q->queue_id = rx_queue_id;
2299 bd_rx_q->dev_private = dev->data->dev_private;
2301 bd_rx_q->nb_rx_desc = nb_rx_desc;
2303 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2304 bd_rx_q->mb_pool = mb_pool;
2306 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2312 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2313 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2314 const struct rte_eth_txconf *tx_conf)
2316 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2317 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2318 0, dev->data->numa_node);
2320 if (bd_tx_q == NULL)
2323 bd_tx_q->queue_id = tx_queue_id;
2324 bd_tx_q->dev_private = dev->data->dev_private;
2326 bd_tx_q->nb_tx_desc = nb_tx_desc;
2327 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2329 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2335 bond_ethdev_rx_queue_release(void *queue)
2344 bond_ethdev_tx_queue_release(void *queue)
2353 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2355 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2356 struct bond_dev_private *internals;
2358 /* Default value for polling slave found is true as we don't want to
2359 * disable the polling thread if we cannot get the lock */
2360 int i, polling_slave_found = 1;
2365 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2366 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2368 if (!bonded_ethdev->data->dev_started ||
2369 !internals->link_status_polling_enabled)
2372 /* If device is currently being configured then don't check slaves link
2373 * status, wait until next period */
2374 if (rte_spinlock_trylock(&internals->lock)) {
2375 if (internals->slave_count > 0)
2376 polling_slave_found = 0;
2378 for (i = 0; i < internals->slave_count; i++) {
2379 if (!internals->slaves[i].link_status_poll_enabled)
2382 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2383 polling_slave_found = 1;
2385 /* Update slave link status */
2386 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2387 internals->slaves[i].link_status_wait_to_complete);
2389 /* if link status has changed since last checked then call lsc
2391 if (slave_ethdev->data->dev_link.link_status !=
2392 internals->slaves[i].last_link_status) {
2393 internals->slaves[i].last_link_status =
2394 slave_ethdev->data->dev_link.link_status;
2396 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2397 RTE_ETH_EVENT_INTR_LSC,
2398 &bonded_ethdev->data->port_id,
2402 rte_spinlock_unlock(&internals->lock);
2405 if (polling_slave_found)
2406 /* Set alarm to continue monitoring link status of slave ethdev's */
2407 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2408 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2412 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2414 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2416 struct bond_dev_private *bond_ctx;
2417 struct rte_eth_link slave_link;
2421 bond_ctx = ethdev->data->dev_private;
2423 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2425 if (ethdev->data->dev_started == 0 ||
2426 bond_ctx->active_slave_count == 0) {
2427 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2431 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2433 if (wait_to_complete)
2434 link_update = rte_eth_link_get;
2436 link_update = rte_eth_link_get_nowait;
2438 switch (bond_ctx->mode) {
2439 case BONDING_MODE_BROADCAST:
2441 * Setting link speed to UINT32_MAX to ensure we pick up the
2442 * value of the first active slave
2444 ethdev->data->dev_link.link_speed = UINT32_MAX;
2447 * link speed is minimum value of all the slaves link speed as
2448 * packet loss will occur on this slave if transmission at rates
2449 * greater than this are attempted
2451 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2452 link_update(bond_ctx->active_slaves[0], &slave_link);
2454 if (slave_link.link_speed <
2455 ethdev->data->dev_link.link_speed)
2456 ethdev->data->dev_link.link_speed =
2457 slave_link.link_speed;
2460 case BONDING_MODE_ACTIVE_BACKUP:
2461 /* Current primary slave */
2462 link_update(bond_ctx->current_primary_port, &slave_link);
2464 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2466 case BONDING_MODE_8023AD:
2467 ethdev->data->dev_link.link_autoneg =
2468 bond_ctx->mode4.slave_link.link_autoneg;
2469 ethdev->data->dev_link.link_duplex =
2470 bond_ctx->mode4.slave_link.link_duplex;
2471 /* fall through to update link speed */
2472 case BONDING_MODE_ROUND_ROBIN:
2473 case BONDING_MODE_BALANCE:
2474 case BONDING_MODE_TLB:
2475 case BONDING_MODE_ALB:
2478 * In theses mode the maximum theoretical link speed is the sum
2481 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2483 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2484 link_update(bond_ctx->active_slaves[idx], &slave_link);
2486 ethdev->data->dev_link.link_speed +=
2487 slave_link.link_speed;
2497 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2499 struct bond_dev_private *internals = dev->data->dev_private;
2500 struct rte_eth_stats slave_stats;
2503 for (i = 0; i < internals->slave_count; i++) {
2504 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2506 stats->ipackets += slave_stats.ipackets;
2507 stats->opackets += slave_stats.opackets;
2508 stats->ibytes += slave_stats.ibytes;
2509 stats->obytes += slave_stats.obytes;
2510 stats->imissed += slave_stats.imissed;
2511 stats->ierrors += slave_stats.ierrors;
2512 stats->oerrors += slave_stats.oerrors;
2513 stats->rx_nombuf += slave_stats.rx_nombuf;
2515 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2516 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2517 stats->q_opackets[j] += slave_stats.q_opackets[j];
2518 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2519 stats->q_obytes[j] += slave_stats.q_obytes[j];
2520 stats->q_errors[j] += slave_stats.q_errors[j];
2529 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2531 struct bond_dev_private *internals = dev->data->dev_private;
2534 for (i = 0; i < internals->slave_count; i++)
2535 rte_eth_stats_reset(internals->slaves[i].port_id);
2539 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2541 struct bond_dev_private *internals = eth_dev->data->dev_private;
2544 internals->promiscuous_en = 1;
2546 switch (internals->mode) {
2547 /* Promiscuous mode is propagated to all slaves */
2548 case BONDING_MODE_ROUND_ROBIN:
2549 case BONDING_MODE_BALANCE:
2550 case BONDING_MODE_BROADCAST:
2551 for (i = 0; i < internals->slave_count; i++)
2552 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2554 /* In mode4 promiscus mode is managed when slave is added/removed */
2555 case BONDING_MODE_8023AD:
2557 /* Promiscuous mode is propagated only to primary slave */
2558 case BONDING_MODE_ACTIVE_BACKUP:
2559 case BONDING_MODE_TLB:
2560 case BONDING_MODE_ALB:
2562 rte_eth_promiscuous_enable(internals->current_primary_port);
2567 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2569 struct bond_dev_private *internals = dev->data->dev_private;
2572 internals->promiscuous_en = 0;
2574 switch (internals->mode) {
2575 /* Promiscuous mode is propagated to all slaves */
2576 case BONDING_MODE_ROUND_ROBIN:
2577 case BONDING_MODE_BALANCE:
2578 case BONDING_MODE_BROADCAST:
2579 for (i = 0; i < internals->slave_count; i++)
2580 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2582 /* In mode4 promiscus mode is set managed when slave is added/removed */
2583 case BONDING_MODE_8023AD:
2585 /* Promiscuous mode is propagated only to primary slave */
2586 case BONDING_MODE_ACTIVE_BACKUP:
2587 case BONDING_MODE_TLB:
2588 case BONDING_MODE_ALB:
2590 rte_eth_promiscuous_disable(internals->current_primary_port);
2595 bond_ethdev_delayed_lsc_propagation(void *arg)
2600 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2601 RTE_ETH_EVENT_INTR_LSC, NULL);
2605 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2606 void *param, void *ret_param __rte_unused)
2608 struct rte_eth_dev *bonded_eth_dev;
2609 struct bond_dev_private *internals;
2610 struct rte_eth_link link;
2613 int i, valid_slave = 0;
2615 uint8_t lsc_flag = 0;
2617 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2620 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2622 if (check_for_bonded_ethdev(bonded_eth_dev))
2625 internals = bonded_eth_dev->data->dev_private;
2627 /* If the device isn't started don't handle interrupts */
2628 if (!bonded_eth_dev->data->dev_started)
2631 /* verify that port_id is a valid slave of bonded port */
2632 for (i = 0; i < internals->slave_count; i++) {
2633 if (internals->slaves[i].port_id == port_id) {
2642 /* Synchronize lsc callback parallel calls either by real link event
2643 * from the slaves PMDs or by the bonding PMD itself.
2645 rte_spinlock_lock(&internals->lsc_lock);
2647 /* Search for port in active port list */
2648 active_pos = find_slave_by_id(internals->active_slaves,
2649 internals->active_slave_count, port_id);
2651 rte_eth_link_get_nowait(port_id, &link);
2652 if (link.link_status) {
2653 if (active_pos < internals->active_slave_count)
2656 /* if no active slave ports then set this port to be primary port */
2657 if (internals->active_slave_count < 1) {
2658 /* If first active slave, then change link status */
2659 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2660 internals->current_primary_port = port_id;
2663 mac_address_slaves_update(bonded_eth_dev);
2666 /* check link state properties if bonded link is up*/
2667 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2668 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2669 RTE_BOND_LOG(ERR, "Invalid link properties "
2670 "for slave %d in bonding mode %d",
2671 port_id, internals->mode);
2673 /* inherit slave link properties */
2674 link_properties_set(bonded_eth_dev, &link);
2677 activate_slave(bonded_eth_dev, port_id);
2679 /* If user has defined the primary port then default to using it */
2680 if (internals->user_defined_primary_port &&
2681 internals->primary_port == port_id)
2682 bond_ethdev_primary_set(internals, port_id);
2684 if (active_pos == internals->active_slave_count)
2687 /* Remove from active slave list */
2688 deactivate_slave(bonded_eth_dev, port_id);
2690 if (internals->active_slave_count < 1)
2693 /* Update primary id, take first active slave from list or if none
2694 * available set to -1 */
2695 if (port_id == internals->current_primary_port) {
2696 if (internals->active_slave_count > 0)
2697 bond_ethdev_primary_set(internals,
2698 internals->active_slaves[0]);
2700 internals->current_primary_port = internals->primary_port;
2706 * Update bonded device link properties after any change to active
2709 bond_ethdev_link_update(bonded_eth_dev, 0);
2712 /* Cancel any possible outstanding interrupts if delays are enabled */
2713 if (internals->link_up_delay_ms > 0 ||
2714 internals->link_down_delay_ms > 0)
2715 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2718 if (bonded_eth_dev->data->dev_link.link_status) {
2719 if (internals->link_up_delay_ms > 0)
2720 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2721 bond_ethdev_delayed_lsc_propagation,
2722 (void *)bonded_eth_dev);
2724 _rte_eth_dev_callback_process(bonded_eth_dev,
2725 RTE_ETH_EVENT_INTR_LSC,
2729 if (internals->link_down_delay_ms > 0)
2730 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2731 bond_ethdev_delayed_lsc_propagation,
2732 (void *)bonded_eth_dev);
2734 _rte_eth_dev_callback_process(bonded_eth_dev,
2735 RTE_ETH_EVENT_INTR_LSC,
2740 rte_spinlock_unlock(&internals->lsc_lock);
2746 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2747 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2751 int slave_reta_size;
2752 unsigned reta_count;
2753 struct bond_dev_private *internals = dev->data->dev_private;
2755 if (reta_size != internals->reta_size)
2758 /* Copy RETA table */
2759 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2761 for (i = 0; i < reta_count; i++) {
2762 internals->reta_conf[i].mask = reta_conf[i].mask;
2763 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2764 if ((reta_conf[i].mask >> j) & 0x01)
2765 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2768 /* Fill rest of array */
2769 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2770 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2771 sizeof(internals->reta_conf[0]) * reta_count);
2773 /* Propagate RETA over slaves */
2774 for (i = 0; i < internals->slave_count; i++) {
2775 slave_reta_size = internals->slaves[i].reta_size;
2776 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2777 &internals->reta_conf[0], slave_reta_size);
2786 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2787 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2790 struct bond_dev_private *internals = dev->data->dev_private;
2792 if (reta_size != internals->reta_size)
2795 /* Copy RETA table */
2796 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2797 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2798 if ((reta_conf[i].mask >> j) & 0x01)
2799 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2805 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2806 struct rte_eth_rss_conf *rss_conf)
2809 struct bond_dev_private *internals = dev->data->dev_private;
2810 struct rte_eth_rss_conf bond_rss_conf;
2812 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2814 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2816 if (bond_rss_conf.rss_hf != 0)
2817 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2819 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2820 sizeof(internals->rss_key)) {
2821 if (bond_rss_conf.rss_key_len == 0)
2822 bond_rss_conf.rss_key_len = 40;
2823 internals->rss_key_len = bond_rss_conf.rss_key_len;
2824 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2825 internals->rss_key_len);
2828 for (i = 0; i < internals->slave_count; i++) {
2829 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2839 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2840 struct rte_eth_rss_conf *rss_conf)
2842 struct bond_dev_private *internals = dev->data->dev_private;
2844 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2845 rss_conf->rss_key_len = internals->rss_key_len;
2846 if (rss_conf->rss_key)
2847 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2853 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2855 struct rte_eth_dev *slave_eth_dev;
2856 struct bond_dev_private *internals = dev->data->dev_private;
2859 rte_spinlock_lock(&internals->lock);
2861 for (i = 0; i < internals->slave_count; i++) {
2862 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2863 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2864 rte_spinlock_unlock(&internals->lock);
2868 for (i = 0; i < internals->slave_count; i++) {
2869 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2871 rte_spinlock_unlock(&internals->lock);
2876 rte_spinlock_unlock(&internals->lock);
2881 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2883 if (mac_address_set(dev, addr)) {
2884 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2892 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2893 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2895 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2896 *(const void **)arg = &bond_flow_ops;
2903 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2904 __rte_unused uint32_t index, uint32_t vmdq)
2906 struct rte_eth_dev *slave_eth_dev;
2907 struct bond_dev_private *internals = dev->data->dev_private;
2910 rte_spinlock_lock(&internals->lock);
2912 for (i = 0; i < internals->slave_count; i++) {
2913 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2914 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2915 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2921 for (i = 0; i < internals->slave_count; i++) {
2922 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2926 for (i--; i >= 0; i--)
2927 rte_eth_dev_mac_addr_remove(
2928 internals->slaves[i].port_id, mac_addr);
2935 rte_spinlock_unlock(&internals->lock);
2940 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2942 struct rte_eth_dev *slave_eth_dev;
2943 struct bond_dev_private *internals = dev->data->dev_private;
2946 rte_spinlock_lock(&internals->lock);
2948 for (i = 0; i < internals->slave_count; i++) {
2949 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2950 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2954 struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2956 for (i = 0; i < internals->slave_count; i++)
2957 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2961 rte_spinlock_unlock(&internals->lock);
2964 const struct eth_dev_ops default_dev_ops = {
2965 .dev_start = bond_ethdev_start,
2966 .dev_stop = bond_ethdev_stop,
2967 .dev_close = bond_ethdev_close,
2968 .dev_configure = bond_ethdev_configure,
2969 .dev_infos_get = bond_ethdev_info,
2970 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2971 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2972 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2973 .rx_queue_release = bond_ethdev_rx_queue_release,
2974 .tx_queue_release = bond_ethdev_tx_queue_release,
2975 .link_update = bond_ethdev_link_update,
2976 .stats_get = bond_ethdev_stats_get,
2977 .stats_reset = bond_ethdev_stats_reset,
2978 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2979 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2980 .reta_update = bond_ethdev_rss_reta_update,
2981 .reta_query = bond_ethdev_rss_reta_query,
2982 .rss_hash_update = bond_ethdev_rss_hash_update,
2983 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2984 .mtu_set = bond_ethdev_mtu_set,
2985 .mac_addr_set = bond_ethdev_mac_address_set,
2986 .mac_addr_add = bond_ethdev_mac_addr_add,
2987 .mac_addr_remove = bond_ethdev_mac_addr_remove,
2988 .filter_ctrl = bond_filter_ctrl
2992 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2994 const char *name = rte_vdev_device_name(dev);
2995 uint8_t socket_id = dev->device.numa_node;
2996 struct bond_dev_private *internals = NULL;
2997 struct rte_eth_dev *eth_dev = NULL;
2998 uint32_t vlan_filter_bmp_size;
3000 /* now do all data allocation - for eth_dev structure, dummy pci driver
3001 * and internal (private) data
3004 /* reserve an ethdev entry */
3005 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3006 if (eth_dev == NULL) {
3007 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3011 internals = eth_dev->data->dev_private;
3012 eth_dev->data->nb_rx_queues = (uint16_t)1;
3013 eth_dev->data->nb_tx_queues = (uint16_t)1;
3015 /* Allocate memory for storing MAC addresses */
3016 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3017 BOND_MAX_MAC_ADDRS, 0, socket_id);
3018 if (eth_dev->data->mac_addrs == NULL) {
3020 "Failed to allocate %u bytes needed to store MAC addresses",
3021 ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3025 eth_dev->dev_ops = &default_dev_ops;
3026 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3028 rte_spinlock_init(&internals->lock);
3029 rte_spinlock_init(&internals->lsc_lock);
3031 internals->port_id = eth_dev->data->port_id;
3032 internals->mode = BONDING_MODE_INVALID;
3033 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3034 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3035 internals->burst_xmit_hash = burst_xmit_l2_hash;
3036 internals->user_defined_mac = 0;
3038 internals->link_status_polling_enabled = 0;
3040 internals->link_status_polling_interval_ms =
3041 DEFAULT_POLLING_INTERVAL_10_MS;
3042 internals->link_down_delay_ms = 0;
3043 internals->link_up_delay_ms = 0;
3045 internals->slave_count = 0;
3046 internals->active_slave_count = 0;
3047 internals->rx_offload_capa = 0;
3048 internals->tx_offload_capa = 0;
3049 internals->rx_queue_offload_capa = 0;
3050 internals->tx_queue_offload_capa = 0;
3051 internals->candidate_max_rx_pktlen = 0;
3052 internals->max_rx_pktlen = 0;
3054 /* Initially allow to choose any offload type */
3055 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3057 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3058 memset(internals->slaves, 0, sizeof(internals->slaves));
3060 TAILQ_INIT(&internals->flow_list);
3061 internals->flow_isolated_valid = 0;
3063 /* Set mode 4 default configuration */
3064 bond_mode_8023ad_setup(eth_dev, NULL);
3065 if (bond_ethdev_mode_set(eth_dev, mode)) {
3066 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3067 eth_dev->data->port_id, mode);
3071 vlan_filter_bmp_size =
3072 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3073 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3074 RTE_CACHE_LINE_SIZE);
3075 if (internals->vlan_filter_bmpmem == NULL) {
3077 "Failed to allocate vlan bitmap for bonded device %u",
3078 eth_dev->data->port_id);
3082 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3083 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3084 if (internals->vlan_filter_bmp == NULL) {
3086 "Failed to init vlan bitmap for bonded device %u",
3087 eth_dev->data->port_id);
3088 rte_free(internals->vlan_filter_bmpmem);
3092 return eth_dev->data->port_id;
3095 rte_free(internals);
3096 if (eth_dev != NULL) {
3097 rte_free(eth_dev->data->mac_addrs);
3098 rte_eth_dev_release_port(eth_dev);
3104 bond_probe(struct rte_vdev_device *dev)
3107 struct bond_dev_private *internals;
3108 struct rte_kvargs *kvlist;
3109 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3110 int arg_count, port_id;
3112 struct rte_eth_dev *eth_dev;
3117 name = rte_vdev_device_name(dev);
3118 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3120 if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3121 strlen(rte_vdev_device_args(dev)) == 0) {
3122 eth_dev = rte_eth_dev_attach_secondary(name);
3124 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3127 /* TODO: request info from primary to set up Rx and Tx */
3128 eth_dev->dev_ops = &default_dev_ops;
3129 eth_dev->device = &dev->device;
3130 rte_eth_dev_probing_finish(eth_dev);
3134 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3135 pmd_bond_init_valid_arguments);
3139 /* Parse link bonding mode */
3140 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3141 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3142 &bond_ethdev_parse_slave_mode_kvarg,
3143 &bonding_mode) != 0) {
3144 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3149 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3154 /* Parse socket id to create bonding device on */
3155 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3156 if (arg_count == 1) {
3157 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3158 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3160 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3161 "bonded device %s", name);
3164 } else if (arg_count > 1) {
3165 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3166 "bonded device %s", name);
3169 socket_id = rte_socket_id();
3172 dev->device.numa_node = socket_id;
3174 /* Create link bonding eth device */
3175 port_id = bond_alloc(dev, bonding_mode);
3177 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3178 "socket %u.", name, bonding_mode, socket_id);
3181 internals = rte_eth_devices[port_id].data->dev_private;
3182 internals->kvlist = kvlist;
3184 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3186 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3187 if (rte_kvargs_process(kvlist,
3188 PMD_BOND_AGG_MODE_KVARG,
3189 &bond_ethdev_parse_slave_agg_mode_kvarg,
3192 "Failed to parse agg selection mode for bonded device %s",
3197 if (internals->mode == BONDING_MODE_8023AD)
3198 rte_eth_bond_8023ad_agg_selection_set(port_id,
3201 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3204 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3205 "socket %u.", name, port_id, bonding_mode, socket_id);
3209 rte_kvargs_free(kvlist);
3215 bond_remove(struct rte_vdev_device *dev)
3217 struct rte_eth_dev *eth_dev;
3218 struct bond_dev_private *internals;
3224 name = rte_vdev_device_name(dev);
3225 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3227 /* now free all data allocation - for eth_dev structure,
3228 * dummy pci driver and internal (private) data
3231 /* find an ethdev entry */
3232 eth_dev = rte_eth_dev_allocated(name);
3233 if (eth_dev == NULL)
3236 RTE_ASSERT(eth_dev->device == &dev->device);
3238 internals = eth_dev->data->dev_private;
3239 if (internals->slave_count != 0)
3242 if (eth_dev->data->dev_started == 1) {
3243 bond_ethdev_stop(eth_dev);
3244 bond_ethdev_close(eth_dev);
3247 eth_dev->dev_ops = NULL;
3248 eth_dev->rx_pkt_burst = NULL;
3249 eth_dev->tx_pkt_burst = NULL;
3251 internals = eth_dev->data->dev_private;
3252 /* Try to release mempool used in mode6. If the bond
3253 * device is not mode6, free the NULL is not problem.
3255 rte_mempool_free(internals->mode6.mempool);
3256 rte_bitmap_free(internals->vlan_filter_bmp);
3257 rte_free(internals->vlan_filter_bmpmem);
3258 rte_free(eth_dev->data->dev_private);
3259 rte_free(eth_dev->data->mac_addrs);
3261 rte_eth_dev_release_port(eth_dev);
3266 /* this part will resolve the slave portids after all the other pdev and vdev
3267 * have been allocated */
3269 bond_ethdev_configure(struct rte_eth_dev *dev)
3271 const char *name = dev->device->name;
3272 struct bond_dev_private *internals = dev->data->dev_private;
3273 struct rte_kvargs *kvlist = internals->kvlist;
3275 uint16_t port_id = dev - rte_eth_devices;
3278 static const uint8_t default_rss_key[40] = {
3279 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3280 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3281 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3282 0xBE, 0xAC, 0x01, 0xFA
3287 /* If RSS is enabled, fill table and key with default values */
3288 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3289 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3290 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3291 memcpy(internals->rss_key, default_rss_key, 40);
3293 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3294 internals->reta_conf[i].mask = ~0LL;
3295 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3296 internals->reta_conf[i].reta[j] =
3297 (i * RTE_RETA_GROUP_SIZE + j) %
3298 dev->data->nb_rx_queues;
3302 /* set the max_rx_pktlen */
3303 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3306 * if no kvlist, it means that this bonded device has been created
3307 * through the bonding api.
3312 /* Parse MAC address for bonded device */
3313 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3314 if (arg_count == 1) {
3315 struct ether_addr bond_mac;
3317 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3318 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3319 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3324 /* Set MAC address */
3325 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3327 "Failed to set mac address on bonded device %s",
3331 } else if (arg_count > 1) {
3333 "MAC address can be specified only once for bonded device %s",
3338 /* Parse/set balance mode transmit policy */
3339 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3340 if (arg_count == 1) {
3341 uint8_t xmit_policy;
3343 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3344 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3347 "Invalid xmit policy specified for bonded device %s",
3352 /* Set balance mode transmit policy*/
3353 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3355 "Failed to set balance xmit policy on bonded device %s",
3359 } else if (arg_count > 1) {
3361 "Transmit policy can be specified only once for bonded device %s",
3366 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3367 if (rte_kvargs_process(kvlist,
3368 PMD_BOND_AGG_MODE_KVARG,
3369 &bond_ethdev_parse_slave_agg_mode_kvarg,
3372 "Failed to parse agg selection mode for bonded device %s",
3375 if (internals->mode == BONDING_MODE_8023AD)
3376 rte_eth_bond_8023ad_agg_selection_set(port_id,
3380 /* Parse/add slave ports to bonded device */
3381 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3382 struct bond_ethdev_slave_ports slave_ports;
3385 memset(&slave_ports, 0, sizeof(slave_ports));
3387 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3388 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3390 "Failed to parse slave ports for bonded device %s",
3395 for (i = 0; i < slave_ports.slave_count; i++) {
3396 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3398 "Failed to add port %d as slave to bonded device %s",
3399 slave_ports.slaves[i], name);
3404 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3408 /* Parse/set primary slave port id*/
3409 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3410 if (arg_count == 1) {
3411 uint16_t primary_slave_port_id;
3413 if (rte_kvargs_process(kvlist,
3414 PMD_BOND_PRIMARY_SLAVE_KVARG,
3415 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3416 &primary_slave_port_id) < 0) {
3418 "Invalid primary slave port id specified for bonded device %s",
3423 /* Set balance mode transmit policy*/
3424 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3427 "Failed to set primary slave port %d on bonded device %s",
3428 primary_slave_port_id, name);
3431 } else if (arg_count > 1) {
3433 "Primary slave can be specified only once for bonded device %s",
3438 /* Parse link status monitor polling interval */
3439 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3440 if (arg_count == 1) {
3441 uint32_t lsc_poll_interval_ms;
3443 if (rte_kvargs_process(kvlist,
3444 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3445 &bond_ethdev_parse_time_ms_kvarg,
3446 &lsc_poll_interval_ms) < 0) {
3448 "Invalid lsc polling interval value specified for bonded"
3449 " device %s", name);
3453 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3456 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3457 lsc_poll_interval_ms, name);
3460 } else if (arg_count > 1) {
3462 "LSC polling interval can be specified only once for bonded"
3463 " device %s", name);
3467 /* Parse link up interrupt propagation delay */
3468 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3469 if (arg_count == 1) {
3470 uint32_t link_up_delay_ms;
3472 if (rte_kvargs_process(kvlist,
3473 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3474 &bond_ethdev_parse_time_ms_kvarg,
3475 &link_up_delay_ms) < 0) {
3477 "Invalid link up propagation delay value specified for"
3478 " bonded device %s", name);
3482 /* Set balance mode transmit policy*/
3483 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3486 "Failed to set link up propagation delay (%u ms) on bonded"
3487 " device %s", link_up_delay_ms, name);
3490 } else if (arg_count > 1) {
3492 "Link up propagation delay can be specified only once for"
3493 " bonded device %s", name);
3497 /* Parse link down interrupt propagation delay */
3498 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3499 if (arg_count == 1) {
3500 uint32_t link_down_delay_ms;
3502 if (rte_kvargs_process(kvlist,
3503 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3504 &bond_ethdev_parse_time_ms_kvarg,
3505 &link_down_delay_ms) < 0) {
3507 "Invalid link down propagation delay value specified for"
3508 " bonded device %s", name);
3512 /* Set balance mode transmit policy*/
3513 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3516 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3517 link_down_delay_ms, name);
3520 } else if (arg_count > 1) {
3522 "Link down propagation delay can be specified only once for bonded device %s",
3530 struct rte_vdev_driver pmd_bond_drv = {
3531 .probe = bond_probe,
3532 .remove = bond_remove,
3535 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3536 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3538 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3542 "xmit_policy=[l2 | l23 | l34] "
3543 "agg_mode=[count | stable | bandwidth] "
3546 "lsc_poll_period_ms=<int> "
3548 "down_delay=<int>");
3552 RTE_INIT(bond_init_log)
3554 bond_logtype = rte_log_register("pmd.net.bon");
3555 if (bond_logtype >= 0)
3556 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);