1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
38 size_t vlan_offset = 0;
40 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
44 vlan_offset = sizeof(struct vlan_hdr);
45 *proto = vlan_hdr->eth_proto;
47 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48 vlan_hdr = vlan_hdr + 1;
49 *proto = vlan_hdr->eth_proto;
50 vlan_offset += sizeof(struct vlan_hdr);
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
59 struct bond_dev_private *internals;
61 uint16_t num_rx_slave = 0;
62 uint16_t num_rx_total = 0;
66 /* Cast to structure, containing bonded device's port id and queue id */
67 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
69 internals = bd_rx_q->dev_private;
72 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
73 /* Offset of pointer to *bufs increases as packets are received
74 * from other slaves */
75 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
76 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
78 num_rx_total += num_rx_slave;
79 nb_pkts -= num_rx_slave;
87 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
90 struct bond_dev_private *internals;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
97 return rte_eth_rx_burst(internals->current_primary_port,
98 bd_rx_q->queue_id, bufs, nb_pkts);
101 static inline uint8_t
102 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
104 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
106 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
107 (ethertype == ether_type_slow_be &&
108 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
111 /*****************************************************************************
112 * Flow director's setup for mode 4 optimization
115 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
116 .dst.addr_bytes = { 0 },
117 .src.addr_bytes = { 0 },
118 .type = RTE_BE16(ETHER_TYPE_SLOW),
121 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
122 .dst.addr_bytes = { 0 },
123 .src.addr_bytes = { 0 },
127 static struct rte_flow_item flow_item_8023ad[] = {
129 .type = RTE_FLOW_ITEM_TYPE_ETH,
130 .spec = &flow_item_eth_type_8023ad,
132 .mask = &flow_item_eth_mask_type_8023ad,
135 .type = RTE_FLOW_ITEM_TYPE_END,
142 const struct rte_flow_attr flow_attr_8023ad = {
151 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
152 uint16_t slave_port) {
153 struct rte_eth_dev_info slave_info;
154 struct rte_flow_error error;
155 struct bond_dev_private *internals = (struct bond_dev_private *)
156 (bond_dev->data->dev_private);
158 const struct rte_flow_action_queue lacp_queue_conf = {
162 const struct rte_flow_action actions[] = {
164 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
165 .conf = &lacp_queue_conf
168 .type = RTE_FLOW_ACTION_TYPE_END,
172 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
173 flow_item_8023ad, actions, &error);
175 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
176 __func__, error.message, slave_port,
177 internals->mode4.dedicated_queues.rx_qid);
181 rte_eth_dev_info_get(slave_port, &slave_info);
182 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
183 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
185 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
186 __func__, slave_port);
194 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
195 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
196 struct bond_dev_private *internals = (struct bond_dev_private *)
197 (bond_dev->data->dev_private);
198 struct rte_eth_dev_info bond_info;
201 /* Verify if all slaves in bonding supports flow director and */
202 if (internals->slave_count > 0) {
203 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
205 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
206 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
208 for (idx = 0; idx < internals->slave_count; idx++) {
209 if (bond_ethdev_8023ad_flow_verify(bond_dev,
210 internals->slaves[idx].port_id) != 0)
219 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
221 struct rte_flow_error error;
222 struct bond_dev_private *internals = (struct bond_dev_private *)
223 (bond_dev->data->dev_private);
225 struct rte_flow_action_queue lacp_queue_conf = {
226 .index = internals->mode4.dedicated_queues.rx_qid,
229 const struct rte_flow_action actions[] = {
231 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
232 .conf = &lacp_queue_conf
235 .type = RTE_FLOW_ACTION_TYPE_END,
239 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
240 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
241 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
242 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
243 "(slave_port=%d queue_id=%d)",
244 error.message, slave_port,
245 internals->mode4.dedicated_queues.rx_qid);
253 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
256 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
257 struct bond_dev_private *internals = bd_rx_q->dev_private;
258 uint16_t num_rx_total = 0; /* Total number of received packets */
259 uint16_t slaves[RTE_MAX_ETHPORTS];
260 uint16_t slave_count;
264 /* Copy slave list to protect against slave up/down changes during tx
266 slave_count = internals->active_slave_count;
267 memcpy(slaves, internals->active_slaves,
268 sizeof(internals->active_slaves[0]) * slave_count);
270 for (i = 0, idx = internals->active_slave;
271 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
272 idx = idx % slave_count;
274 /* Read packets from this slave */
275 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
276 &bufs[num_rx_total], nb_pkts - num_rx_total);
279 internals->active_slave = idx;
285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
288 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
289 struct bond_dev_private *internals = bd_tx_q->dev_private;
291 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
292 uint16_t slave_count;
294 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
295 uint16_t dist_slave_count;
297 /* 2-D array to sort mbufs for transmission on each slave into */
298 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
299 /* Number of mbufs for transmission on each slave */
300 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
301 /* Mapping array generated by hash function to map mbufs to slaves */
302 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
304 uint16_t slave_tx_count;
305 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
309 if (unlikely(nb_bufs == 0))
312 /* Copy slave list to protect against slave up/down changes during tx
314 slave_count = internals->active_slave_count;
315 if (unlikely(slave_count < 1))
318 memcpy(slave_port_ids, internals->active_slaves,
319 sizeof(slave_port_ids[0]) * slave_count);
322 dist_slave_count = 0;
323 for (i = 0; i < slave_count; i++) {
324 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
326 if (ACTOR_STATE(port, DISTRIBUTING))
327 dist_slave_port_ids[dist_slave_count++] =
331 if (unlikely(dist_slave_count < 1))
335 * Populate slaves mbuf with the packets which are to be sent on it
336 * selecting output slave using hash based on xmit policy
338 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
339 bufs_slave_port_idxs);
341 for (i = 0; i < nb_bufs; i++) {
342 /* Populate slave mbuf arrays with mbufs for that slave. */
343 uint8_t slave_idx = bufs_slave_port_idxs[i];
345 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
349 /* Send packet burst on each slave device */
350 for (i = 0; i < dist_slave_count; i++) {
351 if (slave_nb_bufs[i] == 0)
354 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
355 bd_tx_q->queue_id, slave_bufs[i],
358 total_tx_count += slave_tx_count;
360 /* If tx burst fails move packets to end of bufs */
361 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
362 int slave_tx_fail_count = slave_nb_bufs[i] -
364 total_tx_fail_count += slave_tx_fail_count;
365 memcpy(&bufs[nb_bufs - total_tx_fail_count],
366 &slave_bufs[i][slave_tx_count],
367 slave_tx_fail_count * sizeof(bufs[0]));
371 return total_tx_count;
376 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
379 /* Cast to structure, containing bonded device's port id and queue id */
380 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
381 struct bond_dev_private *internals = bd_rx_q->dev_private;
382 struct ether_addr bond_mac;
384 struct ether_hdr *hdr;
386 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
387 uint16_t num_rx_total = 0; /* Total number of received packets */
388 uint16_t slaves[RTE_MAX_ETHPORTS];
389 uint16_t slave_count, idx;
391 uint8_t collecting; /* current slave collecting status */
392 const uint8_t promisc = internals->promiscuous_en;
396 rte_eth_macaddr_get(internals->port_id, &bond_mac);
397 /* Copy slave list to protect against slave up/down changes during tx
399 slave_count = internals->active_slave_count;
400 memcpy(slaves, internals->active_slaves,
401 sizeof(internals->active_slaves[0]) * slave_count);
403 idx = internals->active_slave;
404 if (idx >= slave_count) {
405 internals->active_slave = 0;
408 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
410 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
413 /* Read packets from this slave */
414 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
415 &bufs[num_rx_total], nb_pkts - num_rx_total);
417 for (k = j; k < 2 && k < num_rx_total; k++)
418 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
420 /* Handle slow protocol packets. */
421 while (j < num_rx_total) {
423 /* If packet is not pure L2 and is known, skip it */
424 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
429 if (j + 3 < num_rx_total)
430 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
432 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
433 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
435 /* Remove packet from array if it is slow packet or slave is not
436 * in collecting state or bonding interface is not in promiscuous
437 * mode and packet address does not match. */
438 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
439 !collecting || (!promisc &&
440 !is_multicast_ether_addr(&hdr->d_addr) &&
441 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
443 if (hdr->ether_type == ether_type_slow_be) {
444 bond_mode_8023ad_handle_slow_pkt(
445 internals, slaves[idx], bufs[j]);
447 rte_pktmbuf_free(bufs[j]);
449 /* Packet is managed by mode 4 or dropped, shift the array */
451 if (j < num_rx_total) {
452 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
458 if (unlikely(++idx == slave_count))
462 internals->active_slave = idx;
466 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
467 uint32_t burstnumberRX;
468 uint32_t burstnumberTX;
470 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
473 arp_op_name(uint16_t arp_op, char *buf)
477 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
480 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
482 case ARP_OP_REVREQUEST:
483 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
484 "Reverse ARP Request");
486 case ARP_OP_REVREPLY:
487 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
488 "Reverse ARP Reply");
490 case ARP_OP_INVREQUEST:
491 snprintf(buf, sizeof("Peer Identify Request"), "%s",
492 "Peer Identify Request");
494 case ARP_OP_INVREPLY:
495 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
496 "Peer Identify Reply");
501 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
505 #define MaxIPv4String 16
507 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
511 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
512 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
513 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
517 #define MAX_CLIENTS_NUMBER 128
518 uint8_t active_clients;
519 struct client_stats_t {
522 uint32_t ipv4_rx_packets;
523 uint32_t ipv4_tx_packets;
525 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
528 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
532 for (; i < MAX_CLIENTS_NUMBER; i++) {
533 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
534 /* Just update RX packets number for this client */
535 if (TXorRXindicator == &burstnumberRX)
536 client_stats[i].ipv4_rx_packets++;
538 client_stats[i].ipv4_tx_packets++;
542 /* We have a new client. Insert him to the table, and increment stats */
543 if (TXorRXindicator == &burstnumberRX)
544 client_stats[active_clients].ipv4_rx_packets++;
546 client_stats[active_clients].ipv4_tx_packets++;
547 client_stats[active_clients].ipv4_addr = addr;
548 client_stats[active_clients].port = port;
553 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
554 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
555 rte_log(RTE_LOG_DEBUG, bond_logtype, \
556 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
557 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
560 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
561 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
562 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
564 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
565 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
566 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
568 arp_op, ++burstnumber)
572 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
573 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
575 struct ipv4_hdr *ipv4_h;
576 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
577 struct arp_hdr *arp_h;
584 uint16_t ether_type = eth_h->ether_type;
585 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
587 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
588 strlcpy(buf, info, 16);
591 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
592 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
593 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
594 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
595 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
596 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
598 update_client_stats(ipv4_h->src_addr, port, burstnumber);
600 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
601 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
602 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
603 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
604 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
605 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
606 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
613 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
615 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
616 struct bond_dev_private *internals = bd_tx_q->dev_private;
617 struct ether_hdr *eth_h;
618 uint16_t ether_type, offset;
619 uint16_t nb_recv_pkts;
622 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
624 for (i = 0; i < nb_recv_pkts; i++) {
625 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
626 ether_type = eth_h->ether_type;
627 offset = get_vlan_offset(eth_h, ðer_type);
629 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
630 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
631 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
633 bond_mode_alb_arp_recv(eth_h, offset, internals);
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
637 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
645 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
648 struct bond_dev_private *internals;
649 struct bond_tx_queue *bd_tx_q;
651 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
652 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
654 uint16_t num_of_slaves;
655 uint16_t slaves[RTE_MAX_ETHPORTS];
657 uint16_t num_tx_total = 0, num_tx_slave;
659 static int slave_idx = 0;
660 int i, cslave_idx = 0, tx_fail_total = 0;
662 bd_tx_q = (struct bond_tx_queue *)queue;
663 internals = bd_tx_q->dev_private;
665 /* Copy slave list to protect against slave up/down changes during tx
667 num_of_slaves = internals->active_slave_count;
668 memcpy(slaves, internals->active_slaves,
669 sizeof(internals->active_slaves[0]) * num_of_slaves);
671 if (num_of_slaves < 1)
674 /* Populate slaves mbuf with which packets are to be sent on it */
675 for (i = 0; i < nb_pkts; i++) {
676 cslave_idx = (slave_idx + i) % num_of_slaves;
677 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
680 /* increment current slave index so the next call to tx burst starts on the
682 slave_idx = ++cslave_idx;
684 /* Send packet burst on each slave device */
685 for (i = 0; i < num_of_slaves; i++) {
686 if (slave_nb_pkts[i] > 0) {
687 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
688 slave_bufs[i], slave_nb_pkts[i]);
690 /* if tx burst fails move packets to end of bufs */
691 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
692 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
694 tx_fail_total += tx_fail_slave;
696 memcpy(&bufs[nb_pkts - tx_fail_total],
697 &slave_bufs[i][num_tx_slave],
698 tx_fail_slave * sizeof(bufs[0]));
700 num_tx_total += num_tx_slave;
708 bond_ethdev_tx_burst_active_backup(void *queue,
709 struct rte_mbuf **bufs, uint16_t nb_pkts)
711 struct bond_dev_private *internals;
712 struct bond_tx_queue *bd_tx_q;
714 bd_tx_q = (struct bond_tx_queue *)queue;
715 internals = bd_tx_q->dev_private;
717 if (internals->active_slave_count < 1)
720 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
724 static inline uint16_t
725 ether_hash(struct ether_hdr *eth_hdr)
727 unaligned_uint16_t *word_src_addr =
728 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
729 unaligned_uint16_t *word_dst_addr =
730 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
732 return (word_src_addr[0] ^ word_dst_addr[0]) ^
733 (word_src_addr[1] ^ word_dst_addr[1]) ^
734 (word_src_addr[2] ^ word_dst_addr[2]);
737 static inline uint32_t
738 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
740 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
743 static inline uint32_t
744 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
746 unaligned_uint32_t *word_src_addr =
747 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
748 unaligned_uint32_t *word_dst_addr =
749 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
751 return (word_src_addr[0] ^ word_dst_addr[0]) ^
752 (word_src_addr[1] ^ word_dst_addr[1]) ^
753 (word_src_addr[2] ^ word_dst_addr[2]) ^
754 (word_src_addr[3] ^ word_dst_addr[3]);
759 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
760 uint8_t slave_count, uint16_t *slaves)
762 struct ether_hdr *eth_hdr;
766 for (i = 0; i < nb_pkts; i++) {
767 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
769 hash = ether_hash(eth_hdr);
771 slaves[i] = (hash ^= hash >> 8) % slave_count;
776 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777 uint8_t slave_count, uint16_t *slaves)
780 struct ether_hdr *eth_hdr;
783 uint32_t hash, l3hash;
785 for (i = 0; i < nb_pkts; i++) {
786 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
789 proto = eth_hdr->ether_type;
790 hash = ether_hash(eth_hdr);
792 vlan_offset = get_vlan_offset(eth_hdr, &proto);
794 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
795 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
796 ((char *)(eth_hdr + 1) + vlan_offset);
797 l3hash = ipv4_hash(ipv4_hdr);
799 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
800 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
801 ((char *)(eth_hdr + 1) + vlan_offset);
802 l3hash = ipv6_hash(ipv6_hdr);
805 hash = hash ^ l3hash;
809 slaves[i] = hash % slave_count;
814 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
815 uint8_t slave_count, uint16_t *slaves)
817 struct ether_hdr *eth_hdr;
822 struct udp_hdr *udp_hdr;
823 struct tcp_hdr *tcp_hdr;
824 uint32_t hash, l3hash, l4hash;
826 for (i = 0; i < nb_pkts; i++) {
827 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
828 proto = eth_hdr->ether_type;
829 vlan_offset = get_vlan_offset(eth_hdr, &proto);
833 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
834 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
835 ((char *)(eth_hdr + 1) + vlan_offset);
836 size_t ip_hdr_offset;
838 l3hash = ipv4_hash(ipv4_hdr);
840 /* there is no L4 header in fragmented packet */
841 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
843 ip_hdr_offset = (ipv4_hdr->version_ihl
844 & IPV4_HDR_IHL_MASK) *
847 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
848 tcp_hdr = (struct tcp_hdr *)
851 l4hash = HASH_L4_PORTS(tcp_hdr);
852 } else if (ipv4_hdr->next_proto_id ==
854 udp_hdr = (struct udp_hdr *)
857 l4hash = HASH_L4_PORTS(udp_hdr);
860 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
861 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
862 ((char *)(eth_hdr + 1) + vlan_offset);
863 l3hash = ipv6_hash(ipv6_hdr);
865 if (ipv6_hdr->proto == IPPROTO_TCP) {
866 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
867 l4hash = HASH_L4_PORTS(tcp_hdr);
868 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
869 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
870 l4hash = HASH_L4_PORTS(udp_hdr);
874 hash = l3hash ^ l4hash;
878 slaves[i] = hash % slave_count;
883 uint64_t bwg_left_int;
884 uint64_t bwg_left_remainder;
889 bond_tlb_activate_slave(struct bond_dev_private *internals) {
892 for (i = 0; i < internals->active_slave_count; i++) {
893 tlb_last_obytets[internals->active_slaves[i]] = 0;
898 bandwidth_cmp(const void *a, const void *b)
900 const struct bwg_slave *bwg_a = a;
901 const struct bwg_slave *bwg_b = b;
902 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
903 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
904 (int64_t)bwg_a->bwg_left_remainder;
918 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
919 struct bwg_slave *bwg_slave)
921 struct rte_eth_link link_status;
923 rte_eth_link_get_nowait(port_id, &link_status);
924 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
927 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
928 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
929 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
933 bond_ethdev_update_tlb_slave_cb(void *arg)
935 struct bond_dev_private *internals = arg;
936 struct rte_eth_stats slave_stats;
937 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
941 uint8_t update_stats = 0;
944 internals->slave_update_idx++;
947 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
950 for (i = 0; i < internals->active_slave_count; i++) {
951 slave_id = internals->active_slaves[i];
952 rte_eth_stats_get(slave_id, &slave_stats);
953 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
954 bandwidth_left(slave_id, tx_bytes,
955 internals->slave_update_idx, &bwg_array[i]);
956 bwg_array[i].slave = slave_id;
959 tlb_last_obytets[slave_id] = slave_stats.obytes;
963 if (update_stats == 1)
964 internals->slave_update_idx = 0;
967 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
968 for (i = 0; i < slave_count; i++)
969 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
972 (struct bond_dev_private *)internals);
976 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
979 struct bond_dev_private *internals = bd_tx_q->dev_private;
981 struct rte_eth_dev *primary_port =
982 &rte_eth_devices[internals->primary_port];
983 uint16_t num_tx_total = 0;
986 uint16_t num_of_slaves = internals->active_slave_count;
987 uint16_t slaves[RTE_MAX_ETHPORTS];
989 struct ether_hdr *ether_hdr;
990 struct ether_addr primary_slave_addr;
991 struct ether_addr active_slave_addr;
993 if (num_of_slaves < 1)
996 memcpy(slaves, internals->tlb_slaves_order,
997 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1000 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1003 for (i = 0; i < 3; i++)
1004 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1007 for (i = 0; i < num_of_slaves; i++) {
1008 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1009 for (j = num_tx_total; j < nb_pkts; j++) {
1010 if (j + 3 < nb_pkts)
1011 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1014 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1015 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1016 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1017 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1021 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1022 bufs + num_tx_total, nb_pkts - num_tx_total);
1024 if (num_tx_total == nb_pkts)
1028 return num_tx_total;
1032 bond_tlb_disable(struct bond_dev_private *internals)
1034 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1038 bond_tlb_enable(struct bond_dev_private *internals)
1040 bond_ethdev_update_tlb_slave_cb(internals);
1044 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1047 struct bond_dev_private *internals = bd_tx_q->dev_private;
1049 struct ether_hdr *eth_h;
1050 uint16_t ether_type, offset;
1052 struct client_data *client_info;
1055 * We create transmit buffers for every slave and one additional to send
1056 * through tlb. In worst case every packet will be send on one port.
1058 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1059 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1062 * We create separate transmit buffers for update packets as they won't
1063 * be counted in num_tx_total.
1065 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1066 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068 struct rte_mbuf *upd_pkt;
1071 uint16_t num_send, num_not_send = 0;
1072 uint16_t num_tx_total = 0;
1077 /* Search tx buffer for ARP packets and forward them to alb */
1078 for (i = 0; i < nb_pkts; i++) {
1079 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1080 ether_type = eth_h->ether_type;
1081 offset = get_vlan_offset(eth_h, ðer_type);
1083 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1084 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086 /* Change src mac in eth header */
1087 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1089 /* Add packet to slave tx buffer */
1090 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1091 slave_bufs_pkts[slave_idx]++;
1093 /* If packet is not ARP, send it with TLB policy */
1094 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1100 /* Update connected client ARP tables */
1101 if (internals->mode6.ntt) {
1102 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1103 client_info = &internals->mode6.client_table[i];
1105 if (client_info->in_use) {
1106 /* Allocate new packet to send ARP update on current slave */
1107 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1108 if (upd_pkt == NULL) {
1110 "Failed to allocate ARP packet from pool");
1113 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115 upd_pkt->data_len = pkt_size;
1116 upd_pkt->pkt_len = pkt_size;
1118 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1121 /* Add packet to update tx buffer */
1122 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123 update_bufs_pkts[slave_idx]++;
1126 internals->mode6.ntt = 0;
1129 /* Send ARP packets on proper slaves */
1130 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131 if (slave_bufs_pkts[i] > 0) {
1132 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133 slave_bufs[i], slave_bufs_pkts[i]);
1134 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135 bufs[nb_pkts - 1 - num_not_send - j] =
1136 slave_bufs[i][nb_pkts - 1 - j];
1139 num_tx_total += num_send;
1140 num_not_send += slave_bufs_pkts[i] - num_send;
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143 /* Print TX stats including update packets */
1144 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1152 /* Send update packets on proper slaves */
1153 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154 if (update_bufs_pkts[i] > 0) {
1155 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156 update_bufs_pkts[i]);
1157 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158 rte_pktmbuf_free(update_bufs[i][j]);
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161 for (j = 0; j < update_bufs_pkts[i]; j++) {
1162 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1169 /* Send non-ARP packets using tlb policy */
1170 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171 num_send = bond_ethdev_tx_burst_tlb(queue,
1172 slave_bufs[RTE_MAX_ETHPORTS],
1173 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1175 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176 bufs[nb_pkts - 1 - num_not_send - j] =
1177 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1180 num_tx_total += num_send;
1183 return num_tx_total;
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1190 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1191 struct bond_dev_private *internals = bd_tx_q->dev_private;
1193 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1194 uint16_t slave_count;
1196 /* Array to sort mbufs for transmission on each slave into */
1197 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1198 /* Number of mbufs for transmission on each slave */
1199 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1200 /* Mapping array generated by hash function to map mbufs to slaves */
1201 uint16_t bufs_slave_port_idxs[nb_bufs];
1203 uint16_t slave_tx_count;
1204 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1208 if (unlikely(nb_bufs == 0))
1211 /* Copy slave list to protect against slave up/down changes during tx
1213 slave_count = internals->active_slave_count;
1214 if (unlikely(slave_count < 1))
1217 memcpy(slave_port_ids, internals->active_slaves,
1218 sizeof(slave_port_ids[0]) * slave_count);
1221 * Populate slaves mbuf with the packets which are to be sent on it
1222 * selecting output slave using hash based on xmit policy
1224 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1225 bufs_slave_port_idxs);
1227 for (i = 0; i < nb_bufs; i++) {
1228 /* Populate slave mbuf arrays with mbufs for that slave. */
1229 uint8_t slave_idx = bufs_slave_port_idxs[i];
1231 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1234 /* Send packet burst on each slave device */
1235 for (i = 0; i < slave_count; i++) {
1236 if (slave_nb_bufs[i] == 0)
1239 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1240 bd_tx_q->queue_id, slave_bufs[i],
1243 total_tx_count += slave_tx_count;
1245 /* If tx burst fails move packets to end of bufs */
1246 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1247 int slave_tx_fail_count = slave_nb_bufs[i] -
1249 total_tx_fail_count += slave_tx_fail_count;
1250 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1251 &slave_bufs[i][slave_tx_count],
1252 slave_tx_fail_count * sizeof(bufs[0]));
1256 return total_tx_count;
1260 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1263 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1264 struct bond_dev_private *internals = bd_tx_q->dev_private;
1266 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1267 uint16_t slave_count;
1269 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1270 uint16_t dist_slave_count;
1272 /* 2-D array to sort mbufs for transmission on each slave into */
1273 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1274 /* Number of mbufs for transmission on each slave */
1275 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1276 /* Mapping array generated by hash function to map mbufs to slaves */
1277 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1279 uint16_t slave_tx_count;
1280 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1284 if (unlikely(nb_bufs == 0))
1287 /* Copy slave list to protect against slave up/down changes during tx
1289 slave_count = internals->active_slave_count;
1290 if (unlikely(slave_count < 1))
1293 memcpy(slave_port_ids, internals->active_slaves,
1294 sizeof(slave_port_ids[0]) * slave_count);
1296 dist_slave_count = 0;
1297 for (i = 0; i < slave_count; i++) {
1298 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1300 if (ACTOR_STATE(port, DISTRIBUTING))
1301 dist_slave_port_ids[dist_slave_count++] =
1305 if (likely(dist_slave_count > 1)) {
1308 * Populate slaves mbuf with the packets which are to be sent
1309 * on it, selecting output slave using hash based on xmit policy
1311 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1312 bufs_slave_port_idxs);
1314 for (i = 0; i < nb_bufs; i++) {
1316 * Populate slave mbuf arrays with mbufs for that
1319 uint8_t slave_idx = bufs_slave_port_idxs[i];
1321 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1326 /* Send packet burst on each slave device */
1327 for (i = 0; i < dist_slave_count; i++) {
1328 if (slave_nb_bufs[i] == 0)
1331 slave_tx_count = rte_eth_tx_burst(
1332 dist_slave_port_ids[i],
1333 bd_tx_q->queue_id, slave_bufs[i],
1336 total_tx_count += slave_tx_count;
1338 /* If tx burst fails move packets to end of bufs */
1339 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1340 int slave_tx_fail_count = slave_nb_bufs[i] -
1342 total_tx_fail_count += slave_tx_fail_count;
1344 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1345 &slave_bufs[i][slave_tx_count],
1346 slave_tx_fail_count * sizeof(bufs[0]));
1351 /* Check for LACP control packets and send if available */
1352 for (i = 0; i < slave_count; i++) {
1353 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1354 struct rte_mbuf *ctrl_pkt = NULL;
1356 if (likely(rte_ring_empty(port->tx_ring)))
1359 if (rte_ring_dequeue(port->tx_ring,
1360 (void **)&ctrl_pkt) != -ENOENT) {
1361 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1362 bd_tx_q->queue_id, &ctrl_pkt, 1);
1364 * re-enqueue LAG control plane packets to buffering
1365 * ring if transmission fails so the packet isn't lost.
1367 if (slave_tx_count != 1)
1368 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1372 return total_tx_count;
1376 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1379 struct bond_dev_private *internals;
1380 struct bond_tx_queue *bd_tx_q;
1382 uint8_t tx_failed_flag = 0, num_of_slaves;
1383 uint16_t slaves[RTE_MAX_ETHPORTS];
1385 uint16_t max_nb_of_tx_pkts = 0;
1387 int slave_tx_total[RTE_MAX_ETHPORTS];
1388 int i, most_successful_tx_slave = -1;
1390 bd_tx_q = (struct bond_tx_queue *)queue;
1391 internals = bd_tx_q->dev_private;
1393 /* Copy slave list to protect against slave up/down changes during tx
1395 num_of_slaves = internals->active_slave_count;
1396 memcpy(slaves, internals->active_slaves,
1397 sizeof(internals->active_slaves[0]) * num_of_slaves);
1399 if (num_of_slaves < 1)
1402 /* Increment reference count on mbufs */
1403 for (i = 0; i < nb_pkts; i++)
1404 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1406 /* Transmit burst on each active slave */
1407 for (i = 0; i < num_of_slaves; i++) {
1408 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1411 if (unlikely(slave_tx_total[i] < nb_pkts))
1414 /* record the value and slave index for the slave which transmits the
1415 * maximum number of packets */
1416 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1417 max_nb_of_tx_pkts = slave_tx_total[i];
1418 most_successful_tx_slave = i;
1422 /* if slaves fail to transmit packets from burst, the calling application
1423 * is not expected to know about multiple references to packets so we must
1424 * handle failures of all packets except those of the most successful slave
1426 if (unlikely(tx_failed_flag))
1427 for (i = 0; i < num_of_slaves; i++)
1428 if (i != most_successful_tx_slave)
1429 while (slave_tx_total[i] < nb_pkts)
1430 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1432 return max_nb_of_tx_pkts;
1436 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1438 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1440 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1442 * If in mode 4 then save the link properties of the first
1443 * slave, all subsequent slaves must match these properties
1445 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1447 bond_link->link_autoneg = slave_link->link_autoneg;
1448 bond_link->link_duplex = slave_link->link_duplex;
1449 bond_link->link_speed = slave_link->link_speed;
1452 * In any other mode the link properties are set to default
1453 * values of AUTONEG/DUPLEX
1455 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1456 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1461 link_properties_valid(struct rte_eth_dev *ethdev,
1462 struct rte_eth_link *slave_link)
1464 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1466 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1467 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1469 if (bond_link->link_duplex != slave_link->link_duplex ||
1470 bond_link->link_autoneg != slave_link->link_autoneg ||
1471 bond_link->link_speed != slave_link->link_speed)
1479 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1481 struct ether_addr *mac_addr;
1483 if (eth_dev == NULL) {
1484 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1488 if (dst_mac_addr == NULL) {
1489 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1493 mac_addr = eth_dev->data->mac_addrs;
1495 ether_addr_copy(mac_addr, dst_mac_addr);
1500 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1502 struct ether_addr *mac_addr;
1504 if (eth_dev == NULL) {
1505 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1509 if (new_mac_addr == NULL) {
1510 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1514 mac_addr = eth_dev->data->mac_addrs;
1516 /* If new MAC is different to current MAC then update */
1517 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1518 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1523 static const struct ether_addr null_mac_addr;
1526 * Add additional MAC addresses to the slave
1529 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1530 uint16_t slave_port_id)
1533 struct ether_addr *mac_addr;
1535 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1536 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1537 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1540 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1543 for (i--; i > 0; i--)
1544 rte_eth_dev_mac_addr_remove(slave_port_id,
1545 &bonded_eth_dev->data->mac_addrs[i]);
1554 * Remove additional MAC addresses from the slave
1557 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1558 uint16_t slave_port_id)
1561 struct ether_addr *mac_addr;
1564 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1565 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1566 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1569 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1570 /* save only the first error */
1571 if (ret < 0 && rc == 0)
1579 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1581 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1584 /* Update slave devices MAC addresses */
1585 if (internals->slave_count < 1)
1588 switch (internals->mode) {
1589 case BONDING_MODE_ROUND_ROBIN:
1590 case BONDING_MODE_BALANCE:
1591 case BONDING_MODE_BROADCAST:
1592 for (i = 0; i < internals->slave_count; i++) {
1593 if (rte_eth_dev_default_mac_addr_set(
1594 internals->slaves[i].port_id,
1595 bonded_eth_dev->data->mac_addrs)) {
1596 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1597 internals->slaves[i].port_id);
1602 case BONDING_MODE_8023AD:
1603 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1605 case BONDING_MODE_ACTIVE_BACKUP:
1606 case BONDING_MODE_TLB:
1607 case BONDING_MODE_ALB:
1609 for (i = 0; i < internals->slave_count; i++) {
1610 if (internals->slaves[i].port_id ==
1611 internals->current_primary_port) {
1612 if (rte_eth_dev_default_mac_addr_set(
1613 internals->primary_port,
1614 bonded_eth_dev->data->mac_addrs)) {
1615 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1616 internals->current_primary_port);
1620 if (rte_eth_dev_default_mac_addr_set(
1621 internals->slaves[i].port_id,
1622 &internals->slaves[i].persisted_mac_addr)) {
1623 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1624 internals->slaves[i].port_id);
1635 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1637 struct bond_dev_private *internals;
1639 internals = eth_dev->data->dev_private;
1642 case BONDING_MODE_ROUND_ROBIN:
1643 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1644 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1646 case BONDING_MODE_ACTIVE_BACKUP:
1647 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1648 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1650 case BONDING_MODE_BALANCE:
1651 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1652 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1654 case BONDING_MODE_BROADCAST:
1655 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1656 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1658 case BONDING_MODE_8023AD:
1659 if (bond_mode_8023ad_enable(eth_dev) != 0)
1662 if (internals->mode4.dedicated_queues.enabled == 0) {
1663 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1664 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1665 RTE_BOND_LOG(WARNING,
1666 "Using mode 4, it is necessary to do TX burst "
1667 "and RX burst at least every 100ms.");
1669 /* Use flow director's optimization */
1670 eth_dev->rx_pkt_burst =
1671 bond_ethdev_rx_burst_8023ad_fast_queue;
1672 eth_dev->tx_pkt_burst =
1673 bond_ethdev_tx_burst_8023ad_fast_queue;
1676 case BONDING_MODE_TLB:
1677 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1678 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1680 case BONDING_MODE_ALB:
1681 if (bond_mode_alb_enable(eth_dev) != 0)
1684 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1685 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1691 internals->mode = mode;
1698 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1699 struct rte_eth_dev *slave_eth_dev)
1702 struct bond_dev_private *internals = (struct bond_dev_private *)
1703 bonded_eth_dev->data->dev_private;
1704 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1706 if (port->slow_pool == NULL) {
1708 int slave_id = slave_eth_dev->data->port_id;
1710 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1712 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1713 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1714 slave_eth_dev->data->numa_node);
1716 /* Any memory allocation failure in initialization is critical because
1717 * resources can't be free, so reinitialization is impossible. */
1718 if (port->slow_pool == NULL) {
1719 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1720 slave_id, mem_name, rte_strerror(rte_errno));
1724 if (internals->mode4.dedicated_queues.enabled == 1) {
1725 /* Configure slow Rx queue */
1727 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1728 internals->mode4.dedicated_queues.rx_qid, 128,
1729 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1730 NULL, port->slow_pool);
1733 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1734 slave_eth_dev->data->port_id,
1735 internals->mode4.dedicated_queues.rx_qid,
1740 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1741 internals->mode4.dedicated_queues.tx_qid, 512,
1742 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1746 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1747 slave_eth_dev->data->port_id,
1748 internals->mode4.dedicated_queues.tx_qid,
1757 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1758 struct rte_eth_dev *slave_eth_dev)
1760 struct bond_rx_queue *bd_rx_q;
1761 struct bond_tx_queue *bd_tx_q;
1762 uint16_t nb_rx_queues;
1763 uint16_t nb_tx_queues;
1767 struct rte_flow_error flow_error;
1769 struct bond_dev_private *internals = (struct bond_dev_private *)
1770 bonded_eth_dev->data->dev_private;
1773 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1775 /* Enable interrupts on slave device if supported */
1776 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1777 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1779 /* If RSS is enabled for bonding, try to enable it for slaves */
1780 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1781 if (internals->rss_key_len != 0) {
1782 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1783 internals->rss_key_len;
1784 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1787 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1790 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1791 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1792 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1793 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1796 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1797 DEV_RX_OFFLOAD_VLAN_FILTER)
1798 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1799 DEV_RX_OFFLOAD_VLAN_FILTER;
1801 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1802 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1804 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1805 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1807 if (internals->mode == BONDING_MODE_8023AD) {
1808 if (internals->mode4.dedicated_queues.enabled == 1) {
1814 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1815 bonded_eth_dev->data->mtu);
1816 if (errval != 0 && errval != -ENOTSUP) {
1817 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1818 slave_eth_dev->data->port_id, errval);
1822 /* Configure device */
1823 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1824 nb_rx_queues, nb_tx_queues,
1825 &(slave_eth_dev->data->dev_conf));
1827 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1828 slave_eth_dev->data->port_id, errval);
1832 /* Setup Rx Queues */
1833 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1834 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1836 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1837 bd_rx_q->nb_rx_desc,
1838 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1839 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1842 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1843 slave_eth_dev->data->port_id, q_id, errval);
1848 /* Setup Tx Queues */
1849 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1850 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1852 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1853 bd_tx_q->nb_tx_desc,
1854 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1858 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1859 slave_eth_dev->data->port_id, q_id, errval);
1864 if (internals->mode == BONDING_MODE_8023AD &&
1865 internals->mode4.dedicated_queues.enabled == 1) {
1866 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1870 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1871 slave_eth_dev->data->port_id) != 0) {
1873 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1874 slave_eth_dev->data->port_id, q_id, errval);
1878 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1879 rte_flow_destroy(slave_eth_dev->data->port_id,
1880 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1883 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1884 slave_eth_dev->data->port_id);
1888 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1890 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1891 slave_eth_dev->data->port_id, errval);
1895 /* If RSS is enabled for bonding, synchronize RETA */
1896 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1898 struct bond_dev_private *internals;
1900 internals = bonded_eth_dev->data->dev_private;
1902 for (i = 0; i < internals->slave_count; i++) {
1903 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1904 errval = rte_eth_dev_rss_reta_update(
1905 slave_eth_dev->data->port_id,
1906 &internals->reta_conf[0],
1907 internals->slaves[i].reta_size);
1909 RTE_BOND_LOG(WARNING,
1910 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1911 " RSS Configuration for bonding may be inconsistent.",
1912 slave_eth_dev->data->port_id, errval);
1919 /* If lsc interrupt is set, check initial slave's link status */
1920 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1921 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1922 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1923 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1931 slave_remove(struct bond_dev_private *internals,
1932 struct rte_eth_dev *slave_eth_dev)
1936 for (i = 0; i < internals->slave_count; i++)
1937 if (internals->slaves[i].port_id ==
1938 slave_eth_dev->data->port_id)
1941 if (i < (internals->slave_count - 1)) {
1942 struct rte_flow *flow;
1944 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1945 sizeof(internals->slaves[0]) *
1946 (internals->slave_count - i - 1));
1947 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1948 memmove(&flow->flows[i], &flow->flows[i + 1],
1949 sizeof(flow->flows[0]) *
1950 (internals->slave_count - i - 1));
1951 flow->flows[internals->slave_count - 1] = NULL;
1955 internals->slave_count--;
1957 /* force reconfiguration of slave interfaces */
1958 _rte_eth_dev_reset(slave_eth_dev);
1962 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1965 slave_add(struct bond_dev_private *internals,
1966 struct rte_eth_dev *slave_eth_dev)
1968 struct bond_slave_details *slave_details =
1969 &internals->slaves[internals->slave_count];
1971 slave_details->port_id = slave_eth_dev->data->port_id;
1972 slave_details->last_link_status = 0;
1974 /* Mark slave devices that don't support interrupts so we can
1975 * compensate when we start the bond
1977 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1978 slave_details->link_status_poll_enabled = 1;
1981 slave_details->link_status_wait_to_complete = 0;
1982 /* clean tlb_last_obytes when adding port for bonding device */
1983 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1984 sizeof(struct ether_addr));
1988 bond_ethdev_primary_set(struct bond_dev_private *internals,
1989 uint16_t slave_port_id)
1993 if (internals->active_slave_count < 1)
1994 internals->current_primary_port = slave_port_id;
1996 /* Search bonded device slave ports for new proposed primary port */
1997 for (i = 0; i < internals->active_slave_count; i++) {
1998 if (internals->active_slaves[i] == slave_port_id)
1999 internals->current_primary_port = slave_port_id;
2004 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2007 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2009 struct bond_dev_private *internals;
2012 /* slave eth dev will be started by bonded device */
2013 if (check_for_bonded_ethdev(eth_dev)) {
2014 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2015 eth_dev->data->port_id);
2019 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2020 eth_dev->data->dev_started = 1;
2022 internals = eth_dev->data->dev_private;
2024 if (internals->slave_count == 0) {
2025 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2029 if (internals->user_defined_mac == 0) {
2030 struct ether_addr *new_mac_addr = NULL;
2032 for (i = 0; i < internals->slave_count; i++)
2033 if (internals->slaves[i].port_id == internals->primary_port)
2034 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2036 if (new_mac_addr == NULL)
2039 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2040 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2041 eth_dev->data->port_id);
2046 /* If bonded device is configure in promiscuous mode then re-apply config */
2047 if (internals->promiscuous_en)
2048 bond_ethdev_promiscuous_enable(eth_dev);
2050 if (internals->mode == BONDING_MODE_8023AD) {
2051 if (internals->mode4.dedicated_queues.enabled == 1) {
2052 internals->mode4.dedicated_queues.rx_qid =
2053 eth_dev->data->nb_rx_queues;
2054 internals->mode4.dedicated_queues.tx_qid =
2055 eth_dev->data->nb_tx_queues;
2060 /* Reconfigure each slave device if starting bonded device */
2061 for (i = 0; i < internals->slave_count; i++) {
2062 struct rte_eth_dev *slave_ethdev =
2063 &(rte_eth_devices[internals->slaves[i].port_id]);
2064 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2066 "bonded port (%d) failed to reconfigure slave device (%d)",
2067 eth_dev->data->port_id,
2068 internals->slaves[i].port_id);
2071 /* We will need to poll for link status if any slave doesn't
2072 * support interrupts
2074 if (internals->slaves[i].link_status_poll_enabled)
2075 internals->link_status_polling_enabled = 1;
2078 /* start polling if needed */
2079 if (internals->link_status_polling_enabled) {
2081 internals->link_status_polling_interval_ms * 1000,
2082 bond_ethdev_slave_link_status_change_monitor,
2083 (void *)&rte_eth_devices[internals->port_id]);
2086 /* Update all slave devices MACs*/
2087 if (mac_address_slaves_update(eth_dev) != 0)
2090 if (internals->user_defined_primary_port)
2091 bond_ethdev_primary_set(internals, internals->primary_port);
2093 if (internals->mode == BONDING_MODE_8023AD)
2094 bond_mode_8023ad_start(eth_dev);
2096 if (internals->mode == BONDING_MODE_TLB ||
2097 internals->mode == BONDING_MODE_ALB)
2098 bond_tlb_enable(internals);
2103 eth_dev->data->dev_started = 0;
2108 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2112 if (dev->data->rx_queues != NULL) {
2113 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2114 rte_free(dev->data->rx_queues[i]);
2115 dev->data->rx_queues[i] = NULL;
2117 dev->data->nb_rx_queues = 0;
2120 if (dev->data->tx_queues != NULL) {
2121 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2122 rte_free(dev->data->tx_queues[i]);
2123 dev->data->tx_queues[i] = NULL;
2125 dev->data->nb_tx_queues = 0;
2130 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2132 struct bond_dev_private *internals = eth_dev->data->dev_private;
2135 if (internals->mode == BONDING_MODE_8023AD) {
2139 bond_mode_8023ad_stop(eth_dev);
2141 /* Discard all messages to/from mode 4 state machines */
2142 for (i = 0; i < internals->active_slave_count; i++) {
2143 port = &mode_8023ad_ports[internals->active_slaves[i]];
2145 RTE_ASSERT(port->rx_ring != NULL);
2146 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2147 rte_pktmbuf_free(pkt);
2149 RTE_ASSERT(port->tx_ring != NULL);
2150 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2151 rte_pktmbuf_free(pkt);
2155 if (internals->mode == BONDING_MODE_TLB ||
2156 internals->mode == BONDING_MODE_ALB) {
2157 bond_tlb_disable(internals);
2158 for (i = 0; i < internals->active_slave_count; i++)
2159 tlb_last_obytets[internals->active_slaves[i]] = 0;
2162 internals->link_status_polling_enabled = 0;
2163 for (i = 0; i < internals->slave_count; i++)
2164 internals->slaves[i].last_link_status = 0;
2166 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2167 eth_dev->data->dev_started = 0;
2171 bond_ethdev_close(struct rte_eth_dev *dev)
2173 struct bond_dev_private *internals = dev->data->dev_private;
2174 uint8_t bond_port_id = internals->port_id;
2176 struct rte_flow_error ferror;
2178 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2179 while (internals->slave_count != skipped) {
2180 uint16_t port_id = internals->slaves[skipped].port_id;
2182 rte_eth_dev_stop(port_id);
2184 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2186 "Failed to remove port %d from bonded device %s",
2187 port_id, dev->device->name);
2191 bond_flow_ops.flush(dev, &ferror);
2192 bond_ethdev_free_queues(dev);
2193 rte_bitmap_reset(internals->vlan_filter_bmp);
2196 /* forward declaration */
2197 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2200 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2202 struct bond_dev_private *internals = dev->data->dev_private;
2204 uint16_t max_nb_rx_queues = UINT16_MAX;
2205 uint16_t max_nb_tx_queues = UINT16_MAX;
2207 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2209 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2210 internals->candidate_max_rx_pktlen :
2211 ETHER_MAX_JUMBO_FRAME_LEN;
2213 /* Max number of tx/rx queues that the bonded device can support is the
2214 * minimum values of the bonded slaves, as all slaves must be capable
2215 * of supporting the same number of tx/rx queues.
2217 if (internals->slave_count > 0) {
2218 struct rte_eth_dev_info slave_info;
2221 for (idx = 0; idx < internals->slave_count; idx++) {
2222 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2225 if (slave_info.max_rx_queues < max_nb_rx_queues)
2226 max_nb_rx_queues = slave_info.max_rx_queues;
2228 if (slave_info.max_tx_queues < max_nb_tx_queues)
2229 max_nb_tx_queues = slave_info.max_tx_queues;
2233 dev_info->max_rx_queues = max_nb_rx_queues;
2234 dev_info->max_tx_queues = max_nb_tx_queues;
2237 * If dedicated hw queues enabled for link bonding device in LACP mode
2238 * then we need to reduce the maximum number of data path queues by 1.
2240 if (internals->mode == BONDING_MODE_8023AD &&
2241 internals->mode4.dedicated_queues.enabled == 1) {
2242 dev_info->max_rx_queues--;
2243 dev_info->max_tx_queues--;
2246 dev_info->min_rx_bufsize = 0;
2248 dev_info->rx_offload_capa = internals->rx_offload_capa;
2249 dev_info->tx_offload_capa = internals->tx_offload_capa;
2250 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2251 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2252 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2254 dev_info->reta_size = internals->reta_size;
2258 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2262 struct bond_dev_private *internals = dev->data->dev_private;
2264 /* don't do this while a slave is being added */
2265 rte_spinlock_lock(&internals->lock);
2268 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2270 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2272 for (i = 0; i < internals->slave_count; i++) {
2273 uint16_t port_id = internals->slaves[i].port_id;
2275 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2277 RTE_BOND_LOG(WARNING,
2278 "Setting VLAN filter on slave port %u not supported.",
2282 rte_spinlock_unlock(&internals->lock);
2287 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2288 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2289 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2291 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2292 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2293 0, dev->data->numa_node);
2294 if (bd_rx_q == NULL)
2297 bd_rx_q->queue_id = rx_queue_id;
2298 bd_rx_q->dev_private = dev->data->dev_private;
2300 bd_rx_q->nb_rx_desc = nb_rx_desc;
2302 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2303 bd_rx_q->mb_pool = mb_pool;
2305 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2311 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2312 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2313 const struct rte_eth_txconf *tx_conf)
2315 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2316 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2317 0, dev->data->numa_node);
2319 if (bd_tx_q == NULL)
2322 bd_tx_q->queue_id = tx_queue_id;
2323 bd_tx_q->dev_private = dev->data->dev_private;
2325 bd_tx_q->nb_tx_desc = nb_tx_desc;
2326 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2328 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2334 bond_ethdev_rx_queue_release(void *queue)
2343 bond_ethdev_tx_queue_release(void *queue)
2352 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2354 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2355 struct bond_dev_private *internals;
2357 /* Default value for polling slave found is true as we don't want to
2358 * disable the polling thread if we cannot get the lock */
2359 int i, polling_slave_found = 1;
2364 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2365 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2367 if (!bonded_ethdev->data->dev_started ||
2368 !internals->link_status_polling_enabled)
2371 /* If device is currently being configured then don't check slaves link
2372 * status, wait until next period */
2373 if (rte_spinlock_trylock(&internals->lock)) {
2374 if (internals->slave_count > 0)
2375 polling_slave_found = 0;
2377 for (i = 0; i < internals->slave_count; i++) {
2378 if (!internals->slaves[i].link_status_poll_enabled)
2381 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2382 polling_slave_found = 1;
2384 /* Update slave link status */
2385 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2386 internals->slaves[i].link_status_wait_to_complete);
2388 /* if link status has changed since last checked then call lsc
2390 if (slave_ethdev->data->dev_link.link_status !=
2391 internals->slaves[i].last_link_status) {
2392 internals->slaves[i].last_link_status =
2393 slave_ethdev->data->dev_link.link_status;
2395 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2396 RTE_ETH_EVENT_INTR_LSC,
2397 &bonded_ethdev->data->port_id,
2401 rte_spinlock_unlock(&internals->lock);
2404 if (polling_slave_found)
2405 /* Set alarm to continue monitoring link status of slave ethdev's */
2406 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2407 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2411 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2413 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2415 struct bond_dev_private *bond_ctx;
2416 struct rte_eth_link slave_link;
2420 bond_ctx = ethdev->data->dev_private;
2422 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2424 if (ethdev->data->dev_started == 0 ||
2425 bond_ctx->active_slave_count == 0) {
2426 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2430 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2432 if (wait_to_complete)
2433 link_update = rte_eth_link_get;
2435 link_update = rte_eth_link_get_nowait;
2437 switch (bond_ctx->mode) {
2438 case BONDING_MODE_BROADCAST:
2440 * Setting link speed to UINT32_MAX to ensure we pick up the
2441 * value of the first active slave
2443 ethdev->data->dev_link.link_speed = UINT32_MAX;
2446 * link speed is minimum value of all the slaves link speed as
2447 * packet loss will occur on this slave if transmission at rates
2448 * greater than this are attempted
2450 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2451 link_update(bond_ctx->active_slaves[0], &slave_link);
2453 if (slave_link.link_speed <
2454 ethdev->data->dev_link.link_speed)
2455 ethdev->data->dev_link.link_speed =
2456 slave_link.link_speed;
2459 case BONDING_MODE_ACTIVE_BACKUP:
2460 /* Current primary slave */
2461 link_update(bond_ctx->current_primary_port, &slave_link);
2463 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2465 case BONDING_MODE_8023AD:
2466 ethdev->data->dev_link.link_autoneg =
2467 bond_ctx->mode4.slave_link.link_autoneg;
2468 ethdev->data->dev_link.link_duplex =
2469 bond_ctx->mode4.slave_link.link_duplex;
2470 /* fall through to update link speed */
2471 case BONDING_MODE_ROUND_ROBIN:
2472 case BONDING_MODE_BALANCE:
2473 case BONDING_MODE_TLB:
2474 case BONDING_MODE_ALB:
2477 * In theses mode the maximum theoretical link speed is the sum
2480 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2482 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2483 link_update(bond_ctx->active_slaves[idx], &slave_link);
2485 ethdev->data->dev_link.link_speed +=
2486 slave_link.link_speed;
2496 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2498 struct bond_dev_private *internals = dev->data->dev_private;
2499 struct rte_eth_stats slave_stats;
2502 for (i = 0; i < internals->slave_count; i++) {
2503 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2505 stats->ipackets += slave_stats.ipackets;
2506 stats->opackets += slave_stats.opackets;
2507 stats->ibytes += slave_stats.ibytes;
2508 stats->obytes += slave_stats.obytes;
2509 stats->imissed += slave_stats.imissed;
2510 stats->ierrors += slave_stats.ierrors;
2511 stats->oerrors += slave_stats.oerrors;
2512 stats->rx_nombuf += slave_stats.rx_nombuf;
2514 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2515 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2516 stats->q_opackets[j] += slave_stats.q_opackets[j];
2517 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2518 stats->q_obytes[j] += slave_stats.q_obytes[j];
2519 stats->q_errors[j] += slave_stats.q_errors[j];
2528 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2530 struct bond_dev_private *internals = dev->data->dev_private;
2533 for (i = 0; i < internals->slave_count; i++)
2534 rte_eth_stats_reset(internals->slaves[i].port_id);
2538 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2540 struct bond_dev_private *internals = eth_dev->data->dev_private;
2543 internals->promiscuous_en = 1;
2545 switch (internals->mode) {
2546 /* Promiscuous mode is propagated to all slaves */
2547 case BONDING_MODE_ROUND_ROBIN:
2548 case BONDING_MODE_BALANCE:
2549 case BONDING_MODE_BROADCAST:
2550 for (i = 0; i < internals->slave_count; i++)
2551 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2553 /* In mode4 promiscus mode is managed when slave is added/removed */
2554 case BONDING_MODE_8023AD:
2556 /* Promiscuous mode is propagated only to primary slave */
2557 case BONDING_MODE_ACTIVE_BACKUP:
2558 case BONDING_MODE_TLB:
2559 case BONDING_MODE_ALB:
2561 rte_eth_promiscuous_enable(internals->current_primary_port);
2566 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2568 struct bond_dev_private *internals = dev->data->dev_private;
2571 internals->promiscuous_en = 0;
2573 switch (internals->mode) {
2574 /* Promiscuous mode is propagated to all slaves */
2575 case BONDING_MODE_ROUND_ROBIN:
2576 case BONDING_MODE_BALANCE:
2577 case BONDING_MODE_BROADCAST:
2578 for (i = 0; i < internals->slave_count; i++)
2579 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2581 /* In mode4 promiscus mode is set managed when slave is added/removed */
2582 case BONDING_MODE_8023AD:
2584 /* Promiscuous mode is propagated only to primary slave */
2585 case BONDING_MODE_ACTIVE_BACKUP:
2586 case BONDING_MODE_TLB:
2587 case BONDING_MODE_ALB:
2589 rte_eth_promiscuous_disable(internals->current_primary_port);
2594 bond_ethdev_delayed_lsc_propagation(void *arg)
2599 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2600 RTE_ETH_EVENT_INTR_LSC, NULL);
2604 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2605 void *param, void *ret_param __rte_unused)
2607 struct rte_eth_dev *bonded_eth_dev;
2608 struct bond_dev_private *internals;
2609 struct rte_eth_link link;
2612 int i, valid_slave = 0;
2614 uint8_t lsc_flag = 0;
2616 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2619 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2621 if (check_for_bonded_ethdev(bonded_eth_dev))
2624 internals = bonded_eth_dev->data->dev_private;
2626 /* If the device isn't started don't handle interrupts */
2627 if (!bonded_eth_dev->data->dev_started)
2630 /* verify that port_id is a valid slave of bonded port */
2631 for (i = 0; i < internals->slave_count; i++) {
2632 if (internals->slaves[i].port_id == port_id) {
2641 /* Synchronize lsc callback parallel calls either by real link event
2642 * from the slaves PMDs or by the bonding PMD itself.
2644 rte_spinlock_lock(&internals->lsc_lock);
2646 /* Search for port in active port list */
2647 active_pos = find_slave_by_id(internals->active_slaves,
2648 internals->active_slave_count, port_id);
2650 rte_eth_link_get_nowait(port_id, &link);
2651 if (link.link_status) {
2652 if (active_pos < internals->active_slave_count)
2655 /* if no active slave ports then set this port to be primary port */
2656 if (internals->active_slave_count < 1) {
2657 /* If first active slave, then change link status */
2658 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2659 internals->current_primary_port = port_id;
2662 mac_address_slaves_update(bonded_eth_dev);
2665 /* check link state properties if bonded link is up*/
2666 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2667 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2668 RTE_BOND_LOG(ERR, "Invalid link properties "
2669 "for slave %d in bonding mode %d",
2670 port_id, internals->mode);
2672 /* inherit slave link properties */
2673 link_properties_set(bonded_eth_dev, &link);
2676 activate_slave(bonded_eth_dev, port_id);
2678 /* If user has defined the primary port then default to using it */
2679 if (internals->user_defined_primary_port &&
2680 internals->primary_port == port_id)
2681 bond_ethdev_primary_set(internals, port_id);
2683 if (active_pos == internals->active_slave_count)
2686 /* Remove from active slave list */
2687 deactivate_slave(bonded_eth_dev, port_id);
2689 if (internals->active_slave_count < 1)
2692 /* Update primary id, take first active slave from list or if none
2693 * available set to -1 */
2694 if (port_id == internals->current_primary_port) {
2695 if (internals->active_slave_count > 0)
2696 bond_ethdev_primary_set(internals,
2697 internals->active_slaves[0]);
2699 internals->current_primary_port = internals->primary_port;
2705 * Update bonded device link properties after any change to active
2708 bond_ethdev_link_update(bonded_eth_dev, 0);
2711 /* Cancel any possible outstanding interrupts if delays are enabled */
2712 if (internals->link_up_delay_ms > 0 ||
2713 internals->link_down_delay_ms > 0)
2714 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2717 if (bonded_eth_dev->data->dev_link.link_status) {
2718 if (internals->link_up_delay_ms > 0)
2719 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2720 bond_ethdev_delayed_lsc_propagation,
2721 (void *)bonded_eth_dev);
2723 _rte_eth_dev_callback_process(bonded_eth_dev,
2724 RTE_ETH_EVENT_INTR_LSC,
2728 if (internals->link_down_delay_ms > 0)
2729 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2730 bond_ethdev_delayed_lsc_propagation,
2731 (void *)bonded_eth_dev);
2733 _rte_eth_dev_callback_process(bonded_eth_dev,
2734 RTE_ETH_EVENT_INTR_LSC,
2739 rte_spinlock_unlock(&internals->lsc_lock);
2745 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2746 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2750 int slave_reta_size;
2751 unsigned reta_count;
2752 struct bond_dev_private *internals = dev->data->dev_private;
2754 if (reta_size != internals->reta_size)
2757 /* Copy RETA table */
2758 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2760 for (i = 0; i < reta_count; i++) {
2761 internals->reta_conf[i].mask = reta_conf[i].mask;
2762 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2763 if ((reta_conf[i].mask >> j) & 0x01)
2764 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2767 /* Fill rest of array */
2768 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2769 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2770 sizeof(internals->reta_conf[0]) * reta_count);
2772 /* Propagate RETA over slaves */
2773 for (i = 0; i < internals->slave_count; i++) {
2774 slave_reta_size = internals->slaves[i].reta_size;
2775 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2776 &internals->reta_conf[0], slave_reta_size);
2785 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2786 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2789 struct bond_dev_private *internals = dev->data->dev_private;
2791 if (reta_size != internals->reta_size)
2794 /* Copy RETA table */
2795 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2796 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2797 if ((reta_conf[i].mask >> j) & 0x01)
2798 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2804 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2805 struct rte_eth_rss_conf *rss_conf)
2808 struct bond_dev_private *internals = dev->data->dev_private;
2809 struct rte_eth_rss_conf bond_rss_conf;
2811 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2813 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2815 if (bond_rss_conf.rss_hf != 0)
2816 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2818 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2819 sizeof(internals->rss_key)) {
2820 if (bond_rss_conf.rss_key_len == 0)
2821 bond_rss_conf.rss_key_len = 40;
2822 internals->rss_key_len = bond_rss_conf.rss_key_len;
2823 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2824 internals->rss_key_len);
2827 for (i = 0; i < internals->slave_count; i++) {
2828 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2838 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2839 struct rte_eth_rss_conf *rss_conf)
2841 struct bond_dev_private *internals = dev->data->dev_private;
2843 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2844 rss_conf->rss_key_len = internals->rss_key_len;
2845 if (rss_conf->rss_key)
2846 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2852 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2854 struct rte_eth_dev *slave_eth_dev;
2855 struct bond_dev_private *internals = dev->data->dev_private;
2858 rte_spinlock_lock(&internals->lock);
2860 for (i = 0; i < internals->slave_count; i++) {
2861 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2862 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2863 rte_spinlock_unlock(&internals->lock);
2867 for (i = 0; i < internals->slave_count; i++) {
2868 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2870 rte_spinlock_unlock(&internals->lock);
2875 rte_spinlock_unlock(&internals->lock);
2880 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2882 if (mac_address_set(dev, addr)) {
2883 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2891 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2892 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2894 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2895 *(const void **)arg = &bond_flow_ops;
2902 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2903 __rte_unused uint32_t index, uint32_t vmdq)
2905 struct rte_eth_dev *slave_eth_dev;
2906 struct bond_dev_private *internals = dev->data->dev_private;
2909 rte_spinlock_lock(&internals->lock);
2911 for (i = 0; i < internals->slave_count; i++) {
2912 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2913 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2914 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2920 for (i = 0; i < internals->slave_count; i++) {
2921 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2925 for (i--; i >= 0; i--)
2926 rte_eth_dev_mac_addr_remove(
2927 internals->slaves[i].port_id, mac_addr);
2934 rte_spinlock_unlock(&internals->lock);
2939 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2941 struct rte_eth_dev *slave_eth_dev;
2942 struct bond_dev_private *internals = dev->data->dev_private;
2945 rte_spinlock_lock(&internals->lock);
2947 for (i = 0; i < internals->slave_count; i++) {
2948 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2949 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2953 struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2955 for (i = 0; i < internals->slave_count; i++)
2956 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2960 rte_spinlock_unlock(&internals->lock);
2963 const struct eth_dev_ops default_dev_ops = {
2964 .dev_start = bond_ethdev_start,
2965 .dev_stop = bond_ethdev_stop,
2966 .dev_close = bond_ethdev_close,
2967 .dev_configure = bond_ethdev_configure,
2968 .dev_infos_get = bond_ethdev_info,
2969 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2970 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2971 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2972 .rx_queue_release = bond_ethdev_rx_queue_release,
2973 .tx_queue_release = bond_ethdev_tx_queue_release,
2974 .link_update = bond_ethdev_link_update,
2975 .stats_get = bond_ethdev_stats_get,
2976 .stats_reset = bond_ethdev_stats_reset,
2977 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2978 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2979 .reta_update = bond_ethdev_rss_reta_update,
2980 .reta_query = bond_ethdev_rss_reta_query,
2981 .rss_hash_update = bond_ethdev_rss_hash_update,
2982 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2983 .mtu_set = bond_ethdev_mtu_set,
2984 .mac_addr_set = bond_ethdev_mac_address_set,
2985 .mac_addr_add = bond_ethdev_mac_addr_add,
2986 .mac_addr_remove = bond_ethdev_mac_addr_remove,
2987 .filter_ctrl = bond_filter_ctrl
2991 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2993 const char *name = rte_vdev_device_name(dev);
2994 uint8_t socket_id = dev->device.numa_node;
2995 struct bond_dev_private *internals = NULL;
2996 struct rte_eth_dev *eth_dev = NULL;
2997 uint32_t vlan_filter_bmp_size;
2999 /* now do all data allocation - for eth_dev structure, dummy pci driver
3000 * and internal (private) data
3003 /* reserve an ethdev entry */
3004 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3005 if (eth_dev == NULL) {
3006 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3010 internals = eth_dev->data->dev_private;
3011 eth_dev->data->nb_rx_queues = (uint16_t)1;
3012 eth_dev->data->nb_tx_queues = (uint16_t)1;
3014 /* Allocate memory for storing MAC addresses */
3015 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3016 BOND_MAX_MAC_ADDRS, 0, socket_id);
3017 if (eth_dev->data->mac_addrs == NULL) {
3019 "Failed to allocate %u bytes needed to store MAC addresses",
3020 ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3024 eth_dev->dev_ops = &default_dev_ops;
3025 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3027 rte_spinlock_init(&internals->lock);
3028 rte_spinlock_init(&internals->lsc_lock);
3030 internals->port_id = eth_dev->data->port_id;
3031 internals->mode = BONDING_MODE_INVALID;
3032 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3033 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3034 internals->burst_xmit_hash = burst_xmit_l2_hash;
3035 internals->user_defined_mac = 0;
3037 internals->link_status_polling_enabled = 0;
3039 internals->link_status_polling_interval_ms =
3040 DEFAULT_POLLING_INTERVAL_10_MS;
3041 internals->link_down_delay_ms = 0;
3042 internals->link_up_delay_ms = 0;
3044 internals->slave_count = 0;
3045 internals->active_slave_count = 0;
3046 internals->rx_offload_capa = 0;
3047 internals->tx_offload_capa = 0;
3048 internals->rx_queue_offload_capa = 0;
3049 internals->tx_queue_offload_capa = 0;
3050 internals->candidate_max_rx_pktlen = 0;
3051 internals->max_rx_pktlen = 0;
3053 /* Initially allow to choose any offload type */
3054 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3056 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3057 memset(internals->slaves, 0, sizeof(internals->slaves));
3059 TAILQ_INIT(&internals->flow_list);
3060 internals->flow_isolated_valid = 0;
3062 /* Set mode 4 default configuration */
3063 bond_mode_8023ad_setup(eth_dev, NULL);
3064 if (bond_ethdev_mode_set(eth_dev, mode)) {
3065 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3066 eth_dev->data->port_id, mode);
3070 vlan_filter_bmp_size =
3071 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3072 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3073 RTE_CACHE_LINE_SIZE);
3074 if (internals->vlan_filter_bmpmem == NULL) {
3076 "Failed to allocate vlan bitmap for bonded device %u",
3077 eth_dev->data->port_id);
3081 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3082 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3083 if (internals->vlan_filter_bmp == NULL) {
3085 "Failed to init vlan bitmap for bonded device %u",
3086 eth_dev->data->port_id);
3087 rte_free(internals->vlan_filter_bmpmem);
3091 return eth_dev->data->port_id;
3094 rte_free(internals);
3095 if (eth_dev != NULL) {
3096 rte_free(eth_dev->data->mac_addrs);
3097 rte_eth_dev_release_port(eth_dev);
3103 bond_probe(struct rte_vdev_device *dev)
3106 struct bond_dev_private *internals;
3107 struct rte_kvargs *kvlist;
3108 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3109 int arg_count, port_id;
3111 struct rte_eth_dev *eth_dev;
3116 name = rte_vdev_device_name(dev);
3117 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3119 if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3120 strlen(rte_vdev_device_args(dev)) == 0) {
3121 eth_dev = rte_eth_dev_attach_secondary(name);
3123 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3126 /* TODO: request info from primary to set up Rx and Tx */
3127 eth_dev->dev_ops = &default_dev_ops;
3128 eth_dev->device = &dev->device;
3129 rte_eth_dev_probing_finish(eth_dev);
3133 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3134 pmd_bond_init_valid_arguments);
3138 /* Parse link bonding mode */
3139 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3140 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3141 &bond_ethdev_parse_slave_mode_kvarg,
3142 &bonding_mode) != 0) {
3143 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3148 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3153 /* Parse socket id to create bonding device on */
3154 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3155 if (arg_count == 1) {
3156 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3157 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3159 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3160 "bonded device %s", name);
3163 } else if (arg_count > 1) {
3164 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3165 "bonded device %s", name);
3168 socket_id = rte_socket_id();
3171 dev->device.numa_node = socket_id;
3173 /* Create link bonding eth device */
3174 port_id = bond_alloc(dev, bonding_mode);
3176 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3177 "socket %u.", name, bonding_mode, socket_id);
3180 internals = rte_eth_devices[port_id].data->dev_private;
3181 internals->kvlist = kvlist;
3183 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3185 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3186 if (rte_kvargs_process(kvlist,
3187 PMD_BOND_AGG_MODE_KVARG,
3188 &bond_ethdev_parse_slave_agg_mode_kvarg,
3191 "Failed to parse agg selection mode for bonded device %s",
3196 if (internals->mode == BONDING_MODE_8023AD)
3197 rte_eth_bond_8023ad_agg_selection_set(port_id,
3200 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3203 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3204 "socket %u.", name, port_id, bonding_mode, socket_id);
3208 rte_kvargs_free(kvlist);
3214 bond_remove(struct rte_vdev_device *dev)
3216 struct rte_eth_dev *eth_dev;
3217 struct bond_dev_private *internals;
3223 name = rte_vdev_device_name(dev);
3224 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3226 /* now free all data allocation - for eth_dev structure,
3227 * dummy pci driver and internal (private) data
3230 /* find an ethdev entry */
3231 eth_dev = rte_eth_dev_allocated(name);
3232 if (eth_dev == NULL)
3235 RTE_ASSERT(eth_dev->device == &dev->device);
3237 internals = eth_dev->data->dev_private;
3238 if (internals->slave_count != 0)
3241 if (eth_dev->data->dev_started == 1) {
3242 bond_ethdev_stop(eth_dev);
3243 bond_ethdev_close(eth_dev);
3246 eth_dev->dev_ops = NULL;
3247 eth_dev->rx_pkt_burst = NULL;
3248 eth_dev->tx_pkt_burst = NULL;
3250 internals = eth_dev->data->dev_private;
3251 /* Try to release mempool used in mode6. If the bond
3252 * device is not mode6, free the NULL is not problem.
3254 rte_mempool_free(internals->mode6.mempool);
3255 rte_bitmap_free(internals->vlan_filter_bmp);
3256 rte_free(internals->vlan_filter_bmpmem);
3257 rte_free(eth_dev->data->dev_private);
3258 rte_free(eth_dev->data->mac_addrs);
3260 rte_eth_dev_release_port(eth_dev);
3265 /* this part will resolve the slave portids after all the other pdev and vdev
3266 * have been allocated */
3268 bond_ethdev_configure(struct rte_eth_dev *dev)
3270 const char *name = dev->device->name;
3271 struct bond_dev_private *internals = dev->data->dev_private;
3272 struct rte_kvargs *kvlist = internals->kvlist;
3274 uint16_t port_id = dev - rte_eth_devices;
3277 static const uint8_t default_rss_key[40] = {
3278 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3279 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3280 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3281 0xBE, 0xAC, 0x01, 0xFA
3287 * If RSS is enabled, fill table with default values and
3288 * set key to the the value specified in port RSS configuration.
3289 * Fall back to default RSS key if the key is not specified
3291 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3292 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3293 internals->rss_key_len =
3294 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3295 memcpy(internals->rss_key,
3296 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3297 internals->rss_key_len);
3299 internals->rss_key_len = sizeof(default_rss_key);
3300 memcpy(internals->rss_key, default_rss_key,
3301 internals->rss_key_len);
3304 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3305 internals->reta_conf[i].mask = ~0LL;
3306 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3307 internals->reta_conf[i].reta[j] =
3308 (i * RTE_RETA_GROUP_SIZE + j) %
3309 dev->data->nb_rx_queues;
3313 /* set the max_rx_pktlen */
3314 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3317 * if no kvlist, it means that this bonded device has been created
3318 * through the bonding api.
3323 /* Parse MAC address for bonded device */
3324 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3325 if (arg_count == 1) {
3326 struct ether_addr bond_mac;
3328 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3329 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3330 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3335 /* Set MAC address */
3336 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3338 "Failed to set mac address on bonded device %s",
3342 } else if (arg_count > 1) {
3344 "MAC address can be specified only once for bonded device %s",
3349 /* Parse/set balance mode transmit policy */
3350 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3351 if (arg_count == 1) {
3352 uint8_t xmit_policy;
3354 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3355 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3358 "Invalid xmit policy specified for bonded device %s",
3363 /* Set balance mode transmit policy*/
3364 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3366 "Failed to set balance xmit policy on bonded device %s",
3370 } else if (arg_count > 1) {
3372 "Transmit policy can be specified only once for bonded device %s",
3377 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3378 if (rte_kvargs_process(kvlist,
3379 PMD_BOND_AGG_MODE_KVARG,
3380 &bond_ethdev_parse_slave_agg_mode_kvarg,
3383 "Failed to parse agg selection mode for bonded device %s",
3386 if (internals->mode == BONDING_MODE_8023AD)
3387 rte_eth_bond_8023ad_agg_selection_set(port_id,
3391 /* Parse/add slave ports to bonded device */
3392 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3393 struct bond_ethdev_slave_ports slave_ports;
3396 memset(&slave_ports, 0, sizeof(slave_ports));
3398 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3399 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3401 "Failed to parse slave ports for bonded device %s",
3406 for (i = 0; i < slave_ports.slave_count; i++) {
3407 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3409 "Failed to add port %d as slave to bonded device %s",
3410 slave_ports.slaves[i], name);
3415 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3419 /* Parse/set primary slave port id*/
3420 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3421 if (arg_count == 1) {
3422 uint16_t primary_slave_port_id;
3424 if (rte_kvargs_process(kvlist,
3425 PMD_BOND_PRIMARY_SLAVE_KVARG,
3426 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3427 &primary_slave_port_id) < 0) {
3429 "Invalid primary slave port id specified for bonded device %s",
3434 /* Set balance mode transmit policy*/
3435 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3438 "Failed to set primary slave port %d on bonded device %s",
3439 primary_slave_port_id, name);
3442 } else if (arg_count > 1) {
3444 "Primary slave can be specified only once for bonded device %s",
3449 /* Parse link status monitor polling interval */
3450 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3451 if (arg_count == 1) {
3452 uint32_t lsc_poll_interval_ms;
3454 if (rte_kvargs_process(kvlist,
3455 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3456 &bond_ethdev_parse_time_ms_kvarg,
3457 &lsc_poll_interval_ms) < 0) {
3459 "Invalid lsc polling interval value specified for bonded"
3460 " device %s", name);
3464 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3467 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3468 lsc_poll_interval_ms, name);
3471 } else if (arg_count > 1) {
3473 "LSC polling interval can be specified only once for bonded"
3474 " device %s", name);
3478 /* Parse link up interrupt propagation delay */
3479 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3480 if (arg_count == 1) {
3481 uint32_t link_up_delay_ms;
3483 if (rte_kvargs_process(kvlist,
3484 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3485 &bond_ethdev_parse_time_ms_kvarg,
3486 &link_up_delay_ms) < 0) {
3488 "Invalid link up propagation delay value specified for"
3489 " bonded device %s", name);
3493 /* Set balance mode transmit policy*/
3494 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3497 "Failed to set link up propagation delay (%u ms) on bonded"
3498 " device %s", link_up_delay_ms, name);
3501 } else if (arg_count > 1) {
3503 "Link up propagation delay can be specified only once for"
3504 " bonded device %s", name);
3508 /* Parse link down interrupt propagation delay */
3509 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3510 if (arg_count == 1) {
3511 uint32_t link_down_delay_ms;
3513 if (rte_kvargs_process(kvlist,
3514 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3515 &bond_ethdev_parse_time_ms_kvarg,
3516 &link_down_delay_ms) < 0) {
3518 "Invalid link down propagation delay value specified for"
3519 " bonded device %s", name);
3523 /* Set balance mode transmit policy*/
3524 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3527 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3528 link_down_delay_ms, name);
3531 } else if (arg_count > 1) {
3533 "Link down propagation delay can be specified only once for bonded device %s",
3541 struct rte_vdev_driver pmd_bond_drv = {
3542 .probe = bond_probe,
3543 .remove = bond_remove,
3546 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3547 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3549 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3553 "xmit_policy=[l2 | l23 | l34] "
3554 "agg_mode=[count | stable | bandwidth] "
3557 "lsc_poll_period_ms=<int> "
3559 "down_delay=<int>");
3563 RTE_INIT(bond_init_log)
3565 bond_logtype = rte_log_register("pmd.net.bon");
3566 if (bond_logtype >= 0)
3567 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);