1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 size_t vlan_offset = 0;
39 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
42 vlan_offset = sizeof(struct vlan_hdr);
43 *proto = vlan_hdr->eth_proto;
45 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46 vlan_hdr = vlan_hdr + 1;
47 *proto = vlan_hdr->eth_proto;
48 vlan_offset += sizeof(struct vlan_hdr);
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
57 struct bond_dev_private *internals;
59 uint16_t num_rx_slave = 0;
60 uint16_t num_rx_total = 0;
64 /* Cast to structure, containing bonded device's port id and queue id */
65 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
67 internals = bd_rx_q->dev_private;
70 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71 /* Offset of pointer to *bufs increases as packets are received
72 * from other slaves */
73 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
76 num_rx_total += num_rx_slave;
77 nb_pkts -= num_rx_slave;
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
88 struct bond_dev_private *internals;
90 /* Cast to structure, containing bonded device's port id and queue id */
91 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
93 internals = bd_rx_q->dev_private;
95 return rte_eth_rx_burst(internals->current_primary_port,
96 bd_rx_q->queue_id, bufs, nb_pkts);
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
102 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
104 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105 (ethertype == ether_type_slow_be &&
106 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
109 /*****************************************************************************
110 * Flow director's setup for mode 4 optimization
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114 .dst.addr_bytes = { 0 },
115 .src.addr_bytes = { 0 },
116 .type = RTE_BE16(ETHER_TYPE_SLOW),
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120 .dst.addr_bytes = { 0 },
121 .src.addr_bytes = { 0 },
125 static struct rte_flow_item flow_item_8023ad[] = {
127 .type = RTE_FLOW_ITEM_TYPE_ETH,
128 .spec = &flow_item_eth_type_8023ad,
130 .mask = &flow_item_eth_mask_type_8023ad,
133 .type = RTE_FLOW_ITEM_TYPE_END,
140 const struct rte_flow_attr flow_attr_8023ad = {
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150 uint16_t slave_port) {
151 struct rte_eth_dev_info slave_info;
152 struct rte_flow_error error;
153 struct bond_dev_private *internals = (struct bond_dev_private *)
154 (bond_dev->data->dev_private);
156 const struct rte_flow_action_queue lacp_queue_conf = {
160 const struct rte_flow_action actions[] = {
162 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
163 .conf = &lacp_queue_conf
166 .type = RTE_FLOW_ACTION_TYPE_END,
170 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171 flow_item_8023ad, actions, &error);
173 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174 __func__, error.message, slave_port,
175 internals->mode4.dedicated_queues.rx_qid);
179 rte_eth_dev_info_get(slave_port, &slave_info);
180 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
183 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
184 __func__, slave_port);
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194 struct bond_dev_private *internals = (struct bond_dev_private *)
195 (bond_dev->data->dev_private);
196 struct rte_eth_dev_info bond_info;
199 /* Verify if all slaves in bonding supports flow director and */
200 if (internals->slave_count > 0) {
201 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
203 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
206 for (idx = 0; idx < internals->slave_count; idx++) {
207 if (bond_ethdev_8023ad_flow_verify(bond_dev,
208 internals->slaves[idx].port_id) != 0)
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
219 struct rte_flow_error error;
220 struct bond_dev_private *internals = (struct bond_dev_private *)
221 (bond_dev->data->dev_private);
223 struct rte_flow_action_queue lacp_queue_conf = {
224 .index = internals->mode4.dedicated_queues.rx_qid,
227 const struct rte_flow_action actions[] = {
229 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
230 .conf = &lacp_queue_conf
233 .type = RTE_FLOW_ACTION_TYPE_END,
237 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
239 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241 "(slave_port=%d queue_id=%d)",
242 error.message, slave_port,
243 internals->mode4.dedicated_queues.rx_qid);
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
254 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255 struct bond_dev_private *internals = bd_rx_q->dev_private;
256 uint16_t num_rx_total = 0; /* Total number of received packets */
257 uint16_t slaves[RTE_MAX_ETHPORTS];
258 uint16_t slave_count;
262 /* Copy slave list to protect against slave up/down changes during tx
264 slave_count = internals->active_slave_count;
265 memcpy(slaves, internals->active_slaves,
266 sizeof(internals->active_slaves[0]) * slave_count);
268 for (i = 0, idx = internals->active_slave;
269 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270 idx = idx % slave_count;
272 /* Read packets from this slave */
273 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274 &bufs[num_rx_total], nb_pkts - num_rx_total);
277 internals->active_slave = idx;
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
286 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287 struct bond_dev_private *internals = bd_tx_q->dev_private;
289 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290 uint16_t slave_count;
292 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293 uint16_t dist_slave_count;
295 /* 2-D array to sort mbufs for transmission on each slave into */
296 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297 /* Number of mbufs for transmission on each slave */
298 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299 /* Mapping array generated by hash function to map mbufs to slaves */
300 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
302 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
307 if (unlikely(nb_bufs == 0))
310 /* Copy slave list to protect against slave up/down changes during tx
312 slave_count = internals->active_slave_count;
313 if (unlikely(slave_count < 1))
316 memcpy(slave_port_ids, internals->active_slaves,
317 sizeof(slave_port_ids[0]) * slave_count);
320 dist_slave_count = 0;
321 for (i = 0; i < slave_count; i++) {
322 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
324 if (ACTOR_STATE(port, DISTRIBUTING))
325 dist_slave_port_ids[dist_slave_count++] =
329 if (unlikely(dist_slave_count < 1))
333 * Populate slaves mbuf with the packets which are to be sent on it
334 * selecting output slave using hash based on xmit policy
336 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337 bufs_slave_port_idxs);
339 for (i = 0; i < nb_bufs; i++) {
340 /* Populate slave mbuf arrays with mbufs for that slave. */
341 uint8_t slave_idx = bufs_slave_port_idxs[i];
343 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
347 /* Send packet burst on each slave device */
348 for (i = 0; i < dist_slave_count; i++) {
349 if (slave_nb_bufs[i] == 0)
352 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353 bd_tx_q->queue_id, slave_bufs[i],
356 total_tx_count += slave_tx_count;
358 /* If tx burst fails move packets to end of bufs */
359 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360 slave_tx_fail_count[i] = slave_nb_bufs[i] -
362 total_tx_fail_count += slave_tx_fail_count[i];
365 * Shift bufs to beginning of array to allow reordering
368 for (j = 0; j < slave_tx_fail_count[i]; j++) {
370 slave_bufs[i][(slave_tx_count - 1) + j];
376 * If there are tx burst failures we move packets to end of bufs to
377 * preserve expected PMD behaviour of all failed transmitted being
378 * at the end of the input mbuf array
380 if (unlikely(total_tx_fail_count > 0)) {
381 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
383 for (i = 0; i < slave_count; i++) {
384 if (slave_tx_fail_count[i] > 0) {
385 for (j = 0; j < slave_tx_fail_count[i]; j++)
386 bufs[bufs_idx++] = slave_bufs[i][j];
391 return total_tx_count;
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
399 /* Cast to structure, containing bonded device's port id and queue id */
400 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401 struct bond_dev_private *internals = bd_rx_q->dev_private;
402 struct ether_addr bond_mac;
404 struct ether_hdr *hdr;
406 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407 uint16_t num_rx_total = 0; /* Total number of received packets */
408 uint16_t slaves[RTE_MAX_ETHPORTS];
409 uint16_t slave_count, idx;
411 uint8_t collecting; /* current slave collecting status */
412 const uint8_t promisc = internals->promiscuous_en;
416 rte_eth_macaddr_get(internals->port_id, &bond_mac);
417 /* Copy slave list to protect against slave up/down changes during tx
419 slave_count = internals->active_slave_count;
420 memcpy(slaves, internals->active_slaves,
421 sizeof(internals->active_slaves[0]) * slave_count);
423 idx = internals->active_slave;
424 if (idx >= slave_count) {
425 internals->active_slave = 0;
428 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
433 /* Read packets from this slave */
434 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435 &bufs[num_rx_total], nb_pkts - num_rx_total);
437 for (k = j; k < 2 && k < num_rx_total; k++)
438 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440 /* Handle slow protocol packets. */
441 while (j < num_rx_total) {
443 /* If packet is not pure L2 and is known, skip it */
444 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
449 if (j + 3 < num_rx_total)
450 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455 /* Remove packet from array if it is slow packet or slave is not
456 * in collecting state or bonding interface is not in promiscuous
457 * mode and packet address does not match. */
458 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459 !collecting || (!promisc &&
460 !is_multicast_ether_addr(&hdr->d_addr) &&
461 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463 if (hdr->ether_type == ether_type_slow_be) {
464 bond_mode_8023ad_handle_slow_pkt(
465 internals, slaves[idx], bufs[j]);
467 rte_pktmbuf_free(bufs[j]);
469 /* Packet is managed by mode 4 or dropped, shift the array */
471 if (j < num_rx_total) {
472 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
478 if (unlikely(++idx == slave_count))
482 internals->active_slave = idx;
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
493 arp_op_name(uint16_t arp_op, char *buf)
497 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
500 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502 case ARP_OP_REVREQUEST:
503 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504 "Reverse ARP Request");
506 case ARP_OP_REVREPLY:
507 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508 "Reverse ARP Reply");
510 case ARP_OP_INVREQUEST:
511 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512 "Peer Identify Request");
514 case ARP_OP_INVREPLY:
515 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516 "Peer Identify Reply");
521 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
525 #define MaxIPv4String 16
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
531 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
537 #define MAX_CLIENTS_NUMBER 128
538 uint8_t active_clients;
539 struct client_stats_t {
542 uint32_t ipv4_rx_packets;
543 uint32_t ipv4_tx_packets;
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
552 for (; i < MAX_CLIENTS_NUMBER; i++) {
553 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
554 /* Just update RX packets number for this client */
555 if (TXorRXindicator == &burstnumberRX)
556 client_stats[i].ipv4_rx_packets++;
558 client_stats[i].ipv4_tx_packets++;
562 /* We have a new client. Insert him to the table, and increment stats */
563 if (TXorRXindicator == &burstnumberRX)
564 client_stats[active_clients].ipv4_rx_packets++;
566 client_stats[active_clients].ipv4_tx_packets++;
567 client_stats[active_clients].ipv4_addr = addr;
568 client_stats[active_clients].port = port;
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
575 rte_log(RTE_LOG_DEBUG, bond_logtype, \
576 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
577 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
580 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
581 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
582 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
584 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
585 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
586 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
588 arp_op, ++burstnumber)
592 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
593 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
595 struct ipv4_hdr *ipv4_h;
596 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
597 struct arp_hdr *arp_h;
604 uint16_t ether_type = eth_h->ether_type;
605 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
607 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
608 strlcpy(buf, info, 16);
611 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
612 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
613 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
614 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
615 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
616 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
618 update_client_stats(ipv4_h->src_addr, port, burstnumber);
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
622 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
623 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
624 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
625 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
626 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
633 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
635 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
636 struct bond_dev_private *internals = bd_tx_q->dev_private;
637 struct ether_hdr *eth_h;
638 uint16_t ether_type, offset;
639 uint16_t nb_recv_pkts;
642 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
644 for (i = 0; i < nb_recv_pkts; i++) {
645 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
646 ether_type = eth_h->ether_type;
647 offset = get_vlan_offset(eth_h, ðer_type);
649 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
650 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
651 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
653 bond_mode_alb_arp_recv(eth_h, offset, internals);
655 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
656 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
657 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
665 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
668 struct bond_dev_private *internals;
669 struct bond_tx_queue *bd_tx_q;
671 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
672 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
674 uint16_t num_of_slaves;
675 uint16_t slaves[RTE_MAX_ETHPORTS];
677 uint16_t num_tx_total = 0, num_tx_slave;
679 static int slave_idx = 0;
680 int i, cslave_idx = 0, tx_fail_total = 0;
682 bd_tx_q = (struct bond_tx_queue *)queue;
683 internals = bd_tx_q->dev_private;
685 /* Copy slave list to protect against slave up/down changes during tx
687 num_of_slaves = internals->active_slave_count;
688 memcpy(slaves, internals->active_slaves,
689 sizeof(internals->active_slaves[0]) * num_of_slaves);
691 if (num_of_slaves < 1)
694 /* Populate slaves mbuf with which packets are to be sent on it */
695 for (i = 0; i < nb_pkts; i++) {
696 cslave_idx = (slave_idx + i) % num_of_slaves;
697 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
700 /* increment current slave index so the next call to tx burst starts on the
702 slave_idx = ++cslave_idx;
704 /* Send packet burst on each slave device */
705 for (i = 0; i < num_of_slaves; i++) {
706 if (slave_nb_pkts[i] > 0) {
707 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
708 slave_bufs[i], slave_nb_pkts[i]);
710 /* if tx burst fails move packets to end of bufs */
711 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
712 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
714 tx_fail_total += tx_fail_slave;
716 memcpy(&bufs[nb_pkts - tx_fail_total],
717 &slave_bufs[i][num_tx_slave],
718 tx_fail_slave * sizeof(bufs[0]));
720 num_tx_total += num_tx_slave;
728 bond_ethdev_tx_burst_active_backup(void *queue,
729 struct rte_mbuf **bufs, uint16_t nb_pkts)
731 struct bond_dev_private *internals;
732 struct bond_tx_queue *bd_tx_q;
734 bd_tx_q = (struct bond_tx_queue *)queue;
735 internals = bd_tx_q->dev_private;
737 if (internals->active_slave_count < 1)
740 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
744 static inline uint16_t
745 ether_hash(struct ether_hdr *eth_hdr)
747 unaligned_uint16_t *word_src_addr =
748 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
749 unaligned_uint16_t *word_dst_addr =
750 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
752 return (word_src_addr[0] ^ word_dst_addr[0]) ^
753 (word_src_addr[1] ^ word_dst_addr[1]) ^
754 (word_src_addr[2] ^ word_dst_addr[2]);
757 static inline uint32_t
758 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
760 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
763 static inline uint32_t
764 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
766 unaligned_uint32_t *word_src_addr =
767 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
768 unaligned_uint32_t *word_dst_addr =
769 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
771 return (word_src_addr[0] ^ word_dst_addr[0]) ^
772 (word_src_addr[1] ^ word_dst_addr[1]) ^
773 (word_src_addr[2] ^ word_dst_addr[2]) ^
774 (word_src_addr[3] ^ word_dst_addr[3]);
779 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
780 uint8_t slave_count, uint16_t *slaves)
782 struct ether_hdr *eth_hdr;
786 for (i = 0; i < nb_pkts; i++) {
787 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
789 hash = ether_hash(eth_hdr);
791 slaves[i] = (hash ^= hash >> 8) % slave_count;
796 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
797 uint8_t slave_count, uint16_t *slaves)
800 struct ether_hdr *eth_hdr;
803 uint32_t hash, l3hash;
805 for (i = 0; i < nb_pkts; i++) {
806 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
809 proto = eth_hdr->ether_type;
810 hash = ether_hash(eth_hdr);
812 vlan_offset = get_vlan_offset(eth_hdr, &proto);
814 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
815 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
816 ((char *)(eth_hdr + 1) + vlan_offset);
817 l3hash = ipv4_hash(ipv4_hdr);
819 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
820 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
821 ((char *)(eth_hdr + 1) + vlan_offset);
822 l3hash = ipv6_hash(ipv6_hdr);
825 hash = hash ^ l3hash;
829 slaves[i] = hash % slave_count;
834 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
835 uint8_t slave_count, uint16_t *slaves)
837 struct ether_hdr *eth_hdr;
842 struct udp_hdr *udp_hdr;
843 struct tcp_hdr *tcp_hdr;
844 uint32_t hash, l3hash, l4hash;
846 for (i = 0; i < nb_pkts; i++) {
847 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
848 proto = eth_hdr->ether_type;
849 vlan_offset = get_vlan_offset(eth_hdr, &proto);
853 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
854 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
855 ((char *)(eth_hdr + 1) + vlan_offset);
856 size_t ip_hdr_offset;
858 l3hash = ipv4_hash(ipv4_hdr);
860 /* there is no L4 header in fragmented packet */
861 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
863 ip_hdr_offset = (ipv4_hdr->version_ihl
864 & IPV4_HDR_IHL_MASK) *
867 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
868 tcp_hdr = (struct tcp_hdr *)
871 l4hash = HASH_L4_PORTS(tcp_hdr);
872 } else if (ipv4_hdr->next_proto_id ==
874 udp_hdr = (struct udp_hdr *)
877 l4hash = HASH_L4_PORTS(udp_hdr);
880 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
881 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
882 ((char *)(eth_hdr + 1) + vlan_offset);
883 l3hash = ipv6_hash(ipv6_hdr);
885 if (ipv6_hdr->proto == IPPROTO_TCP) {
886 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
887 l4hash = HASH_L4_PORTS(tcp_hdr);
888 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
889 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
890 l4hash = HASH_L4_PORTS(udp_hdr);
894 hash = l3hash ^ l4hash;
898 slaves[i] = hash % slave_count;
903 uint64_t bwg_left_int;
904 uint64_t bwg_left_remainder;
909 bond_tlb_activate_slave(struct bond_dev_private *internals) {
912 for (i = 0; i < internals->active_slave_count; i++) {
913 tlb_last_obytets[internals->active_slaves[i]] = 0;
918 bandwidth_cmp(const void *a, const void *b)
920 const struct bwg_slave *bwg_a = a;
921 const struct bwg_slave *bwg_b = b;
922 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
923 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
924 (int64_t)bwg_a->bwg_left_remainder;
938 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
939 struct bwg_slave *bwg_slave)
941 struct rte_eth_link link_status;
943 rte_eth_link_get_nowait(port_id, &link_status);
944 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
947 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
948 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
949 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
953 bond_ethdev_update_tlb_slave_cb(void *arg)
955 struct bond_dev_private *internals = arg;
956 struct rte_eth_stats slave_stats;
957 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
961 uint8_t update_stats = 0;
964 internals->slave_update_idx++;
967 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
970 for (i = 0; i < internals->active_slave_count; i++) {
971 slave_id = internals->active_slaves[i];
972 rte_eth_stats_get(slave_id, &slave_stats);
973 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
974 bandwidth_left(slave_id, tx_bytes,
975 internals->slave_update_idx, &bwg_array[i]);
976 bwg_array[i].slave = slave_id;
979 tlb_last_obytets[slave_id] = slave_stats.obytes;
983 if (update_stats == 1)
984 internals->slave_update_idx = 0;
987 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
988 for (i = 0; i < slave_count; i++)
989 internals->tlb_slaves_order[i] = bwg_array[i].slave;
991 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
992 (struct bond_dev_private *)internals);
996 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
998 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
999 struct bond_dev_private *internals = bd_tx_q->dev_private;
1001 struct rte_eth_dev *primary_port =
1002 &rte_eth_devices[internals->primary_port];
1003 uint16_t num_tx_total = 0;
1006 uint16_t num_of_slaves = internals->active_slave_count;
1007 uint16_t slaves[RTE_MAX_ETHPORTS];
1009 struct ether_hdr *ether_hdr;
1010 struct ether_addr primary_slave_addr;
1011 struct ether_addr active_slave_addr;
1013 if (num_of_slaves < 1)
1014 return num_tx_total;
1016 memcpy(slaves, internals->tlb_slaves_order,
1017 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1020 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1023 for (i = 0; i < 3; i++)
1024 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1027 for (i = 0; i < num_of_slaves; i++) {
1028 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1029 for (j = num_tx_total; j < nb_pkts; j++) {
1030 if (j + 3 < nb_pkts)
1031 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1033 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1034 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1035 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1036 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1037 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1041 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1042 bufs + num_tx_total, nb_pkts - num_tx_total);
1044 if (num_tx_total == nb_pkts)
1048 return num_tx_total;
1052 bond_tlb_disable(struct bond_dev_private *internals)
1054 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1058 bond_tlb_enable(struct bond_dev_private *internals)
1060 bond_ethdev_update_tlb_slave_cb(internals);
1064 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1066 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1067 struct bond_dev_private *internals = bd_tx_q->dev_private;
1069 struct ether_hdr *eth_h;
1070 uint16_t ether_type, offset;
1072 struct client_data *client_info;
1075 * We create transmit buffers for every slave and one additional to send
1076 * through tlb. In worst case every packet will be send on one port.
1078 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1079 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1082 * We create separate transmit buffers for update packets as they won't
1083 * be counted in num_tx_total.
1085 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1086 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1088 struct rte_mbuf *upd_pkt;
1091 uint16_t num_send, num_not_send = 0;
1092 uint16_t num_tx_total = 0;
1097 /* Search tx buffer for ARP packets and forward them to alb */
1098 for (i = 0; i < nb_pkts; i++) {
1099 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1100 ether_type = eth_h->ether_type;
1101 offset = get_vlan_offset(eth_h, ðer_type);
1103 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1104 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1106 /* Change src mac in eth header */
1107 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1109 /* Add packet to slave tx buffer */
1110 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1111 slave_bufs_pkts[slave_idx]++;
1113 /* If packet is not ARP, send it with TLB policy */
1114 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1116 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1120 /* Update connected client ARP tables */
1121 if (internals->mode6.ntt) {
1122 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1123 client_info = &internals->mode6.client_table[i];
1125 if (client_info->in_use) {
1126 /* Allocate new packet to send ARP update on current slave */
1127 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1128 if (upd_pkt == NULL) {
1130 "Failed to allocate ARP packet from pool");
1133 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1134 + client_info->vlan_count * sizeof(struct vlan_hdr);
1135 upd_pkt->data_len = pkt_size;
1136 upd_pkt->pkt_len = pkt_size;
1138 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1141 /* Add packet to update tx buffer */
1142 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1143 update_bufs_pkts[slave_idx]++;
1146 internals->mode6.ntt = 0;
1149 /* Send ARP packets on proper slaves */
1150 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1151 if (slave_bufs_pkts[i] > 0) {
1152 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1153 slave_bufs[i], slave_bufs_pkts[i]);
1154 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1155 bufs[nb_pkts - 1 - num_not_send - j] =
1156 slave_bufs[i][nb_pkts - 1 - j];
1159 num_tx_total += num_send;
1160 num_not_send += slave_bufs_pkts[i] - num_send;
1162 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1163 /* Print TX stats including update packets */
1164 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1165 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1166 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1172 /* Send update packets on proper slaves */
1173 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1174 if (update_bufs_pkts[i] > 0) {
1175 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1176 update_bufs_pkts[i]);
1177 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1178 rte_pktmbuf_free(update_bufs[i][j]);
1180 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1181 for (j = 0; j < update_bufs_pkts[i]; j++) {
1182 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1183 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1189 /* Send non-ARP packets using tlb policy */
1190 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1191 num_send = bond_ethdev_tx_burst_tlb(queue,
1192 slave_bufs[RTE_MAX_ETHPORTS],
1193 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1195 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1196 bufs[nb_pkts - 1 - num_not_send - j] =
1197 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1200 num_tx_total += num_send;
1203 return num_tx_total;
1207 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1210 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1211 struct bond_dev_private *internals = bd_tx_q->dev_private;
1213 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1214 uint16_t slave_count;
1216 /* Array to sort mbufs for transmission on each slave into */
1217 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1218 /* Number of mbufs for transmission on each slave */
1219 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1220 /* Mapping array generated by hash function to map mbufs to slaves */
1221 uint16_t bufs_slave_port_idxs[nb_bufs];
1223 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1224 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1228 if (unlikely(nb_bufs == 0))
1231 /* Copy slave list to protect against slave up/down changes during tx
1233 slave_count = internals->active_slave_count;
1234 if (unlikely(slave_count < 1))
1237 memcpy(slave_port_ids, internals->active_slaves,
1238 sizeof(slave_port_ids[0]) * slave_count);
1241 * Populate slaves mbuf with the packets which are to be sent on it
1242 * selecting output slave using hash based on xmit policy
1244 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1245 bufs_slave_port_idxs);
1247 for (i = 0; i < nb_bufs; i++) {
1248 /* Populate slave mbuf arrays with mbufs for that slave. */
1249 uint8_t slave_idx = bufs_slave_port_idxs[i];
1251 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1254 /* Send packet burst on each slave device */
1255 for (i = 0; i < slave_count; i++) {
1256 if (slave_nb_bufs[i] == 0)
1259 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1260 bd_tx_q->queue_id, slave_bufs[i],
1263 total_tx_count += slave_tx_count;
1265 /* If tx burst fails move packets to end of bufs */
1266 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1267 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1269 total_tx_fail_count += slave_tx_fail_count[i];
1272 * Shift bufs to beginning of array to allow reordering
1275 for (j = 0; j < slave_tx_fail_count[i]; j++) {
1277 slave_bufs[i][(slave_tx_count - 1) + j];
1283 * If there are tx burst failures we move packets to end of bufs to
1284 * preserve expected PMD behaviour of all failed transmitted being
1285 * at the end of the input mbuf array
1287 if (unlikely(total_tx_fail_count > 0)) {
1288 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1290 for (i = 0; i < slave_count; i++) {
1291 if (slave_tx_fail_count[i] > 0) {
1292 for (j = 0; j < slave_tx_fail_count[i]; j++)
1293 bufs[bufs_idx++] = slave_bufs[i][j];
1298 return total_tx_count;
1302 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1305 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1306 struct bond_dev_private *internals = bd_tx_q->dev_private;
1308 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1309 uint16_t slave_count;
1311 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1312 uint16_t dist_slave_count;
1314 /* 2-D array to sort mbufs for transmission on each slave into */
1315 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1316 /* Number of mbufs for transmission on each slave */
1317 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1318 /* Mapping array generated by hash function to map mbufs to slaves */
1319 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1321 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1322 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1326 if (unlikely(nb_bufs == 0))
1329 /* Copy slave list to protect against slave up/down changes during tx
1331 slave_count = internals->active_slave_count;
1332 if (unlikely(slave_count < 1))
1335 memcpy(slave_port_ids, internals->active_slaves,
1336 sizeof(slave_port_ids[0]) * slave_count);
1338 dist_slave_count = 0;
1339 for (i = 0; i < slave_count; i++) {
1340 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1342 if (ACTOR_STATE(port, DISTRIBUTING))
1343 dist_slave_port_ids[dist_slave_count++] =
1347 if (likely(dist_slave_count > 1)) {
1350 * Populate slaves mbuf with the packets which are to be sent
1351 * on it, selecting output slave using hash based on xmit policy
1353 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1354 bufs_slave_port_idxs);
1356 for (i = 0; i < nb_bufs; i++) {
1358 * Populate slave mbuf arrays with mbufs for that
1361 uint8_t slave_idx = bufs_slave_port_idxs[i];
1363 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1368 /* Send packet burst on each slave device */
1369 for (i = 0; i < dist_slave_count; i++) {
1370 if (slave_nb_bufs[i] == 0)
1373 slave_tx_count = rte_eth_tx_burst(
1374 dist_slave_port_ids[i],
1375 bd_tx_q->queue_id, slave_bufs[i],
1378 total_tx_count += slave_tx_count;
1380 /* If tx burst fails move packets to end of bufs */
1381 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1382 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1384 total_tx_fail_count += slave_tx_fail_count[i];
1387 * Shift bufs to beginning of array to allow
1390 for (j = 0; j < slave_tx_fail_count[i]; j++)
1393 [(slave_tx_count - 1)
1399 * If there are tx burst failures we move packets to end of
1400 * bufs to preserve expected PMD behaviour of all failed
1401 * transmitted being at the end of the input mbuf array
1403 if (unlikely(total_tx_fail_count > 0)) {
1404 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1406 for (i = 0; i < slave_count; i++) {
1407 if (slave_tx_fail_count[i] > 0) {
1409 j < slave_tx_fail_count[i];
1419 /* Check for LACP control packets and send if available */
1420 for (i = 0; i < slave_count; i++) {
1421 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1422 struct rte_mbuf *ctrl_pkt = NULL;
1424 if (likely(rte_ring_empty(port->tx_ring)))
1427 if (rte_ring_dequeue(port->tx_ring,
1428 (void **)&ctrl_pkt) != -ENOENT) {
1429 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1430 bd_tx_q->queue_id, &ctrl_pkt, 1);
1432 * re-enqueue LAG control plane packets to buffering
1433 * ring if transmission fails so the packet isn't lost.
1435 if (slave_tx_count != 1)
1436 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1440 return total_tx_count;
1444 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1447 struct bond_dev_private *internals;
1448 struct bond_tx_queue *bd_tx_q;
1450 uint8_t tx_failed_flag = 0, num_of_slaves;
1451 uint16_t slaves[RTE_MAX_ETHPORTS];
1453 uint16_t max_nb_of_tx_pkts = 0;
1455 int slave_tx_total[RTE_MAX_ETHPORTS];
1456 int i, most_successful_tx_slave = -1;
1458 bd_tx_q = (struct bond_tx_queue *)queue;
1459 internals = bd_tx_q->dev_private;
1461 /* Copy slave list to protect against slave up/down changes during tx
1463 num_of_slaves = internals->active_slave_count;
1464 memcpy(slaves, internals->active_slaves,
1465 sizeof(internals->active_slaves[0]) * num_of_slaves);
1467 if (num_of_slaves < 1)
1470 /* Increment reference count on mbufs */
1471 for (i = 0; i < nb_pkts; i++)
1472 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1474 /* Transmit burst on each active slave */
1475 for (i = 0; i < num_of_slaves; i++) {
1476 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1479 if (unlikely(slave_tx_total[i] < nb_pkts))
1482 /* record the value and slave index for the slave which transmits the
1483 * maximum number of packets */
1484 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1485 max_nb_of_tx_pkts = slave_tx_total[i];
1486 most_successful_tx_slave = i;
1490 /* if slaves fail to transmit packets from burst, the calling application
1491 * is not expected to know about multiple references to packets so we must
1492 * handle failures of all packets except those of the most successful slave
1494 if (unlikely(tx_failed_flag))
1495 for (i = 0; i < num_of_slaves; i++)
1496 if (i != most_successful_tx_slave)
1497 while (slave_tx_total[i] < nb_pkts)
1498 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1500 return max_nb_of_tx_pkts;
1504 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1506 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1508 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1510 * If in mode 4 then save the link properties of the first
1511 * slave, all subsequent slaves must match these properties
1513 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1515 bond_link->link_autoneg = slave_link->link_autoneg;
1516 bond_link->link_duplex = slave_link->link_duplex;
1517 bond_link->link_speed = slave_link->link_speed;
1520 * In any other mode the link properties are set to default
1521 * values of AUTONEG/DUPLEX
1523 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1524 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1529 link_properties_valid(struct rte_eth_dev *ethdev,
1530 struct rte_eth_link *slave_link)
1532 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1534 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1535 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1537 if (bond_link->link_duplex != slave_link->link_duplex ||
1538 bond_link->link_autoneg != slave_link->link_autoneg ||
1539 bond_link->link_speed != slave_link->link_speed)
1547 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1549 struct ether_addr *mac_addr;
1551 if (eth_dev == NULL) {
1552 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1556 if (dst_mac_addr == NULL) {
1557 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1561 mac_addr = eth_dev->data->mac_addrs;
1563 ether_addr_copy(mac_addr, dst_mac_addr);
1568 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1570 struct ether_addr *mac_addr;
1572 if (eth_dev == NULL) {
1573 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1577 if (new_mac_addr == NULL) {
1578 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1582 mac_addr = eth_dev->data->mac_addrs;
1584 /* If new MAC is different to current MAC then update */
1585 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1586 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1592 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1594 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1597 /* Update slave devices MAC addresses */
1598 if (internals->slave_count < 1)
1601 switch (internals->mode) {
1602 case BONDING_MODE_ROUND_ROBIN:
1603 case BONDING_MODE_BALANCE:
1604 case BONDING_MODE_BROADCAST:
1605 for (i = 0; i < internals->slave_count; i++) {
1606 if (rte_eth_dev_default_mac_addr_set(
1607 internals->slaves[i].port_id,
1608 bonded_eth_dev->data->mac_addrs)) {
1609 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1610 internals->slaves[i].port_id);
1615 case BONDING_MODE_8023AD:
1616 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1618 case BONDING_MODE_ACTIVE_BACKUP:
1619 case BONDING_MODE_TLB:
1620 case BONDING_MODE_ALB:
1622 for (i = 0; i < internals->slave_count; i++) {
1623 if (internals->slaves[i].port_id ==
1624 internals->current_primary_port) {
1625 if (rte_eth_dev_default_mac_addr_set(
1626 internals->primary_port,
1627 bonded_eth_dev->data->mac_addrs)) {
1628 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1629 internals->current_primary_port);
1633 if (rte_eth_dev_default_mac_addr_set(
1634 internals->slaves[i].port_id,
1635 &internals->slaves[i].persisted_mac_addr)) {
1636 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1637 internals->slaves[i].port_id);
1648 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1650 struct bond_dev_private *internals;
1652 internals = eth_dev->data->dev_private;
1655 case BONDING_MODE_ROUND_ROBIN:
1656 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1657 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1659 case BONDING_MODE_ACTIVE_BACKUP:
1660 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1661 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1663 case BONDING_MODE_BALANCE:
1664 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1665 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1667 case BONDING_MODE_BROADCAST:
1668 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1669 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1671 case BONDING_MODE_8023AD:
1672 if (bond_mode_8023ad_enable(eth_dev) != 0)
1675 if (internals->mode4.dedicated_queues.enabled == 0) {
1676 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1677 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1678 RTE_BOND_LOG(WARNING,
1679 "Using mode 4, it is necessary to do TX burst "
1680 "and RX burst at least every 100ms.");
1682 /* Use flow director's optimization */
1683 eth_dev->rx_pkt_burst =
1684 bond_ethdev_rx_burst_8023ad_fast_queue;
1685 eth_dev->tx_pkt_burst =
1686 bond_ethdev_tx_burst_8023ad_fast_queue;
1689 case BONDING_MODE_TLB:
1690 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1691 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1693 case BONDING_MODE_ALB:
1694 if (bond_mode_alb_enable(eth_dev) != 0)
1697 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1698 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1704 internals->mode = mode;
1711 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1712 struct rte_eth_dev *slave_eth_dev)
1715 struct bond_dev_private *internals = (struct bond_dev_private *)
1716 bonded_eth_dev->data->dev_private;
1717 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1719 if (port->slow_pool == NULL) {
1721 int slave_id = slave_eth_dev->data->port_id;
1723 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1725 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1726 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1727 slave_eth_dev->data->numa_node);
1729 /* Any memory allocation failure in initialization is critical because
1730 * resources can't be free, so reinitialization is impossible. */
1731 if (port->slow_pool == NULL) {
1732 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1733 slave_id, mem_name, rte_strerror(rte_errno));
1737 if (internals->mode4.dedicated_queues.enabled == 1) {
1738 /* Configure slow Rx queue */
1740 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1741 internals->mode4.dedicated_queues.rx_qid, 128,
1742 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1743 NULL, port->slow_pool);
1746 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1747 slave_eth_dev->data->port_id,
1748 internals->mode4.dedicated_queues.rx_qid,
1753 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1754 internals->mode4.dedicated_queues.tx_qid, 512,
1755 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1759 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1760 slave_eth_dev->data->port_id,
1761 internals->mode4.dedicated_queues.tx_qid,
1770 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1771 struct rte_eth_dev *slave_eth_dev)
1773 struct bond_rx_queue *bd_rx_q;
1774 struct bond_tx_queue *bd_tx_q;
1775 uint16_t nb_rx_queues;
1776 uint16_t nb_tx_queues;
1780 struct rte_flow_error flow_error;
1782 struct bond_dev_private *internals = (struct bond_dev_private *)
1783 bonded_eth_dev->data->dev_private;
1786 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1788 /* Enable interrupts on slave device if supported */
1789 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1790 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1792 /* If RSS is enabled for bonding, try to enable it for slaves */
1793 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1794 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1796 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1797 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1798 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1799 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1801 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1804 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1805 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1806 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1807 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1810 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1811 DEV_RX_OFFLOAD_VLAN_FILTER)
1812 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1813 DEV_RX_OFFLOAD_VLAN_FILTER;
1815 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1816 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1818 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1819 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1821 if (internals->mode == BONDING_MODE_8023AD) {
1822 if (internals->mode4.dedicated_queues.enabled == 1) {
1828 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1829 bonded_eth_dev->data->mtu);
1830 if (errval != 0 && errval != -ENOTSUP) {
1831 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1832 slave_eth_dev->data->port_id, errval);
1836 /* Configure device */
1837 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1838 nb_rx_queues, nb_tx_queues,
1839 &(slave_eth_dev->data->dev_conf));
1841 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1842 slave_eth_dev->data->port_id, errval);
1846 /* Setup Rx Queues */
1847 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1848 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1850 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1851 bd_rx_q->nb_rx_desc,
1852 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1853 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1856 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1857 slave_eth_dev->data->port_id, q_id, errval);
1862 /* Setup Tx Queues */
1863 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1864 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1866 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1867 bd_tx_q->nb_tx_desc,
1868 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1872 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1873 slave_eth_dev->data->port_id, q_id, errval);
1878 if (internals->mode == BONDING_MODE_8023AD &&
1879 internals->mode4.dedicated_queues.enabled == 1) {
1880 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1884 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1885 slave_eth_dev->data->port_id) != 0) {
1887 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1888 slave_eth_dev->data->port_id, q_id, errval);
1892 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1893 rte_flow_destroy(slave_eth_dev->data->port_id,
1894 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1897 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1898 slave_eth_dev->data->port_id);
1902 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1904 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1905 slave_eth_dev->data->port_id, errval);
1909 /* If RSS is enabled for bonding, synchronize RETA */
1910 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1912 struct bond_dev_private *internals;
1914 internals = bonded_eth_dev->data->dev_private;
1916 for (i = 0; i < internals->slave_count; i++) {
1917 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1918 errval = rte_eth_dev_rss_reta_update(
1919 slave_eth_dev->data->port_id,
1920 &internals->reta_conf[0],
1921 internals->slaves[i].reta_size);
1923 RTE_BOND_LOG(WARNING,
1924 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1925 " RSS Configuration for bonding may be inconsistent.",
1926 slave_eth_dev->data->port_id, errval);
1933 /* If lsc interrupt is set, check initial slave's link status */
1934 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1935 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1936 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1937 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1945 slave_remove(struct bond_dev_private *internals,
1946 struct rte_eth_dev *slave_eth_dev)
1950 for (i = 0; i < internals->slave_count; i++)
1951 if (internals->slaves[i].port_id ==
1952 slave_eth_dev->data->port_id)
1955 if (i < (internals->slave_count - 1)) {
1956 struct rte_flow *flow;
1958 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1959 sizeof(internals->slaves[0]) *
1960 (internals->slave_count - i - 1));
1961 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1962 memmove(&flow->flows[i], &flow->flows[i + 1],
1963 sizeof(flow->flows[0]) *
1964 (internals->slave_count - i - 1));
1965 flow->flows[internals->slave_count - 1] = NULL;
1969 internals->slave_count--;
1971 /* force reconfiguration of slave interfaces */
1972 _rte_eth_dev_reset(slave_eth_dev);
1976 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1979 slave_add(struct bond_dev_private *internals,
1980 struct rte_eth_dev *slave_eth_dev)
1982 struct bond_slave_details *slave_details =
1983 &internals->slaves[internals->slave_count];
1985 slave_details->port_id = slave_eth_dev->data->port_id;
1986 slave_details->last_link_status = 0;
1988 /* Mark slave devices that don't support interrupts so we can
1989 * compensate when we start the bond
1991 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1992 slave_details->link_status_poll_enabled = 1;
1995 slave_details->link_status_wait_to_complete = 0;
1996 /* clean tlb_last_obytes when adding port for bonding device */
1997 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1998 sizeof(struct ether_addr));
2002 bond_ethdev_primary_set(struct bond_dev_private *internals,
2003 uint16_t slave_port_id)
2007 if (internals->active_slave_count < 1)
2008 internals->current_primary_port = slave_port_id;
2010 /* Search bonded device slave ports for new proposed primary port */
2011 for (i = 0; i < internals->active_slave_count; i++) {
2012 if (internals->active_slaves[i] == slave_port_id)
2013 internals->current_primary_port = slave_port_id;
2018 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2021 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2023 struct bond_dev_private *internals;
2026 /* slave eth dev will be started by bonded device */
2027 if (check_for_bonded_ethdev(eth_dev)) {
2028 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2029 eth_dev->data->port_id);
2033 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2034 eth_dev->data->dev_started = 1;
2036 internals = eth_dev->data->dev_private;
2038 if (internals->slave_count == 0) {
2039 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2043 if (internals->user_defined_mac == 0) {
2044 struct ether_addr *new_mac_addr = NULL;
2046 for (i = 0; i < internals->slave_count; i++)
2047 if (internals->slaves[i].port_id == internals->primary_port)
2048 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2050 if (new_mac_addr == NULL)
2053 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2054 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2055 eth_dev->data->port_id);
2060 /* Update all slave devices MACs*/
2061 if (mac_address_slaves_update(eth_dev) != 0)
2064 /* If bonded device is configure in promiscuous mode then re-apply config */
2065 if (internals->promiscuous_en)
2066 bond_ethdev_promiscuous_enable(eth_dev);
2068 if (internals->mode == BONDING_MODE_8023AD) {
2069 if (internals->mode4.dedicated_queues.enabled == 1) {
2070 internals->mode4.dedicated_queues.rx_qid =
2071 eth_dev->data->nb_rx_queues;
2072 internals->mode4.dedicated_queues.tx_qid =
2073 eth_dev->data->nb_tx_queues;
2078 /* Reconfigure each slave device if starting bonded device */
2079 for (i = 0; i < internals->slave_count; i++) {
2080 struct rte_eth_dev *slave_ethdev =
2081 &(rte_eth_devices[internals->slaves[i].port_id]);
2082 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2084 "bonded port (%d) failed to reconfigure slave device (%d)",
2085 eth_dev->data->port_id,
2086 internals->slaves[i].port_id);
2089 /* We will need to poll for link status if any slave doesn't
2090 * support interrupts
2092 if (internals->slaves[i].link_status_poll_enabled)
2093 internals->link_status_polling_enabled = 1;
2096 /* start polling if needed */
2097 if (internals->link_status_polling_enabled) {
2099 internals->link_status_polling_interval_ms * 1000,
2100 bond_ethdev_slave_link_status_change_monitor,
2101 (void *)&rte_eth_devices[internals->port_id]);
2104 if (internals->user_defined_primary_port)
2105 bond_ethdev_primary_set(internals, internals->primary_port);
2107 if (internals->mode == BONDING_MODE_8023AD)
2108 bond_mode_8023ad_start(eth_dev);
2110 if (internals->mode == BONDING_MODE_TLB ||
2111 internals->mode == BONDING_MODE_ALB)
2112 bond_tlb_enable(internals);
2117 eth_dev->data->dev_started = 0;
2122 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2126 if (dev->data->rx_queues != NULL) {
2127 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2128 rte_free(dev->data->rx_queues[i]);
2129 dev->data->rx_queues[i] = NULL;
2131 dev->data->nb_rx_queues = 0;
2134 if (dev->data->tx_queues != NULL) {
2135 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2136 rte_free(dev->data->tx_queues[i]);
2137 dev->data->tx_queues[i] = NULL;
2139 dev->data->nb_tx_queues = 0;
2144 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2146 struct bond_dev_private *internals = eth_dev->data->dev_private;
2149 if (internals->mode == BONDING_MODE_8023AD) {
2153 bond_mode_8023ad_stop(eth_dev);
2155 /* Discard all messages to/from mode 4 state machines */
2156 for (i = 0; i < internals->active_slave_count; i++) {
2157 port = &mode_8023ad_ports[internals->active_slaves[i]];
2159 RTE_ASSERT(port->rx_ring != NULL);
2160 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2161 rte_pktmbuf_free(pkt);
2163 RTE_ASSERT(port->tx_ring != NULL);
2164 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2165 rte_pktmbuf_free(pkt);
2169 if (internals->mode == BONDING_MODE_TLB ||
2170 internals->mode == BONDING_MODE_ALB) {
2171 bond_tlb_disable(internals);
2172 for (i = 0; i < internals->active_slave_count; i++)
2173 tlb_last_obytets[internals->active_slaves[i]] = 0;
2176 internals->active_slave_count = 0;
2177 internals->link_status_polling_enabled = 0;
2178 for (i = 0; i < internals->slave_count; i++)
2179 internals->slaves[i].last_link_status = 0;
2181 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2182 eth_dev->data->dev_started = 0;
2186 bond_ethdev_close(struct rte_eth_dev *dev)
2188 struct bond_dev_private *internals = dev->data->dev_private;
2189 uint8_t bond_port_id = internals->port_id;
2191 struct rte_flow_error ferror;
2193 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2194 while (internals->slave_count != skipped) {
2195 uint16_t port_id = internals->slaves[skipped].port_id;
2197 rte_eth_dev_stop(port_id);
2199 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2201 "Failed to remove port %d from bonded device %s",
2202 port_id, dev->device->name);
2206 bond_flow_ops.flush(dev, &ferror);
2207 bond_ethdev_free_queues(dev);
2208 rte_bitmap_reset(internals->vlan_filter_bmp);
2211 /* forward declaration */
2212 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2215 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2217 struct bond_dev_private *internals = dev->data->dev_private;
2219 uint16_t max_nb_rx_queues = UINT16_MAX;
2220 uint16_t max_nb_tx_queues = UINT16_MAX;
2222 dev_info->max_mac_addrs = 1;
2224 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2225 internals->candidate_max_rx_pktlen :
2226 ETHER_MAX_JUMBO_FRAME_LEN;
2228 /* Max number of tx/rx queues that the bonded device can support is the
2229 * minimum values of the bonded slaves, as all slaves must be capable
2230 * of supporting the same number of tx/rx queues.
2232 if (internals->slave_count > 0) {
2233 struct rte_eth_dev_info slave_info;
2236 for (idx = 0; idx < internals->slave_count; idx++) {
2237 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2240 if (slave_info.max_rx_queues < max_nb_rx_queues)
2241 max_nb_rx_queues = slave_info.max_rx_queues;
2243 if (slave_info.max_tx_queues < max_nb_tx_queues)
2244 max_nb_tx_queues = slave_info.max_tx_queues;
2248 dev_info->max_rx_queues = max_nb_rx_queues;
2249 dev_info->max_tx_queues = max_nb_tx_queues;
2252 * If dedicated hw queues enabled for link bonding device in LACP mode
2253 * then we need to reduce the maximum number of data path queues by 1.
2255 if (internals->mode == BONDING_MODE_8023AD &&
2256 internals->mode4.dedicated_queues.enabled == 1) {
2257 dev_info->max_rx_queues--;
2258 dev_info->max_tx_queues--;
2261 dev_info->min_rx_bufsize = 0;
2263 dev_info->rx_offload_capa = internals->rx_offload_capa;
2264 dev_info->tx_offload_capa = internals->tx_offload_capa;
2265 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2266 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2267 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2269 dev_info->reta_size = internals->reta_size;
2273 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2277 struct bond_dev_private *internals = dev->data->dev_private;
2279 /* don't do this while a slave is being added */
2280 rte_spinlock_lock(&internals->lock);
2283 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2285 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2287 for (i = 0; i < internals->slave_count; i++) {
2288 uint16_t port_id = internals->slaves[i].port_id;
2290 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2292 RTE_BOND_LOG(WARNING,
2293 "Setting VLAN filter on slave port %u not supported.",
2297 rte_spinlock_unlock(&internals->lock);
2302 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2303 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2304 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2306 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2307 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2308 0, dev->data->numa_node);
2309 if (bd_rx_q == NULL)
2312 bd_rx_q->queue_id = rx_queue_id;
2313 bd_rx_q->dev_private = dev->data->dev_private;
2315 bd_rx_q->nb_rx_desc = nb_rx_desc;
2317 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2318 bd_rx_q->mb_pool = mb_pool;
2320 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2326 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2327 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2328 const struct rte_eth_txconf *tx_conf)
2330 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2331 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2332 0, dev->data->numa_node);
2334 if (bd_tx_q == NULL)
2337 bd_tx_q->queue_id = tx_queue_id;
2338 bd_tx_q->dev_private = dev->data->dev_private;
2340 bd_tx_q->nb_tx_desc = nb_tx_desc;
2341 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2343 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2349 bond_ethdev_rx_queue_release(void *queue)
2358 bond_ethdev_tx_queue_release(void *queue)
2367 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2369 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2370 struct bond_dev_private *internals;
2372 /* Default value for polling slave found is true as we don't want to
2373 * disable the polling thread if we cannot get the lock */
2374 int i, polling_slave_found = 1;
2379 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2380 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2382 if (!bonded_ethdev->data->dev_started ||
2383 !internals->link_status_polling_enabled)
2386 /* If device is currently being configured then don't check slaves link
2387 * status, wait until next period */
2388 if (rte_spinlock_trylock(&internals->lock)) {
2389 if (internals->slave_count > 0)
2390 polling_slave_found = 0;
2392 for (i = 0; i < internals->slave_count; i++) {
2393 if (!internals->slaves[i].link_status_poll_enabled)
2396 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2397 polling_slave_found = 1;
2399 /* Update slave link status */
2400 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2401 internals->slaves[i].link_status_wait_to_complete);
2403 /* if link status has changed since last checked then call lsc
2405 if (slave_ethdev->data->dev_link.link_status !=
2406 internals->slaves[i].last_link_status) {
2407 internals->slaves[i].last_link_status =
2408 slave_ethdev->data->dev_link.link_status;
2410 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2411 RTE_ETH_EVENT_INTR_LSC,
2412 &bonded_ethdev->data->port_id,
2416 rte_spinlock_unlock(&internals->lock);
2419 if (polling_slave_found)
2420 /* Set alarm to continue monitoring link status of slave ethdev's */
2421 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2422 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2426 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2428 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2430 struct bond_dev_private *bond_ctx;
2431 struct rte_eth_link slave_link;
2435 bond_ctx = ethdev->data->dev_private;
2437 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2439 if (ethdev->data->dev_started == 0 ||
2440 bond_ctx->active_slave_count == 0) {
2441 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2445 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2447 if (wait_to_complete)
2448 link_update = rte_eth_link_get;
2450 link_update = rte_eth_link_get_nowait;
2452 switch (bond_ctx->mode) {
2453 case BONDING_MODE_BROADCAST:
2455 * Setting link speed to UINT32_MAX to ensure we pick up the
2456 * value of the first active slave
2458 ethdev->data->dev_link.link_speed = UINT32_MAX;
2461 * link speed is minimum value of all the slaves link speed as
2462 * packet loss will occur on this slave if transmission at rates
2463 * greater than this are attempted
2465 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2466 link_update(bond_ctx->active_slaves[0], &slave_link);
2468 if (slave_link.link_speed <
2469 ethdev->data->dev_link.link_speed)
2470 ethdev->data->dev_link.link_speed =
2471 slave_link.link_speed;
2474 case BONDING_MODE_ACTIVE_BACKUP:
2475 /* Current primary slave */
2476 link_update(bond_ctx->current_primary_port, &slave_link);
2478 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2480 case BONDING_MODE_8023AD:
2481 ethdev->data->dev_link.link_autoneg =
2482 bond_ctx->mode4.slave_link.link_autoneg;
2483 ethdev->data->dev_link.link_duplex =
2484 bond_ctx->mode4.slave_link.link_duplex;
2485 /* fall through to update link speed */
2486 case BONDING_MODE_ROUND_ROBIN:
2487 case BONDING_MODE_BALANCE:
2488 case BONDING_MODE_TLB:
2489 case BONDING_MODE_ALB:
2492 * In theses mode the maximum theoretical link speed is the sum
2495 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2497 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2498 link_update(bond_ctx->active_slaves[idx], &slave_link);
2500 ethdev->data->dev_link.link_speed +=
2501 slave_link.link_speed;
2511 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2513 struct bond_dev_private *internals = dev->data->dev_private;
2514 struct rte_eth_stats slave_stats;
2517 for (i = 0; i < internals->slave_count; i++) {
2518 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2520 stats->ipackets += slave_stats.ipackets;
2521 stats->opackets += slave_stats.opackets;
2522 stats->ibytes += slave_stats.ibytes;
2523 stats->obytes += slave_stats.obytes;
2524 stats->imissed += slave_stats.imissed;
2525 stats->ierrors += slave_stats.ierrors;
2526 stats->oerrors += slave_stats.oerrors;
2527 stats->rx_nombuf += slave_stats.rx_nombuf;
2529 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2530 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2531 stats->q_opackets[j] += slave_stats.q_opackets[j];
2532 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2533 stats->q_obytes[j] += slave_stats.q_obytes[j];
2534 stats->q_errors[j] += slave_stats.q_errors[j];
2543 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2545 struct bond_dev_private *internals = dev->data->dev_private;
2548 for (i = 0; i < internals->slave_count; i++)
2549 rte_eth_stats_reset(internals->slaves[i].port_id);
2553 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2555 struct bond_dev_private *internals = eth_dev->data->dev_private;
2558 internals->promiscuous_en = 1;
2560 switch (internals->mode) {
2561 /* Promiscuous mode is propagated to all slaves */
2562 case BONDING_MODE_ROUND_ROBIN:
2563 case BONDING_MODE_BALANCE:
2564 case BONDING_MODE_BROADCAST:
2565 for (i = 0; i < internals->slave_count; i++)
2566 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2568 /* In mode4 promiscus mode is managed when slave is added/removed */
2569 case BONDING_MODE_8023AD:
2571 /* Promiscuous mode is propagated only to primary slave */
2572 case BONDING_MODE_ACTIVE_BACKUP:
2573 case BONDING_MODE_TLB:
2574 case BONDING_MODE_ALB:
2576 rte_eth_promiscuous_enable(internals->current_primary_port);
2581 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2583 struct bond_dev_private *internals = dev->data->dev_private;
2586 internals->promiscuous_en = 0;
2588 switch (internals->mode) {
2589 /* Promiscuous mode is propagated to all slaves */
2590 case BONDING_MODE_ROUND_ROBIN:
2591 case BONDING_MODE_BALANCE:
2592 case BONDING_MODE_BROADCAST:
2593 for (i = 0; i < internals->slave_count; i++)
2594 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2596 /* In mode4 promiscus mode is set managed when slave is added/removed */
2597 case BONDING_MODE_8023AD:
2599 /* Promiscuous mode is propagated only to primary slave */
2600 case BONDING_MODE_ACTIVE_BACKUP:
2601 case BONDING_MODE_TLB:
2602 case BONDING_MODE_ALB:
2604 rte_eth_promiscuous_disable(internals->current_primary_port);
2609 bond_ethdev_delayed_lsc_propagation(void *arg)
2614 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2615 RTE_ETH_EVENT_INTR_LSC, NULL);
2619 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2620 void *param, void *ret_param __rte_unused)
2622 struct rte_eth_dev *bonded_eth_dev;
2623 struct bond_dev_private *internals;
2624 struct rte_eth_link link;
2627 int i, valid_slave = 0;
2629 uint8_t lsc_flag = 0;
2631 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2634 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2636 if (check_for_bonded_ethdev(bonded_eth_dev))
2639 internals = bonded_eth_dev->data->dev_private;
2641 /* If the device isn't started don't handle interrupts */
2642 if (!bonded_eth_dev->data->dev_started)
2645 /* verify that port_id is a valid slave of bonded port */
2646 for (i = 0; i < internals->slave_count; i++) {
2647 if (internals->slaves[i].port_id == port_id) {
2656 /* Synchronize lsc callback parallel calls either by real link event
2657 * from the slaves PMDs or by the bonding PMD itself.
2659 rte_spinlock_lock(&internals->lsc_lock);
2661 /* Search for port in active port list */
2662 active_pos = find_slave_by_id(internals->active_slaves,
2663 internals->active_slave_count, port_id);
2665 rte_eth_link_get_nowait(port_id, &link);
2666 if (link.link_status) {
2667 if (active_pos < internals->active_slave_count) {
2668 rte_spinlock_unlock(&internals->lsc_lock);
2672 /* if no active slave ports then set this port to be primary port */
2673 if (internals->active_slave_count < 1) {
2674 /* If first active slave, then change link status */
2675 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2676 internals->current_primary_port = port_id;
2679 mac_address_slaves_update(bonded_eth_dev);
2682 activate_slave(bonded_eth_dev, port_id);
2684 /* If user has defined the primary port then default to using it */
2685 if (internals->user_defined_primary_port &&
2686 internals->primary_port == port_id)
2687 bond_ethdev_primary_set(internals, port_id);
2689 if (active_pos == internals->active_slave_count) {
2690 rte_spinlock_unlock(&internals->lsc_lock);
2694 /* Remove from active slave list */
2695 deactivate_slave(bonded_eth_dev, port_id);
2697 if (internals->active_slave_count < 1)
2700 /* Update primary id, take first active slave from list or if none
2701 * available set to -1 */
2702 if (port_id == internals->current_primary_port) {
2703 if (internals->active_slave_count > 0)
2704 bond_ethdev_primary_set(internals,
2705 internals->active_slaves[0]);
2707 internals->current_primary_port = internals->primary_port;
2712 * Update bonded device link properties after any change to active
2715 bond_ethdev_link_update(bonded_eth_dev, 0);
2718 /* Cancel any possible outstanding interrupts if delays are enabled */
2719 if (internals->link_up_delay_ms > 0 ||
2720 internals->link_down_delay_ms > 0)
2721 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2724 if (bonded_eth_dev->data->dev_link.link_status) {
2725 if (internals->link_up_delay_ms > 0)
2726 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2727 bond_ethdev_delayed_lsc_propagation,
2728 (void *)bonded_eth_dev);
2730 _rte_eth_dev_callback_process(bonded_eth_dev,
2731 RTE_ETH_EVENT_INTR_LSC,
2735 if (internals->link_down_delay_ms > 0)
2736 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2737 bond_ethdev_delayed_lsc_propagation,
2738 (void *)bonded_eth_dev);
2740 _rte_eth_dev_callback_process(bonded_eth_dev,
2741 RTE_ETH_EVENT_INTR_LSC,
2746 rte_spinlock_unlock(&internals->lsc_lock);
2752 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2753 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2757 int slave_reta_size;
2758 unsigned reta_count;
2759 struct bond_dev_private *internals = dev->data->dev_private;
2761 if (reta_size != internals->reta_size)
2764 /* Copy RETA table */
2765 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2767 for (i = 0; i < reta_count; i++) {
2768 internals->reta_conf[i].mask = reta_conf[i].mask;
2769 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2770 if ((reta_conf[i].mask >> j) & 0x01)
2771 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2774 /* Fill rest of array */
2775 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2776 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2777 sizeof(internals->reta_conf[0]) * reta_count);
2779 /* Propagate RETA over slaves */
2780 for (i = 0; i < internals->slave_count; i++) {
2781 slave_reta_size = internals->slaves[i].reta_size;
2782 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2783 &internals->reta_conf[0], slave_reta_size);
2792 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2793 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2796 struct bond_dev_private *internals = dev->data->dev_private;
2798 if (reta_size != internals->reta_size)
2801 /* Copy RETA table */
2802 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2803 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2804 if ((reta_conf[i].mask >> j) & 0x01)
2805 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2811 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2812 struct rte_eth_rss_conf *rss_conf)
2815 struct bond_dev_private *internals = dev->data->dev_private;
2816 struct rte_eth_rss_conf bond_rss_conf;
2818 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2820 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2822 if (bond_rss_conf.rss_hf != 0)
2823 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2825 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2826 sizeof(internals->rss_key)) {
2827 if (bond_rss_conf.rss_key_len == 0)
2828 bond_rss_conf.rss_key_len = 40;
2829 internals->rss_key_len = bond_rss_conf.rss_key_len;
2830 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2831 internals->rss_key_len);
2834 for (i = 0; i < internals->slave_count; i++) {
2835 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2845 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2846 struct rte_eth_rss_conf *rss_conf)
2848 struct bond_dev_private *internals = dev->data->dev_private;
2850 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2851 rss_conf->rss_key_len = internals->rss_key_len;
2852 if (rss_conf->rss_key)
2853 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2859 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2861 struct rte_eth_dev *slave_eth_dev;
2862 struct bond_dev_private *internals = dev->data->dev_private;
2865 rte_spinlock_lock(&internals->lock);
2867 for (i = 0; i < internals->slave_count; i++) {
2868 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2869 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2870 rte_spinlock_unlock(&internals->lock);
2874 for (i = 0; i < internals->slave_count; i++) {
2875 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2877 rte_spinlock_unlock(&internals->lock);
2882 rte_spinlock_unlock(&internals->lock);
2887 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2889 if (mac_address_set(dev, addr)) {
2890 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2898 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2899 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2901 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2902 *(const void **)arg = &bond_flow_ops;
2908 const struct eth_dev_ops default_dev_ops = {
2909 .dev_start = bond_ethdev_start,
2910 .dev_stop = bond_ethdev_stop,
2911 .dev_close = bond_ethdev_close,
2912 .dev_configure = bond_ethdev_configure,
2913 .dev_infos_get = bond_ethdev_info,
2914 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2915 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2916 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2917 .rx_queue_release = bond_ethdev_rx_queue_release,
2918 .tx_queue_release = bond_ethdev_tx_queue_release,
2919 .link_update = bond_ethdev_link_update,
2920 .stats_get = bond_ethdev_stats_get,
2921 .stats_reset = bond_ethdev_stats_reset,
2922 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2923 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2924 .reta_update = bond_ethdev_rss_reta_update,
2925 .reta_query = bond_ethdev_rss_reta_query,
2926 .rss_hash_update = bond_ethdev_rss_hash_update,
2927 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2928 .mtu_set = bond_ethdev_mtu_set,
2929 .mac_addr_set = bond_ethdev_mac_address_set,
2930 .filter_ctrl = bond_filter_ctrl
2934 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2936 const char *name = rte_vdev_device_name(dev);
2937 uint8_t socket_id = dev->device.numa_node;
2938 struct bond_dev_private *internals = NULL;
2939 struct rte_eth_dev *eth_dev = NULL;
2940 uint32_t vlan_filter_bmp_size;
2942 /* now do all data allocation - for eth_dev structure, dummy pci driver
2943 * and internal (private) data
2946 /* reserve an ethdev entry */
2947 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2948 if (eth_dev == NULL) {
2949 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2953 internals = eth_dev->data->dev_private;
2954 eth_dev->data->nb_rx_queues = (uint16_t)1;
2955 eth_dev->data->nb_tx_queues = (uint16_t)1;
2957 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2959 if (eth_dev->data->mac_addrs == NULL) {
2960 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2964 eth_dev->dev_ops = &default_dev_ops;
2965 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2967 rte_spinlock_init(&internals->lock);
2968 rte_spinlock_init(&internals->lsc_lock);
2970 internals->port_id = eth_dev->data->port_id;
2971 internals->mode = BONDING_MODE_INVALID;
2972 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2973 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2974 internals->burst_xmit_hash = burst_xmit_l2_hash;
2975 internals->user_defined_mac = 0;
2977 internals->link_status_polling_enabled = 0;
2979 internals->link_status_polling_interval_ms =
2980 DEFAULT_POLLING_INTERVAL_10_MS;
2981 internals->link_down_delay_ms = 0;
2982 internals->link_up_delay_ms = 0;
2984 internals->slave_count = 0;
2985 internals->active_slave_count = 0;
2986 internals->rx_offload_capa = 0;
2987 internals->tx_offload_capa = 0;
2988 internals->rx_queue_offload_capa = 0;
2989 internals->tx_queue_offload_capa = 0;
2990 internals->candidate_max_rx_pktlen = 0;
2991 internals->max_rx_pktlen = 0;
2993 /* Initially allow to choose any offload type */
2994 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2996 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2997 memset(internals->slaves, 0, sizeof(internals->slaves));
2999 TAILQ_INIT(&internals->flow_list);
3000 internals->flow_isolated_valid = 0;
3002 /* Set mode 4 default configuration */
3003 bond_mode_8023ad_setup(eth_dev, NULL);
3004 if (bond_ethdev_mode_set(eth_dev, mode)) {
3005 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3006 eth_dev->data->port_id, mode);
3010 vlan_filter_bmp_size =
3011 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3012 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3013 RTE_CACHE_LINE_SIZE);
3014 if (internals->vlan_filter_bmpmem == NULL) {
3016 "Failed to allocate vlan bitmap for bonded device %u",
3017 eth_dev->data->port_id);
3021 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3022 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3023 if (internals->vlan_filter_bmp == NULL) {
3025 "Failed to init vlan bitmap for bonded device %u",
3026 eth_dev->data->port_id);
3027 rte_free(internals->vlan_filter_bmpmem);
3031 return eth_dev->data->port_id;
3034 rte_free(internals);
3035 if (eth_dev != NULL) {
3036 rte_free(eth_dev->data->mac_addrs);
3037 rte_eth_dev_release_port(eth_dev);
3043 bond_probe(struct rte_vdev_device *dev)
3046 struct bond_dev_private *internals;
3047 struct rte_kvargs *kvlist;
3048 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3049 int arg_count, port_id;
3051 struct rte_eth_dev *eth_dev;
3056 name = rte_vdev_device_name(dev);
3057 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3059 if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3060 strlen(rte_vdev_device_args(dev)) == 0) {
3061 eth_dev = rte_eth_dev_attach_secondary(name);
3063 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3066 /* TODO: request info from primary to set up Rx and Tx */
3067 eth_dev->dev_ops = &default_dev_ops;
3068 rte_eth_dev_probing_finish(eth_dev);
3072 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3073 pmd_bond_init_valid_arguments);
3077 /* Parse link bonding mode */
3078 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3079 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3080 &bond_ethdev_parse_slave_mode_kvarg,
3081 &bonding_mode) != 0) {
3082 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3087 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3092 /* Parse socket id to create bonding device on */
3093 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3094 if (arg_count == 1) {
3095 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3096 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3098 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3099 "bonded device %s", name);
3102 } else if (arg_count > 1) {
3103 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3104 "bonded device %s", name);
3107 socket_id = rte_socket_id();
3110 dev->device.numa_node = socket_id;
3112 /* Create link bonding eth device */
3113 port_id = bond_alloc(dev, bonding_mode);
3115 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3116 "socket %u.", name, bonding_mode, socket_id);
3119 internals = rte_eth_devices[port_id].data->dev_private;
3120 internals->kvlist = kvlist;
3123 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3124 if (rte_kvargs_process(kvlist,
3125 PMD_BOND_AGG_MODE_KVARG,
3126 &bond_ethdev_parse_slave_agg_mode_kvarg,
3129 "Failed to parse agg selection mode for bonded device %s",
3134 if (internals->mode == BONDING_MODE_8023AD)
3135 rte_eth_bond_8023ad_agg_selection_set(port_id,
3138 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3141 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3142 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3143 "socket %u.", name, port_id, bonding_mode, socket_id);
3147 rte_kvargs_free(kvlist);
3153 bond_remove(struct rte_vdev_device *dev)
3155 struct rte_eth_dev *eth_dev;
3156 struct bond_dev_private *internals;
3162 name = rte_vdev_device_name(dev);
3163 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3165 /* now free all data allocation - for eth_dev structure,
3166 * dummy pci driver and internal (private) data
3169 /* find an ethdev entry */
3170 eth_dev = rte_eth_dev_allocated(name);
3171 if (eth_dev == NULL)
3174 RTE_ASSERT(eth_dev->device == &dev->device);
3176 internals = eth_dev->data->dev_private;
3177 if (internals->slave_count != 0)
3180 if (eth_dev->data->dev_started == 1) {
3181 bond_ethdev_stop(eth_dev);
3182 bond_ethdev_close(eth_dev);
3185 eth_dev->dev_ops = NULL;
3186 eth_dev->rx_pkt_burst = NULL;
3187 eth_dev->tx_pkt_burst = NULL;
3189 internals = eth_dev->data->dev_private;
3190 /* Try to release mempool used in mode6. If the bond
3191 * device is not mode6, free the NULL is not problem.
3193 rte_mempool_free(internals->mode6.mempool);
3194 rte_bitmap_free(internals->vlan_filter_bmp);
3195 rte_free(internals->vlan_filter_bmpmem);
3196 rte_free(eth_dev->data->dev_private);
3197 rte_free(eth_dev->data->mac_addrs);
3199 rte_eth_dev_release_port(eth_dev);
3204 /* this part will resolve the slave portids after all the other pdev and vdev
3205 * have been allocated */
3207 bond_ethdev_configure(struct rte_eth_dev *dev)
3209 const char *name = dev->device->name;
3210 struct bond_dev_private *internals = dev->data->dev_private;
3211 struct rte_kvargs *kvlist = internals->kvlist;
3213 uint16_t port_id = dev - rte_eth_devices;
3216 static const uint8_t default_rss_key[40] = {
3217 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3218 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3219 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3220 0xBE, 0xAC, 0x01, 0xFA
3225 /* If RSS is enabled, fill table and key with default values */
3226 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3227 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3228 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3229 memcpy(internals->rss_key, default_rss_key, 40);
3231 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3232 internals->reta_conf[i].mask = ~0LL;
3233 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3234 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3238 /* set the max_rx_pktlen */
3239 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3242 * if no kvlist, it means that this bonded device has been created
3243 * through the bonding api.
3248 /* Parse MAC address for bonded device */
3249 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3250 if (arg_count == 1) {
3251 struct ether_addr bond_mac;
3253 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3254 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3255 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3260 /* Set MAC address */
3261 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3263 "Failed to set mac address on bonded device %s",
3267 } else if (arg_count > 1) {
3269 "MAC address can be specified only once for bonded device %s",
3274 /* Parse/set balance mode transmit policy */
3275 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3276 if (arg_count == 1) {
3277 uint8_t xmit_policy;
3279 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3280 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3283 "Invalid xmit policy specified for bonded device %s",
3288 /* Set balance mode transmit policy*/
3289 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3291 "Failed to set balance xmit policy on bonded device %s",
3295 } else if (arg_count > 1) {
3297 "Transmit policy can be specified only once for bonded device %s",
3302 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3303 if (rte_kvargs_process(kvlist,
3304 PMD_BOND_AGG_MODE_KVARG,
3305 &bond_ethdev_parse_slave_agg_mode_kvarg,
3308 "Failed to parse agg selection mode for bonded device %s",
3311 if (internals->mode == BONDING_MODE_8023AD)
3312 rte_eth_bond_8023ad_agg_selection_set(port_id,
3316 /* Parse/add slave ports to bonded device */
3317 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3318 struct bond_ethdev_slave_ports slave_ports;
3321 memset(&slave_ports, 0, sizeof(slave_ports));
3323 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3324 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3326 "Failed to parse slave ports for bonded device %s",
3331 for (i = 0; i < slave_ports.slave_count; i++) {
3332 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3334 "Failed to add port %d as slave to bonded device %s",
3335 slave_ports.slaves[i], name);
3340 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3344 /* Parse/set primary slave port id*/
3345 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3346 if (arg_count == 1) {
3347 uint16_t primary_slave_port_id;
3349 if (rte_kvargs_process(kvlist,
3350 PMD_BOND_PRIMARY_SLAVE_KVARG,
3351 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3352 &primary_slave_port_id) < 0) {
3354 "Invalid primary slave port id specified for bonded device %s",
3359 /* Set balance mode transmit policy*/
3360 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3363 "Failed to set primary slave port %d on bonded device %s",
3364 primary_slave_port_id, name);
3367 } else if (arg_count > 1) {
3369 "Primary slave can be specified only once for bonded device %s",
3374 /* Parse link status monitor polling interval */
3375 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3376 if (arg_count == 1) {
3377 uint32_t lsc_poll_interval_ms;
3379 if (rte_kvargs_process(kvlist,
3380 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3381 &bond_ethdev_parse_time_ms_kvarg,
3382 &lsc_poll_interval_ms) < 0) {
3384 "Invalid lsc polling interval value specified for bonded"
3385 " device %s", name);
3389 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3392 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3393 lsc_poll_interval_ms, name);
3396 } else if (arg_count > 1) {
3398 "LSC polling interval can be specified only once for bonded"
3399 " device %s", name);
3403 /* Parse link up interrupt propagation delay */
3404 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3405 if (arg_count == 1) {
3406 uint32_t link_up_delay_ms;
3408 if (rte_kvargs_process(kvlist,
3409 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3410 &bond_ethdev_parse_time_ms_kvarg,
3411 &link_up_delay_ms) < 0) {
3413 "Invalid link up propagation delay value specified for"
3414 " bonded device %s", name);
3418 /* Set balance mode transmit policy*/
3419 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3422 "Failed to set link up propagation delay (%u ms) on bonded"
3423 " device %s", link_up_delay_ms, name);
3426 } else if (arg_count > 1) {
3428 "Link up propagation delay can be specified only once for"
3429 " bonded device %s", name);
3433 /* Parse link down interrupt propagation delay */
3434 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3435 if (arg_count == 1) {
3436 uint32_t link_down_delay_ms;
3438 if (rte_kvargs_process(kvlist,
3439 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3440 &bond_ethdev_parse_time_ms_kvarg,
3441 &link_down_delay_ms) < 0) {
3443 "Invalid link down propagation delay value specified for"
3444 " bonded device %s", name);
3448 /* Set balance mode transmit policy*/
3449 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3452 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3453 link_down_delay_ms, name);
3456 } else if (arg_count > 1) {
3458 "Link down propagation delay can be specified only once for bonded device %s",
3466 struct rte_vdev_driver pmd_bond_drv = {
3467 .probe = bond_probe,
3468 .remove = bond_remove,
3471 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3472 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3474 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3478 "xmit_policy=[l2 | l23 | l34] "
3479 "agg_mode=[count | stable | bandwidth] "
3482 "lsc_poll_period_ms=<int> "
3484 "down_delay=<int>");
3488 RTE_INIT(bond_init_log);
3492 bond_logtype = rte_log_register("pmd.net.bon");
3493 if (bond_logtype >= 0)
3494 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);