1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 size_t vlan_offset = 0;
38 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41 vlan_offset = sizeof(struct vlan_hdr);
42 *proto = vlan_hdr->eth_proto;
44 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45 vlan_hdr = vlan_hdr + 1;
46 *proto = vlan_hdr->eth_proto;
47 vlan_offset += sizeof(struct vlan_hdr);
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 struct bond_dev_private *internals;
58 uint16_t num_rx_slave = 0;
59 uint16_t num_rx_total = 0;
63 /* Cast to structure, containing bonded device's port id and queue id */
64 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66 internals = bd_rx_q->dev_private;
69 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70 /* Offset of pointer to *bufs increases as packets are received
71 * from other slaves */
72 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75 num_rx_total += num_rx_slave;
76 nb_pkts -= num_rx_slave;
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87 struct bond_dev_private *internals;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
94 return rte_eth_rx_burst(internals->current_primary_port,
95 bd_rx_q->queue_id, bufs, nb_pkts);
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104 (ethertype == ether_type_slow_be &&
105 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 /*****************************************************************************
109 * Flow director's setup for mode 4 optimization
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113 .dst.addr_bytes = { 0 },
114 .src.addr_bytes = { 0 },
115 .type = RTE_BE16(ETHER_TYPE_SLOW),
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119 .dst.addr_bytes = { 0 },
120 .src.addr_bytes = { 0 },
124 static struct rte_flow_item flow_item_8023ad[] = {
126 .type = RTE_FLOW_ITEM_TYPE_ETH,
127 .spec = &flow_item_eth_type_8023ad,
129 .mask = &flow_item_eth_mask_type_8023ad,
132 .type = RTE_FLOW_ITEM_TYPE_END,
139 const struct rte_flow_attr flow_attr_8023ad = {
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149 uint16_t slave_port) {
150 struct rte_eth_dev_info slave_info;
151 struct rte_flow_error error;
152 struct bond_dev_private *internals = (struct bond_dev_private *)
153 (bond_dev->data->dev_private);
155 const struct rte_flow_action_queue lacp_queue_conf = {
159 const struct rte_flow_action actions[] = {
161 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162 .conf = &lacp_queue_conf
165 .type = RTE_FLOW_ACTION_TYPE_END,
169 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170 flow_item_8023ad, actions, &error);
172 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173 __func__, error.message, slave_port,
174 internals->mode4.dedicated_queues.rx_qid);
178 rte_eth_dev_info_get(slave_port, &slave_info);
179 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183 __func__, slave_port);
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193 struct bond_dev_private *internals = (struct bond_dev_private *)
194 (bond_dev->data->dev_private);
195 struct rte_eth_dev_info bond_info;
198 /* Verify if all slaves in bonding supports flow director and */
199 if (internals->slave_count > 0) {
200 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205 for (idx = 0; idx < internals->slave_count; idx++) {
206 if (bond_ethdev_8023ad_flow_verify(bond_dev,
207 internals->slaves[idx].port_id) != 0)
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218 struct rte_flow_error error;
219 struct bond_dev_private *internals = (struct bond_dev_private *)
220 (bond_dev->data->dev_private);
222 struct rte_flow_action_queue lacp_queue_conf = {
223 .index = internals->mode4.dedicated_queues.rx_qid,
226 const struct rte_flow_action actions[] = {
228 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229 .conf = &lacp_queue_conf
232 .type = RTE_FLOW_ACTION_TYPE_END,
236 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240 "(slave_port=%d queue_id=%d)",
241 error.message, slave_port,
242 internals->mode4.dedicated_queues.rx_qid);
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254 struct bond_dev_private *internals = bd_rx_q->dev_private;
255 uint16_t num_rx_total = 0; /* Total number of received packets */
256 uint16_t slaves[RTE_MAX_ETHPORTS];
257 uint16_t slave_count;
261 /* Copy slave list to protect against slave up/down changes during tx
263 slave_count = internals->active_slave_count;
264 memcpy(slaves, internals->active_slaves,
265 sizeof(internals->active_slaves[0]) * slave_count);
267 for (i = 0, idx = internals->active_slave;
268 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269 idx = idx % slave_count;
271 /* Read packets from this slave */
272 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273 &bufs[num_rx_total], nb_pkts - num_rx_total);
276 internals->active_slave = idx;
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
286 struct bond_dev_private *internals = bd_tx_q->dev_private;
288 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
289 uint16_t slave_count;
291 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
292 uint16_t dist_slave_count;
294 /* 2-D array to sort mbufs for transmission on each slave into */
295 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
296 /* Number of mbufs for transmission on each slave */
297 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
298 /* Mapping array generated by hash function to map mbufs to slaves */
299 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
302 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
306 if (unlikely(nb_bufs == 0))
309 /* Copy slave list to protect against slave up/down changes during tx
311 slave_count = internals->active_slave_count;
312 if (unlikely(slave_count < 1))
315 memcpy(slave_port_ids, internals->active_slaves,
316 sizeof(slave_port_ids[0]) * slave_count);
319 dist_slave_count = 0;
320 for (i = 0; i < slave_count; i++) {
321 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323 if (ACTOR_STATE(port, DISTRIBUTING))
324 dist_slave_port_ids[dist_slave_count++] =
328 if (unlikely(dist_slave_count < 1))
332 * Populate slaves mbuf with the packets which are to be sent on it
333 * selecting output slave using hash based on xmit policy
335 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
336 bufs_slave_port_idxs);
338 for (i = 0; i < nb_bufs; i++) {
339 /* Populate slave mbuf arrays with mbufs for that slave. */
340 uint8_t slave_idx = bufs_slave_port_idxs[i];
342 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
346 /* Send packet burst on each slave device */
347 for (i = 0; i < dist_slave_count; i++) {
348 if (slave_nb_bufs[i] == 0)
351 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
352 bd_tx_q->queue_id, slave_bufs[i],
355 total_tx_count += slave_tx_count;
357 /* If tx burst fails move packets to end of bufs */
358 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
359 slave_tx_fail_count[i] = slave_nb_bufs[i] -
361 total_tx_fail_count += slave_tx_fail_count[i];
364 * Shift bufs to beginning of array to allow reordering
367 for (j = 0; j < slave_tx_fail_count[i]; j++) {
369 slave_bufs[i][(slave_tx_count - 1) + j];
375 * If there are tx burst failures we move packets to end of bufs to
376 * preserve expected PMD behaviour of all failed transmitted being
377 * at the end of the input mbuf array
379 if (unlikely(total_tx_fail_count > 0)) {
380 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382 for (i = 0; i < slave_count; i++) {
383 if (slave_tx_fail_count[i] > 0) {
384 for (j = 0; j < slave_tx_fail_count[i]; j++)
385 bufs[bufs_idx++] = slave_bufs[i][j];
390 return total_tx_count;
395 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398 /* Cast to structure, containing bonded device's port id and queue id */
399 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
400 struct bond_dev_private *internals = bd_rx_q->dev_private;
401 struct ether_addr bond_mac;
403 struct ether_hdr *hdr;
405 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
406 uint16_t num_rx_total = 0; /* Total number of received packets */
407 uint16_t slaves[RTE_MAX_ETHPORTS];
408 uint16_t slave_count, idx;
410 uint8_t collecting; /* current slave collecting status */
411 const uint8_t promisc = internals->promiscuous_en;
415 rte_eth_macaddr_get(internals->port_id, &bond_mac);
416 /* Copy slave list to protect against slave up/down changes during tx
418 slave_count = internals->active_slave_count;
419 memcpy(slaves, internals->active_slaves,
420 sizeof(internals->active_slaves[0]) * slave_count);
422 idx = internals->active_slave;
423 if (idx >= slave_count) {
424 internals->active_slave = 0;
427 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432 /* Read packets from this slave */
433 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
434 &bufs[num_rx_total], nb_pkts - num_rx_total);
436 for (k = j; k < 2 && k < num_rx_total; k++)
437 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439 /* Handle slow protocol packets. */
440 while (j < num_rx_total) {
442 /* If packet is not pure L2 and is known, skip it */
443 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
448 if (j + 3 < num_rx_total)
449 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
452 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454 /* Remove packet from array if it is slow packet or slave is not
455 * in collecting state or bonding interface is not in promiscuous
456 * mode and packet address does not match. */
457 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
458 !collecting || (!promisc &&
459 !is_multicast_ether_addr(&hdr->d_addr) &&
460 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462 if (hdr->ether_type == ether_type_slow_be) {
463 bond_mode_8023ad_handle_slow_pkt(
464 internals, slaves[idx], bufs[j]);
466 rte_pktmbuf_free(bufs[j]);
468 /* Packet is managed by mode 4 or dropped, shift the array */
470 if (j < num_rx_total) {
471 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
477 if (unlikely(++idx == slave_count))
481 internals->active_slave = idx;
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492 arp_op_name(uint16_t arp_op, char *buf)
496 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501 case ARP_OP_REVREQUEST:
502 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
503 "Reverse ARP Request");
505 case ARP_OP_REVREPLY:
506 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
507 "Reverse ARP Reply");
509 case ARP_OP_INVREQUEST:
510 snprintf(buf, sizeof("Peer Identify Request"), "%s",
511 "Peer Identify Request");
513 case ARP_OP_INVREPLY:
514 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
515 "Peer Identify Reply");
520 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
524 #define MaxIPv4String 16
526 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
530 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
531 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
532 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
536 #define MAX_CLIENTS_NUMBER 128
537 uint8_t active_clients;
538 struct client_stats_t {
541 uint32_t ipv4_rx_packets;
542 uint32_t ipv4_tx_packets;
544 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
551 for (; i < MAX_CLIENTS_NUMBER; i++) {
552 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
553 /* Just update RX packets number for this client */
554 if (TXorRXindicator == &burstnumberRX)
555 client_stats[i].ipv4_rx_packets++;
557 client_stats[i].ipv4_tx_packets++;
561 /* We have a new client. Insert him to the table, and increment stats */
562 if (TXorRXindicator == &burstnumberRX)
563 client_stats[active_clients].ipv4_rx_packets++;
565 client_stats[active_clients].ipv4_tx_packets++;
566 client_stats[active_clients].ipv4_addr = addr;
567 client_stats[active_clients].port = port;
572 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
573 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
574 RTE_LOG(DEBUG, PMD, \
577 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
579 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
585 eth_h->s_addr.addr_bytes[0], \
586 eth_h->s_addr.addr_bytes[1], \
587 eth_h->s_addr.addr_bytes[2], \
588 eth_h->s_addr.addr_bytes[3], \
589 eth_h->s_addr.addr_bytes[4], \
590 eth_h->s_addr.addr_bytes[5], \
592 eth_h->d_addr.addr_bytes[0], \
593 eth_h->d_addr.addr_bytes[1], \
594 eth_h->d_addr.addr_bytes[2], \
595 eth_h->d_addr.addr_bytes[3], \
596 eth_h->d_addr.addr_bytes[4], \
597 eth_h->d_addr.addr_bytes[5], \
604 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
605 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
607 struct ipv4_hdr *ipv4_h;
608 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
609 struct arp_hdr *arp_h;
616 uint16_t ether_type = eth_h->ether_type;
617 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
619 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
620 snprintf(buf, 16, "%s", info);
623 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
624 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
625 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
626 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
627 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
628 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
630 update_client_stats(ipv4_h->src_addr, port, burstnumber);
632 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
633 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
634 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
635 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
636 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
637 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
638 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
645 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
647 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
648 struct bond_dev_private *internals = bd_tx_q->dev_private;
649 struct ether_hdr *eth_h;
650 uint16_t ether_type, offset;
651 uint16_t nb_recv_pkts;
654 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
656 for (i = 0; i < nb_recv_pkts; i++) {
657 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
658 ether_type = eth_h->ether_type;
659 offset = get_vlan_offset(eth_h, ðer_type);
661 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
662 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
663 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
665 bond_mode_alb_arp_recv(eth_h, offset, internals);
667 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
668 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
669 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
677 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
680 struct bond_dev_private *internals;
681 struct bond_tx_queue *bd_tx_q;
683 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
684 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
686 uint16_t num_of_slaves;
687 uint16_t slaves[RTE_MAX_ETHPORTS];
689 uint16_t num_tx_total = 0, num_tx_slave;
691 static int slave_idx = 0;
692 int i, cslave_idx = 0, tx_fail_total = 0;
694 bd_tx_q = (struct bond_tx_queue *)queue;
695 internals = bd_tx_q->dev_private;
697 /* Copy slave list to protect against slave up/down changes during tx
699 num_of_slaves = internals->active_slave_count;
700 memcpy(slaves, internals->active_slaves,
701 sizeof(internals->active_slaves[0]) * num_of_slaves);
703 if (num_of_slaves < 1)
706 /* Populate slaves mbuf with which packets are to be sent on it */
707 for (i = 0; i < nb_pkts; i++) {
708 cslave_idx = (slave_idx + i) % num_of_slaves;
709 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
712 /* increment current slave index so the next call to tx burst starts on the
714 slave_idx = ++cslave_idx;
716 /* Send packet burst on each slave device */
717 for (i = 0; i < num_of_slaves; i++) {
718 if (slave_nb_pkts[i] > 0) {
719 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
720 slave_bufs[i], slave_nb_pkts[i]);
722 /* if tx burst fails move packets to end of bufs */
723 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
724 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
726 tx_fail_total += tx_fail_slave;
728 memcpy(&bufs[nb_pkts - tx_fail_total],
729 &slave_bufs[i][num_tx_slave],
730 tx_fail_slave * sizeof(bufs[0]));
732 num_tx_total += num_tx_slave;
740 bond_ethdev_tx_burst_active_backup(void *queue,
741 struct rte_mbuf **bufs, uint16_t nb_pkts)
743 struct bond_dev_private *internals;
744 struct bond_tx_queue *bd_tx_q;
746 bd_tx_q = (struct bond_tx_queue *)queue;
747 internals = bd_tx_q->dev_private;
749 if (internals->active_slave_count < 1)
752 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
756 static inline uint16_t
757 ether_hash(struct ether_hdr *eth_hdr)
759 unaligned_uint16_t *word_src_addr =
760 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
761 unaligned_uint16_t *word_dst_addr =
762 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
764 return (word_src_addr[0] ^ word_dst_addr[0]) ^
765 (word_src_addr[1] ^ word_dst_addr[1]) ^
766 (word_src_addr[2] ^ word_dst_addr[2]);
769 static inline uint32_t
770 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
772 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
775 static inline uint32_t
776 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
778 unaligned_uint32_t *word_src_addr =
779 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
780 unaligned_uint32_t *word_dst_addr =
781 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
783 return (word_src_addr[0] ^ word_dst_addr[0]) ^
784 (word_src_addr[1] ^ word_dst_addr[1]) ^
785 (word_src_addr[2] ^ word_dst_addr[2]) ^
786 (word_src_addr[3] ^ word_dst_addr[3]);
791 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792 uint8_t slave_count, uint16_t *slaves)
794 struct ether_hdr *eth_hdr;
798 for (i = 0; i < nb_pkts; i++) {
799 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
801 hash = ether_hash(eth_hdr);
803 slaves[i] = (hash ^= hash >> 8) % slave_count;
808 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
809 uint8_t slave_count, uint16_t *slaves)
812 struct ether_hdr *eth_hdr;
815 uint32_t hash, l3hash;
817 for (i = 0; i < nb_pkts; i++) {
818 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
821 proto = eth_hdr->ether_type;
822 hash = ether_hash(eth_hdr);
824 vlan_offset = get_vlan_offset(eth_hdr, &proto);
826 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
827 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
828 ((char *)(eth_hdr + 1) + vlan_offset);
829 l3hash = ipv4_hash(ipv4_hdr);
831 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
832 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
833 ((char *)(eth_hdr + 1) + vlan_offset);
834 l3hash = ipv6_hash(ipv6_hdr);
837 hash = hash ^ l3hash;
841 slaves[i] = hash % slave_count;
846 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
847 uint8_t slave_count, uint16_t *slaves)
849 struct ether_hdr *eth_hdr;
854 struct udp_hdr *udp_hdr;
855 struct tcp_hdr *tcp_hdr;
856 uint32_t hash, l3hash, l4hash;
858 for (i = 0; i < nb_pkts; i++) {
859 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
860 proto = eth_hdr->ether_type;
861 vlan_offset = get_vlan_offset(eth_hdr, &proto);
865 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
866 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
867 ((char *)(eth_hdr + 1) + vlan_offset);
868 size_t ip_hdr_offset;
870 l3hash = ipv4_hash(ipv4_hdr);
872 /* there is no L4 header in fragmented packet */
873 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
875 ip_hdr_offset = (ipv4_hdr->version_ihl
876 & IPV4_HDR_IHL_MASK) *
879 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
880 tcp_hdr = (struct tcp_hdr *)
883 l4hash = HASH_L4_PORTS(tcp_hdr);
884 } else if (ipv4_hdr->next_proto_id ==
886 udp_hdr = (struct udp_hdr *)
889 l4hash = HASH_L4_PORTS(udp_hdr);
892 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
893 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
894 ((char *)(eth_hdr + 1) + vlan_offset);
895 l3hash = ipv6_hash(ipv6_hdr);
897 if (ipv6_hdr->proto == IPPROTO_TCP) {
898 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
899 l4hash = HASH_L4_PORTS(tcp_hdr);
900 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
901 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
902 l4hash = HASH_L4_PORTS(udp_hdr);
906 hash = l3hash ^ l4hash;
910 slaves[i] = hash % slave_count;
915 uint64_t bwg_left_int;
916 uint64_t bwg_left_remainder;
921 bond_tlb_activate_slave(struct bond_dev_private *internals) {
924 for (i = 0; i < internals->active_slave_count; i++) {
925 tlb_last_obytets[internals->active_slaves[i]] = 0;
930 bandwidth_cmp(const void *a, const void *b)
932 const struct bwg_slave *bwg_a = a;
933 const struct bwg_slave *bwg_b = b;
934 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
935 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
936 (int64_t)bwg_a->bwg_left_remainder;
950 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
951 struct bwg_slave *bwg_slave)
953 struct rte_eth_link link_status;
955 rte_eth_link_get_nowait(port_id, &link_status);
956 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
959 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
960 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
961 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
965 bond_ethdev_update_tlb_slave_cb(void *arg)
967 struct bond_dev_private *internals = arg;
968 struct rte_eth_stats slave_stats;
969 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
973 uint8_t update_stats = 0;
976 internals->slave_update_idx++;
979 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
982 for (i = 0; i < internals->active_slave_count; i++) {
983 slave_id = internals->active_slaves[i];
984 rte_eth_stats_get(slave_id, &slave_stats);
985 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
986 bandwidth_left(slave_id, tx_bytes,
987 internals->slave_update_idx, &bwg_array[i]);
988 bwg_array[i].slave = slave_id;
991 tlb_last_obytets[slave_id] = slave_stats.obytes;
995 if (update_stats == 1)
996 internals->slave_update_idx = 0;
999 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1000 for (i = 0; i < slave_count; i++)
1001 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1003 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1004 (struct bond_dev_private *)internals);
1008 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1010 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1011 struct bond_dev_private *internals = bd_tx_q->dev_private;
1013 struct rte_eth_dev *primary_port =
1014 &rte_eth_devices[internals->primary_port];
1015 uint16_t num_tx_total = 0;
1018 uint16_t num_of_slaves = internals->active_slave_count;
1019 uint16_t slaves[RTE_MAX_ETHPORTS];
1021 struct ether_hdr *ether_hdr;
1022 struct ether_addr primary_slave_addr;
1023 struct ether_addr active_slave_addr;
1025 if (num_of_slaves < 1)
1026 return num_tx_total;
1028 memcpy(slaves, internals->tlb_slaves_order,
1029 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1032 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1035 for (i = 0; i < 3; i++)
1036 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1039 for (i = 0; i < num_of_slaves; i++) {
1040 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1041 for (j = num_tx_total; j < nb_pkts; j++) {
1042 if (j + 3 < nb_pkts)
1043 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1045 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1046 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1047 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1048 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1049 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1053 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1054 bufs + num_tx_total, nb_pkts - num_tx_total);
1056 if (num_tx_total == nb_pkts)
1060 return num_tx_total;
1064 bond_tlb_disable(struct bond_dev_private *internals)
1066 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1070 bond_tlb_enable(struct bond_dev_private *internals)
1072 bond_ethdev_update_tlb_slave_cb(internals);
1076 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1078 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1079 struct bond_dev_private *internals = bd_tx_q->dev_private;
1081 struct ether_hdr *eth_h;
1082 uint16_t ether_type, offset;
1084 struct client_data *client_info;
1087 * We create transmit buffers for every slave and one additional to send
1088 * through tlb. In worst case every packet will be send on one port.
1090 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1091 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1094 * We create separate transmit buffers for update packets as they won't
1095 * be counted in num_tx_total.
1097 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1098 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1100 struct rte_mbuf *upd_pkt;
1103 uint16_t num_send, num_not_send = 0;
1104 uint16_t num_tx_total = 0;
1109 /* Search tx buffer for ARP packets and forward them to alb */
1110 for (i = 0; i < nb_pkts; i++) {
1111 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1112 ether_type = eth_h->ether_type;
1113 offset = get_vlan_offset(eth_h, ðer_type);
1115 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1116 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1118 /* Change src mac in eth header */
1119 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1121 /* Add packet to slave tx buffer */
1122 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1123 slave_bufs_pkts[slave_idx]++;
1125 /* If packet is not ARP, send it with TLB policy */
1126 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1128 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1132 /* Update connected client ARP tables */
1133 if (internals->mode6.ntt) {
1134 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1135 client_info = &internals->mode6.client_table[i];
1137 if (client_info->in_use) {
1138 /* Allocate new packet to send ARP update on current slave */
1139 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1140 if (upd_pkt == NULL) {
1141 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1144 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1145 + client_info->vlan_count * sizeof(struct vlan_hdr);
1146 upd_pkt->data_len = pkt_size;
1147 upd_pkt->pkt_len = pkt_size;
1149 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1152 /* Add packet to update tx buffer */
1153 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1154 update_bufs_pkts[slave_idx]++;
1157 internals->mode6.ntt = 0;
1160 /* Send ARP packets on proper slaves */
1161 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1162 if (slave_bufs_pkts[i] > 0) {
1163 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1164 slave_bufs[i], slave_bufs_pkts[i]);
1165 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1166 bufs[nb_pkts - 1 - num_not_send - j] =
1167 slave_bufs[i][nb_pkts - 1 - j];
1170 num_tx_total += num_send;
1171 num_not_send += slave_bufs_pkts[i] - num_send;
1173 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1174 /* Print TX stats including update packets */
1175 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1176 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1177 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1183 /* Send update packets on proper slaves */
1184 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1185 if (update_bufs_pkts[i] > 0) {
1186 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1187 update_bufs_pkts[i]);
1188 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1189 rte_pktmbuf_free(update_bufs[i][j]);
1191 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1192 for (j = 0; j < update_bufs_pkts[i]; j++) {
1193 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1194 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1200 /* Send non-ARP packets using tlb policy */
1201 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1202 num_send = bond_ethdev_tx_burst_tlb(queue,
1203 slave_bufs[RTE_MAX_ETHPORTS],
1204 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1206 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1207 bufs[nb_pkts - 1 - num_not_send - j] =
1208 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1211 num_tx_total += num_send;
1214 return num_tx_total;
1218 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1221 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1222 struct bond_dev_private *internals = bd_tx_q->dev_private;
1224 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1225 uint16_t slave_count;
1227 /* Array to sort mbufs for transmission on each slave into */
1228 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1229 /* Number of mbufs for transmission on each slave */
1230 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1231 /* Mapping array generated by hash function to map mbufs to slaves */
1232 uint16_t bufs_slave_port_idxs[nb_bufs];
1234 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1235 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1239 if (unlikely(nb_bufs == 0))
1242 /* Copy slave list to protect against slave up/down changes during tx
1244 slave_count = internals->active_slave_count;
1245 if (unlikely(slave_count < 1))
1248 memcpy(slave_port_ids, internals->active_slaves,
1249 sizeof(slave_port_ids[0]) * slave_count);
1252 * Populate slaves mbuf with the packets which are to be sent on it
1253 * selecting output slave using hash based on xmit policy
1255 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1256 bufs_slave_port_idxs);
1258 for (i = 0; i < nb_bufs; i++) {
1259 /* Populate slave mbuf arrays with mbufs for that slave. */
1260 uint8_t slave_idx = bufs_slave_port_idxs[i];
1262 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1265 /* Send packet burst on each slave device */
1266 for (i = 0; i < slave_count; i++) {
1267 if (slave_nb_bufs[i] == 0)
1270 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1271 bd_tx_q->queue_id, slave_bufs[i],
1274 total_tx_count += slave_tx_count;
1276 /* If tx burst fails move packets to end of bufs */
1277 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1278 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1280 total_tx_fail_count += slave_tx_fail_count[i];
1283 * Shift bufs to beginning of array to allow reordering
1286 for (j = 0; j < slave_tx_fail_count[i]; j++) {
1288 slave_bufs[i][(slave_tx_count - 1) + j];
1294 * If there are tx burst failures we move packets to end of bufs to
1295 * preserve expected PMD behaviour of all failed transmitted being
1296 * at the end of the input mbuf array
1298 if (unlikely(total_tx_fail_count > 0)) {
1299 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1301 for (i = 0; i < slave_count; i++) {
1302 if (slave_tx_fail_count[i] > 0) {
1303 for (j = 0; j < slave_tx_fail_count[i]; j++)
1304 bufs[bufs_idx++] = slave_bufs[i][j];
1309 return total_tx_count;
1313 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1316 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1317 struct bond_dev_private *internals = bd_tx_q->dev_private;
1319 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1320 uint16_t slave_count;
1322 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1323 uint16_t dist_slave_count;
1325 /* 2-D array to sort mbufs for transmission on each slave into */
1326 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1327 /* Number of mbufs for transmission on each slave */
1328 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1329 /* Mapping array generated by hash function to map mbufs to slaves */
1330 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1332 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1333 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1337 if (unlikely(nb_bufs == 0))
1340 /* Copy slave list to protect against slave up/down changes during tx
1342 slave_count = internals->active_slave_count;
1343 if (unlikely(slave_count < 1))
1346 memcpy(slave_port_ids, internals->active_slaves,
1347 sizeof(slave_port_ids[0]) * slave_count);
1349 dist_slave_count = 0;
1350 for (i = 0; i < slave_count; i++) {
1351 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1353 if (ACTOR_STATE(port, DISTRIBUTING))
1354 dist_slave_port_ids[dist_slave_count++] =
1358 if (likely(dist_slave_count > 1)) {
1361 * Populate slaves mbuf with the packets which are to be sent
1362 * on it, selecting output slave using hash based on xmit policy
1364 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1365 bufs_slave_port_idxs);
1367 for (i = 0; i < nb_bufs; i++) {
1369 * Populate slave mbuf arrays with mbufs for that
1372 uint8_t slave_idx = bufs_slave_port_idxs[i];
1374 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1379 /* Send packet burst on each slave device */
1380 for (i = 0; i < dist_slave_count; i++) {
1381 if (slave_nb_bufs[i] == 0)
1384 slave_tx_count = rte_eth_tx_burst(
1385 dist_slave_port_ids[i],
1386 bd_tx_q->queue_id, slave_bufs[i],
1389 total_tx_count += slave_tx_count;
1391 /* If tx burst fails move packets to end of bufs */
1392 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1393 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1395 total_tx_fail_count += slave_tx_fail_count[i];
1398 * Shift bufs to beginning of array to allow
1401 for (j = 0; j < slave_tx_fail_count[i]; j++)
1404 [(slave_tx_count - 1)
1410 * If there are tx burst failures we move packets to end of
1411 * bufs to preserve expected PMD behaviour of all failed
1412 * transmitted being at the end of the input mbuf array
1414 if (unlikely(total_tx_fail_count > 0)) {
1415 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1417 for (i = 0; i < slave_count; i++) {
1418 if (slave_tx_fail_count[i] > 0) {
1420 j < slave_tx_fail_count[i];
1430 /* Check for LACP control packets and send if available */
1431 for (i = 0; i < slave_count; i++) {
1432 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1433 struct rte_mbuf *ctrl_pkt = NULL;
1435 if (likely(rte_ring_empty(port->tx_ring)))
1438 if (rte_ring_dequeue(port->tx_ring,
1439 (void **)&ctrl_pkt) != -ENOENT) {
1440 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1441 bd_tx_q->queue_id, &ctrl_pkt, 1);
1443 * re-enqueue LAG control plane packets to buffering
1444 * ring if transmission fails so the packet isn't lost.
1446 if (slave_tx_count != 1)
1447 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1451 return total_tx_count;
1455 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1458 struct bond_dev_private *internals;
1459 struct bond_tx_queue *bd_tx_q;
1461 uint8_t tx_failed_flag = 0, num_of_slaves;
1462 uint16_t slaves[RTE_MAX_ETHPORTS];
1464 uint16_t max_nb_of_tx_pkts = 0;
1466 int slave_tx_total[RTE_MAX_ETHPORTS];
1467 int i, most_successful_tx_slave = -1;
1469 bd_tx_q = (struct bond_tx_queue *)queue;
1470 internals = bd_tx_q->dev_private;
1472 /* Copy slave list to protect against slave up/down changes during tx
1474 num_of_slaves = internals->active_slave_count;
1475 memcpy(slaves, internals->active_slaves,
1476 sizeof(internals->active_slaves[0]) * num_of_slaves);
1478 if (num_of_slaves < 1)
1481 /* Increment reference count on mbufs */
1482 for (i = 0; i < nb_pkts; i++)
1483 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1485 /* Transmit burst on each active slave */
1486 for (i = 0; i < num_of_slaves; i++) {
1487 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1490 if (unlikely(slave_tx_total[i] < nb_pkts))
1493 /* record the value and slave index for the slave which transmits the
1494 * maximum number of packets */
1495 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1496 max_nb_of_tx_pkts = slave_tx_total[i];
1497 most_successful_tx_slave = i;
1501 /* if slaves fail to transmit packets from burst, the calling application
1502 * is not expected to know about multiple references to packets so we must
1503 * handle failures of all packets except those of the most successful slave
1505 if (unlikely(tx_failed_flag))
1506 for (i = 0; i < num_of_slaves; i++)
1507 if (i != most_successful_tx_slave)
1508 while (slave_tx_total[i] < nb_pkts)
1509 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1511 return max_nb_of_tx_pkts;
1515 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1517 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1519 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1521 * If in mode 4 then save the link properties of the first
1522 * slave, all subsequent slaves must match these properties
1524 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1526 bond_link->link_autoneg = slave_link->link_autoneg;
1527 bond_link->link_duplex = slave_link->link_duplex;
1528 bond_link->link_speed = slave_link->link_speed;
1531 * In any other mode the link properties are set to default
1532 * values of AUTONEG/DUPLEX
1534 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1535 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1540 link_properties_valid(struct rte_eth_dev *ethdev,
1541 struct rte_eth_link *slave_link)
1543 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1545 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1546 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1548 if (bond_link->link_duplex != slave_link->link_duplex ||
1549 bond_link->link_autoneg != slave_link->link_autoneg ||
1550 bond_link->link_speed != slave_link->link_speed)
1558 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1560 struct ether_addr *mac_addr;
1562 if (eth_dev == NULL) {
1563 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1567 if (dst_mac_addr == NULL) {
1568 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1572 mac_addr = eth_dev->data->mac_addrs;
1574 ether_addr_copy(mac_addr, dst_mac_addr);
1579 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1581 struct ether_addr *mac_addr;
1583 if (eth_dev == NULL) {
1584 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1588 if (new_mac_addr == NULL) {
1589 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1593 mac_addr = eth_dev->data->mac_addrs;
1595 /* If new MAC is different to current MAC then update */
1596 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1597 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1603 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1608 /* Update slave devices MAC addresses */
1609 if (internals->slave_count < 1)
1612 switch (internals->mode) {
1613 case BONDING_MODE_ROUND_ROBIN:
1614 case BONDING_MODE_BALANCE:
1615 case BONDING_MODE_BROADCAST:
1616 for (i = 0; i < internals->slave_count; i++) {
1617 if (rte_eth_dev_default_mac_addr_set(
1618 internals->slaves[i].port_id,
1619 bonded_eth_dev->data->mac_addrs)) {
1620 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1621 internals->slaves[i].port_id);
1626 case BONDING_MODE_8023AD:
1627 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629 case BONDING_MODE_ACTIVE_BACKUP:
1630 case BONDING_MODE_TLB:
1631 case BONDING_MODE_ALB:
1633 for (i = 0; i < internals->slave_count; i++) {
1634 if (internals->slaves[i].port_id ==
1635 internals->current_primary_port) {
1636 if (rte_eth_dev_default_mac_addr_set(
1637 internals->primary_port,
1638 bonded_eth_dev->data->mac_addrs)) {
1639 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1640 internals->current_primary_port);
1644 if (rte_eth_dev_default_mac_addr_set(
1645 internals->slaves[i].port_id,
1646 &internals->slaves[i].persisted_mac_addr)) {
1647 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1648 internals->slaves[i].port_id);
1659 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 struct bond_dev_private *internals;
1663 internals = eth_dev->data->dev_private;
1666 case BONDING_MODE_ROUND_ROBIN:
1667 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1668 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670 case BONDING_MODE_ACTIVE_BACKUP:
1671 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1672 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674 case BONDING_MODE_BALANCE:
1675 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1676 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678 case BONDING_MODE_BROADCAST:
1679 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1680 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682 case BONDING_MODE_8023AD:
1683 if (bond_mode_8023ad_enable(eth_dev) != 0)
1686 if (internals->mode4.dedicated_queues.enabled == 0) {
1687 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1688 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1689 RTE_LOG(WARNING, PMD,
1690 "Using mode 4, it is necessary to do TX burst "
1691 "and RX burst at least every 100ms.\n");
1693 /* Use flow director's optimization */
1694 eth_dev->rx_pkt_burst =
1695 bond_ethdev_rx_burst_8023ad_fast_queue;
1696 eth_dev->tx_pkt_burst =
1697 bond_ethdev_tx_burst_8023ad_fast_queue;
1700 case BONDING_MODE_TLB:
1701 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1702 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704 case BONDING_MODE_ALB:
1705 if (bond_mode_alb_enable(eth_dev) != 0)
1708 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1709 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1715 internals->mode = mode;
1722 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1723 struct rte_eth_dev *slave_eth_dev)
1726 struct bond_dev_private *internals = (struct bond_dev_private *)
1727 bonded_eth_dev->data->dev_private;
1728 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1730 if (port->slow_pool == NULL) {
1732 int slave_id = slave_eth_dev->data->port_id;
1734 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1737 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1738 slave_eth_dev->data->numa_node);
1740 /* Any memory allocation failure in initialization is critical because
1741 * resources can't be free, so reinitialization is impossible. */
1742 if (port->slow_pool == NULL) {
1743 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1744 slave_id, mem_name, rte_strerror(rte_errno));
1748 if (internals->mode4.dedicated_queues.enabled == 1) {
1749 /* Configure slow Rx queue */
1751 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1752 internals->mode4.dedicated_queues.rx_qid, 128,
1753 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1754 NULL, port->slow_pool);
1757 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1758 slave_eth_dev->data->port_id,
1759 internals->mode4.dedicated_queues.rx_qid,
1764 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1765 internals->mode4.dedicated_queues.tx_qid, 512,
1766 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1770 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1771 slave_eth_dev->data->port_id,
1772 internals->mode4.dedicated_queues.tx_qid,
1781 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1782 struct rte_eth_dev *slave_eth_dev)
1784 struct bond_rx_queue *bd_rx_q;
1785 struct bond_tx_queue *bd_tx_q;
1786 uint16_t nb_rx_queues;
1787 uint16_t nb_tx_queues;
1791 struct rte_flow_error flow_error;
1793 struct bond_dev_private *internals = (struct bond_dev_private *)
1794 bonded_eth_dev->data->dev_private;
1797 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799 /* Enable interrupts on slave device if supported */
1800 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1801 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803 /* If RSS is enabled for bonding, try to enable it for slaves */
1804 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1805 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1807 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1808 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1809 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1810 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1812 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1815 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1816 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1817 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1818 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1821 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1822 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1824 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1825 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1827 if (internals->mode == BONDING_MODE_8023AD) {
1828 if (internals->mode4.dedicated_queues.enabled == 1) {
1834 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1835 bonded_eth_dev->data->mtu);
1836 if (errval != 0 && errval != -ENOTSUP) {
1837 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1838 slave_eth_dev->data->port_id, errval);
1842 /* Configure device */
1843 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1844 nb_rx_queues, nb_tx_queues,
1845 &(slave_eth_dev->data->dev_conf));
1847 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1848 slave_eth_dev->data->port_id, errval);
1852 /* Setup Rx Queues */
1853 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1854 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1856 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1857 bd_rx_q->nb_rx_desc,
1858 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1859 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1862 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1863 slave_eth_dev->data->port_id, q_id, errval);
1868 /* Setup Tx Queues */
1869 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1870 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1872 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1873 bd_tx_q->nb_tx_desc,
1874 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1878 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1879 slave_eth_dev->data->port_id, q_id, errval);
1884 if (internals->mode == BONDING_MODE_8023AD &&
1885 internals->mode4.dedicated_queues.enabled == 1) {
1886 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1890 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1891 slave_eth_dev->data->port_id) != 0) {
1893 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1894 slave_eth_dev->data->port_id, q_id, errval);
1898 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1899 rte_flow_destroy(slave_eth_dev->data->port_id,
1900 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1903 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1904 slave_eth_dev->data->port_id);
1908 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1910 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1911 slave_eth_dev->data->port_id, errval);
1915 /* If RSS is enabled for bonding, synchronize RETA */
1916 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1918 struct bond_dev_private *internals;
1920 internals = bonded_eth_dev->data->dev_private;
1922 for (i = 0; i < internals->slave_count; i++) {
1923 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1924 errval = rte_eth_dev_rss_reta_update(
1925 slave_eth_dev->data->port_id,
1926 &internals->reta_conf[0],
1927 internals->slaves[i].reta_size);
1929 RTE_LOG(WARNING, PMD,
1930 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1931 " RSS Configuration for bonding may be inconsistent.\n",
1932 slave_eth_dev->data->port_id, errval);
1939 /* If lsc interrupt is set, check initial slave's link status */
1940 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1941 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1942 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1943 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1951 slave_remove(struct bond_dev_private *internals,
1952 struct rte_eth_dev *slave_eth_dev)
1956 for (i = 0; i < internals->slave_count; i++)
1957 if (internals->slaves[i].port_id ==
1958 slave_eth_dev->data->port_id)
1961 if (i < (internals->slave_count - 1))
1962 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1963 sizeof(internals->slaves[0]) *
1964 (internals->slave_count - i - 1));
1966 internals->slave_count--;
1968 /* force reconfiguration of slave interfaces */
1969 _rte_eth_dev_reset(slave_eth_dev);
1973 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1976 slave_add(struct bond_dev_private *internals,
1977 struct rte_eth_dev *slave_eth_dev)
1979 struct bond_slave_details *slave_details =
1980 &internals->slaves[internals->slave_count];
1982 slave_details->port_id = slave_eth_dev->data->port_id;
1983 slave_details->last_link_status = 0;
1985 /* Mark slave devices that don't support interrupts so we can
1986 * compensate when we start the bond
1988 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1989 slave_details->link_status_poll_enabled = 1;
1992 slave_details->link_status_wait_to_complete = 0;
1993 /* clean tlb_last_obytes when adding port for bonding device */
1994 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1995 sizeof(struct ether_addr));
1999 bond_ethdev_primary_set(struct bond_dev_private *internals,
2000 uint16_t slave_port_id)
2004 if (internals->active_slave_count < 1)
2005 internals->current_primary_port = slave_port_id;
2007 /* Search bonded device slave ports for new proposed primary port */
2008 for (i = 0; i < internals->active_slave_count; i++) {
2009 if (internals->active_slaves[i] == slave_port_id)
2010 internals->current_primary_port = slave_port_id;
2015 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2018 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2020 struct bond_dev_private *internals;
2023 /* slave eth dev will be started by bonded device */
2024 if (check_for_bonded_ethdev(eth_dev)) {
2025 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2026 eth_dev->data->port_id);
2030 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2031 eth_dev->data->dev_started = 1;
2033 internals = eth_dev->data->dev_private;
2035 if (internals->slave_count == 0) {
2036 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2040 if (internals->user_defined_mac == 0) {
2041 struct ether_addr *new_mac_addr = NULL;
2043 for (i = 0; i < internals->slave_count; i++)
2044 if (internals->slaves[i].port_id == internals->primary_port)
2045 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2047 if (new_mac_addr == NULL)
2050 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2051 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2052 eth_dev->data->port_id);
2057 /* Update all slave devices MACs*/
2058 if (mac_address_slaves_update(eth_dev) != 0)
2061 /* If bonded device is configure in promiscuous mode then re-apply config */
2062 if (internals->promiscuous_en)
2063 bond_ethdev_promiscuous_enable(eth_dev);
2065 if (internals->mode == BONDING_MODE_8023AD) {
2066 if (internals->mode4.dedicated_queues.enabled == 1) {
2067 internals->mode4.dedicated_queues.rx_qid =
2068 eth_dev->data->nb_rx_queues;
2069 internals->mode4.dedicated_queues.tx_qid =
2070 eth_dev->data->nb_tx_queues;
2075 /* Reconfigure each slave device if starting bonded device */
2076 for (i = 0; i < internals->slave_count; i++) {
2077 struct rte_eth_dev *slave_ethdev =
2078 &(rte_eth_devices[internals->slaves[i].port_id]);
2079 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2081 "bonded port (%d) failed to reconfigure slave device (%d)",
2082 eth_dev->data->port_id,
2083 internals->slaves[i].port_id);
2086 /* We will need to poll for link status if any slave doesn't
2087 * support interrupts
2089 if (internals->slaves[i].link_status_poll_enabled)
2090 internals->link_status_polling_enabled = 1;
2092 /* start polling if needed */
2093 if (internals->link_status_polling_enabled) {
2095 internals->link_status_polling_interval_ms * 1000,
2096 bond_ethdev_slave_link_status_change_monitor,
2097 (void *)&rte_eth_devices[internals->port_id]);
2100 if (internals->user_defined_primary_port)
2101 bond_ethdev_primary_set(internals, internals->primary_port);
2103 if (internals->mode == BONDING_MODE_8023AD)
2104 bond_mode_8023ad_start(eth_dev);
2106 if (internals->mode == BONDING_MODE_TLB ||
2107 internals->mode == BONDING_MODE_ALB)
2108 bond_tlb_enable(internals);
2114 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2118 if (dev->data->rx_queues != NULL) {
2119 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2120 rte_free(dev->data->rx_queues[i]);
2121 dev->data->rx_queues[i] = NULL;
2123 dev->data->nb_rx_queues = 0;
2126 if (dev->data->tx_queues != NULL) {
2127 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2128 rte_free(dev->data->tx_queues[i]);
2129 dev->data->tx_queues[i] = NULL;
2131 dev->data->nb_tx_queues = 0;
2136 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2138 struct bond_dev_private *internals = eth_dev->data->dev_private;
2141 if (internals->mode == BONDING_MODE_8023AD) {
2145 bond_mode_8023ad_stop(eth_dev);
2147 /* Discard all messages to/from mode 4 state machines */
2148 for (i = 0; i < internals->active_slave_count; i++) {
2149 port = &mode_8023ad_ports[internals->active_slaves[i]];
2151 RTE_ASSERT(port->rx_ring != NULL);
2152 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2153 rte_pktmbuf_free(pkt);
2155 RTE_ASSERT(port->tx_ring != NULL);
2156 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2157 rte_pktmbuf_free(pkt);
2161 if (internals->mode == BONDING_MODE_TLB ||
2162 internals->mode == BONDING_MODE_ALB) {
2163 bond_tlb_disable(internals);
2164 for (i = 0; i < internals->active_slave_count; i++)
2165 tlb_last_obytets[internals->active_slaves[i]] = 0;
2168 internals->active_slave_count = 0;
2169 internals->link_status_polling_enabled = 0;
2170 for (i = 0; i < internals->slave_count; i++)
2171 internals->slaves[i].last_link_status = 0;
2173 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2174 eth_dev->data->dev_started = 0;
2178 bond_ethdev_close(struct rte_eth_dev *dev)
2180 struct bond_dev_private *internals = dev->data->dev_private;
2181 uint8_t bond_port_id = internals->port_id;
2184 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2185 while (internals->slave_count != skipped) {
2186 uint16_t port_id = internals->slaves[skipped].port_id;
2188 rte_eth_dev_stop(port_id);
2190 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2192 "Failed to remove port %d from bonded device "
2193 "%s\n", port_id, dev->device->name);
2197 bond_ethdev_free_queues(dev);
2198 rte_bitmap_reset(internals->vlan_filter_bmp);
2201 /* forward declaration */
2202 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2205 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2207 struct bond_dev_private *internals = dev->data->dev_private;
2209 uint16_t max_nb_rx_queues = UINT16_MAX;
2210 uint16_t max_nb_tx_queues = UINT16_MAX;
2212 dev_info->max_mac_addrs = 1;
2214 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2215 internals->candidate_max_rx_pktlen :
2216 ETHER_MAX_JUMBO_FRAME_LEN;
2218 /* Max number of tx/rx queues that the bonded device can support is the
2219 * minimum values of the bonded slaves, as all slaves must be capable
2220 * of supporting the same number of tx/rx queues.
2222 if (internals->slave_count > 0) {
2223 struct rte_eth_dev_info slave_info;
2226 for (idx = 0; idx < internals->slave_count; idx++) {
2227 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2230 if (slave_info.max_rx_queues < max_nb_rx_queues)
2231 max_nb_rx_queues = slave_info.max_rx_queues;
2233 if (slave_info.max_tx_queues < max_nb_tx_queues)
2234 max_nb_tx_queues = slave_info.max_tx_queues;
2238 dev_info->max_rx_queues = max_nb_rx_queues;
2239 dev_info->max_tx_queues = max_nb_tx_queues;
2242 * If dedicated hw queues enabled for link bonding device in LACP mode
2243 * then we need to reduce the maximum number of data path queues by 1.
2245 if (internals->mode == BONDING_MODE_8023AD &&
2246 internals->mode4.dedicated_queues.enabled == 1) {
2247 dev_info->max_rx_queues--;
2248 dev_info->max_tx_queues--;
2251 dev_info->min_rx_bufsize = 0;
2253 dev_info->rx_offload_capa = internals->rx_offload_capa;
2254 dev_info->tx_offload_capa = internals->tx_offload_capa;
2255 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2257 dev_info->reta_size = internals->reta_size;
2261 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2265 struct bond_dev_private *internals = dev->data->dev_private;
2267 /* don't do this while a slave is being added */
2268 rte_spinlock_lock(&internals->lock);
2271 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2273 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2275 for (i = 0; i < internals->slave_count; i++) {
2276 uint16_t port_id = internals->slaves[i].port_id;
2278 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2280 RTE_LOG(WARNING, PMD,
2281 "Setting VLAN filter on slave port %u not supported.\n",
2285 rte_spinlock_unlock(&internals->lock);
2290 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2291 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2292 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2294 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2295 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2296 0, dev->data->numa_node);
2297 if (bd_rx_q == NULL)
2300 bd_rx_q->queue_id = rx_queue_id;
2301 bd_rx_q->dev_private = dev->data->dev_private;
2303 bd_rx_q->nb_rx_desc = nb_rx_desc;
2305 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2306 bd_rx_q->mb_pool = mb_pool;
2308 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2314 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2315 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2316 const struct rte_eth_txconf *tx_conf)
2318 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2319 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2320 0, dev->data->numa_node);
2322 if (bd_tx_q == NULL)
2325 bd_tx_q->queue_id = tx_queue_id;
2326 bd_tx_q->dev_private = dev->data->dev_private;
2328 bd_tx_q->nb_tx_desc = nb_tx_desc;
2329 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2331 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2337 bond_ethdev_rx_queue_release(void *queue)
2346 bond_ethdev_tx_queue_release(void *queue)
2355 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2357 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2358 struct bond_dev_private *internals;
2360 /* Default value for polling slave found is true as we don't want to
2361 * disable the polling thread if we cannot get the lock */
2362 int i, polling_slave_found = 1;
2367 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2368 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2370 if (!bonded_ethdev->data->dev_started ||
2371 !internals->link_status_polling_enabled)
2374 /* If device is currently being configured then don't check slaves link
2375 * status, wait until next period */
2376 if (rte_spinlock_trylock(&internals->lock)) {
2377 if (internals->slave_count > 0)
2378 polling_slave_found = 0;
2380 for (i = 0; i < internals->slave_count; i++) {
2381 if (!internals->slaves[i].link_status_poll_enabled)
2384 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2385 polling_slave_found = 1;
2387 /* Update slave link status */
2388 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2389 internals->slaves[i].link_status_wait_to_complete);
2391 /* if link status has changed since last checked then call lsc
2393 if (slave_ethdev->data->dev_link.link_status !=
2394 internals->slaves[i].last_link_status) {
2395 internals->slaves[i].last_link_status =
2396 slave_ethdev->data->dev_link.link_status;
2398 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2399 RTE_ETH_EVENT_INTR_LSC,
2400 &bonded_ethdev->data->port_id,
2404 rte_spinlock_unlock(&internals->lock);
2407 if (polling_slave_found)
2408 /* Set alarm to continue monitoring link status of slave ethdev's */
2409 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2410 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2414 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2416 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2418 struct bond_dev_private *bond_ctx;
2419 struct rte_eth_link slave_link;
2423 bond_ctx = ethdev->data->dev_private;
2425 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2427 if (ethdev->data->dev_started == 0 ||
2428 bond_ctx->active_slave_count == 0) {
2429 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2433 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2435 if (wait_to_complete)
2436 link_update = rte_eth_link_get;
2438 link_update = rte_eth_link_get_nowait;
2440 switch (bond_ctx->mode) {
2441 case BONDING_MODE_BROADCAST:
2443 * Setting link speed to UINT32_MAX to ensure we pick up the
2444 * value of the first active slave
2446 ethdev->data->dev_link.link_speed = UINT32_MAX;
2449 * link speed is minimum value of all the slaves link speed as
2450 * packet loss will occur on this slave if transmission at rates
2451 * greater than this are attempted
2453 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2454 link_update(bond_ctx->active_slaves[0], &slave_link);
2456 if (slave_link.link_speed <
2457 ethdev->data->dev_link.link_speed)
2458 ethdev->data->dev_link.link_speed =
2459 slave_link.link_speed;
2462 case BONDING_MODE_ACTIVE_BACKUP:
2463 /* Current primary slave */
2464 link_update(bond_ctx->current_primary_port, &slave_link);
2466 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2468 case BONDING_MODE_8023AD:
2469 ethdev->data->dev_link.link_autoneg =
2470 bond_ctx->mode4.slave_link.link_autoneg;
2471 ethdev->data->dev_link.link_duplex =
2472 bond_ctx->mode4.slave_link.link_duplex;
2473 /* fall through to update link speed */
2474 case BONDING_MODE_ROUND_ROBIN:
2475 case BONDING_MODE_BALANCE:
2476 case BONDING_MODE_TLB:
2477 case BONDING_MODE_ALB:
2480 * In theses mode the maximum theoretical link speed is the sum
2483 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2485 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2486 link_update(bond_ctx->active_slaves[idx], &slave_link);
2488 ethdev->data->dev_link.link_speed +=
2489 slave_link.link_speed;
2499 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2501 struct bond_dev_private *internals = dev->data->dev_private;
2502 struct rte_eth_stats slave_stats;
2505 for (i = 0; i < internals->slave_count; i++) {
2506 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2508 stats->ipackets += slave_stats.ipackets;
2509 stats->opackets += slave_stats.opackets;
2510 stats->ibytes += slave_stats.ibytes;
2511 stats->obytes += slave_stats.obytes;
2512 stats->imissed += slave_stats.imissed;
2513 stats->ierrors += slave_stats.ierrors;
2514 stats->oerrors += slave_stats.oerrors;
2515 stats->rx_nombuf += slave_stats.rx_nombuf;
2517 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2518 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2519 stats->q_opackets[j] += slave_stats.q_opackets[j];
2520 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2521 stats->q_obytes[j] += slave_stats.q_obytes[j];
2522 stats->q_errors[j] += slave_stats.q_errors[j];
2531 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2533 struct bond_dev_private *internals = dev->data->dev_private;
2536 for (i = 0; i < internals->slave_count; i++)
2537 rte_eth_stats_reset(internals->slaves[i].port_id);
2541 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2543 struct bond_dev_private *internals = eth_dev->data->dev_private;
2546 internals->promiscuous_en = 1;
2548 switch (internals->mode) {
2549 /* Promiscuous mode is propagated to all slaves */
2550 case BONDING_MODE_ROUND_ROBIN:
2551 case BONDING_MODE_BALANCE:
2552 case BONDING_MODE_BROADCAST:
2553 for (i = 0; i < internals->slave_count; i++)
2554 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2556 /* In mode4 promiscus mode is managed when slave is added/removed */
2557 case BONDING_MODE_8023AD:
2559 /* Promiscuous mode is propagated only to primary slave */
2560 case BONDING_MODE_ACTIVE_BACKUP:
2561 case BONDING_MODE_TLB:
2562 case BONDING_MODE_ALB:
2564 rte_eth_promiscuous_enable(internals->current_primary_port);
2569 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2571 struct bond_dev_private *internals = dev->data->dev_private;
2574 internals->promiscuous_en = 0;
2576 switch (internals->mode) {
2577 /* Promiscuous mode is propagated to all slaves */
2578 case BONDING_MODE_ROUND_ROBIN:
2579 case BONDING_MODE_BALANCE:
2580 case BONDING_MODE_BROADCAST:
2581 for (i = 0; i < internals->slave_count; i++)
2582 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2584 /* In mode4 promiscus mode is set managed when slave is added/removed */
2585 case BONDING_MODE_8023AD:
2587 /* Promiscuous mode is propagated only to primary slave */
2588 case BONDING_MODE_ACTIVE_BACKUP:
2589 case BONDING_MODE_TLB:
2590 case BONDING_MODE_ALB:
2592 rte_eth_promiscuous_disable(internals->current_primary_port);
2597 bond_ethdev_delayed_lsc_propagation(void *arg)
2602 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2603 RTE_ETH_EVENT_INTR_LSC, NULL);
2607 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2608 void *param, void *ret_param __rte_unused)
2610 struct rte_eth_dev *bonded_eth_dev;
2611 struct bond_dev_private *internals;
2612 struct rte_eth_link link;
2615 int i, valid_slave = 0;
2617 uint8_t lsc_flag = 0;
2619 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2622 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2624 if (check_for_bonded_ethdev(bonded_eth_dev))
2627 internals = bonded_eth_dev->data->dev_private;
2629 /* If the device isn't started don't handle interrupts */
2630 if (!bonded_eth_dev->data->dev_started)
2633 /* verify that port_id is a valid slave of bonded port */
2634 for (i = 0; i < internals->slave_count; i++) {
2635 if (internals->slaves[i].port_id == port_id) {
2644 /* Search for port in active port list */
2645 active_pos = find_slave_by_id(internals->active_slaves,
2646 internals->active_slave_count, port_id);
2648 rte_eth_link_get_nowait(port_id, &link);
2649 if (link.link_status) {
2650 if (active_pos < internals->active_slave_count)
2653 /* if no active slave ports then set this port to be primary port */
2654 if (internals->active_slave_count < 1) {
2655 /* If first active slave, then change link status */
2656 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2657 internals->current_primary_port = port_id;
2660 mac_address_slaves_update(bonded_eth_dev);
2663 activate_slave(bonded_eth_dev, port_id);
2665 /* If user has defined the primary port then default to using it */
2666 if (internals->user_defined_primary_port &&
2667 internals->primary_port == port_id)
2668 bond_ethdev_primary_set(internals, port_id);
2670 if (active_pos == internals->active_slave_count)
2673 /* Remove from active slave list */
2674 deactivate_slave(bonded_eth_dev, port_id);
2676 if (internals->active_slave_count < 1)
2679 /* Update primary id, take first active slave from list or if none
2680 * available set to -1 */
2681 if (port_id == internals->current_primary_port) {
2682 if (internals->active_slave_count > 0)
2683 bond_ethdev_primary_set(internals,
2684 internals->active_slaves[0]);
2686 internals->current_primary_port = internals->primary_port;
2691 * Update bonded device link properties after any change to active
2694 bond_ethdev_link_update(bonded_eth_dev, 0);
2697 /* Cancel any possible outstanding interrupts if delays are enabled */
2698 if (internals->link_up_delay_ms > 0 ||
2699 internals->link_down_delay_ms > 0)
2700 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2703 if (bonded_eth_dev->data->dev_link.link_status) {
2704 if (internals->link_up_delay_ms > 0)
2705 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2706 bond_ethdev_delayed_lsc_propagation,
2707 (void *)bonded_eth_dev);
2709 _rte_eth_dev_callback_process(bonded_eth_dev,
2710 RTE_ETH_EVENT_INTR_LSC,
2714 if (internals->link_down_delay_ms > 0)
2715 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2716 bond_ethdev_delayed_lsc_propagation,
2717 (void *)bonded_eth_dev);
2719 _rte_eth_dev_callback_process(bonded_eth_dev,
2720 RTE_ETH_EVENT_INTR_LSC,
2728 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2729 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2733 int slave_reta_size;
2734 unsigned reta_count;
2735 struct bond_dev_private *internals = dev->data->dev_private;
2737 if (reta_size != internals->reta_size)
2740 /* Copy RETA table */
2741 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2743 for (i = 0; i < reta_count; i++) {
2744 internals->reta_conf[i].mask = reta_conf[i].mask;
2745 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2746 if ((reta_conf[i].mask >> j) & 0x01)
2747 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2750 /* Fill rest of array */
2751 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2752 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2753 sizeof(internals->reta_conf[0]) * reta_count);
2755 /* Propagate RETA over slaves */
2756 for (i = 0; i < internals->slave_count; i++) {
2757 slave_reta_size = internals->slaves[i].reta_size;
2758 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2759 &internals->reta_conf[0], slave_reta_size);
2768 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2769 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2772 struct bond_dev_private *internals = dev->data->dev_private;
2774 if (reta_size != internals->reta_size)
2777 /* Copy RETA table */
2778 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2779 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2780 if ((reta_conf[i].mask >> j) & 0x01)
2781 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2787 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2788 struct rte_eth_rss_conf *rss_conf)
2791 struct bond_dev_private *internals = dev->data->dev_private;
2792 struct rte_eth_rss_conf bond_rss_conf;
2794 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2796 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2798 if (bond_rss_conf.rss_hf != 0)
2799 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2801 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2802 sizeof(internals->rss_key)) {
2803 if (bond_rss_conf.rss_key_len == 0)
2804 bond_rss_conf.rss_key_len = 40;
2805 internals->rss_key_len = bond_rss_conf.rss_key_len;
2806 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2807 internals->rss_key_len);
2810 for (i = 0; i < internals->slave_count; i++) {
2811 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2821 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2822 struct rte_eth_rss_conf *rss_conf)
2824 struct bond_dev_private *internals = dev->data->dev_private;
2826 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2827 rss_conf->rss_key_len = internals->rss_key_len;
2828 if (rss_conf->rss_key)
2829 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2835 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2837 struct rte_eth_dev *slave_eth_dev;
2838 struct bond_dev_private *internals = dev->data->dev_private;
2841 rte_spinlock_lock(&internals->lock);
2843 for (i = 0; i < internals->slave_count; i++) {
2844 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2845 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2846 rte_spinlock_unlock(&internals->lock);
2850 for (i = 0; i < internals->slave_count; i++) {
2851 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2853 rte_spinlock_unlock(&internals->lock);
2858 rte_spinlock_unlock(&internals->lock);
2863 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2865 if (mac_address_set(dev, addr))
2866 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2869 const struct eth_dev_ops default_dev_ops = {
2870 .dev_start = bond_ethdev_start,
2871 .dev_stop = bond_ethdev_stop,
2872 .dev_close = bond_ethdev_close,
2873 .dev_configure = bond_ethdev_configure,
2874 .dev_infos_get = bond_ethdev_info,
2875 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2876 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2877 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2878 .rx_queue_release = bond_ethdev_rx_queue_release,
2879 .tx_queue_release = bond_ethdev_tx_queue_release,
2880 .link_update = bond_ethdev_link_update,
2881 .stats_get = bond_ethdev_stats_get,
2882 .stats_reset = bond_ethdev_stats_reset,
2883 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2884 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2885 .reta_update = bond_ethdev_rss_reta_update,
2886 .reta_query = bond_ethdev_rss_reta_query,
2887 .rss_hash_update = bond_ethdev_rss_hash_update,
2888 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2889 .mtu_set = bond_ethdev_mtu_set,
2890 .mac_addr_set = bond_ethdev_mac_address_set
2894 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2896 const char *name = rte_vdev_device_name(dev);
2897 uint8_t socket_id = dev->device.numa_node;
2898 struct bond_dev_private *internals = NULL;
2899 struct rte_eth_dev *eth_dev = NULL;
2900 uint32_t vlan_filter_bmp_size;
2902 /* now do all data allocation - for eth_dev structure, dummy pci driver
2903 * and internal (private) data
2906 /* reserve an ethdev entry */
2907 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2908 if (eth_dev == NULL) {
2909 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2913 internals = eth_dev->data->dev_private;
2914 eth_dev->data->nb_rx_queues = (uint16_t)1;
2915 eth_dev->data->nb_tx_queues = (uint16_t)1;
2917 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2919 if (eth_dev->data->mac_addrs == NULL) {
2920 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2924 eth_dev->dev_ops = &default_dev_ops;
2925 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2927 rte_spinlock_init(&internals->lock);
2929 internals->port_id = eth_dev->data->port_id;
2930 internals->mode = BONDING_MODE_INVALID;
2931 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2932 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2933 internals->burst_xmit_hash = burst_xmit_l2_hash;
2934 internals->user_defined_mac = 0;
2936 internals->link_status_polling_enabled = 0;
2938 internals->link_status_polling_interval_ms =
2939 DEFAULT_POLLING_INTERVAL_10_MS;
2940 internals->link_down_delay_ms = 0;
2941 internals->link_up_delay_ms = 0;
2943 internals->slave_count = 0;
2944 internals->active_slave_count = 0;
2945 internals->rx_offload_capa = 0;
2946 internals->tx_offload_capa = 0;
2947 internals->candidate_max_rx_pktlen = 0;
2948 internals->max_rx_pktlen = 0;
2950 /* Initially allow to choose any offload type */
2951 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2953 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2954 memset(internals->slaves, 0, sizeof(internals->slaves));
2956 /* Set mode 4 default configuration */
2957 bond_mode_8023ad_setup(eth_dev, NULL);
2958 if (bond_ethdev_mode_set(eth_dev, mode)) {
2959 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2960 eth_dev->data->port_id, mode);
2964 vlan_filter_bmp_size =
2965 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2966 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2967 RTE_CACHE_LINE_SIZE);
2968 if (internals->vlan_filter_bmpmem == NULL) {
2970 "Failed to allocate vlan bitmap for bonded device %u\n",
2971 eth_dev->data->port_id);
2975 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2976 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2977 if (internals->vlan_filter_bmp == NULL) {
2979 "Failed to init vlan bitmap for bonded device %u\n",
2980 eth_dev->data->port_id);
2981 rte_free(internals->vlan_filter_bmpmem);
2985 return eth_dev->data->port_id;
2988 rte_free(internals);
2989 if (eth_dev != NULL) {
2990 rte_free(eth_dev->data->mac_addrs);
2991 rte_eth_dev_release_port(eth_dev);
2997 bond_probe(struct rte_vdev_device *dev)
3000 struct bond_dev_private *internals;
3001 struct rte_kvargs *kvlist;
3002 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3003 int arg_count, port_id;
3009 name = rte_vdev_device_name(dev);
3010 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3012 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3013 pmd_bond_init_valid_arguments);
3017 /* Parse link bonding mode */
3018 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3019 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3020 &bond_ethdev_parse_slave_mode_kvarg,
3021 &bonding_mode) != 0) {
3022 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3027 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3028 "device %s\n", name);
3032 /* Parse socket id to create bonding device on */
3033 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3034 if (arg_count == 1) {
3035 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3036 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3038 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3039 "bonded device %s\n", name);
3042 } else if (arg_count > 1) {
3043 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3044 "bonded device %s\n", name);
3047 socket_id = rte_socket_id();
3050 dev->device.numa_node = socket_id;
3052 /* Create link bonding eth device */
3053 port_id = bond_alloc(dev, bonding_mode);
3055 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3056 "socket %u.\n", name, bonding_mode, socket_id);
3059 internals = rte_eth_devices[port_id].data->dev_private;
3060 internals->kvlist = kvlist;
3063 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3064 if (rte_kvargs_process(kvlist,
3065 PMD_BOND_AGG_MODE_KVARG,
3066 &bond_ethdev_parse_slave_agg_mode_kvarg,
3069 "Failed to parse agg selection mode for bonded device %s\n",
3074 if (internals->mode == BONDING_MODE_8023AD)
3075 rte_eth_bond_8023ad_agg_selection_set(port_id,
3078 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3081 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3082 "socket %u.\n", name, port_id, bonding_mode, socket_id);
3086 rte_kvargs_free(kvlist);
3092 bond_remove(struct rte_vdev_device *dev)
3094 struct rte_eth_dev *eth_dev;
3095 struct bond_dev_private *internals;
3101 name = rte_vdev_device_name(dev);
3102 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3104 /* now free all data allocation - for eth_dev structure,
3105 * dummy pci driver and internal (private) data
3108 /* find an ethdev entry */
3109 eth_dev = rte_eth_dev_allocated(name);
3110 if (eth_dev == NULL)
3113 RTE_ASSERT(eth_dev->device == &dev->device);
3115 internals = eth_dev->data->dev_private;
3116 if (internals->slave_count != 0)
3119 if (eth_dev->data->dev_started == 1) {
3120 bond_ethdev_stop(eth_dev);
3121 bond_ethdev_close(eth_dev);
3124 eth_dev->dev_ops = NULL;
3125 eth_dev->rx_pkt_burst = NULL;
3126 eth_dev->tx_pkt_burst = NULL;
3128 internals = eth_dev->data->dev_private;
3129 rte_bitmap_free(internals->vlan_filter_bmp);
3130 rte_free(internals->vlan_filter_bmpmem);
3131 rte_free(eth_dev->data->dev_private);
3132 rte_free(eth_dev->data->mac_addrs);
3134 rte_eth_dev_release_port(eth_dev);
3139 /* this part will resolve the slave portids after all the other pdev and vdev
3140 * have been allocated */
3142 bond_ethdev_configure(struct rte_eth_dev *dev)
3144 const char *name = dev->device->name;
3145 struct bond_dev_private *internals = dev->data->dev_private;
3146 struct rte_kvargs *kvlist = internals->kvlist;
3148 uint16_t port_id = dev - rte_eth_devices;
3151 static const uint8_t default_rss_key[40] = {
3152 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3153 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3154 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3155 0xBE, 0xAC, 0x01, 0xFA
3160 /* If RSS is enabled, fill table and key with default values */
3161 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3162 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3163 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3164 memcpy(internals->rss_key, default_rss_key, 40);
3166 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3167 internals->reta_conf[i].mask = ~0LL;
3168 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3169 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3173 /* set the max_rx_pktlen */
3174 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3177 * if no kvlist, it means that this bonded device has been created
3178 * through the bonding api.
3183 /* Parse MAC address for bonded device */
3184 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3185 if (arg_count == 1) {
3186 struct ether_addr bond_mac;
3188 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3189 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3190 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3195 /* Set MAC address */
3196 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3198 "Failed to set mac address on bonded device %s\n",
3202 } else if (arg_count > 1) {
3204 "MAC address can be specified only once for bonded device %s\n",
3209 /* Parse/set balance mode transmit policy */
3210 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3211 if (arg_count == 1) {
3212 uint8_t xmit_policy;
3214 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3215 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3218 "Invalid xmit policy specified for bonded device %s\n",
3223 /* Set balance mode transmit policy*/
3224 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3226 "Failed to set balance xmit policy on bonded device %s\n",
3230 } else if (arg_count > 1) {
3232 "Transmit policy can be specified only once for bonded device"
3237 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3238 if (rte_kvargs_process(kvlist,
3239 PMD_BOND_AGG_MODE_KVARG,
3240 &bond_ethdev_parse_slave_agg_mode_kvarg,
3243 "Failed to parse agg selection mode for bonded device %s\n",
3246 if (internals->mode == BONDING_MODE_8023AD)
3247 rte_eth_bond_8023ad_agg_selection_set(port_id,
3251 /* Parse/add slave ports to bonded device */
3252 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3253 struct bond_ethdev_slave_ports slave_ports;
3256 memset(&slave_ports, 0, sizeof(slave_ports));
3258 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3259 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3261 "Failed to parse slave ports for bonded device %s\n",
3266 for (i = 0; i < slave_ports.slave_count; i++) {
3267 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3269 "Failed to add port %d as slave to bonded device %s\n",
3270 slave_ports.slaves[i], name);
3275 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3279 /* Parse/set primary slave port id*/
3280 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3281 if (arg_count == 1) {
3282 uint16_t primary_slave_port_id;
3284 if (rte_kvargs_process(kvlist,
3285 PMD_BOND_PRIMARY_SLAVE_KVARG,
3286 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3287 &primary_slave_port_id) < 0) {
3289 "Invalid primary slave port id specified for bonded device"
3294 /* Set balance mode transmit policy*/
3295 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3298 "Failed to set primary slave port %d on bonded device %s\n",
3299 primary_slave_port_id, name);
3302 } else if (arg_count > 1) {
3304 "Primary slave can be specified only once for bonded device"
3309 /* Parse link status monitor polling interval */
3310 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3311 if (arg_count == 1) {
3312 uint32_t lsc_poll_interval_ms;
3314 if (rte_kvargs_process(kvlist,
3315 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3316 &bond_ethdev_parse_time_ms_kvarg,
3317 &lsc_poll_interval_ms) < 0) {
3319 "Invalid lsc polling interval value specified for bonded"
3320 " device %s\n", name);
3324 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3327 "Failed to set lsc monitor polling interval (%u ms) on"
3328 " bonded device %s\n", lsc_poll_interval_ms, name);
3331 } else if (arg_count > 1) {
3333 "LSC polling interval can be specified only once for bonded"
3334 " device %s\n", name);
3338 /* Parse link up interrupt propagation delay */
3339 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3340 if (arg_count == 1) {
3341 uint32_t link_up_delay_ms;
3343 if (rte_kvargs_process(kvlist,
3344 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3345 &bond_ethdev_parse_time_ms_kvarg,
3346 &link_up_delay_ms) < 0) {
3348 "Invalid link up propagation delay value specified for"
3349 " bonded device %s\n", name);
3353 /* Set balance mode transmit policy*/
3354 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3357 "Failed to set link up propagation delay (%u ms) on bonded"
3358 " device %s\n", link_up_delay_ms, name);
3361 } else if (arg_count > 1) {
3363 "Link up propagation delay can be specified only once for"
3364 " bonded device %s\n", name);
3368 /* Parse link down interrupt propagation delay */
3369 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3370 if (arg_count == 1) {
3371 uint32_t link_down_delay_ms;
3373 if (rte_kvargs_process(kvlist,
3374 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3375 &bond_ethdev_parse_time_ms_kvarg,
3376 &link_down_delay_ms) < 0) {
3378 "Invalid link down propagation delay value specified for"
3379 " bonded device %s\n", name);
3383 /* Set balance mode transmit policy*/
3384 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3387 "Failed to set link down propagation delay (%u ms) on"
3388 " bonded device %s\n", link_down_delay_ms, name);
3391 } else if (arg_count > 1) {
3393 "Link down propagation delay can be specified only once for"
3394 " bonded device %s\n", name);
3401 struct rte_vdev_driver pmd_bond_drv = {
3402 .probe = bond_probe,
3403 .remove = bond_remove,
3406 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3407 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3409 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3413 "xmit_policy=[l2 | l23 | l34] "
3414 "agg_mode=[count | stable | bandwidth] "
3417 "lsc_poll_period_ms=<int> "
3419 "down_delay=<int>");