1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 size_t vlan_offset = 0;
39 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
42 vlan_offset = sizeof(struct vlan_hdr);
43 *proto = vlan_hdr->eth_proto;
45 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46 vlan_hdr = vlan_hdr + 1;
47 *proto = vlan_hdr->eth_proto;
48 vlan_offset += sizeof(struct vlan_hdr);
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
57 struct bond_dev_private *internals;
59 uint16_t num_rx_slave = 0;
60 uint16_t num_rx_total = 0;
64 /* Cast to structure, containing bonded device's port id and queue id */
65 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
67 internals = bd_rx_q->dev_private;
70 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71 /* Offset of pointer to *bufs increases as packets are received
72 * from other slaves */
73 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
76 num_rx_total += num_rx_slave;
77 nb_pkts -= num_rx_slave;
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
88 struct bond_dev_private *internals;
90 /* Cast to structure, containing bonded device's port id and queue id */
91 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
93 internals = bd_rx_q->dev_private;
95 return rte_eth_rx_burst(internals->current_primary_port,
96 bd_rx_q->queue_id, bufs, nb_pkts);
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
102 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
104 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105 (ethertype == ether_type_slow_be &&
106 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
109 /*****************************************************************************
110 * Flow director's setup for mode 4 optimization
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114 .dst.addr_bytes = { 0 },
115 .src.addr_bytes = { 0 },
116 .type = RTE_BE16(ETHER_TYPE_SLOW),
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120 .dst.addr_bytes = { 0 },
121 .src.addr_bytes = { 0 },
125 static struct rte_flow_item flow_item_8023ad[] = {
127 .type = RTE_FLOW_ITEM_TYPE_ETH,
128 .spec = &flow_item_eth_type_8023ad,
130 .mask = &flow_item_eth_mask_type_8023ad,
133 .type = RTE_FLOW_ITEM_TYPE_END,
140 const struct rte_flow_attr flow_attr_8023ad = {
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150 uint16_t slave_port) {
151 struct rte_eth_dev_info slave_info;
152 struct rte_flow_error error;
153 struct bond_dev_private *internals = (struct bond_dev_private *)
154 (bond_dev->data->dev_private);
156 const struct rte_flow_action_queue lacp_queue_conf = {
160 const struct rte_flow_action actions[] = {
162 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
163 .conf = &lacp_queue_conf
166 .type = RTE_FLOW_ACTION_TYPE_END,
170 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171 flow_item_8023ad, actions, &error);
173 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174 __func__, error.message, slave_port,
175 internals->mode4.dedicated_queues.rx_qid);
179 rte_eth_dev_info_get(slave_port, &slave_info);
180 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
183 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
184 __func__, slave_port);
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194 struct bond_dev_private *internals = (struct bond_dev_private *)
195 (bond_dev->data->dev_private);
196 struct rte_eth_dev_info bond_info;
199 /* Verify if all slaves in bonding supports flow director and */
200 if (internals->slave_count > 0) {
201 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
203 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
206 for (idx = 0; idx < internals->slave_count; idx++) {
207 if (bond_ethdev_8023ad_flow_verify(bond_dev,
208 internals->slaves[idx].port_id) != 0)
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
219 struct rte_flow_error error;
220 struct bond_dev_private *internals = (struct bond_dev_private *)
221 (bond_dev->data->dev_private);
223 struct rte_flow_action_queue lacp_queue_conf = {
224 .index = internals->mode4.dedicated_queues.rx_qid,
227 const struct rte_flow_action actions[] = {
229 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
230 .conf = &lacp_queue_conf
233 .type = RTE_FLOW_ACTION_TYPE_END,
237 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
239 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241 "(slave_port=%d queue_id=%d)",
242 error.message, slave_port,
243 internals->mode4.dedicated_queues.rx_qid);
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
254 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255 struct bond_dev_private *internals = bd_rx_q->dev_private;
256 uint16_t num_rx_total = 0; /* Total number of received packets */
257 uint16_t slaves[RTE_MAX_ETHPORTS];
258 uint16_t slave_count;
262 /* Copy slave list to protect against slave up/down changes during tx
264 slave_count = internals->active_slave_count;
265 memcpy(slaves, internals->active_slaves,
266 sizeof(internals->active_slaves[0]) * slave_count);
268 for (i = 0, idx = internals->active_slave;
269 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270 idx = idx % slave_count;
272 /* Read packets from this slave */
273 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274 &bufs[num_rx_total], nb_pkts - num_rx_total);
277 internals->active_slave = idx;
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
286 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287 struct bond_dev_private *internals = bd_tx_q->dev_private;
289 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290 uint16_t slave_count;
292 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293 uint16_t dist_slave_count;
295 /* 2-D array to sort mbufs for transmission on each slave into */
296 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297 /* Number of mbufs for transmission on each slave */
298 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299 /* Mapping array generated by hash function to map mbufs to slaves */
300 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
302 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
307 if (unlikely(nb_bufs == 0))
310 /* Copy slave list to protect against slave up/down changes during tx
312 slave_count = internals->active_slave_count;
313 if (unlikely(slave_count < 1))
316 memcpy(slave_port_ids, internals->active_slaves,
317 sizeof(slave_port_ids[0]) * slave_count);
320 dist_slave_count = 0;
321 for (i = 0; i < slave_count; i++) {
322 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
324 if (ACTOR_STATE(port, DISTRIBUTING))
325 dist_slave_port_ids[dist_slave_count++] =
329 if (unlikely(dist_slave_count < 1))
333 * Populate slaves mbuf with the packets which are to be sent on it
334 * selecting output slave using hash based on xmit policy
336 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337 bufs_slave_port_idxs);
339 for (i = 0; i < nb_bufs; i++) {
340 /* Populate slave mbuf arrays with mbufs for that slave. */
341 uint8_t slave_idx = bufs_slave_port_idxs[i];
343 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
347 /* Send packet burst on each slave device */
348 for (i = 0; i < dist_slave_count; i++) {
349 if (slave_nb_bufs[i] == 0)
352 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353 bd_tx_q->queue_id, slave_bufs[i],
356 total_tx_count += slave_tx_count;
358 /* If tx burst fails move packets to end of bufs */
359 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360 slave_tx_fail_count[i] = slave_nb_bufs[i] -
362 total_tx_fail_count += slave_tx_fail_count[i];
365 * Shift bufs to beginning of array to allow reordering
368 for (j = 0; j < slave_tx_fail_count[i]; j++) {
370 slave_bufs[i][(slave_tx_count - 1) + j];
376 * If there are tx burst failures we move packets to end of bufs to
377 * preserve expected PMD behaviour of all failed transmitted being
378 * at the end of the input mbuf array
380 if (unlikely(total_tx_fail_count > 0)) {
381 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
383 for (i = 0; i < slave_count; i++) {
384 if (slave_tx_fail_count[i] > 0) {
385 for (j = 0; j < slave_tx_fail_count[i]; j++)
386 bufs[bufs_idx++] = slave_bufs[i][j];
391 return total_tx_count;
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
399 /* Cast to structure, containing bonded device's port id and queue id */
400 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401 struct bond_dev_private *internals = bd_rx_q->dev_private;
402 struct ether_addr bond_mac;
404 struct ether_hdr *hdr;
406 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407 uint16_t num_rx_total = 0; /* Total number of received packets */
408 uint16_t slaves[RTE_MAX_ETHPORTS];
409 uint16_t slave_count, idx;
411 uint8_t collecting; /* current slave collecting status */
412 const uint8_t promisc = internals->promiscuous_en;
416 rte_eth_macaddr_get(internals->port_id, &bond_mac);
417 /* Copy slave list to protect against slave up/down changes during tx
419 slave_count = internals->active_slave_count;
420 memcpy(slaves, internals->active_slaves,
421 sizeof(internals->active_slaves[0]) * slave_count);
423 idx = internals->active_slave;
424 if (idx >= slave_count) {
425 internals->active_slave = 0;
428 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
433 /* Read packets from this slave */
434 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435 &bufs[num_rx_total], nb_pkts - num_rx_total);
437 for (k = j; k < 2 && k < num_rx_total; k++)
438 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440 /* Handle slow protocol packets. */
441 while (j < num_rx_total) {
443 /* If packet is not pure L2 and is known, skip it */
444 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
449 if (j + 3 < num_rx_total)
450 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455 /* Remove packet from array if it is slow packet or slave is not
456 * in collecting state or bonding interface is not in promiscuous
457 * mode and packet address does not match. */
458 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459 !collecting || (!promisc &&
460 !is_multicast_ether_addr(&hdr->d_addr) &&
461 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463 if (hdr->ether_type == ether_type_slow_be) {
464 bond_mode_8023ad_handle_slow_pkt(
465 internals, slaves[idx], bufs[j]);
467 rte_pktmbuf_free(bufs[j]);
469 /* Packet is managed by mode 4 or dropped, shift the array */
471 if (j < num_rx_total) {
472 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
478 if (unlikely(++idx == slave_count))
482 internals->active_slave = idx;
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
493 arp_op_name(uint16_t arp_op, char *buf)
497 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
500 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502 case ARP_OP_REVREQUEST:
503 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504 "Reverse ARP Request");
506 case ARP_OP_REVREPLY:
507 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508 "Reverse ARP Reply");
510 case ARP_OP_INVREQUEST:
511 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512 "Peer Identify Request");
514 case ARP_OP_INVREPLY:
515 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516 "Peer Identify Reply");
521 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
525 #define MaxIPv4String 16
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
531 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
537 #define MAX_CLIENTS_NUMBER 128
538 uint8_t active_clients;
539 struct client_stats_t {
542 uint32_t ipv4_rx_packets;
543 uint32_t ipv4_tx_packets;
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
552 for (; i < MAX_CLIENTS_NUMBER; i++) {
553 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
554 /* Just update RX packets number for this client */
555 if (TXorRXindicator == &burstnumberRX)
556 client_stats[i].ipv4_rx_packets++;
558 client_stats[i].ipv4_tx_packets++;
562 /* We have a new client. Insert him to the table, and increment stats */
563 if (TXorRXindicator == &burstnumberRX)
564 client_stats[active_clients].ipv4_rx_packets++;
566 client_stats[active_clients].ipv4_tx_packets++;
567 client_stats[active_clients].ipv4_addr = addr;
568 client_stats[active_clients].port = port;
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
575 RTE_LOG(DEBUG, PMD, \
578 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
586 eth_h->s_addr.addr_bytes[0], \
587 eth_h->s_addr.addr_bytes[1], \
588 eth_h->s_addr.addr_bytes[2], \
589 eth_h->s_addr.addr_bytes[3], \
590 eth_h->s_addr.addr_bytes[4], \
591 eth_h->s_addr.addr_bytes[5], \
593 eth_h->d_addr.addr_bytes[0], \
594 eth_h->d_addr.addr_bytes[1], \
595 eth_h->d_addr.addr_bytes[2], \
596 eth_h->d_addr.addr_bytes[3], \
597 eth_h->d_addr.addr_bytes[4], \
598 eth_h->d_addr.addr_bytes[5], \
605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
606 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
608 struct ipv4_hdr *ipv4_h;
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 struct arp_hdr *arp_h;
617 uint16_t ether_type = eth_h->ether_type;
618 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621 strlcpy(buf, info, 16);
624 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
625 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
626 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
628 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
629 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
631 update_client_stats(ipv4_h->src_addr, port, burstnumber);
633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
634 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
636 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
637 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
638 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
639 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
648 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
649 struct bond_dev_private *internals = bd_tx_q->dev_private;
650 struct ether_hdr *eth_h;
651 uint16_t ether_type, offset;
652 uint16_t nb_recv_pkts;
655 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
657 for (i = 0; i < nb_recv_pkts; i++) {
658 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
659 ether_type = eth_h->ether_type;
660 offset = get_vlan_offset(eth_h, ðer_type);
662 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
664 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
666 bond_mode_alb_arp_recv(eth_h, offset, internals);
668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
669 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
670 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
681 struct bond_dev_private *internals;
682 struct bond_tx_queue *bd_tx_q;
684 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
685 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
687 uint16_t num_of_slaves;
688 uint16_t slaves[RTE_MAX_ETHPORTS];
690 uint16_t num_tx_total = 0, num_tx_slave;
692 static int slave_idx = 0;
693 int i, cslave_idx = 0, tx_fail_total = 0;
695 bd_tx_q = (struct bond_tx_queue *)queue;
696 internals = bd_tx_q->dev_private;
698 /* Copy slave list to protect against slave up/down changes during tx
700 num_of_slaves = internals->active_slave_count;
701 memcpy(slaves, internals->active_slaves,
702 sizeof(internals->active_slaves[0]) * num_of_slaves);
704 if (num_of_slaves < 1)
707 /* Populate slaves mbuf with which packets are to be sent on it */
708 for (i = 0; i < nb_pkts; i++) {
709 cslave_idx = (slave_idx + i) % num_of_slaves;
710 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
713 /* increment current slave index so the next call to tx burst starts on the
715 slave_idx = ++cslave_idx;
717 /* Send packet burst on each slave device */
718 for (i = 0; i < num_of_slaves; i++) {
719 if (slave_nb_pkts[i] > 0) {
720 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
721 slave_bufs[i], slave_nb_pkts[i]);
723 /* if tx burst fails move packets to end of bufs */
724 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
725 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
727 tx_fail_total += tx_fail_slave;
729 memcpy(&bufs[nb_pkts - tx_fail_total],
730 &slave_bufs[i][num_tx_slave],
731 tx_fail_slave * sizeof(bufs[0]));
733 num_tx_total += num_tx_slave;
741 bond_ethdev_tx_burst_active_backup(void *queue,
742 struct rte_mbuf **bufs, uint16_t nb_pkts)
744 struct bond_dev_private *internals;
745 struct bond_tx_queue *bd_tx_q;
747 bd_tx_q = (struct bond_tx_queue *)queue;
748 internals = bd_tx_q->dev_private;
750 if (internals->active_slave_count < 1)
753 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
757 static inline uint16_t
758 ether_hash(struct ether_hdr *eth_hdr)
760 unaligned_uint16_t *word_src_addr =
761 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
762 unaligned_uint16_t *word_dst_addr =
763 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
765 return (word_src_addr[0] ^ word_dst_addr[0]) ^
766 (word_src_addr[1] ^ word_dst_addr[1]) ^
767 (word_src_addr[2] ^ word_dst_addr[2]);
770 static inline uint32_t
771 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
773 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
776 static inline uint32_t
777 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
779 unaligned_uint32_t *word_src_addr =
780 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
781 unaligned_uint32_t *word_dst_addr =
782 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
784 return (word_src_addr[0] ^ word_dst_addr[0]) ^
785 (word_src_addr[1] ^ word_dst_addr[1]) ^
786 (word_src_addr[2] ^ word_dst_addr[2]) ^
787 (word_src_addr[3] ^ word_dst_addr[3]);
792 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793 uint8_t slave_count, uint16_t *slaves)
795 struct ether_hdr *eth_hdr;
799 for (i = 0; i < nb_pkts; i++) {
800 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
802 hash = ether_hash(eth_hdr);
804 slaves[i] = (hash ^= hash >> 8) % slave_count;
809 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
810 uint8_t slave_count, uint16_t *slaves)
813 struct ether_hdr *eth_hdr;
816 uint32_t hash, l3hash;
818 for (i = 0; i < nb_pkts; i++) {
819 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
822 proto = eth_hdr->ether_type;
823 hash = ether_hash(eth_hdr);
825 vlan_offset = get_vlan_offset(eth_hdr, &proto);
827 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829 ((char *)(eth_hdr + 1) + vlan_offset);
830 l3hash = ipv4_hash(ipv4_hdr);
832 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834 ((char *)(eth_hdr + 1) + vlan_offset);
835 l3hash = ipv6_hash(ipv6_hdr);
838 hash = hash ^ l3hash;
842 slaves[i] = hash % slave_count;
847 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
848 uint8_t slave_count, uint16_t *slaves)
850 struct ether_hdr *eth_hdr;
855 struct udp_hdr *udp_hdr;
856 struct tcp_hdr *tcp_hdr;
857 uint32_t hash, l3hash, l4hash;
859 for (i = 0; i < nb_pkts; i++) {
860 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
861 proto = eth_hdr->ether_type;
862 vlan_offset = get_vlan_offset(eth_hdr, &proto);
866 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
867 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
868 ((char *)(eth_hdr + 1) + vlan_offset);
869 size_t ip_hdr_offset;
871 l3hash = ipv4_hash(ipv4_hdr);
873 /* there is no L4 header in fragmented packet */
874 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
876 ip_hdr_offset = (ipv4_hdr->version_ihl
877 & IPV4_HDR_IHL_MASK) *
880 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
881 tcp_hdr = (struct tcp_hdr *)
884 l4hash = HASH_L4_PORTS(tcp_hdr);
885 } else if (ipv4_hdr->next_proto_id ==
887 udp_hdr = (struct udp_hdr *)
890 l4hash = HASH_L4_PORTS(udp_hdr);
893 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
894 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
895 ((char *)(eth_hdr + 1) + vlan_offset);
896 l3hash = ipv6_hash(ipv6_hdr);
898 if (ipv6_hdr->proto == IPPROTO_TCP) {
899 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
900 l4hash = HASH_L4_PORTS(tcp_hdr);
901 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
902 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
903 l4hash = HASH_L4_PORTS(udp_hdr);
907 hash = l3hash ^ l4hash;
911 slaves[i] = hash % slave_count;
916 uint64_t bwg_left_int;
917 uint64_t bwg_left_remainder;
922 bond_tlb_activate_slave(struct bond_dev_private *internals) {
925 for (i = 0; i < internals->active_slave_count; i++) {
926 tlb_last_obytets[internals->active_slaves[i]] = 0;
931 bandwidth_cmp(const void *a, const void *b)
933 const struct bwg_slave *bwg_a = a;
934 const struct bwg_slave *bwg_b = b;
935 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
936 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
937 (int64_t)bwg_a->bwg_left_remainder;
951 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
952 struct bwg_slave *bwg_slave)
954 struct rte_eth_link link_status;
956 rte_eth_link_get_nowait(port_id, &link_status);
957 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
960 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
961 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
962 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
966 bond_ethdev_update_tlb_slave_cb(void *arg)
968 struct bond_dev_private *internals = arg;
969 struct rte_eth_stats slave_stats;
970 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
974 uint8_t update_stats = 0;
977 internals->slave_update_idx++;
980 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
983 for (i = 0; i < internals->active_slave_count; i++) {
984 slave_id = internals->active_slaves[i];
985 rte_eth_stats_get(slave_id, &slave_stats);
986 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
987 bandwidth_left(slave_id, tx_bytes,
988 internals->slave_update_idx, &bwg_array[i]);
989 bwg_array[i].slave = slave_id;
992 tlb_last_obytets[slave_id] = slave_stats.obytes;
996 if (update_stats == 1)
997 internals->slave_update_idx = 0;
1000 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1001 for (i = 0; i < slave_count; i++)
1002 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1004 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1005 (struct bond_dev_private *)internals);
1009 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1011 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1012 struct bond_dev_private *internals = bd_tx_q->dev_private;
1014 struct rte_eth_dev *primary_port =
1015 &rte_eth_devices[internals->primary_port];
1016 uint16_t num_tx_total = 0;
1019 uint16_t num_of_slaves = internals->active_slave_count;
1020 uint16_t slaves[RTE_MAX_ETHPORTS];
1022 struct ether_hdr *ether_hdr;
1023 struct ether_addr primary_slave_addr;
1024 struct ether_addr active_slave_addr;
1026 if (num_of_slaves < 1)
1027 return num_tx_total;
1029 memcpy(slaves, internals->tlb_slaves_order,
1030 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1033 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1036 for (i = 0; i < 3; i++)
1037 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1040 for (i = 0; i < num_of_slaves; i++) {
1041 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1042 for (j = num_tx_total; j < nb_pkts; j++) {
1043 if (j + 3 < nb_pkts)
1044 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1046 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1047 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1048 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1049 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1050 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1054 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1055 bufs + num_tx_total, nb_pkts - num_tx_total);
1057 if (num_tx_total == nb_pkts)
1061 return num_tx_total;
1065 bond_tlb_disable(struct bond_dev_private *internals)
1067 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1071 bond_tlb_enable(struct bond_dev_private *internals)
1073 bond_ethdev_update_tlb_slave_cb(internals);
1077 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1079 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1080 struct bond_dev_private *internals = bd_tx_q->dev_private;
1082 struct ether_hdr *eth_h;
1083 uint16_t ether_type, offset;
1085 struct client_data *client_info;
1088 * We create transmit buffers for every slave and one additional to send
1089 * through tlb. In worst case every packet will be send on one port.
1091 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1092 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1095 * We create separate transmit buffers for update packets as they won't
1096 * be counted in num_tx_total.
1098 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1099 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1101 struct rte_mbuf *upd_pkt;
1104 uint16_t num_send, num_not_send = 0;
1105 uint16_t num_tx_total = 0;
1110 /* Search tx buffer for ARP packets and forward them to alb */
1111 for (i = 0; i < nb_pkts; i++) {
1112 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1113 ether_type = eth_h->ether_type;
1114 offset = get_vlan_offset(eth_h, ðer_type);
1116 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1117 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1119 /* Change src mac in eth header */
1120 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1122 /* Add packet to slave tx buffer */
1123 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1124 slave_bufs_pkts[slave_idx]++;
1126 /* If packet is not ARP, send it with TLB policy */
1127 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1129 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1133 /* Update connected client ARP tables */
1134 if (internals->mode6.ntt) {
1135 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1136 client_info = &internals->mode6.client_table[i];
1138 if (client_info->in_use) {
1139 /* Allocate new packet to send ARP update on current slave */
1140 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1141 if (upd_pkt == NULL) {
1142 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1145 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1146 + client_info->vlan_count * sizeof(struct vlan_hdr);
1147 upd_pkt->data_len = pkt_size;
1148 upd_pkt->pkt_len = pkt_size;
1150 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1153 /* Add packet to update tx buffer */
1154 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1155 update_bufs_pkts[slave_idx]++;
1158 internals->mode6.ntt = 0;
1161 /* Send ARP packets on proper slaves */
1162 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1163 if (slave_bufs_pkts[i] > 0) {
1164 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1165 slave_bufs[i], slave_bufs_pkts[i]);
1166 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1167 bufs[nb_pkts - 1 - num_not_send - j] =
1168 slave_bufs[i][nb_pkts - 1 - j];
1171 num_tx_total += num_send;
1172 num_not_send += slave_bufs_pkts[i] - num_send;
1174 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1175 /* Print TX stats including update packets */
1176 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1177 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1178 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1184 /* Send update packets on proper slaves */
1185 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1186 if (update_bufs_pkts[i] > 0) {
1187 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1188 update_bufs_pkts[i]);
1189 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1190 rte_pktmbuf_free(update_bufs[i][j]);
1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1193 for (j = 0; j < update_bufs_pkts[i]; j++) {
1194 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1195 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1201 /* Send non-ARP packets using tlb policy */
1202 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1203 num_send = bond_ethdev_tx_burst_tlb(queue,
1204 slave_bufs[RTE_MAX_ETHPORTS],
1205 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1207 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1208 bufs[nb_pkts - 1 - num_not_send - j] =
1209 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1212 num_tx_total += num_send;
1215 return num_tx_total;
1219 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1222 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1223 struct bond_dev_private *internals = bd_tx_q->dev_private;
1225 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1226 uint16_t slave_count;
1228 /* Array to sort mbufs for transmission on each slave into */
1229 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1230 /* Number of mbufs for transmission on each slave */
1231 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1232 /* Mapping array generated by hash function to map mbufs to slaves */
1233 uint16_t bufs_slave_port_idxs[nb_bufs];
1235 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1236 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1240 if (unlikely(nb_bufs == 0))
1243 /* Copy slave list to protect against slave up/down changes during tx
1245 slave_count = internals->active_slave_count;
1246 if (unlikely(slave_count < 1))
1249 memcpy(slave_port_ids, internals->active_slaves,
1250 sizeof(slave_port_ids[0]) * slave_count);
1253 * Populate slaves mbuf with the packets which are to be sent on it
1254 * selecting output slave using hash based on xmit policy
1256 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1257 bufs_slave_port_idxs);
1259 for (i = 0; i < nb_bufs; i++) {
1260 /* Populate slave mbuf arrays with mbufs for that slave. */
1261 uint8_t slave_idx = bufs_slave_port_idxs[i];
1263 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1266 /* Send packet burst on each slave device */
1267 for (i = 0; i < slave_count; i++) {
1268 if (slave_nb_bufs[i] == 0)
1271 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1272 bd_tx_q->queue_id, slave_bufs[i],
1275 total_tx_count += slave_tx_count;
1277 /* If tx burst fails move packets to end of bufs */
1278 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1279 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1281 total_tx_fail_count += slave_tx_fail_count[i];
1284 * Shift bufs to beginning of array to allow reordering
1287 for (j = 0; j < slave_tx_fail_count[i]; j++) {
1289 slave_bufs[i][(slave_tx_count - 1) + j];
1295 * If there are tx burst failures we move packets to end of bufs to
1296 * preserve expected PMD behaviour of all failed transmitted being
1297 * at the end of the input mbuf array
1299 if (unlikely(total_tx_fail_count > 0)) {
1300 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1302 for (i = 0; i < slave_count; i++) {
1303 if (slave_tx_fail_count[i] > 0) {
1304 for (j = 0; j < slave_tx_fail_count[i]; j++)
1305 bufs[bufs_idx++] = slave_bufs[i][j];
1310 return total_tx_count;
1314 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1317 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1318 struct bond_dev_private *internals = bd_tx_q->dev_private;
1320 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1321 uint16_t slave_count;
1323 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1324 uint16_t dist_slave_count;
1326 /* 2-D array to sort mbufs for transmission on each slave into */
1327 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1328 /* Number of mbufs for transmission on each slave */
1329 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1330 /* Mapping array generated by hash function to map mbufs to slaves */
1331 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1333 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1334 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1338 if (unlikely(nb_bufs == 0))
1341 /* Copy slave list to protect against slave up/down changes during tx
1343 slave_count = internals->active_slave_count;
1344 if (unlikely(slave_count < 1))
1347 memcpy(slave_port_ids, internals->active_slaves,
1348 sizeof(slave_port_ids[0]) * slave_count);
1350 dist_slave_count = 0;
1351 for (i = 0; i < slave_count; i++) {
1352 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1354 if (ACTOR_STATE(port, DISTRIBUTING))
1355 dist_slave_port_ids[dist_slave_count++] =
1359 if (likely(dist_slave_count > 1)) {
1362 * Populate slaves mbuf with the packets which are to be sent
1363 * on it, selecting output slave using hash based on xmit policy
1365 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1366 bufs_slave_port_idxs);
1368 for (i = 0; i < nb_bufs; i++) {
1370 * Populate slave mbuf arrays with mbufs for that
1373 uint8_t slave_idx = bufs_slave_port_idxs[i];
1375 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1380 /* Send packet burst on each slave device */
1381 for (i = 0; i < dist_slave_count; i++) {
1382 if (slave_nb_bufs[i] == 0)
1385 slave_tx_count = rte_eth_tx_burst(
1386 dist_slave_port_ids[i],
1387 bd_tx_q->queue_id, slave_bufs[i],
1390 total_tx_count += slave_tx_count;
1392 /* If tx burst fails move packets to end of bufs */
1393 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1394 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1396 total_tx_fail_count += slave_tx_fail_count[i];
1399 * Shift bufs to beginning of array to allow
1402 for (j = 0; j < slave_tx_fail_count[i]; j++)
1405 [(slave_tx_count - 1)
1411 * If there are tx burst failures we move packets to end of
1412 * bufs to preserve expected PMD behaviour of all failed
1413 * transmitted being at the end of the input mbuf array
1415 if (unlikely(total_tx_fail_count > 0)) {
1416 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1418 for (i = 0; i < slave_count; i++) {
1419 if (slave_tx_fail_count[i] > 0) {
1421 j < slave_tx_fail_count[i];
1431 /* Check for LACP control packets and send if available */
1432 for (i = 0; i < slave_count; i++) {
1433 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1434 struct rte_mbuf *ctrl_pkt = NULL;
1436 if (likely(rte_ring_empty(port->tx_ring)))
1439 if (rte_ring_dequeue(port->tx_ring,
1440 (void **)&ctrl_pkt) != -ENOENT) {
1441 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1442 bd_tx_q->queue_id, &ctrl_pkt, 1);
1444 * re-enqueue LAG control plane packets to buffering
1445 * ring if transmission fails so the packet isn't lost.
1447 if (slave_tx_count != 1)
1448 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1452 return total_tx_count;
1456 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1459 struct bond_dev_private *internals;
1460 struct bond_tx_queue *bd_tx_q;
1462 uint8_t tx_failed_flag = 0, num_of_slaves;
1463 uint16_t slaves[RTE_MAX_ETHPORTS];
1465 uint16_t max_nb_of_tx_pkts = 0;
1467 int slave_tx_total[RTE_MAX_ETHPORTS];
1468 int i, most_successful_tx_slave = -1;
1470 bd_tx_q = (struct bond_tx_queue *)queue;
1471 internals = bd_tx_q->dev_private;
1473 /* Copy slave list to protect against slave up/down changes during tx
1475 num_of_slaves = internals->active_slave_count;
1476 memcpy(slaves, internals->active_slaves,
1477 sizeof(internals->active_slaves[0]) * num_of_slaves);
1479 if (num_of_slaves < 1)
1482 /* Increment reference count on mbufs */
1483 for (i = 0; i < nb_pkts; i++)
1484 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1486 /* Transmit burst on each active slave */
1487 for (i = 0; i < num_of_slaves; i++) {
1488 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1491 if (unlikely(slave_tx_total[i] < nb_pkts))
1494 /* record the value and slave index for the slave which transmits the
1495 * maximum number of packets */
1496 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1497 max_nb_of_tx_pkts = slave_tx_total[i];
1498 most_successful_tx_slave = i;
1502 /* if slaves fail to transmit packets from burst, the calling application
1503 * is not expected to know about multiple references to packets so we must
1504 * handle failures of all packets except those of the most successful slave
1506 if (unlikely(tx_failed_flag))
1507 for (i = 0; i < num_of_slaves; i++)
1508 if (i != most_successful_tx_slave)
1509 while (slave_tx_total[i] < nb_pkts)
1510 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1512 return max_nb_of_tx_pkts;
1516 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1518 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1520 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1522 * If in mode 4 then save the link properties of the first
1523 * slave, all subsequent slaves must match these properties
1525 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1527 bond_link->link_autoneg = slave_link->link_autoneg;
1528 bond_link->link_duplex = slave_link->link_duplex;
1529 bond_link->link_speed = slave_link->link_speed;
1532 * In any other mode the link properties are set to default
1533 * values of AUTONEG/DUPLEX
1535 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1536 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1541 link_properties_valid(struct rte_eth_dev *ethdev,
1542 struct rte_eth_link *slave_link)
1544 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1546 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1547 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1549 if (bond_link->link_duplex != slave_link->link_duplex ||
1550 bond_link->link_autoneg != slave_link->link_autoneg ||
1551 bond_link->link_speed != slave_link->link_speed)
1559 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1561 struct ether_addr *mac_addr;
1563 if (eth_dev == NULL) {
1564 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1568 if (dst_mac_addr == NULL) {
1569 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1573 mac_addr = eth_dev->data->mac_addrs;
1575 ether_addr_copy(mac_addr, dst_mac_addr);
1580 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1582 struct ether_addr *mac_addr;
1584 if (eth_dev == NULL) {
1585 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1589 if (new_mac_addr == NULL) {
1590 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1594 mac_addr = eth_dev->data->mac_addrs;
1596 /* If new MAC is different to current MAC then update */
1597 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1598 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1606 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1609 /* Update slave devices MAC addresses */
1610 if (internals->slave_count < 1)
1613 switch (internals->mode) {
1614 case BONDING_MODE_ROUND_ROBIN:
1615 case BONDING_MODE_BALANCE:
1616 case BONDING_MODE_BROADCAST:
1617 for (i = 0; i < internals->slave_count; i++) {
1618 if (rte_eth_dev_default_mac_addr_set(
1619 internals->slaves[i].port_id,
1620 bonded_eth_dev->data->mac_addrs)) {
1621 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1622 internals->slaves[i].port_id);
1627 case BONDING_MODE_8023AD:
1628 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1630 case BONDING_MODE_ACTIVE_BACKUP:
1631 case BONDING_MODE_TLB:
1632 case BONDING_MODE_ALB:
1634 for (i = 0; i < internals->slave_count; i++) {
1635 if (internals->slaves[i].port_id ==
1636 internals->current_primary_port) {
1637 if (rte_eth_dev_default_mac_addr_set(
1638 internals->primary_port,
1639 bonded_eth_dev->data->mac_addrs)) {
1640 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641 internals->current_primary_port);
1645 if (rte_eth_dev_default_mac_addr_set(
1646 internals->slaves[i].port_id,
1647 &internals->slaves[i].persisted_mac_addr)) {
1648 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1649 internals->slaves[i].port_id);
1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1662 struct bond_dev_private *internals;
1664 internals = eth_dev->data->dev_private;
1667 case BONDING_MODE_ROUND_ROBIN:
1668 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1669 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1671 case BONDING_MODE_ACTIVE_BACKUP:
1672 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1673 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1675 case BONDING_MODE_BALANCE:
1676 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1677 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1679 case BONDING_MODE_BROADCAST:
1680 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1681 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1683 case BONDING_MODE_8023AD:
1684 if (bond_mode_8023ad_enable(eth_dev) != 0)
1687 if (internals->mode4.dedicated_queues.enabled == 0) {
1688 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1689 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1690 RTE_LOG(WARNING, PMD,
1691 "Using mode 4, it is necessary to do TX burst "
1692 "and RX burst at least every 100ms.\n");
1694 /* Use flow director's optimization */
1695 eth_dev->rx_pkt_burst =
1696 bond_ethdev_rx_burst_8023ad_fast_queue;
1697 eth_dev->tx_pkt_burst =
1698 bond_ethdev_tx_burst_8023ad_fast_queue;
1701 case BONDING_MODE_TLB:
1702 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1703 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1705 case BONDING_MODE_ALB:
1706 if (bond_mode_alb_enable(eth_dev) != 0)
1709 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1710 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1716 internals->mode = mode;
1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1724 struct rte_eth_dev *slave_eth_dev)
1727 struct bond_dev_private *internals = (struct bond_dev_private *)
1728 bonded_eth_dev->data->dev_private;
1729 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1731 if (port->slow_pool == NULL) {
1733 int slave_id = slave_eth_dev->data->port_id;
1735 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1737 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1738 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1739 slave_eth_dev->data->numa_node);
1741 /* Any memory allocation failure in initialization is critical because
1742 * resources can't be free, so reinitialization is impossible. */
1743 if (port->slow_pool == NULL) {
1744 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1745 slave_id, mem_name, rte_strerror(rte_errno));
1749 if (internals->mode4.dedicated_queues.enabled == 1) {
1750 /* Configure slow Rx queue */
1752 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1753 internals->mode4.dedicated_queues.rx_qid, 128,
1754 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755 NULL, port->slow_pool);
1758 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1759 slave_eth_dev->data->port_id,
1760 internals->mode4.dedicated_queues.rx_qid,
1765 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1766 internals->mode4.dedicated_queues.tx_qid, 512,
1767 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1771 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772 slave_eth_dev->data->port_id,
1773 internals->mode4.dedicated_queues.tx_qid,
1782 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1783 struct rte_eth_dev *slave_eth_dev)
1785 struct bond_rx_queue *bd_rx_q;
1786 struct bond_tx_queue *bd_tx_q;
1787 uint16_t nb_rx_queues;
1788 uint16_t nb_tx_queues;
1792 struct rte_flow_error flow_error;
1794 struct bond_dev_private *internals = (struct bond_dev_private *)
1795 bonded_eth_dev->data->dev_private;
1798 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1800 /* Enable interrupts on slave device if supported */
1801 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1802 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1804 /* If RSS is enabled for bonding, try to enable it for slaves */
1805 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1806 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1808 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1809 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1810 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1811 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1813 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1816 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1817 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1818 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1819 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1822 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1823 DEV_RX_OFFLOAD_VLAN_FILTER)
1824 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1825 DEV_RX_OFFLOAD_VLAN_FILTER;
1827 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1828 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1830 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1831 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1833 if (internals->mode == BONDING_MODE_8023AD) {
1834 if (internals->mode4.dedicated_queues.enabled == 1) {
1840 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1841 bonded_eth_dev->data->mtu);
1842 if (errval != 0 && errval != -ENOTSUP) {
1843 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1844 slave_eth_dev->data->port_id, errval);
1848 /* Configure device */
1849 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1850 nb_rx_queues, nb_tx_queues,
1851 &(slave_eth_dev->data->dev_conf));
1853 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1854 slave_eth_dev->data->port_id, errval);
1858 /* Setup Rx Queues */
1859 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1860 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1862 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1863 bd_rx_q->nb_rx_desc,
1864 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1865 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1868 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1869 slave_eth_dev->data->port_id, q_id, errval);
1874 /* Setup Tx Queues */
1875 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1876 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1878 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1879 bd_tx_q->nb_tx_desc,
1880 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1884 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1885 slave_eth_dev->data->port_id, q_id, errval);
1890 if (internals->mode == BONDING_MODE_8023AD &&
1891 internals->mode4.dedicated_queues.enabled == 1) {
1892 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1896 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1897 slave_eth_dev->data->port_id) != 0) {
1899 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1900 slave_eth_dev->data->port_id, q_id, errval);
1904 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1905 rte_flow_destroy(slave_eth_dev->data->port_id,
1906 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1909 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1910 slave_eth_dev->data->port_id);
1914 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1916 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1917 slave_eth_dev->data->port_id, errval);
1921 /* If RSS is enabled for bonding, synchronize RETA */
1922 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1924 struct bond_dev_private *internals;
1926 internals = bonded_eth_dev->data->dev_private;
1928 for (i = 0; i < internals->slave_count; i++) {
1929 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1930 errval = rte_eth_dev_rss_reta_update(
1931 slave_eth_dev->data->port_id,
1932 &internals->reta_conf[0],
1933 internals->slaves[i].reta_size);
1935 RTE_LOG(WARNING, PMD,
1936 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1937 " RSS Configuration for bonding may be inconsistent.\n",
1938 slave_eth_dev->data->port_id, errval);
1945 /* If lsc interrupt is set, check initial slave's link status */
1946 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1947 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1948 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1949 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1957 slave_remove(struct bond_dev_private *internals,
1958 struct rte_eth_dev *slave_eth_dev)
1962 for (i = 0; i < internals->slave_count; i++)
1963 if (internals->slaves[i].port_id ==
1964 slave_eth_dev->data->port_id)
1967 if (i < (internals->slave_count - 1))
1968 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1969 sizeof(internals->slaves[0]) *
1970 (internals->slave_count - i - 1));
1972 internals->slave_count--;
1974 /* force reconfiguration of slave interfaces */
1975 _rte_eth_dev_reset(slave_eth_dev);
1979 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1982 slave_add(struct bond_dev_private *internals,
1983 struct rte_eth_dev *slave_eth_dev)
1985 struct bond_slave_details *slave_details =
1986 &internals->slaves[internals->slave_count];
1988 slave_details->port_id = slave_eth_dev->data->port_id;
1989 slave_details->last_link_status = 0;
1991 /* Mark slave devices that don't support interrupts so we can
1992 * compensate when we start the bond
1994 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1995 slave_details->link_status_poll_enabled = 1;
1998 slave_details->link_status_wait_to_complete = 0;
1999 /* clean tlb_last_obytes when adding port for bonding device */
2000 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2001 sizeof(struct ether_addr));
2005 bond_ethdev_primary_set(struct bond_dev_private *internals,
2006 uint16_t slave_port_id)
2010 if (internals->active_slave_count < 1)
2011 internals->current_primary_port = slave_port_id;
2013 /* Search bonded device slave ports for new proposed primary port */
2014 for (i = 0; i < internals->active_slave_count; i++) {
2015 if (internals->active_slaves[i] == slave_port_id)
2016 internals->current_primary_port = slave_port_id;
2021 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2024 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2026 struct bond_dev_private *internals;
2029 /* slave eth dev will be started by bonded device */
2030 if (check_for_bonded_ethdev(eth_dev)) {
2031 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2032 eth_dev->data->port_id);
2036 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2037 eth_dev->data->dev_started = 1;
2039 internals = eth_dev->data->dev_private;
2041 if (internals->slave_count == 0) {
2042 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2046 if (internals->user_defined_mac == 0) {
2047 struct ether_addr *new_mac_addr = NULL;
2049 for (i = 0; i < internals->slave_count; i++)
2050 if (internals->slaves[i].port_id == internals->primary_port)
2051 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2053 if (new_mac_addr == NULL)
2056 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2057 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2058 eth_dev->data->port_id);
2063 /* Update all slave devices MACs*/
2064 if (mac_address_slaves_update(eth_dev) != 0)
2067 /* If bonded device is configure in promiscuous mode then re-apply config */
2068 if (internals->promiscuous_en)
2069 bond_ethdev_promiscuous_enable(eth_dev);
2071 if (internals->mode == BONDING_MODE_8023AD) {
2072 if (internals->mode4.dedicated_queues.enabled == 1) {
2073 internals->mode4.dedicated_queues.rx_qid =
2074 eth_dev->data->nb_rx_queues;
2075 internals->mode4.dedicated_queues.tx_qid =
2076 eth_dev->data->nb_tx_queues;
2081 /* Reconfigure each slave device if starting bonded device */
2082 for (i = 0; i < internals->slave_count; i++) {
2083 struct rte_eth_dev *slave_ethdev =
2084 &(rte_eth_devices[internals->slaves[i].port_id]);
2085 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2087 "bonded port (%d) failed to reconfigure slave device (%d)",
2088 eth_dev->data->port_id,
2089 internals->slaves[i].port_id);
2092 /* We will need to poll for link status if any slave doesn't
2093 * support interrupts
2095 if (internals->slaves[i].link_status_poll_enabled)
2096 internals->link_status_polling_enabled = 1;
2099 /* start polling if needed */
2100 if (internals->link_status_polling_enabled) {
2102 internals->link_status_polling_interval_ms * 1000,
2103 bond_ethdev_slave_link_status_change_monitor,
2104 (void *)&rte_eth_devices[internals->port_id]);
2107 if (internals->user_defined_primary_port)
2108 bond_ethdev_primary_set(internals, internals->primary_port);
2110 if (internals->mode == BONDING_MODE_8023AD)
2111 bond_mode_8023ad_start(eth_dev);
2113 if (internals->mode == BONDING_MODE_TLB ||
2114 internals->mode == BONDING_MODE_ALB)
2115 bond_tlb_enable(internals);
2120 eth_dev->data->dev_started = 0;
2125 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2129 if (dev->data->rx_queues != NULL) {
2130 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2131 rte_free(dev->data->rx_queues[i]);
2132 dev->data->rx_queues[i] = NULL;
2134 dev->data->nb_rx_queues = 0;
2137 if (dev->data->tx_queues != NULL) {
2138 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2139 rte_free(dev->data->tx_queues[i]);
2140 dev->data->tx_queues[i] = NULL;
2142 dev->data->nb_tx_queues = 0;
2147 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2149 struct bond_dev_private *internals = eth_dev->data->dev_private;
2152 if (internals->mode == BONDING_MODE_8023AD) {
2156 bond_mode_8023ad_stop(eth_dev);
2158 /* Discard all messages to/from mode 4 state machines */
2159 for (i = 0; i < internals->active_slave_count; i++) {
2160 port = &mode_8023ad_ports[internals->active_slaves[i]];
2162 RTE_ASSERT(port->rx_ring != NULL);
2163 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2164 rte_pktmbuf_free(pkt);
2166 RTE_ASSERT(port->tx_ring != NULL);
2167 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2168 rte_pktmbuf_free(pkt);
2172 if (internals->mode == BONDING_MODE_TLB ||
2173 internals->mode == BONDING_MODE_ALB) {
2174 bond_tlb_disable(internals);
2175 for (i = 0; i < internals->active_slave_count; i++)
2176 tlb_last_obytets[internals->active_slaves[i]] = 0;
2179 internals->active_slave_count = 0;
2180 internals->link_status_polling_enabled = 0;
2181 for (i = 0; i < internals->slave_count; i++)
2182 internals->slaves[i].last_link_status = 0;
2184 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2185 eth_dev->data->dev_started = 0;
2189 bond_ethdev_close(struct rte_eth_dev *dev)
2191 struct bond_dev_private *internals = dev->data->dev_private;
2192 uint8_t bond_port_id = internals->port_id;
2195 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2196 while (internals->slave_count != skipped) {
2197 uint16_t port_id = internals->slaves[skipped].port_id;
2199 rte_eth_dev_stop(port_id);
2201 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2203 "Failed to remove port %d from bonded device "
2204 "%s\n", port_id, dev->device->name);
2208 bond_ethdev_free_queues(dev);
2209 rte_bitmap_reset(internals->vlan_filter_bmp);
2212 /* forward declaration */
2213 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2216 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2218 struct bond_dev_private *internals = dev->data->dev_private;
2220 uint16_t max_nb_rx_queues = UINT16_MAX;
2221 uint16_t max_nb_tx_queues = UINT16_MAX;
2223 dev_info->max_mac_addrs = 1;
2225 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2226 internals->candidate_max_rx_pktlen :
2227 ETHER_MAX_JUMBO_FRAME_LEN;
2229 /* Max number of tx/rx queues that the bonded device can support is the
2230 * minimum values of the bonded slaves, as all slaves must be capable
2231 * of supporting the same number of tx/rx queues.
2233 if (internals->slave_count > 0) {
2234 struct rte_eth_dev_info slave_info;
2237 for (idx = 0; idx < internals->slave_count; idx++) {
2238 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2241 if (slave_info.max_rx_queues < max_nb_rx_queues)
2242 max_nb_rx_queues = slave_info.max_rx_queues;
2244 if (slave_info.max_tx_queues < max_nb_tx_queues)
2245 max_nb_tx_queues = slave_info.max_tx_queues;
2249 dev_info->max_rx_queues = max_nb_rx_queues;
2250 dev_info->max_tx_queues = max_nb_tx_queues;
2253 * If dedicated hw queues enabled for link bonding device in LACP mode
2254 * then we need to reduce the maximum number of data path queues by 1.
2256 if (internals->mode == BONDING_MODE_8023AD &&
2257 internals->mode4.dedicated_queues.enabled == 1) {
2258 dev_info->max_rx_queues--;
2259 dev_info->max_tx_queues--;
2262 dev_info->min_rx_bufsize = 0;
2264 dev_info->rx_offload_capa = internals->rx_offload_capa;
2265 dev_info->tx_offload_capa = internals->tx_offload_capa;
2266 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2267 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2268 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2270 dev_info->reta_size = internals->reta_size;
2274 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2278 struct bond_dev_private *internals = dev->data->dev_private;
2280 /* don't do this while a slave is being added */
2281 rte_spinlock_lock(&internals->lock);
2284 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2286 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2288 for (i = 0; i < internals->slave_count; i++) {
2289 uint16_t port_id = internals->slaves[i].port_id;
2291 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2293 RTE_LOG(WARNING, PMD,
2294 "Setting VLAN filter on slave port %u not supported.\n",
2298 rte_spinlock_unlock(&internals->lock);
2303 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2304 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2305 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2307 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2308 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2309 0, dev->data->numa_node);
2310 if (bd_rx_q == NULL)
2313 bd_rx_q->queue_id = rx_queue_id;
2314 bd_rx_q->dev_private = dev->data->dev_private;
2316 bd_rx_q->nb_rx_desc = nb_rx_desc;
2318 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2319 bd_rx_q->mb_pool = mb_pool;
2321 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2327 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2328 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2329 const struct rte_eth_txconf *tx_conf)
2331 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2332 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2333 0, dev->data->numa_node);
2335 if (bd_tx_q == NULL)
2338 bd_tx_q->queue_id = tx_queue_id;
2339 bd_tx_q->dev_private = dev->data->dev_private;
2341 bd_tx_q->nb_tx_desc = nb_tx_desc;
2342 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2344 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2350 bond_ethdev_rx_queue_release(void *queue)
2359 bond_ethdev_tx_queue_release(void *queue)
2368 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2370 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2371 struct bond_dev_private *internals;
2373 /* Default value for polling slave found is true as we don't want to
2374 * disable the polling thread if we cannot get the lock */
2375 int i, polling_slave_found = 1;
2380 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2381 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2383 if (!bonded_ethdev->data->dev_started ||
2384 !internals->link_status_polling_enabled)
2387 /* If device is currently being configured then don't check slaves link
2388 * status, wait until next period */
2389 if (rte_spinlock_trylock(&internals->lock)) {
2390 if (internals->slave_count > 0)
2391 polling_slave_found = 0;
2393 for (i = 0; i < internals->slave_count; i++) {
2394 if (!internals->slaves[i].link_status_poll_enabled)
2397 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2398 polling_slave_found = 1;
2400 /* Update slave link status */
2401 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2402 internals->slaves[i].link_status_wait_to_complete);
2404 /* if link status has changed since last checked then call lsc
2406 if (slave_ethdev->data->dev_link.link_status !=
2407 internals->slaves[i].last_link_status) {
2408 internals->slaves[i].last_link_status =
2409 slave_ethdev->data->dev_link.link_status;
2411 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2412 RTE_ETH_EVENT_INTR_LSC,
2413 &bonded_ethdev->data->port_id,
2417 rte_spinlock_unlock(&internals->lock);
2420 if (polling_slave_found)
2421 /* Set alarm to continue monitoring link status of slave ethdev's */
2422 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2423 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2427 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2429 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2431 struct bond_dev_private *bond_ctx;
2432 struct rte_eth_link slave_link;
2436 bond_ctx = ethdev->data->dev_private;
2438 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2440 if (ethdev->data->dev_started == 0 ||
2441 bond_ctx->active_slave_count == 0) {
2442 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2446 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2448 if (wait_to_complete)
2449 link_update = rte_eth_link_get;
2451 link_update = rte_eth_link_get_nowait;
2453 switch (bond_ctx->mode) {
2454 case BONDING_MODE_BROADCAST:
2456 * Setting link speed to UINT32_MAX to ensure we pick up the
2457 * value of the first active slave
2459 ethdev->data->dev_link.link_speed = UINT32_MAX;
2462 * link speed is minimum value of all the slaves link speed as
2463 * packet loss will occur on this slave if transmission at rates
2464 * greater than this are attempted
2466 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2467 link_update(bond_ctx->active_slaves[0], &slave_link);
2469 if (slave_link.link_speed <
2470 ethdev->data->dev_link.link_speed)
2471 ethdev->data->dev_link.link_speed =
2472 slave_link.link_speed;
2475 case BONDING_MODE_ACTIVE_BACKUP:
2476 /* Current primary slave */
2477 link_update(bond_ctx->current_primary_port, &slave_link);
2479 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2481 case BONDING_MODE_8023AD:
2482 ethdev->data->dev_link.link_autoneg =
2483 bond_ctx->mode4.slave_link.link_autoneg;
2484 ethdev->data->dev_link.link_duplex =
2485 bond_ctx->mode4.slave_link.link_duplex;
2486 /* fall through to update link speed */
2487 case BONDING_MODE_ROUND_ROBIN:
2488 case BONDING_MODE_BALANCE:
2489 case BONDING_MODE_TLB:
2490 case BONDING_MODE_ALB:
2493 * In theses mode the maximum theoretical link speed is the sum
2496 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2498 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2499 link_update(bond_ctx->active_slaves[idx], &slave_link);
2501 ethdev->data->dev_link.link_speed +=
2502 slave_link.link_speed;
2512 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2514 struct bond_dev_private *internals = dev->data->dev_private;
2515 struct rte_eth_stats slave_stats;
2518 for (i = 0; i < internals->slave_count; i++) {
2519 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2521 stats->ipackets += slave_stats.ipackets;
2522 stats->opackets += slave_stats.opackets;
2523 stats->ibytes += slave_stats.ibytes;
2524 stats->obytes += slave_stats.obytes;
2525 stats->imissed += slave_stats.imissed;
2526 stats->ierrors += slave_stats.ierrors;
2527 stats->oerrors += slave_stats.oerrors;
2528 stats->rx_nombuf += slave_stats.rx_nombuf;
2530 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2531 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2532 stats->q_opackets[j] += slave_stats.q_opackets[j];
2533 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2534 stats->q_obytes[j] += slave_stats.q_obytes[j];
2535 stats->q_errors[j] += slave_stats.q_errors[j];
2544 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2546 struct bond_dev_private *internals = dev->data->dev_private;
2549 for (i = 0; i < internals->slave_count; i++)
2550 rte_eth_stats_reset(internals->slaves[i].port_id);
2554 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2556 struct bond_dev_private *internals = eth_dev->data->dev_private;
2559 internals->promiscuous_en = 1;
2561 switch (internals->mode) {
2562 /* Promiscuous mode is propagated to all slaves */
2563 case BONDING_MODE_ROUND_ROBIN:
2564 case BONDING_MODE_BALANCE:
2565 case BONDING_MODE_BROADCAST:
2566 for (i = 0; i < internals->slave_count; i++)
2567 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2569 /* In mode4 promiscus mode is managed when slave is added/removed */
2570 case BONDING_MODE_8023AD:
2572 /* Promiscuous mode is propagated only to primary slave */
2573 case BONDING_MODE_ACTIVE_BACKUP:
2574 case BONDING_MODE_TLB:
2575 case BONDING_MODE_ALB:
2577 rte_eth_promiscuous_enable(internals->current_primary_port);
2582 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2584 struct bond_dev_private *internals = dev->data->dev_private;
2587 internals->promiscuous_en = 0;
2589 switch (internals->mode) {
2590 /* Promiscuous mode is propagated to all slaves */
2591 case BONDING_MODE_ROUND_ROBIN:
2592 case BONDING_MODE_BALANCE:
2593 case BONDING_MODE_BROADCAST:
2594 for (i = 0; i < internals->slave_count; i++)
2595 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2597 /* In mode4 promiscus mode is set managed when slave is added/removed */
2598 case BONDING_MODE_8023AD:
2600 /* Promiscuous mode is propagated only to primary slave */
2601 case BONDING_MODE_ACTIVE_BACKUP:
2602 case BONDING_MODE_TLB:
2603 case BONDING_MODE_ALB:
2605 rte_eth_promiscuous_disable(internals->current_primary_port);
2610 bond_ethdev_delayed_lsc_propagation(void *arg)
2615 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2616 RTE_ETH_EVENT_INTR_LSC, NULL);
2620 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2621 void *param, void *ret_param __rte_unused)
2623 struct rte_eth_dev *bonded_eth_dev;
2624 struct bond_dev_private *internals;
2625 struct rte_eth_link link;
2628 int i, valid_slave = 0;
2630 uint8_t lsc_flag = 0;
2632 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2635 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2637 if (check_for_bonded_ethdev(bonded_eth_dev))
2640 internals = bonded_eth_dev->data->dev_private;
2642 /* If the device isn't started don't handle interrupts */
2643 if (!bonded_eth_dev->data->dev_started)
2646 /* verify that port_id is a valid slave of bonded port */
2647 for (i = 0; i < internals->slave_count; i++) {
2648 if (internals->slaves[i].port_id == port_id) {
2657 /* Search for port in active port list */
2658 active_pos = find_slave_by_id(internals->active_slaves,
2659 internals->active_slave_count, port_id);
2661 rte_eth_link_get_nowait(port_id, &link);
2662 if (link.link_status) {
2663 if (active_pos < internals->active_slave_count)
2666 /* if no active slave ports then set this port to be primary port */
2667 if (internals->active_slave_count < 1) {
2668 /* If first active slave, then change link status */
2669 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2670 internals->current_primary_port = port_id;
2673 mac_address_slaves_update(bonded_eth_dev);
2676 activate_slave(bonded_eth_dev, port_id);
2678 /* If user has defined the primary port then default to using it */
2679 if (internals->user_defined_primary_port &&
2680 internals->primary_port == port_id)
2681 bond_ethdev_primary_set(internals, port_id);
2683 if (active_pos == internals->active_slave_count)
2686 /* Remove from active slave list */
2687 deactivate_slave(bonded_eth_dev, port_id);
2689 if (internals->active_slave_count < 1)
2692 /* Update primary id, take first active slave from list or if none
2693 * available set to -1 */
2694 if (port_id == internals->current_primary_port) {
2695 if (internals->active_slave_count > 0)
2696 bond_ethdev_primary_set(internals,
2697 internals->active_slaves[0]);
2699 internals->current_primary_port = internals->primary_port;
2704 * Update bonded device link properties after any change to active
2707 bond_ethdev_link_update(bonded_eth_dev, 0);
2710 /* Cancel any possible outstanding interrupts if delays are enabled */
2711 if (internals->link_up_delay_ms > 0 ||
2712 internals->link_down_delay_ms > 0)
2713 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2716 if (bonded_eth_dev->data->dev_link.link_status) {
2717 if (internals->link_up_delay_ms > 0)
2718 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2719 bond_ethdev_delayed_lsc_propagation,
2720 (void *)bonded_eth_dev);
2722 _rte_eth_dev_callback_process(bonded_eth_dev,
2723 RTE_ETH_EVENT_INTR_LSC,
2727 if (internals->link_down_delay_ms > 0)
2728 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2729 bond_ethdev_delayed_lsc_propagation,
2730 (void *)bonded_eth_dev);
2732 _rte_eth_dev_callback_process(bonded_eth_dev,
2733 RTE_ETH_EVENT_INTR_LSC,
2741 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2742 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2746 int slave_reta_size;
2747 unsigned reta_count;
2748 struct bond_dev_private *internals = dev->data->dev_private;
2750 if (reta_size != internals->reta_size)
2753 /* Copy RETA table */
2754 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2756 for (i = 0; i < reta_count; i++) {
2757 internals->reta_conf[i].mask = reta_conf[i].mask;
2758 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2759 if ((reta_conf[i].mask >> j) & 0x01)
2760 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2763 /* Fill rest of array */
2764 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2765 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2766 sizeof(internals->reta_conf[0]) * reta_count);
2768 /* Propagate RETA over slaves */
2769 for (i = 0; i < internals->slave_count; i++) {
2770 slave_reta_size = internals->slaves[i].reta_size;
2771 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2772 &internals->reta_conf[0], slave_reta_size);
2781 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2782 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2785 struct bond_dev_private *internals = dev->data->dev_private;
2787 if (reta_size != internals->reta_size)
2790 /* Copy RETA table */
2791 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2792 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2793 if ((reta_conf[i].mask >> j) & 0x01)
2794 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2800 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2801 struct rte_eth_rss_conf *rss_conf)
2804 struct bond_dev_private *internals = dev->data->dev_private;
2805 struct rte_eth_rss_conf bond_rss_conf;
2807 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2809 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2811 if (bond_rss_conf.rss_hf != 0)
2812 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2814 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2815 sizeof(internals->rss_key)) {
2816 if (bond_rss_conf.rss_key_len == 0)
2817 bond_rss_conf.rss_key_len = 40;
2818 internals->rss_key_len = bond_rss_conf.rss_key_len;
2819 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2820 internals->rss_key_len);
2823 for (i = 0; i < internals->slave_count; i++) {
2824 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2834 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2835 struct rte_eth_rss_conf *rss_conf)
2837 struct bond_dev_private *internals = dev->data->dev_private;
2839 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2840 rss_conf->rss_key_len = internals->rss_key_len;
2841 if (rss_conf->rss_key)
2842 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2848 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2850 struct rte_eth_dev *slave_eth_dev;
2851 struct bond_dev_private *internals = dev->data->dev_private;
2854 rte_spinlock_lock(&internals->lock);
2856 for (i = 0; i < internals->slave_count; i++) {
2857 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2858 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2859 rte_spinlock_unlock(&internals->lock);
2863 for (i = 0; i < internals->slave_count; i++) {
2864 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2866 rte_spinlock_unlock(&internals->lock);
2871 rte_spinlock_unlock(&internals->lock);
2876 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2878 if (mac_address_set(dev, addr)) {
2879 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2886 const struct eth_dev_ops default_dev_ops = {
2887 .dev_start = bond_ethdev_start,
2888 .dev_stop = bond_ethdev_stop,
2889 .dev_close = bond_ethdev_close,
2890 .dev_configure = bond_ethdev_configure,
2891 .dev_infos_get = bond_ethdev_info,
2892 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2893 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2894 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2895 .rx_queue_release = bond_ethdev_rx_queue_release,
2896 .tx_queue_release = bond_ethdev_tx_queue_release,
2897 .link_update = bond_ethdev_link_update,
2898 .stats_get = bond_ethdev_stats_get,
2899 .stats_reset = bond_ethdev_stats_reset,
2900 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2901 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2902 .reta_update = bond_ethdev_rss_reta_update,
2903 .reta_query = bond_ethdev_rss_reta_query,
2904 .rss_hash_update = bond_ethdev_rss_hash_update,
2905 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
2906 .mtu_set = bond_ethdev_mtu_set,
2907 .mac_addr_set = bond_ethdev_mac_address_set
2911 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2913 const char *name = rte_vdev_device_name(dev);
2914 uint8_t socket_id = dev->device.numa_node;
2915 struct bond_dev_private *internals = NULL;
2916 struct rte_eth_dev *eth_dev = NULL;
2917 uint32_t vlan_filter_bmp_size;
2919 /* now do all data allocation - for eth_dev structure, dummy pci driver
2920 * and internal (private) data
2923 /* reserve an ethdev entry */
2924 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2925 if (eth_dev == NULL) {
2926 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2930 internals = eth_dev->data->dev_private;
2931 eth_dev->data->nb_rx_queues = (uint16_t)1;
2932 eth_dev->data->nb_tx_queues = (uint16_t)1;
2934 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2936 if (eth_dev->data->mac_addrs == NULL) {
2937 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2941 eth_dev->dev_ops = &default_dev_ops;
2942 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2944 rte_spinlock_init(&internals->lock);
2946 internals->port_id = eth_dev->data->port_id;
2947 internals->mode = BONDING_MODE_INVALID;
2948 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2949 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2950 internals->burst_xmit_hash = burst_xmit_l2_hash;
2951 internals->user_defined_mac = 0;
2953 internals->link_status_polling_enabled = 0;
2955 internals->link_status_polling_interval_ms =
2956 DEFAULT_POLLING_INTERVAL_10_MS;
2957 internals->link_down_delay_ms = 0;
2958 internals->link_up_delay_ms = 0;
2960 internals->slave_count = 0;
2961 internals->active_slave_count = 0;
2962 internals->rx_offload_capa = 0;
2963 internals->tx_offload_capa = 0;
2964 internals->rx_queue_offload_capa = 0;
2965 internals->tx_queue_offload_capa = 0;
2966 internals->candidate_max_rx_pktlen = 0;
2967 internals->max_rx_pktlen = 0;
2969 /* Initially allow to choose any offload type */
2970 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2972 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2973 memset(internals->slaves, 0, sizeof(internals->slaves));
2975 /* Set mode 4 default configuration */
2976 bond_mode_8023ad_setup(eth_dev, NULL);
2977 if (bond_ethdev_mode_set(eth_dev, mode)) {
2978 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d\n",
2979 eth_dev->data->port_id, mode);
2983 vlan_filter_bmp_size =
2984 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2985 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2986 RTE_CACHE_LINE_SIZE);
2987 if (internals->vlan_filter_bmpmem == NULL) {
2989 "Failed to allocate vlan bitmap for bonded device %u\n",
2990 eth_dev->data->port_id);
2994 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2995 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2996 if (internals->vlan_filter_bmp == NULL) {
2998 "Failed to init vlan bitmap for bonded device %u\n",
2999 eth_dev->data->port_id);
3000 rte_free(internals->vlan_filter_bmpmem);
3004 return eth_dev->data->port_id;
3007 rte_free(internals);
3008 if (eth_dev != NULL) {
3009 rte_free(eth_dev->data->mac_addrs);
3010 rte_eth_dev_release_port(eth_dev);
3016 bond_probe(struct rte_vdev_device *dev)
3019 struct bond_dev_private *internals;
3020 struct rte_kvargs *kvlist;
3021 uint8_t bonding_mode, socket_id/*, agg_mode*/;
3022 int arg_count, port_id;
3028 name = rte_vdev_device_name(dev);
3029 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3031 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3032 pmd_bond_init_valid_arguments);
3036 /* Parse link bonding mode */
3037 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3038 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3039 &bond_ethdev_parse_slave_mode_kvarg,
3040 &bonding_mode) != 0) {
3041 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3046 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3047 "device %s\n", name);
3051 /* Parse socket id to create bonding device on */
3052 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3053 if (arg_count == 1) {
3054 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3055 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3057 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3058 "bonded device %s\n", name);
3061 } else if (arg_count > 1) {
3062 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3063 "bonded device %s\n", name);
3066 socket_id = rte_socket_id();
3069 dev->device.numa_node = socket_id;
3071 /* Create link bonding eth device */
3072 port_id = bond_alloc(dev, bonding_mode);
3074 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3075 "socket %u.\n", name, bonding_mode, socket_id);
3078 internals = rte_eth_devices[port_id].data->dev_private;
3079 internals->kvlist = kvlist;
3082 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3083 if (rte_kvargs_process(kvlist,
3084 PMD_BOND_AGG_MODE_KVARG,
3085 &bond_ethdev_parse_slave_agg_mode_kvarg,
3088 "Failed to parse agg selection mode for bonded device %s\n",
3093 if (internals->mode == BONDING_MODE_8023AD)
3094 rte_eth_bond_8023ad_agg_selection_set(port_id,
3097 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3100 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3101 "socket %u.\n", name, port_id, bonding_mode, socket_id);
3105 rte_kvargs_free(kvlist);
3111 bond_remove(struct rte_vdev_device *dev)
3113 struct rte_eth_dev *eth_dev;
3114 struct bond_dev_private *internals;
3120 name = rte_vdev_device_name(dev);
3121 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3123 /* now free all data allocation - for eth_dev structure,
3124 * dummy pci driver and internal (private) data
3127 /* find an ethdev entry */
3128 eth_dev = rte_eth_dev_allocated(name);
3129 if (eth_dev == NULL)
3132 RTE_ASSERT(eth_dev->device == &dev->device);
3134 internals = eth_dev->data->dev_private;
3135 if (internals->slave_count != 0)
3138 if (eth_dev->data->dev_started == 1) {
3139 bond_ethdev_stop(eth_dev);
3140 bond_ethdev_close(eth_dev);
3143 eth_dev->dev_ops = NULL;
3144 eth_dev->rx_pkt_burst = NULL;
3145 eth_dev->tx_pkt_burst = NULL;
3147 internals = eth_dev->data->dev_private;
3148 /* Try to release mempool used in mode6. If the bond
3149 * device is not mode6, free the NULL is not problem.
3151 rte_mempool_free(internals->mode6.mempool);
3152 rte_bitmap_free(internals->vlan_filter_bmp);
3153 rte_free(internals->vlan_filter_bmpmem);
3154 rte_free(eth_dev->data->dev_private);
3155 rte_free(eth_dev->data->mac_addrs);
3157 rte_eth_dev_release_port(eth_dev);
3162 /* this part will resolve the slave portids after all the other pdev and vdev
3163 * have been allocated */
3165 bond_ethdev_configure(struct rte_eth_dev *dev)
3167 const char *name = dev->device->name;
3168 struct bond_dev_private *internals = dev->data->dev_private;
3169 struct rte_kvargs *kvlist = internals->kvlist;
3171 uint16_t port_id = dev - rte_eth_devices;
3174 static const uint8_t default_rss_key[40] = {
3175 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3176 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3177 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3178 0xBE, 0xAC, 0x01, 0xFA
3183 /* If RSS is enabled, fill table and key with default values */
3184 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3185 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3186 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3187 memcpy(internals->rss_key, default_rss_key, 40);
3189 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3190 internals->reta_conf[i].mask = ~0LL;
3191 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3192 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3196 /* set the max_rx_pktlen */
3197 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3200 * if no kvlist, it means that this bonded device has been created
3201 * through the bonding api.
3206 /* Parse MAC address for bonded device */
3207 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3208 if (arg_count == 1) {
3209 struct ether_addr bond_mac;
3211 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3212 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3213 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3218 /* Set MAC address */
3219 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3221 "Failed to set mac address on bonded device %s\n",
3225 } else if (arg_count > 1) {
3227 "MAC address can be specified only once for bonded device %s\n",
3232 /* Parse/set balance mode transmit policy */
3233 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3234 if (arg_count == 1) {
3235 uint8_t xmit_policy;
3237 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3238 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3241 "Invalid xmit policy specified for bonded device %s\n",
3246 /* Set balance mode transmit policy*/
3247 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3249 "Failed to set balance xmit policy on bonded device %s\n",
3253 } else if (arg_count > 1) {
3255 "Transmit policy can be specified only once for bonded device"
3260 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3261 if (rte_kvargs_process(kvlist,
3262 PMD_BOND_AGG_MODE_KVARG,
3263 &bond_ethdev_parse_slave_agg_mode_kvarg,
3266 "Failed to parse agg selection mode for bonded device %s\n",
3269 if (internals->mode == BONDING_MODE_8023AD)
3270 rte_eth_bond_8023ad_agg_selection_set(port_id,
3274 /* Parse/add slave ports to bonded device */
3275 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3276 struct bond_ethdev_slave_ports slave_ports;
3279 memset(&slave_ports, 0, sizeof(slave_ports));
3281 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3282 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3284 "Failed to parse slave ports for bonded device %s\n",
3289 for (i = 0; i < slave_ports.slave_count; i++) {
3290 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3292 "Failed to add port %d as slave to bonded device %s\n",
3293 slave_ports.slaves[i], name);
3298 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3302 /* Parse/set primary slave port id*/
3303 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3304 if (arg_count == 1) {
3305 uint16_t primary_slave_port_id;
3307 if (rte_kvargs_process(kvlist,
3308 PMD_BOND_PRIMARY_SLAVE_KVARG,
3309 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3310 &primary_slave_port_id) < 0) {
3312 "Invalid primary slave port id specified for bonded device"
3317 /* Set balance mode transmit policy*/
3318 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3321 "Failed to set primary slave port %d on bonded device %s\n",
3322 primary_slave_port_id, name);
3325 } else if (arg_count > 1) {
3327 "Primary slave can be specified only once for bonded device"
3332 /* Parse link status monitor polling interval */
3333 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3334 if (arg_count == 1) {
3335 uint32_t lsc_poll_interval_ms;
3337 if (rte_kvargs_process(kvlist,
3338 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3339 &bond_ethdev_parse_time_ms_kvarg,
3340 &lsc_poll_interval_ms) < 0) {
3342 "Invalid lsc polling interval value specified for bonded"
3343 " device %s\n", name);
3347 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3350 "Failed to set lsc monitor polling interval (%u ms) on"
3351 " bonded device %s\n", lsc_poll_interval_ms, name);
3354 } else if (arg_count > 1) {
3356 "LSC polling interval can be specified only once for bonded"
3357 " device %s\n", name);
3361 /* Parse link up interrupt propagation delay */
3362 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3363 if (arg_count == 1) {
3364 uint32_t link_up_delay_ms;
3366 if (rte_kvargs_process(kvlist,
3367 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3368 &bond_ethdev_parse_time_ms_kvarg,
3369 &link_up_delay_ms) < 0) {
3371 "Invalid link up propagation delay value specified for"
3372 " bonded device %s\n", name);
3376 /* Set balance mode transmit policy*/
3377 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3380 "Failed to set link up propagation delay (%u ms) on bonded"
3381 " device %s\n", link_up_delay_ms, name);
3384 } else if (arg_count > 1) {
3386 "Link up propagation delay can be specified only once for"
3387 " bonded device %s\n", name);
3391 /* Parse link down interrupt propagation delay */
3392 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3393 if (arg_count == 1) {
3394 uint32_t link_down_delay_ms;
3396 if (rte_kvargs_process(kvlist,
3397 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3398 &bond_ethdev_parse_time_ms_kvarg,
3399 &link_down_delay_ms) < 0) {
3401 "Invalid link down propagation delay value specified for"
3402 " bonded device %s\n", name);
3406 /* Set balance mode transmit policy*/
3407 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3410 "Failed to set link down propagation delay (%u ms) on"
3411 " bonded device %s\n", link_down_delay_ms, name);
3414 } else if (arg_count > 1) {
3416 "Link down propagation delay can be specified only once for"
3417 " bonded device %s\n", name);
3424 struct rte_vdev_driver pmd_bond_drv = {
3425 .probe = bond_probe,
3426 .remove = bond_remove,
3429 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3430 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3432 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3436 "xmit_policy=[l2 | l23 | l34] "
3437 "agg_mode=[count | stable | bandwidth] "
3440 "lsc_poll_period_ms=<int> "
3442 "down_delay=<int>");