4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
65 size_t vlan_offset = 0;
67 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
70 vlan_offset = sizeof(struct vlan_hdr);
71 *proto = vlan_hdr->eth_proto;
73 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74 vlan_hdr = vlan_hdr + 1;
75 *proto = vlan_hdr->eth_proto;
76 vlan_offset += sizeof(struct vlan_hdr);
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
85 struct bond_dev_private *internals;
87 uint16_t num_rx_slave = 0;
88 uint16_t num_rx_total = 0;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
98 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99 /* Offset of pointer to *bufs increases as packets are received
100 * from other slaves */
101 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
104 num_rx_total += num_rx_slave;
105 nb_pkts -= num_rx_slave;
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
116 struct bond_dev_private *internals;
118 /* Cast to structure, containing bonded device's port id and queue id */
119 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
121 internals = bd_rx_q->dev_private;
123 return rte_eth_rx_burst(internals->current_primary_port,
124 bd_rx_q->queue_id, bufs, nb_pkts);
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
130 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
132 return !vlan_tci && (ethertype == ether_type_slow_be &&
133 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
136 /*****************************************************************************
137 * Flow director's setup for mode 4 optimization
140 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
141 .dst.addr_bytes = { 0 },
142 .src.addr_bytes = { 0 },
143 .type = RTE_BE16(ETHER_TYPE_SLOW),
146 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
147 .dst.addr_bytes = { 0 },
148 .src.addr_bytes = { 0 },
152 static struct rte_flow_item flow_item_8023ad[] = {
154 .type = RTE_FLOW_ITEM_TYPE_ETH,
155 .spec = &flow_item_eth_type_8023ad,
157 .mask = &flow_item_eth_mask_type_8023ad,
160 .type = RTE_FLOW_ITEM_TYPE_END,
167 const struct rte_flow_attr flow_attr_8023ad = {
176 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
177 uint8_t slave_port) {
178 struct rte_eth_dev_info slave_info;
179 struct rte_flow_error error;
180 struct bond_dev_private *internals = (struct bond_dev_private *)
181 (bond_dev->data->dev_private);
183 const struct rte_flow_action_queue lacp_queue_conf = {
187 const struct rte_flow_action actions[] = {
189 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
190 .conf = &lacp_queue_conf
193 .type = RTE_FLOW_ACTION_TYPE_END,
197 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
198 flow_item_8023ad, actions, &error);
200 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
201 __func__, error.message, slave_port,
202 internals->mode4.dedicated_queues.rx_qid);
206 rte_eth_dev_info_get(slave_port, &slave_info);
207 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
208 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
210 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
211 __func__, slave_port);
219 bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
220 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
221 struct bond_dev_private *internals = (struct bond_dev_private *)
222 (bond_dev->data->dev_private);
223 struct rte_eth_dev_info bond_info;
226 /* Verify if all slaves in bonding supports flow director and */
227 if (internals->slave_count > 0) {
228 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
230 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
231 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
233 for (idx = 0; idx < internals->slave_count; idx++) {
234 if (bond_ethdev_8023ad_flow_verify(bond_dev,
235 internals->slaves[idx].port_id) != 0)
244 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
246 struct rte_flow_error error;
247 struct bond_dev_private *internals = (struct bond_dev_private *)
248 (bond_dev->data->dev_private);
250 struct rte_flow_action_queue lacp_queue_conf = {
251 .index = internals->mode4.dedicated_queues.rx_qid,
254 const struct rte_flow_action actions[] = {
256 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
257 .conf = &lacp_queue_conf
260 .type = RTE_FLOW_ACTION_TYPE_END,
264 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
265 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
266 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
267 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
268 "(slave_port=%d queue_id=%d)",
269 error.message, slave_port,
270 internals->mode4.dedicated_queues.rx_qid);
278 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
281 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
282 struct bond_dev_private *internals = bd_rx_q->dev_private;
283 uint16_t num_rx_total = 0; /* Total number of received packets */
284 uint8_t slaves[RTE_MAX_ETHPORTS];
289 /* Copy slave list to protect against slave up/down changes during tx
291 slave_count = internals->active_slave_count;
292 memcpy(slaves, internals->active_slaves,
293 sizeof(internals->active_slaves[0]) * slave_count);
295 for (i = 0, idx = internals->active_slave;
296 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
297 idx = idx % slave_count;
299 /* Read packets from this slave */
300 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
301 &bufs[num_rx_total], nb_pkts - num_rx_total);
304 internals->active_slave = idx;
310 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
313 struct bond_dev_private *internals;
314 struct bond_tx_queue *bd_tx_q;
316 uint8_t num_of_slaves;
317 uint8_t slaves[RTE_MAX_ETHPORTS];
318 /* positions in slaves, not ID */
319 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
320 uint8_t distributing_count;
322 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
323 uint16_t i, op_slave_idx;
325 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
327 /* Total amount of packets in slave_bufs */
328 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
329 /* Slow packets placed in each slave */
331 if (unlikely(nb_pkts == 0))
334 bd_tx_q = (struct bond_tx_queue *)queue;
335 internals = bd_tx_q->dev_private;
337 /* Copy slave list to protect against slave up/down changes during tx
339 num_of_slaves = internals->active_slave_count;
340 if (num_of_slaves < 1)
343 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
346 distributing_count = 0;
347 for (i = 0; i < num_of_slaves; i++) {
348 struct port *port = &mode_8023ad_ports[slaves[i]];
349 if (ACTOR_STATE(port, DISTRIBUTING))
350 distributing_offsets[distributing_count++] = i;
353 if (likely(distributing_count > 0)) {
354 /* Populate slaves mbuf with the packets which are to be sent */
355 for (i = 0; i < nb_pkts; i++) {
356 /* Select output slave using hash based on xmit policy */
357 op_slave_idx = internals->xmit_hash(bufs[i],
360 /* Populate slave mbuf arrays with mbufs for that slave.
361 * Use only slaves that are currently distributing.
363 uint8_t slave_offset =
364 distributing_offsets[op_slave_idx];
365 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
367 slave_nb_pkts[slave_offset]++;
371 /* Send packet burst on each slave device */
372 for (i = 0; i < num_of_slaves; i++) {
373 if (slave_nb_pkts[i] == 0)
376 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
377 slave_bufs[i], slave_nb_pkts[i]);
379 num_tx_total += num_tx_slave;
380 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
382 /* If tx burst fails move packets to end of bufs */
383 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
384 uint16_t j = nb_pkts - num_tx_fail_total;
385 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
387 bufs[j] = slave_bufs[i][num_tx_slave];
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
399 /* Cast to structure, containing bonded device's port id and queue id */
400 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401 struct bond_dev_private *internals = bd_rx_q->dev_private;
402 struct ether_addr bond_mac;
404 struct ether_hdr *hdr;
406 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407 uint16_t num_rx_total = 0; /* Total number of received packets */
408 uint8_t slaves[RTE_MAX_ETHPORTS];
409 uint8_t slave_count, idx;
411 uint8_t collecting; /* current slave collecting status */
412 const uint8_t promisc = internals->promiscuous_en;
416 rte_eth_macaddr_get(internals->port_id, &bond_mac);
417 /* Copy slave list to protect against slave up/down changes during tx
419 slave_count = internals->active_slave_count;
420 memcpy(slaves, internals->active_slaves,
421 sizeof(internals->active_slaves[0]) * slave_count);
423 idx = internals->active_slave;
424 if (idx >= slave_count) {
425 internals->active_slave = 0;
428 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
433 /* Read packets from this slave */
434 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435 &bufs[num_rx_total], nb_pkts - num_rx_total);
437 for (k = j; k < 2 && k < num_rx_total; k++)
438 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440 /* Handle slow protocol packets. */
441 while (j < num_rx_total) {
443 /* If packet is not pure L2 and is known, skip it */
444 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
449 if (j + 3 < num_rx_total)
450 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455 /* Remove packet from array if it is slow packet or slave is not
456 * in collecting state or bonding interface is not in promiscuous
457 * mode and packet address does not match. */
458 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
459 !collecting || (!promisc &&
460 !is_multicast_ether_addr(&hdr->d_addr) &&
461 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463 if (hdr->ether_type == ether_type_slow_be) {
464 bond_mode_8023ad_handle_slow_pkt(
465 internals, slaves[idx], bufs[j]);
467 rte_pktmbuf_free(bufs[j]);
469 /* Packet is managed by mode 4 or dropped, shift the array */
471 if (j < num_rx_total) {
472 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
478 if (unlikely(++idx == slave_count))
482 internals->active_slave = idx;
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
493 arp_op_name(uint16_t arp_op, char *buf)
497 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
500 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502 case ARP_OP_REVREQUEST:
503 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504 "Reverse ARP Request");
506 case ARP_OP_REVREPLY:
507 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508 "Reverse ARP Reply");
510 case ARP_OP_INVREQUEST:
511 snprintf(buf, sizeof("Peer Identify Request"), "%s",
512 "Peer Identify Request");
514 case ARP_OP_INVREPLY:
515 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516 "Peer Identify Reply");
521 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
525 #define MaxIPv4String 16
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
531 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
537 #define MAX_CLIENTS_NUMBER 128
538 uint8_t active_clients;
539 struct client_stats_t {
542 uint32_t ipv4_rx_packets;
543 uint32_t ipv4_tx_packets;
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
548 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
552 for (; i < MAX_CLIENTS_NUMBER; i++) {
553 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
554 /* Just update RX packets number for this client */
555 if (TXorRXindicator == &burstnumberRX)
556 client_stats[i].ipv4_rx_packets++;
558 client_stats[i].ipv4_tx_packets++;
562 /* We have a new client. Insert him to the table, and increment stats */
563 if (TXorRXindicator == &burstnumberRX)
564 client_stats[active_clients].ipv4_rx_packets++;
566 client_stats[active_clients].ipv4_tx_packets++;
567 client_stats[active_clients].ipv4_addr = addr;
568 client_stats[active_clients].port = port;
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
575 RTE_LOG(DEBUG, PMD, \
578 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
586 eth_h->s_addr.addr_bytes[0], \
587 eth_h->s_addr.addr_bytes[1], \
588 eth_h->s_addr.addr_bytes[2], \
589 eth_h->s_addr.addr_bytes[3], \
590 eth_h->s_addr.addr_bytes[4], \
591 eth_h->s_addr.addr_bytes[5], \
593 eth_h->d_addr.addr_bytes[0], \
594 eth_h->d_addr.addr_bytes[1], \
595 eth_h->d_addr.addr_bytes[2], \
596 eth_h->d_addr.addr_bytes[3], \
597 eth_h->d_addr.addr_bytes[4], \
598 eth_h->d_addr.addr_bytes[5], \
605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
606 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
608 struct ipv4_hdr *ipv4_h;
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 struct arp_hdr *arp_h;
617 uint16_t ether_type = eth_h->ether_type;
618 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621 snprintf(buf, 16, "%s", info);
624 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
625 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
626 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
628 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
629 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
631 update_client_stats(ipv4_h->src_addr, port, burstnumber);
633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
634 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
636 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
637 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
638 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
639 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
648 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
649 struct bond_dev_private *internals = bd_tx_q->dev_private;
650 struct ether_hdr *eth_h;
651 uint16_t ether_type, offset;
652 uint16_t nb_recv_pkts;
655 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
657 for (i = 0; i < nb_recv_pkts; i++) {
658 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
659 ether_type = eth_h->ether_type;
660 offset = get_vlan_offset(eth_h, ðer_type);
662 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
664 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
666 bond_mode_alb_arp_recv(eth_h, offset, internals);
668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
669 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
670 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
681 struct bond_dev_private *internals;
682 struct bond_tx_queue *bd_tx_q;
684 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
685 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
687 uint8_t num_of_slaves;
688 uint8_t slaves[RTE_MAX_ETHPORTS];
690 uint16_t num_tx_total = 0, num_tx_slave;
692 static int slave_idx = 0;
693 int i, cslave_idx = 0, tx_fail_total = 0;
695 bd_tx_q = (struct bond_tx_queue *)queue;
696 internals = bd_tx_q->dev_private;
698 /* Copy slave list to protect against slave up/down changes during tx
700 num_of_slaves = internals->active_slave_count;
701 memcpy(slaves, internals->active_slaves,
702 sizeof(internals->active_slaves[0]) * num_of_slaves);
704 if (num_of_slaves < 1)
707 /* Populate slaves mbuf with which packets are to be sent on it */
708 for (i = 0; i < nb_pkts; i++) {
709 cslave_idx = (slave_idx + i) % num_of_slaves;
710 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
713 /* increment current slave index so the next call to tx burst starts on the
715 slave_idx = ++cslave_idx;
717 /* Send packet burst on each slave device */
718 for (i = 0; i < num_of_slaves; i++) {
719 if (slave_nb_pkts[i] > 0) {
720 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
721 slave_bufs[i], slave_nb_pkts[i]);
723 /* if tx burst fails move packets to end of bufs */
724 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
725 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
727 tx_fail_total += tx_fail_slave;
729 memcpy(&bufs[nb_pkts - tx_fail_total],
730 &slave_bufs[i][num_tx_slave],
731 tx_fail_slave * sizeof(bufs[0]));
733 num_tx_total += num_tx_slave;
741 bond_ethdev_tx_burst_active_backup(void *queue,
742 struct rte_mbuf **bufs, uint16_t nb_pkts)
744 struct bond_dev_private *internals;
745 struct bond_tx_queue *bd_tx_q;
747 bd_tx_q = (struct bond_tx_queue *)queue;
748 internals = bd_tx_q->dev_private;
750 if (internals->active_slave_count < 1)
753 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
757 static inline uint16_t
758 ether_hash(struct ether_hdr *eth_hdr)
760 unaligned_uint16_t *word_src_addr =
761 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
762 unaligned_uint16_t *word_dst_addr =
763 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
765 return (word_src_addr[0] ^ word_dst_addr[0]) ^
766 (word_src_addr[1] ^ word_dst_addr[1]) ^
767 (word_src_addr[2] ^ word_dst_addr[2]);
770 static inline uint32_t
771 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
773 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
776 static inline uint32_t
777 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
779 unaligned_uint32_t *word_src_addr =
780 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
781 unaligned_uint32_t *word_dst_addr =
782 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
784 return (word_src_addr[0] ^ word_dst_addr[0]) ^
785 (word_src_addr[1] ^ word_dst_addr[1]) ^
786 (word_src_addr[2] ^ word_dst_addr[2]) ^
787 (word_src_addr[3] ^ word_dst_addr[3]);
791 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
793 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
795 uint32_t hash = ether_hash(eth_hdr);
797 return (hash ^= hash >> 8) % slave_count;
801 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
804 uint16_t proto = eth_hdr->ether_type;
805 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
806 uint32_t hash, l3hash = 0;
808 hash = ether_hash(eth_hdr);
810 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
811 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
812 ((char *)(eth_hdr + 1) + vlan_offset);
813 l3hash = ipv4_hash(ipv4_hdr);
815 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
816 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
817 ((char *)(eth_hdr + 1) + vlan_offset);
818 l3hash = ipv6_hash(ipv6_hdr);
821 hash = hash ^ l3hash;
825 return hash % slave_count;
829 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
831 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
832 uint16_t proto = eth_hdr->ether_type;
833 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
835 struct udp_hdr *udp_hdr = NULL;
836 struct tcp_hdr *tcp_hdr = NULL;
837 uint32_t hash, l3hash = 0, l4hash = 0;
839 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
840 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
841 ((char *)(eth_hdr + 1) + vlan_offset);
842 size_t ip_hdr_offset;
844 l3hash = ipv4_hash(ipv4_hdr);
846 /* there is no L4 header in fragmented packet */
847 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
848 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
851 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
852 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
854 l4hash = HASH_L4_PORTS(tcp_hdr);
855 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
856 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
858 l4hash = HASH_L4_PORTS(udp_hdr);
861 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
862 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
863 ((char *)(eth_hdr + 1) + vlan_offset);
864 l3hash = ipv6_hash(ipv6_hdr);
866 if (ipv6_hdr->proto == IPPROTO_TCP) {
867 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
868 l4hash = HASH_L4_PORTS(tcp_hdr);
869 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
870 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
871 l4hash = HASH_L4_PORTS(udp_hdr);
875 hash = l3hash ^ l4hash;
879 return hash % slave_count;
883 uint64_t bwg_left_int;
884 uint64_t bwg_left_remainder;
889 bond_tlb_activate_slave(struct bond_dev_private *internals) {
892 for (i = 0; i < internals->active_slave_count; i++) {
893 tlb_last_obytets[internals->active_slaves[i]] = 0;
898 bandwidth_cmp(const void *a, const void *b)
900 const struct bwg_slave *bwg_a = a;
901 const struct bwg_slave *bwg_b = b;
902 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
903 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
904 (int64_t)bwg_a->bwg_left_remainder;
918 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
919 struct bwg_slave *bwg_slave)
921 struct rte_eth_link link_status;
923 rte_eth_link_get_nowait(port_id, &link_status);
924 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
927 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
928 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
929 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
933 bond_ethdev_update_tlb_slave_cb(void *arg)
935 struct bond_dev_private *internals = arg;
936 struct rte_eth_stats slave_stats;
937 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
941 uint8_t update_stats = 0;
944 internals->slave_update_idx++;
947 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
950 for (i = 0; i < internals->active_slave_count; i++) {
951 slave_id = internals->active_slaves[i];
952 rte_eth_stats_get(slave_id, &slave_stats);
953 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
954 bandwidth_left(slave_id, tx_bytes,
955 internals->slave_update_idx, &bwg_array[i]);
956 bwg_array[i].slave = slave_id;
959 tlb_last_obytets[slave_id] = slave_stats.obytes;
963 if (update_stats == 1)
964 internals->slave_update_idx = 0;
967 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
968 for (i = 0; i < slave_count; i++)
969 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
972 (struct bond_dev_private *)internals);
976 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
979 struct bond_dev_private *internals = bd_tx_q->dev_private;
981 struct rte_eth_dev *primary_port =
982 &rte_eth_devices[internals->primary_port];
983 uint16_t num_tx_total = 0;
986 uint8_t num_of_slaves = internals->active_slave_count;
987 uint8_t slaves[RTE_MAX_ETHPORTS];
989 struct ether_hdr *ether_hdr;
990 struct ether_addr primary_slave_addr;
991 struct ether_addr active_slave_addr;
993 if (num_of_slaves < 1)
996 memcpy(slaves, internals->tlb_slaves_order,
997 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1000 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1003 for (i = 0; i < 3; i++)
1004 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1007 for (i = 0; i < num_of_slaves; i++) {
1008 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1009 for (j = num_tx_total; j < nb_pkts; j++) {
1010 if (j + 3 < nb_pkts)
1011 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1014 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1015 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1016 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1017 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1021 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1022 bufs + num_tx_total, nb_pkts - num_tx_total);
1024 if (num_tx_total == nb_pkts)
1028 return num_tx_total;
1032 bond_tlb_disable(struct bond_dev_private *internals)
1034 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1038 bond_tlb_enable(struct bond_dev_private *internals)
1040 bond_ethdev_update_tlb_slave_cb(internals);
1044 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1047 struct bond_dev_private *internals = bd_tx_q->dev_private;
1049 struct ether_hdr *eth_h;
1050 uint16_t ether_type, offset;
1052 struct client_data *client_info;
1055 * We create transmit buffers for every slave and one additional to send
1056 * through tlb. In worst case every packet will be send on one port.
1058 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1059 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1062 * We create separate transmit buffers for update packets as they won't
1063 * be counted in num_tx_total.
1065 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1066 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068 struct rte_mbuf *upd_pkt;
1071 uint16_t num_send, num_not_send = 0;
1072 uint16_t num_tx_total = 0;
1077 /* Search tx buffer for ARP packets and forward them to alb */
1078 for (i = 0; i < nb_pkts; i++) {
1079 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1080 ether_type = eth_h->ether_type;
1081 offset = get_vlan_offset(eth_h, ðer_type);
1083 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1084 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086 /* Change src mac in eth header */
1087 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1089 /* Add packet to slave tx buffer */
1090 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1091 slave_bufs_pkts[slave_idx]++;
1093 /* If packet is not ARP, send it with TLB policy */
1094 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1100 /* Update connected client ARP tables */
1101 if (internals->mode6.ntt) {
1102 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1103 client_info = &internals->mode6.client_table[i];
1105 if (client_info->in_use) {
1106 /* Allocate new packet to send ARP update on current slave */
1107 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1108 if (upd_pkt == NULL) {
1109 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1112 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1113 + client_info->vlan_count * sizeof(struct vlan_hdr);
1114 upd_pkt->data_len = pkt_size;
1115 upd_pkt->pkt_len = pkt_size;
1117 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1120 /* Add packet to update tx buffer */
1121 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1122 update_bufs_pkts[slave_idx]++;
1125 internals->mode6.ntt = 0;
1128 /* Send ARP packets on proper slaves */
1129 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1130 if (slave_bufs_pkts[i] > 0) {
1131 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1132 slave_bufs[i], slave_bufs_pkts[i]);
1133 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1134 bufs[nb_pkts - 1 - num_not_send - j] =
1135 slave_bufs[i][nb_pkts - 1 - j];
1138 num_tx_total += num_send;
1139 num_not_send += slave_bufs_pkts[i] - num_send;
1141 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1142 /* Print TX stats including update packets */
1143 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1144 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1145 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1151 /* Send update packets on proper slaves */
1152 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1153 if (update_bufs_pkts[i] > 0) {
1154 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1155 update_bufs_pkts[i]);
1156 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1157 rte_pktmbuf_free(update_bufs[i][j]);
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160 for (j = 0; j < update_bufs_pkts[i]; j++) {
1161 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1162 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1168 /* Send non-ARP packets using tlb policy */
1169 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1170 num_send = bond_ethdev_tx_burst_tlb(queue,
1171 slave_bufs[RTE_MAX_ETHPORTS],
1172 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1175 bufs[nb_pkts - 1 - num_not_send - j] =
1176 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1179 num_tx_total += num_send;
1182 return num_tx_total;
1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1189 struct bond_dev_private *internals;
1190 struct bond_tx_queue *bd_tx_q;
1192 uint8_t num_of_slaves;
1193 uint8_t slaves[RTE_MAX_ETHPORTS];
1195 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1199 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1200 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1202 bd_tx_q = (struct bond_tx_queue *)queue;
1203 internals = bd_tx_q->dev_private;
1205 /* Copy slave list to protect against slave up/down changes during tx
1207 num_of_slaves = internals->active_slave_count;
1208 memcpy(slaves, internals->active_slaves,
1209 sizeof(internals->active_slaves[0]) * num_of_slaves);
1211 if (num_of_slaves < 1)
1212 return num_tx_total;
1214 /* Populate slaves mbuf with the packets which are to be sent on it */
1215 for (i = 0; i < nb_pkts; i++) {
1216 /* Select output slave using hash based on xmit policy */
1217 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1219 /* Populate slave mbuf arrays with mbufs for that slave */
1220 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1223 /* Send packet burst on each slave device */
1224 for (i = 0; i < num_of_slaves; i++) {
1225 if (slave_nb_pkts[i] > 0) {
1226 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1227 slave_bufs[i], slave_nb_pkts[i]);
1229 /* if tx burst fails move packets to end of bufs */
1230 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1231 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1233 tx_fail_total += slave_tx_fail_count;
1234 memcpy(&bufs[nb_pkts - tx_fail_total],
1235 &slave_bufs[i][num_tx_slave],
1236 slave_tx_fail_count * sizeof(bufs[0]));
1239 num_tx_total += num_tx_slave;
1243 return num_tx_total;
1247 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1250 struct bond_dev_private *internals;
1251 struct bond_tx_queue *bd_tx_q;
1253 uint8_t num_of_slaves;
1254 uint8_t slaves[RTE_MAX_ETHPORTS];
1255 /* positions in slaves, not ID */
1256 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1257 uint8_t distributing_count;
1259 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1260 uint16_t i, j, op_slave_idx;
1261 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1263 /* Allocate additional packets in case 8023AD mode. */
1264 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1265 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1267 /* Total amount of packets in slave_bufs */
1268 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1269 /* Slow packets placed in each slave */
1270 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1272 bd_tx_q = (struct bond_tx_queue *)queue;
1273 internals = bd_tx_q->dev_private;
1275 /* Copy slave list to protect against slave up/down changes during tx
1277 num_of_slaves = internals->active_slave_count;
1278 if (num_of_slaves < 1)
1279 return num_tx_total;
1281 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1283 distributing_count = 0;
1284 for (i = 0; i < num_of_slaves; i++) {
1285 struct port *port = &mode_8023ad_ports[slaves[i]];
1287 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1288 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1290 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1292 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1293 slave_bufs[i][j] = slow_pkts[j];
1295 if (ACTOR_STATE(port, DISTRIBUTING))
1296 distributing_offsets[distributing_count++] = i;
1299 if (likely(distributing_count > 0)) {
1300 /* Populate slaves mbuf with the packets which are to be sent on it */
1301 for (i = 0; i < nb_pkts; i++) {
1302 /* Select output slave using hash based on xmit policy */
1303 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1305 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1306 * slaves that are currently distributing. */
1307 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1308 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1309 slave_nb_pkts[slave_offset]++;
1313 /* Send packet burst on each slave device */
1314 for (i = 0; i < num_of_slaves; i++) {
1315 if (slave_nb_pkts[i] == 0)
1318 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1319 slave_bufs[i], slave_nb_pkts[i]);
1321 /* If tx burst fails drop slow packets */
1322 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1323 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1325 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1326 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1328 /* If tx burst fails move packets to end of bufs */
1329 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1330 uint16_t j = nb_pkts - num_tx_fail_total;
1331 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1332 bufs[j] = slave_bufs[i][num_tx_slave];
1336 return num_tx_total;
1340 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1343 struct bond_dev_private *internals;
1344 struct bond_tx_queue *bd_tx_q;
1346 uint8_t tx_failed_flag = 0, num_of_slaves;
1347 uint8_t slaves[RTE_MAX_ETHPORTS];
1349 uint16_t max_nb_of_tx_pkts = 0;
1351 int slave_tx_total[RTE_MAX_ETHPORTS];
1352 int i, most_successful_tx_slave = -1;
1354 bd_tx_q = (struct bond_tx_queue *)queue;
1355 internals = bd_tx_q->dev_private;
1357 /* Copy slave list to protect against slave up/down changes during tx
1359 num_of_slaves = internals->active_slave_count;
1360 memcpy(slaves, internals->active_slaves,
1361 sizeof(internals->active_slaves[0]) * num_of_slaves);
1363 if (num_of_slaves < 1)
1366 /* Increment reference count on mbufs */
1367 for (i = 0; i < nb_pkts; i++)
1368 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1370 /* Transmit burst on each active slave */
1371 for (i = 0; i < num_of_slaves; i++) {
1372 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1375 if (unlikely(slave_tx_total[i] < nb_pkts))
1378 /* record the value and slave index for the slave which transmits the
1379 * maximum number of packets */
1380 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1381 max_nb_of_tx_pkts = slave_tx_total[i];
1382 most_successful_tx_slave = i;
1386 /* if slaves fail to transmit packets from burst, the calling application
1387 * is not expected to know about multiple references to packets so we must
1388 * handle failures of all packets except those of the most successful slave
1390 if (unlikely(tx_failed_flag))
1391 for (i = 0; i < num_of_slaves; i++)
1392 if (i != most_successful_tx_slave)
1393 while (slave_tx_total[i] < nb_pkts)
1394 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1396 return max_nb_of_tx_pkts;
1400 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1402 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1404 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1406 * If in mode 4 then save the link properties of the first
1407 * slave, all subsequent slaves must match these properties
1409 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1411 bond_link->link_autoneg = slave_link->link_autoneg;
1412 bond_link->link_duplex = slave_link->link_duplex;
1413 bond_link->link_speed = slave_link->link_speed;
1416 * In any other mode the link properties are set to default
1417 * values of AUTONEG/DUPLEX
1419 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1420 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1425 link_properties_valid(struct rte_eth_dev *ethdev,
1426 struct rte_eth_link *slave_link)
1428 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1430 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1431 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1433 if (bond_link->link_duplex != slave_link->link_duplex ||
1434 bond_link->link_autoneg != slave_link->link_autoneg ||
1435 bond_link->link_speed != slave_link->link_speed)
1443 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1445 struct ether_addr *mac_addr;
1447 if (eth_dev == NULL) {
1448 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1452 if (dst_mac_addr == NULL) {
1453 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1457 mac_addr = eth_dev->data->mac_addrs;
1459 ether_addr_copy(mac_addr, dst_mac_addr);
1464 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1466 struct ether_addr *mac_addr;
1468 if (eth_dev == NULL) {
1469 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1473 if (new_mac_addr == NULL) {
1474 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1478 mac_addr = eth_dev->data->mac_addrs;
1480 /* If new MAC is different to current MAC then update */
1481 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1482 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1488 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1490 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1493 /* Update slave devices MAC addresses */
1494 if (internals->slave_count < 1)
1497 switch (internals->mode) {
1498 case BONDING_MODE_ROUND_ROBIN:
1499 case BONDING_MODE_BALANCE:
1500 case BONDING_MODE_BROADCAST:
1501 for (i = 0; i < internals->slave_count; i++) {
1502 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1503 bonded_eth_dev->data->mac_addrs)) {
1504 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1505 internals->slaves[i].port_id);
1510 case BONDING_MODE_8023AD:
1511 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1513 case BONDING_MODE_ACTIVE_BACKUP:
1514 case BONDING_MODE_TLB:
1515 case BONDING_MODE_ALB:
1517 for (i = 0; i < internals->slave_count; i++) {
1518 if (internals->slaves[i].port_id ==
1519 internals->current_primary_port) {
1520 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1521 bonded_eth_dev->data->mac_addrs)) {
1522 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1523 internals->current_primary_port);
1527 if (mac_address_set(
1528 &rte_eth_devices[internals->slaves[i].port_id],
1529 &internals->slaves[i].persisted_mac_addr)) {
1530 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1531 internals->slaves[i].port_id);
1542 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1544 struct bond_dev_private *internals;
1546 internals = eth_dev->data->dev_private;
1549 case BONDING_MODE_ROUND_ROBIN:
1550 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1551 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1553 case BONDING_MODE_ACTIVE_BACKUP:
1554 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1555 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1557 case BONDING_MODE_BALANCE:
1558 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1559 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1561 case BONDING_MODE_BROADCAST:
1562 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1563 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1565 case BONDING_MODE_8023AD:
1566 if (bond_mode_8023ad_enable(eth_dev) != 0)
1569 if (internals->mode4.dedicated_queues.enabled == 0) {
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1571 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1572 RTE_LOG(WARNING, PMD,
1573 "Using mode 4, it is necessary to do TX burst "
1574 "and RX burst at least every 100ms.\n");
1576 /* Use flow director's optimization */
1577 eth_dev->rx_pkt_burst =
1578 bond_ethdev_rx_burst_8023ad_fast_queue;
1579 eth_dev->tx_pkt_burst =
1580 bond_ethdev_tx_burst_8023ad_fast_queue;
1583 case BONDING_MODE_TLB:
1584 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1585 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1587 case BONDING_MODE_ALB:
1588 if (bond_mode_alb_enable(eth_dev) != 0)
1591 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1592 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1598 internals->mode = mode;
1605 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1606 struct rte_eth_dev *slave_eth_dev)
1609 struct bond_dev_private *internals = (struct bond_dev_private *)
1610 bonded_eth_dev->data->dev_private;
1611 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1613 if (port->slow_pool == NULL) {
1615 int slave_id = slave_eth_dev->data->port_id;
1617 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1619 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1620 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1621 slave_eth_dev->data->numa_node);
1623 /* Any memory allocation failure in initialization is critical because
1624 * resources can't be free, so reinitialization is impossible. */
1625 if (port->slow_pool == NULL) {
1626 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1627 slave_id, mem_name, rte_strerror(rte_errno));
1631 if (internals->mode4.dedicated_queues.enabled == 1) {
1632 /* Configure slow Rx queue */
1634 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1635 internals->mode4.dedicated_queues.rx_qid, 128,
1636 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1637 NULL, port->slow_pool);
1640 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1641 slave_eth_dev->data->port_id,
1642 internals->mode4.dedicated_queues.rx_qid,
1647 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1648 internals->mode4.dedicated_queues.tx_qid, 512,
1649 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1653 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1654 slave_eth_dev->data->port_id,
1655 internals->mode4.dedicated_queues.tx_qid,
1664 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1665 struct rte_eth_dev *slave_eth_dev)
1667 struct bond_rx_queue *bd_rx_q;
1668 struct bond_tx_queue *bd_tx_q;
1669 uint16_t nb_rx_queues;
1670 uint16_t nb_tx_queues;
1674 struct rte_flow_error flow_error;
1676 struct bond_dev_private *internals = (struct bond_dev_private *)
1677 bonded_eth_dev->data->dev_private;
1680 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1682 /* Enable interrupts on slave device if supported */
1683 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1684 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1686 /* If RSS is enabled for bonding, try to enable it for slaves */
1687 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1688 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1690 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1691 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1692 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1693 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1695 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1698 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1699 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1700 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1701 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1704 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1705 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1707 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1708 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1710 if (internals->mode == BONDING_MODE_8023AD) {
1711 if (internals->mode4.dedicated_queues.enabled == 1) {
1717 /* Configure device */
1718 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1719 nb_rx_queues, nb_tx_queues,
1720 &(slave_eth_dev->data->dev_conf));
1722 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1723 slave_eth_dev->data->port_id, errval);
1727 /* Setup Rx Queues */
1728 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1729 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1731 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1732 bd_rx_q->nb_rx_desc,
1733 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1734 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1737 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1738 slave_eth_dev->data->port_id, q_id, errval);
1743 /* Setup Tx Queues */
1744 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1745 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1747 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1748 bd_tx_q->nb_tx_desc,
1749 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1754 slave_eth_dev->data->port_id, q_id, errval);
1759 if (internals->mode == BONDING_MODE_8023AD &&
1760 internals->mode4.dedicated_queues.enabled == 1) {
1761 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1765 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1766 slave_eth_dev->data->port_id) != 0) {
1768 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1769 slave_eth_dev->data->port_id, q_id, errval);
1773 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1774 rte_flow_destroy(slave_eth_dev->data->port_id,
1775 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1778 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1779 slave_eth_dev->data->port_id);
1783 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1785 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1786 slave_eth_dev->data->port_id, errval);
1790 /* If RSS is enabled for bonding, synchronize RETA */
1791 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1793 struct bond_dev_private *internals;
1795 internals = bonded_eth_dev->data->dev_private;
1797 for (i = 0; i < internals->slave_count; i++) {
1798 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1799 errval = rte_eth_dev_rss_reta_update(
1800 slave_eth_dev->data->port_id,
1801 &internals->reta_conf[0],
1802 internals->slaves[i].reta_size);
1804 RTE_LOG(WARNING, PMD,
1805 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1806 " RSS Configuration for bonding may be inconsistent.\n",
1807 slave_eth_dev->data->port_id, errval);
1814 /* If lsc interrupt is set, check initial slave's link status */
1815 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1816 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1817 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1818 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1826 slave_remove(struct bond_dev_private *internals,
1827 struct rte_eth_dev *slave_eth_dev)
1831 for (i = 0; i < internals->slave_count; i++)
1832 if (internals->slaves[i].port_id ==
1833 slave_eth_dev->data->port_id)
1836 if (i < (internals->slave_count - 1))
1837 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1838 sizeof(internals->slaves[0]) *
1839 (internals->slave_count - i - 1));
1841 internals->slave_count--;
1843 /* force reconfiguration of slave interfaces */
1844 _rte_eth_dev_reset(slave_eth_dev);
1848 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1851 slave_add(struct bond_dev_private *internals,
1852 struct rte_eth_dev *slave_eth_dev)
1854 struct bond_slave_details *slave_details =
1855 &internals->slaves[internals->slave_count];
1857 slave_details->port_id = slave_eth_dev->data->port_id;
1858 slave_details->last_link_status = 0;
1860 /* Mark slave devices that don't support interrupts so we can
1861 * compensate when we start the bond
1863 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1864 slave_details->link_status_poll_enabled = 1;
1867 slave_details->link_status_wait_to_complete = 0;
1868 /* clean tlb_last_obytes when adding port for bonding device */
1869 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1870 sizeof(struct ether_addr));
1874 bond_ethdev_primary_set(struct bond_dev_private *internals,
1875 uint8_t slave_port_id)
1879 if (internals->active_slave_count < 1)
1880 internals->current_primary_port = slave_port_id;
1882 /* Search bonded device slave ports for new proposed primary port */
1883 for (i = 0; i < internals->active_slave_count; i++) {
1884 if (internals->active_slaves[i] == slave_port_id)
1885 internals->current_primary_port = slave_port_id;
1890 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1893 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1895 struct bond_dev_private *internals;
1898 /* slave eth dev will be started by bonded device */
1899 if (check_for_bonded_ethdev(eth_dev)) {
1900 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1901 eth_dev->data->port_id);
1905 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1906 eth_dev->data->dev_started = 1;
1908 internals = eth_dev->data->dev_private;
1910 if (internals->slave_count == 0) {
1911 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1915 if (internals->user_defined_mac == 0) {
1916 struct ether_addr *new_mac_addr = NULL;
1918 for (i = 0; i < internals->slave_count; i++)
1919 if (internals->slaves[i].port_id == internals->primary_port)
1920 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1922 if (new_mac_addr == NULL)
1925 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1926 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1927 eth_dev->data->port_id);
1932 /* Update all slave devices MACs*/
1933 if (mac_address_slaves_update(eth_dev) != 0)
1936 /* If bonded device is configure in promiscuous mode then re-apply config */
1937 if (internals->promiscuous_en)
1938 bond_ethdev_promiscuous_enable(eth_dev);
1940 if (internals->mode == BONDING_MODE_8023AD) {
1941 if (internals->mode4.dedicated_queues.enabled == 1) {
1942 internals->mode4.dedicated_queues.rx_qid =
1943 eth_dev->data->nb_rx_queues;
1944 internals->mode4.dedicated_queues.tx_qid =
1945 eth_dev->data->nb_tx_queues;
1950 /* Reconfigure each slave device if starting bonded device */
1951 for (i = 0; i < internals->slave_count; i++) {
1952 struct rte_eth_dev *slave_ethdev =
1953 &(rte_eth_devices[internals->slaves[i].port_id]);
1954 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1956 "bonded port (%d) failed to reconfigure slave device (%d)",
1957 eth_dev->data->port_id,
1958 internals->slaves[i].port_id);
1961 /* We will need to poll for link status if any slave doesn't
1962 * support interrupts
1964 if (internals->slaves[i].link_status_poll_enabled)
1965 internals->link_status_polling_enabled = 1;
1967 /* start polling if needed */
1968 if (internals->link_status_polling_enabled) {
1970 internals->link_status_polling_interval_ms * 1000,
1971 bond_ethdev_slave_link_status_change_monitor,
1972 (void *)&rte_eth_devices[internals->port_id]);
1975 if (internals->user_defined_primary_port)
1976 bond_ethdev_primary_set(internals, internals->primary_port);
1978 if (internals->mode == BONDING_MODE_8023AD)
1979 bond_mode_8023ad_start(eth_dev);
1981 if (internals->mode == BONDING_MODE_TLB ||
1982 internals->mode == BONDING_MODE_ALB)
1983 bond_tlb_enable(internals);
1989 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1993 if (dev->data->rx_queues != NULL) {
1994 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1995 rte_free(dev->data->rx_queues[i]);
1996 dev->data->rx_queues[i] = NULL;
1998 dev->data->nb_rx_queues = 0;
2001 if (dev->data->tx_queues != NULL) {
2002 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2003 rte_free(dev->data->tx_queues[i]);
2004 dev->data->tx_queues[i] = NULL;
2006 dev->data->nb_tx_queues = 0;
2011 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2013 struct bond_dev_private *internals = eth_dev->data->dev_private;
2016 if (internals->mode == BONDING_MODE_8023AD) {
2020 bond_mode_8023ad_stop(eth_dev);
2022 /* Discard all messages to/from mode 4 state machines */
2023 for (i = 0; i < internals->active_slave_count; i++) {
2024 port = &mode_8023ad_ports[internals->active_slaves[i]];
2026 RTE_ASSERT(port->rx_ring != NULL);
2027 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2028 rte_pktmbuf_free(pkt);
2030 RTE_ASSERT(port->tx_ring != NULL);
2031 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2032 rte_pktmbuf_free(pkt);
2036 if (internals->mode == BONDING_MODE_TLB ||
2037 internals->mode == BONDING_MODE_ALB) {
2038 bond_tlb_disable(internals);
2039 for (i = 0; i < internals->active_slave_count; i++)
2040 tlb_last_obytets[internals->active_slaves[i]] = 0;
2043 internals->active_slave_count = 0;
2044 internals->link_status_polling_enabled = 0;
2045 for (i = 0; i < internals->slave_count; i++)
2046 internals->slaves[i].last_link_status = 0;
2048 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2049 eth_dev->data->dev_started = 0;
2053 bond_ethdev_close(struct rte_eth_dev *dev)
2055 struct bond_dev_private *internals = dev->data->dev_private;
2056 uint8_t bond_port_id = internals->port_id;
2059 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2060 while (internals->slave_count != skipped) {
2061 uint8_t port_id = internals->slaves[skipped].port_id;
2063 rte_eth_dev_stop(port_id);
2065 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2067 "Failed to remove port %d from bonded device "
2068 "%s\n", port_id, dev->device->name);
2072 bond_ethdev_free_queues(dev);
2073 rte_bitmap_reset(internals->vlan_filter_bmp);
2076 /* forward declaration */
2077 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2080 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2082 struct bond_dev_private *internals = dev->data->dev_private;
2084 uint16_t max_nb_rx_queues = UINT16_MAX;
2085 uint16_t max_nb_tx_queues = UINT16_MAX;
2087 dev_info->max_mac_addrs = 1;
2089 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2090 internals->candidate_max_rx_pktlen :
2091 ETHER_MAX_JUMBO_FRAME_LEN;
2093 /* Max number of tx/rx queues that the bonded device can support is the
2094 * minimum values of the bonded slaves, as all slaves must be capable
2095 * of supporting the same number of tx/rx queues.
2097 if (internals->slave_count > 0) {
2098 struct rte_eth_dev_info slave_info;
2101 for (idx = 0; idx < internals->slave_count; idx++) {
2102 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2105 if (slave_info.max_rx_queues < max_nb_rx_queues)
2106 max_nb_rx_queues = slave_info.max_rx_queues;
2108 if (slave_info.max_tx_queues < max_nb_tx_queues)
2109 max_nb_tx_queues = slave_info.max_tx_queues;
2113 dev_info->max_rx_queues = max_nb_rx_queues;
2114 dev_info->max_tx_queues = max_nb_tx_queues;
2117 * If dedicated hw queues enabled for link bonding device in LACP mode
2118 * then we need to reduce the maximum number of data path queues by 1.
2120 if (internals->mode == BONDING_MODE_8023AD &&
2121 internals->mode4.dedicated_queues.enabled == 1) {
2122 dev_info->max_rx_queues--;
2123 dev_info->max_tx_queues--;
2126 dev_info->min_rx_bufsize = 0;
2128 dev_info->rx_offload_capa = internals->rx_offload_capa;
2129 dev_info->tx_offload_capa = internals->tx_offload_capa;
2130 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2132 dev_info->reta_size = internals->reta_size;
2136 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2140 struct bond_dev_private *internals = dev->data->dev_private;
2142 /* don't do this while a slave is being added */
2143 rte_spinlock_lock(&internals->lock);
2146 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2148 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2150 for (i = 0; i < internals->slave_count; i++) {
2151 uint8_t port_id = internals->slaves[i].port_id;
2153 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2155 RTE_LOG(WARNING, PMD,
2156 "Setting VLAN filter on slave port %u not supported.\n",
2160 rte_spinlock_unlock(&internals->lock);
2165 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2166 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2167 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2169 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2170 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2171 0, dev->data->numa_node);
2172 if (bd_rx_q == NULL)
2175 bd_rx_q->queue_id = rx_queue_id;
2176 bd_rx_q->dev_private = dev->data->dev_private;
2178 bd_rx_q->nb_rx_desc = nb_rx_desc;
2180 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2181 bd_rx_q->mb_pool = mb_pool;
2183 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2189 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2190 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2191 const struct rte_eth_txconf *tx_conf)
2193 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2194 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2195 0, dev->data->numa_node);
2197 if (bd_tx_q == NULL)
2200 bd_tx_q->queue_id = tx_queue_id;
2201 bd_tx_q->dev_private = dev->data->dev_private;
2203 bd_tx_q->nb_tx_desc = nb_tx_desc;
2204 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2206 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2212 bond_ethdev_rx_queue_release(void *queue)
2221 bond_ethdev_tx_queue_release(void *queue)
2230 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2232 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2233 struct bond_dev_private *internals;
2235 /* Default value for polling slave found is true as we don't want to
2236 * disable the polling thread if we cannot get the lock */
2237 int i, polling_slave_found = 1;
2242 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2243 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2245 if (!bonded_ethdev->data->dev_started ||
2246 !internals->link_status_polling_enabled)
2249 /* If device is currently being configured then don't check slaves link
2250 * status, wait until next period */
2251 if (rte_spinlock_trylock(&internals->lock)) {
2252 if (internals->slave_count > 0)
2253 polling_slave_found = 0;
2255 for (i = 0; i < internals->slave_count; i++) {
2256 if (!internals->slaves[i].link_status_poll_enabled)
2259 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2260 polling_slave_found = 1;
2262 /* Update slave link status */
2263 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2264 internals->slaves[i].link_status_wait_to_complete);
2266 /* if link status has changed since last checked then call lsc
2268 if (slave_ethdev->data->dev_link.link_status !=
2269 internals->slaves[i].last_link_status) {
2270 internals->slaves[i].last_link_status =
2271 slave_ethdev->data->dev_link.link_status;
2273 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2274 RTE_ETH_EVENT_INTR_LSC,
2275 &bonded_ethdev->data->port_id,
2279 rte_spinlock_unlock(&internals->lock);
2282 if (polling_slave_found)
2283 /* Set alarm to continue monitoring link status of slave ethdev's */
2284 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2285 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2289 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2291 void (*link_update)(uint8_t port_id, struct rte_eth_link *eth_link);
2293 struct bond_dev_private *bond_ctx;
2294 struct rte_eth_link slave_link;
2298 bond_ctx = ethdev->data->dev_private;
2300 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2302 if (ethdev->data->dev_started == 0 ||
2303 bond_ctx->active_slave_count == 0) {
2304 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2308 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2310 if (wait_to_complete)
2311 link_update = rte_eth_link_get;
2313 link_update = rte_eth_link_get_nowait;
2315 switch (bond_ctx->mode) {
2316 case BONDING_MODE_BROADCAST:
2318 * Setting link speed to UINT32_MAX to ensure we pick up the
2319 * value of the first active slave
2321 ethdev->data->dev_link.link_speed = UINT32_MAX;
2324 * link speed is minimum value of all the slaves link speed as
2325 * packet loss will occur on this slave if transmission at rates
2326 * greater than this are attempted
2328 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2329 link_update(bond_ctx->active_slaves[0], &slave_link);
2331 if (slave_link.link_speed <
2332 ethdev->data->dev_link.link_speed)
2333 ethdev->data->dev_link.link_speed =
2334 slave_link.link_speed;
2337 case BONDING_MODE_ACTIVE_BACKUP:
2338 /* Current primary slave */
2339 link_update(bond_ctx->current_primary_port, &slave_link);
2341 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2343 case BONDING_MODE_8023AD:
2344 ethdev->data->dev_link.link_autoneg =
2345 bond_ctx->mode4.slave_link.link_autoneg;
2346 ethdev->data->dev_link.link_duplex =
2347 bond_ctx->mode4.slave_link.link_duplex;
2348 /* fall through to update link speed */
2349 case BONDING_MODE_ROUND_ROBIN:
2350 case BONDING_MODE_BALANCE:
2351 case BONDING_MODE_TLB:
2352 case BONDING_MODE_ALB:
2355 * In theses mode the maximum theoretical link speed is the sum
2358 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2360 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2361 link_update(bond_ctx->active_slaves[idx], &slave_link);
2363 ethdev->data->dev_link.link_speed +=
2364 slave_link.link_speed;
2374 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2376 struct bond_dev_private *internals = dev->data->dev_private;
2377 struct rte_eth_stats slave_stats;
2380 for (i = 0; i < internals->slave_count; i++) {
2381 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2383 stats->ipackets += slave_stats.ipackets;
2384 stats->opackets += slave_stats.opackets;
2385 stats->ibytes += slave_stats.ibytes;
2386 stats->obytes += slave_stats.obytes;
2387 stats->imissed += slave_stats.imissed;
2388 stats->ierrors += slave_stats.ierrors;
2389 stats->oerrors += slave_stats.oerrors;
2390 stats->rx_nombuf += slave_stats.rx_nombuf;
2392 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2393 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2394 stats->q_opackets[j] += slave_stats.q_opackets[j];
2395 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2396 stats->q_obytes[j] += slave_stats.q_obytes[j];
2397 stats->q_errors[j] += slave_stats.q_errors[j];
2404 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2406 struct bond_dev_private *internals = dev->data->dev_private;
2409 for (i = 0; i < internals->slave_count; i++)
2410 rte_eth_stats_reset(internals->slaves[i].port_id);
2414 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2416 struct bond_dev_private *internals = eth_dev->data->dev_private;
2419 internals->promiscuous_en = 1;
2421 switch (internals->mode) {
2422 /* Promiscuous mode is propagated to all slaves */
2423 case BONDING_MODE_ROUND_ROBIN:
2424 case BONDING_MODE_BALANCE:
2425 case BONDING_MODE_BROADCAST:
2426 for (i = 0; i < internals->slave_count; i++)
2427 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2429 /* In mode4 promiscus mode is managed when slave is added/removed */
2430 case BONDING_MODE_8023AD:
2432 /* Promiscuous mode is propagated only to primary slave */
2433 case BONDING_MODE_ACTIVE_BACKUP:
2434 case BONDING_MODE_TLB:
2435 case BONDING_MODE_ALB:
2437 rte_eth_promiscuous_enable(internals->current_primary_port);
2442 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2444 struct bond_dev_private *internals = dev->data->dev_private;
2447 internals->promiscuous_en = 0;
2449 switch (internals->mode) {
2450 /* Promiscuous mode is propagated to all slaves */
2451 case BONDING_MODE_ROUND_ROBIN:
2452 case BONDING_MODE_BALANCE:
2453 case BONDING_MODE_BROADCAST:
2454 for (i = 0; i < internals->slave_count; i++)
2455 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2457 /* In mode4 promiscus mode is set managed when slave is added/removed */
2458 case BONDING_MODE_8023AD:
2460 /* Promiscuous mode is propagated only to primary slave */
2461 case BONDING_MODE_ACTIVE_BACKUP:
2462 case BONDING_MODE_TLB:
2463 case BONDING_MODE_ALB:
2465 rte_eth_promiscuous_disable(internals->current_primary_port);
2470 bond_ethdev_delayed_lsc_propagation(void *arg)
2475 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2476 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2480 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2481 void *param, void *ret_param __rte_unused)
2483 struct rte_eth_dev *bonded_eth_dev;
2484 struct bond_dev_private *internals;
2485 struct rte_eth_link link;
2488 int i, valid_slave = 0;
2490 uint8_t lsc_flag = 0;
2492 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2495 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2497 if (check_for_bonded_ethdev(bonded_eth_dev))
2500 internals = bonded_eth_dev->data->dev_private;
2502 /* If the device isn't started don't handle interrupts */
2503 if (!bonded_eth_dev->data->dev_started)
2506 /* verify that port_id is a valid slave of bonded port */
2507 for (i = 0; i < internals->slave_count; i++) {
2508 if (internals->slaves[i].port_id == port_id) {
2517 /* Search for port in active port list */
2518 active_pos = find_slave_by_id(internals->active_slaves,
2519 internals->active_slave_count, port_id);
2521 rte_eth_link_get_nowait(port_id, &link);
2522 if (link.link_status) {
2523 if (active_pos < internals->active_slave_count)
2526 /* if no active slave ports then set this port to be primary port */
2527 if (internals->active_slave_count < 1) {
2528 /* If first active slave, then change link status */
2529 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2530 internals->current_primary_port = port_id;
2533 mac_address_slaves_update(bonded_eth_dev);
2536 activate_slave(bonded_eth_dev, port_id);
2538 /* If user has defined the primary port then default to using it */
2539 if (internals->user_defined_primary_port &&
2540 internals->primary_port == port_id)
2541 bond_ethdev_primary_set(internals, port_id);
2543 if (active_pos == internals->active_slave_count)
2546 /* Remove from active slave list */
2547 deactivate_slave(bonded_eth_dev, port_id);
2549 if (internals->active_slave_count < 1)
2552 /* Update primary id, take first active slave from list or if none
2553 * available set to -1 */
2554 if (port_id == internals->current_primary_port) {
2555 if (internals->active_slave_count > 0)
2556 bond_ethdev_primary_set(internals,
2557 internals->active_slaves[0]);
2559 internals->current_primary_port = internals->primary_port;
2564 * Update bonded device link properties after any change to active
2567 bond_ethdev_link_update(bonded_eth_dev, 0);
2570 /* Cancel any possible outstanding interrupts if delays are enabled */
2571 if (internals->link_up_delay_ms > 0 ||
2572 internals->link_down_delay_ms > 0)
2573 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2576 if (bonded_eth_dev->data->dev_link.link_status) {
2577 if (internals->link_up_delay_ms > 0)
2578 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2579 bond_ethdev_delayed_lsc_propagation,
2580 (void *)bonded_eth_dev);
2582 _rte_eth_dev_callback_process(bonded_eth_dev,
2583 RTE_ETH_EVENT_INTR_LSC,
2587 if (internals->link_down_delay_ms > 0)
2588 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2589 bond_ethdev_delayed_lsc_propagation,
2590 (void *)bonded_eth_dev);
2592 _rte_eth_dev_callback_process(bonded_eth_dev,
2593 RTE_ETH_EVENT_INTR_LSC,
2601 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2602 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2606 int slave_reta_size;
2607 unsigned reta_count;
2608 struct bond_dev_private *internals = dev->data->dev_private;
2610 if (reta_size != internals->reta_size)
2613 /* Copy RETA table */
2614 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2616 for (i = 0; i < reta_count; i++) {
2617 internals->reta_conf[i].mask = reta_conf[i].mask;
2618 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2619 if ((reta_conf[i].mask >> j) & 0x01)
2620 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2623 /* Fill rest of array */
2624 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2625 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2626 sizeof(internals->reta_conf[0]) * reta_count);
2628 /* Propagate RETA over slaves */
2629 for (i = 0; i < internals->slave_count; i++) {
2630 slave_reta_size = internals->slaves[i].reta_size;
2631 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2632 &internals->reta_conf[0], slave_reta_size);
2641 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2642 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2645 struct bond_dev_private *internals = dev->data->dev_private;
2647 if (reta_size != internals->reta_size)
2650 /* Copy RETA table */
2651 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2652 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2653 if ((reta_conf[i].mask >> j) & 0x01)
2654 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2660 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2661 struct rte_eth_rss_conf *rss_conf)
2664 struct bond_dev_private *internals = dev->data->dev_private;
2665 struct rte_eth_rss_conf bond_rss_conf;
2667 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2669 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2671 if (bond_rss_conf.rss_hf != 0)
2672 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2674 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2675 sizeof(internals->rss_key)) {
2676 if (bond_rss_conf.rss_key_len == 0)
2677 bond_rss_conf.rss_key_len = 40;
2678 internals->rss_key_len = bond_rss_conf.rss_key_len;
2679 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2680 internals->rss_key_len);
2683 for (i = 0; i < internals->slave_count; i++) {
2684 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2694 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2695 struct rte_eth_rss_conf *rss_conf)
2697 struct bond_dev_private *internals = dev->data->dev_private;
2699 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2700 rss_conf->rss_key_len = internals->rss_key_len;
2701 if (rss_conf->rss_key)
2702 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2707 const struct eth_dev_ops default_dev_ops = {
2708 .dev_start = bond_ethdev_start,
2709 .dev_stop = bond_ethdev_stop,
2710 .dev_close = bond_ethdev_close,
2711 .dev_configure = bond_ethdev_configure,
2712 .dev_infos_get = bond_ethdev_info,
2713 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2714 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2715 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2716 .rx_queue_release = bond_ethdev_rx_queue_release,
2717 .tx_queue_release = bond_ethdev_tx_queue_release,
2718 .link_update = bond_ethdev_link_update,
2719 .stats_get = bond_ethdev_stats_get,
2720 .stats_reset = bond_ethdev_stats_reset,
2721 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2722 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2723 .reta_update = bond_ethdev_rss_reta_update,
2724 .reta_query = bond_ethdev_rss_reta_query,
2725 .rss_hash_update = bond_ethdev_rss_hash_update,
2726 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2730 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2732 const char *name = rte_vdev_device_name(dev);
2733 uint8_t socket_id = dev->device.numa_node;
2734 struct bond_dev_private *internals = NULL;
2735 struct rte_eth_dev *eth_dev = NULL;
2736 uint32_t vlan_filter_bmp_size;
2738 /* now do all data allocation - for eth_dev structure, dummy pci driver
2739 * and internal (private) data
2742 /* reserve an ethdev entry */
2743 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2744 if (eth_dev == NULL) {
2745 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2749 internals = eth_dev->data->dev_private;
2750 eth_dev->data->nb_rx_queues = (uint16_t)1;
2751 eth_dev->data->nb_tx_queues = (uint16_t)1;
2753 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2755 if (eth_dev->data->mac_addrs == NULL) {
2756 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2760 eth_dev->dev_ops = &default_dev_ops;
2761 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
2762 RTE_ETH_DEV_DETACHABLE;
2764 rte_spinlock_init(&internals->lock);
2766 internals->port_id = eth_dev->data->port_id;
2767 internals->mode = BONDING_MODE_INVALID;
2768 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2769 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2770 internals->xmit_hash = xmit_l2_hash;
2771 internals->user_defined_mac = 0;
2773 internals->link_status_polling_enabled = 0;
2775 internals->link_status_polling_interval_ms =
2776 DEFAULT_POLLING_INTERVAL_10_MS;
2777 internals->link_down_delay_ms = 0;
2778 internals->link_up_delay_ms = 0;
2780 internals->slave_count = 0;
2781 internals->active_slave_count = 0;
2782 internals->rx_offload_capa = 0;
2783 internals->tx_offload_capa = 0;
2784 internals->candidate_max_rx_pktlen = 0;
2785 internals->max_rx_pktlen = 0;
2787 /* Initially allow to choose any offload type */
2788 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2790 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2791 memset(internals->slaves, 0, sizeof(internals->slaves));
2793 /* Set mode 4 default configuration */
2794 bond_mode_8023ad_setup(eth_dev, NULL);
2795 if (bond_ethdev_mode_set(eth_dev, mode)) {
2796 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2797 eth_dev->data->port_id, mode);
2801 vlan_filter_bmp_size =
2802 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2803 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2804 RTE_CACHE_LINE_SIZE);
2805 if (internals->vlan_filter_bmpmem == NULL) {
2807 "Failed to allocate vlan bitmap for bonded device %u\n",
2808 eth_dev->data->port_id);
2812 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2813 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2814 if (internals->vlan_filter_bmp == NULL) {
2816 "Failed to init vlan bitmap for bonded device %u\n",
2817 eth_dev->data->port_id);
2818 rte_free(internals->vlan_filter_bmpmem);
2822 return eth_dev->data->port_id;
2825 rte_free(internals);
2826 if (eth_dev != NULL) {
2827 rte_free(eth_dev->data->mac_addrs);
2828 rte_eth_dev_release_port(eth_dev);
2834 bond_probe(struct rte_vdev_device *dev)
2837 struct bond_dev_private *internals;
2838 struct rte_kvargs *kvlist;
2839 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2840 int arg_count, port_id;
2845 name = rte_vdev_device_name(dev);
2846 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2848 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2849 pmd_bond_init_valid_arguments);
2853 /* Parse link bonding mode */
2854 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2855 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2856 &bond_ethdev_parse_slave_mode_kvarg,
2857 &bonding_mode) != 0) {
2858 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2863 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2864 "device %s\n", name);
2868 /* Parse socket id to create bonding device on */
2869 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2870 if (arg_count == 1) {
2871 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2872 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2874 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2875 "bonded device %s\n", name);
2878 } else if (arg_count > 1) {
2879 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2880 "bonded device %s\n", name);
2883 socket_id = rte_socket_id();
2886 dev->device.numa_node = socket_id;
2888 /* Create link bonding eth device */
2889 port_id = bond_alloc(dev, bonding_mode);
2891 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2892 "socket %u.\n", name, bonding_mode, socket_id);
2895 internals = rte_eth_devices[port_id].data->dev_private;
2896 internals->kvlist = kvlist;
2898 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2899 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2903 rte_kvargs_free(kvlist);
2909 bond_remove(struct rte_vdev_device *dev)
2911 struct rte_eth_dev *eth_dev;
2912 struct bond_dev_private *internals;
2918 name = rte_vdev_device_name(dev);
2919 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2921 /* now free all data allocation - for eth_dev structure,
2922 * dummy pci driver and internal (private) data
2925 /* find an ethdev entry */
2926 eth_dev = rte_eth_dev_allocated(name);
2927 if (eth_dev == NULL)
2930 RTE_ASSERT(eth_dev->device == &dev->device);
2932 internals = eth_dev->data->dev_private;
2933 if (internals->slave_count != 0)
2936 if (eth_dev->data->dev_started == 1) {
2937 bond_ethdev_stop(eth_dev);
2938 bond_ethdev_close(eth_dev);
2941 eth_dev->dev_ops = NULL;
2942 eth_dev->rx_pkt_burst = NULL;
2943 eth_dev->tx_pkt_burst = NULL;
2945 internals = eth_dev->data->dev_private;
2946 rte_bitmap_free(internals->vlan_filter_bmp);
2947 rte_free(internals->vlan_filter_bmpmem);
2948 rte_free(eth_dev->data->dev_private);
2949 rte_free(eth_dev->data->mac_addrs);
2951 rte_eth_dev_release_port(eth_dev);
2956 /* this part will resolve the slave portids after all the other pdev and vdev
2957 * have been allocated */
2959 bond_ethdev_configure(struct rte_eth_dev *dev)
2961 const char *name = dev->device->name;
2962 struct bond_dev_private *internals = dev->data->dev_private;
2963 struct rte_kvargs *kvlist = internals->kvlist;
2965 uint8_t port_id = dev - rte_eth_devices;
2968 static const uint8_t default_rss_key[40] = {
2969 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2970 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2971 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2972 0xBE, 0xAC, 0x01, 0xFA
2977 /* If RSS is enabled, fill table and key with default values */
2978 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2979 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2980 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2981 memcpy(internals->rss_key, default_rss_key, 40);
2983 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2984 internals->reta_conf[i].mask = ~0LL;
2985 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2986 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2990 /* set the max_rx_pktlen */
2991 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2994 * if no kvlist, it means that this bonded device has been created
2995 * through the bonding api.
3000 /* Parse MAC address for bonded device */
3001 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3002 if (arg_count == 1) {
3003 struct ether_addr bond_mac;
3005 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3006 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3007 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3012 /* Set MAC address */
3013 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3015 "Failed to set mac address on bonded device %s\n",
3019 } else if (arg_count > 1) {
3021 "MAC address can be specified only once for bonded device %s\n",
3026 /* Parse/set balance mode transmit policy */
3027 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3028 if (arg_count == 1) {
3029 uint8_t xmit_policy;
3031 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3032 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3035 "Invalid xmit policy specified for bonded device %s\n",
3040 /* Set balance mode transmit policy*/
3041 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3043 "Failed to set balance xmit policy on bonded device %s\n",
3047 } else if (arg_count > 1) {
3049 "Transmit policy can be specified only once for bonded device"
3054 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3055 if (rte_kvargs_process(kvlist,
3056 PMD_BOND_AGG_MODE_KVARG,
3057 &bond_ethdev_parse_slave_agg_mode_kvarg,
3060 "Failed to parse agg selection mode for bonded device %s\n",
3063 if (internals->mode == BONDING_MODE_8023AD)
3065 rte_eth_bond_8023ad_agg_selection_set(port_id,
3069 /* Parse/add slave ports to bonded device */
3070 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3071 struct bond_ethdev_slave_ports slave_ports;
3074 memset(&slave_ports, 0, sizeof(slave_ports));
3076 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3077 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3079 "Failed to parse slave ports for bonded device %s\n",
3084 for (i = 0; i < slave_ports.slave_count; i++) {
3085 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3087 "Failed to add port %d as slave to bonded device %s\n",
3088 slave_ports.slaves[i], name);
3093 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3097 /* Parse/set primary slave port id*/
3098 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3099 if (arg_count == 1) {
3100 uint8_t primary_slave_port_id;
3102 if (rte_kvargs_process(kvlist,
3103 PMD_BOND_PRIMARY_SLAVE_KVARG,
3104 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3105 &primary_slave_port_id) < 0) {
3107 "Invalid primary slave port id specified for bonded device"
3112 /* Set balance mode transmit policy*/
3113 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
3116 "Failed to set primary slave port %d on bonded device %s\n",
3117 primary_slave_port_id, name);
3120 } else if (arg_count > 1) {
3122 "Primary slave can be specified only once for bonded device"
3127 /* Parse link status monitor polling interval */
3128 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3129 if (arg_count == 1) {
3130 uint32_t lsc_poll_interval_ms;
3132 if (rte_kvargs_process(kvlist,
3133 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3134 &bond_ethdev_parse_time_ms_kvarg,
3135 &lsc_poll_interval_ms) < 0) {
3137 "Invalid lsc polling interval value specified for bonded"
3138 " device %s\n", name);
3142 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3145 "Failed to set lsc monitor polling interval (%u ms) on"
3146 " bonded device %s\n", lsc_poll_interval_ms, name);
3149 } else if (arg_count > 1) {
3151 "LSC polling interval can be specified only once for bonded"
3152 " device %s\n", name);
3156 /* Parse link up interrupt propagation delay */
3157 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3158 if (arg_count == 1) {
3159 uint32_t link_up_delay_ms;
3161 if (rte_kvargs_process(kvlist,
3162 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3163 &bond_ethdev_parse_time_ms_kvarg,
3164 &link_up_delay_ms) < 0) {
3166 "Invalid link up propagation delay value specified for"
3167 " bonded device %s\n", name);
3171 /* Set balance mode transmit policy*/
3172 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3175 "Failed to set link up propagation delay (%u ms) on bonded"
3176 " device %s\n", link_up_delay_ms, name);
3179 } else if (arg_count > 1) {
3181 "Link up propagation delay can be specified only once for"
3182 " bonded device %s\n", name);
3186 /* Parse link down interrupt propagation delay */
3187 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3188 if (arg_count == 1) {
3189 uint32_t link_down_delay_ms;
3191 if (rte_kvargs_process(kvlist,
3192 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3193 &bond_ethdev_parse_time_ms_kvarg,
3194 &link_down_delay_ms) < 0) {
3196 "Invalid link down propagation delay value specified for"
3197 " bonded device %s\n", name);
3201 /* Set balance mode transmit policy*/
3202 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3205 "Failed to set link down propagation delay (%u ms) on"
3206 " bonded device %s\n", link_down_delay_ms, name);
3209 } else if (arg_count > 1) {
3211 "Link down propagation delay can be specified only once for"
3212 " bonded device %s\n", name);
3219 struct rte_vdev_driver pmd_bond_drv = {
3220 .probe = bond_probe,
3221 .remove = bond_remove,
3224 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3225 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3227 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3231 "xmit_policy=[l2 | l23 | l34] "
3232 "agg_mode=[count | stable | bandwidth] "
3235 "lsc_poll_period_ms=<int> "
3237 "down_delay=<int>");