4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
65 size_t vlan_offset = 0;
67 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
70 vlan_offset = sizeof(struct vlan_hdr);
71 *proto = vlan_hdr->eth_proto;
73 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74 vlan_hdr = vlan_hdr + 1;
75 *proto = vlan_hdr->eth_proto;
76 vlan_offset += sizeof(struct vlan_hdr);
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
85 struct bond_dev_private *internals;
87 uint16_t num_rx_slave = 0;
88 uint16_t num_rx_total = 0;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
98 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99 /* Offset of pointer to *bufs increases as packets are received
100 * from other slaves */
101 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
104 num_rx_total += num_rx_slave;
105 nb_pkts -= num_rx_slave;
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
116 struct bond_dev_private *internals;
118 /* Cast to structure, containing bonded device's port id and queue id */
119 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
121 internals = bd_rx_q->dev_private;
123 return rte_eth_rx_burst(internals->current_primary_port,
124 bd_rx_q->queue_id, bufs, nb_pkts);
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
130 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
132 return !vlan_tci && (ethertype == ether_type_slow_be &&
133 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
136 /*****************************************************************************
137 * Flow director's setup for mode 4 optimization
140 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
141 .dst.addr_bytes = { 0 },
142 .src.addr_bytes = { 0 },
143 .type = RTE_BE16(ETHER_TYPE_SLOW),
146 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
147 .dst.addr_bytes = { 0 },
148 .src.addr_bytes = { 0 },
152 static struct rte_flow_item flow_item_8023ad[] = {
154 .type = RTE_FLOW_ITEM_TYPE_ETH,
155 .spec = &flow_item_eth_type_8023ad,
157 .mask = &flow_item_eth_mask_type_8023ad,
160 .type = RTE_FLOW_ITEM_TYPE_END,
167 const struct rte_flow_attr flow_attr_8023ad = {
176 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
177 uint8_t slave_port) {
178 struct rte_flow_error error;
179 struct bond_dev_private *internals = (struct bond_dev_private *)
180 (bond_dev->data->dev_private);
182 struct rte_flow_action_queue lacp_queue_conf = {
183 .index = internals->mode4.dedicated_queues.rx_qid,
186 const struct rte_flow_action actions[] = {
188 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
189 .conf = &lacp_queue_conf
192 .type = RTE_FLOW_ACTION_TYPE_END,
196 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
197 flow_item_8023ad, actions, &error);
205 bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
206 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
207 struct bond_dev_private *internals = (struct bond_dev_private *)
208 (bond_dev->data->dev_private);
209 struct rte_eth_dev_info bond_info, slave_info;
212 /* Verify if all slaves in bonding supports flow director and */
213 if (internals->slave_count > 0) {
214 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
216 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
217 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
219 for (idx = 0; idx < internals->slave_count; idx++) {
220 rte_eth_dev_info_get(internals->slaves[idx].port_id,
223 if (bond_ethdev_8023ad_flow_verify(bond_dev,
224 internals->slaves[idx].port_id) != 0)
233 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
235 struct rte_flow_error error;
236 struct bond_dev_private *internals = (struct bond_dev_private *)
237 (bond_dev->data->dev_private);
239 struct rte_flow_action_queue lacp_queue_conf = {
240 .index = internals->mode4.dedicated_queues.rx_qid,
243 const struct rte_flow_action actions[] = {
245 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
246 .conf = &lacp_queue_conf
249 .type = RTE_FLOW_ACTION_TYPE_END,
253 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
254 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
255 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
256 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
257 "(slave_port=%d queue_id=%d)",
258 error.message, slave_port,
259 internals->mode4.dedicated_queues.rx_qid);
267 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
270 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
271 struct bond_dev_private *internals = bd_rx_q->dev_private;
272 uint16_t num_rx_total = 0; /* Total number of received packets */
273 uint8_t slaves[RTE_MAX_ETHPORTS];
278 /* Copy slave list to protect against slave up/down changes during tx
280 slave_count = internals->active_slave_count;
281 memcpy(slaves, internals->active_slaves,
282 sizeof(internals->active_slaves[0]) * slave_count);
284 for (i = 0, idx = internals->active_slave;
285 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
286 idx = idx % slave_count;
288 /* Read packets from this slave */
289 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
290 &bufs[num_rx_total], nb_pkts - num_rx_total);
293 internals->active_slave = idx;
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
302 struct bond_dev_private *internals;
303 struct bond_tx_queue *bd_tx_q;
305 uint8_t num_of_slaves;
306 uint8_t slaves[RTE_MAX_ETHPORTS];
307 /* positions in slaves, not ID */
308 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
309 uint8_t distributing_count;
311 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
312 uint16_t i, op_slave_idx;
314 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
316 /* Total amount of packets in slave_bufs */
317 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
318 /* Slow packets placed in each slave */
320 if (unlikely(nb_pkts == 0))
323 bd_tx_q = (struct bond_tx_queue *)queue;
324 internals = bd_tx_q->dev_private;
326 /* Copy slave list to protect against slave up/down changes during tx
328 num_of_slaves = internals->active_slave_count;
329 if (num_of_slaves < 1)
332 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
335 distributing_count = 0;
336 for (i = 0; i < num_of_slaves; i++) {
337 struct port *port = &mode_8023ad_ports[slaves[i]];
338 if (ACTOR_STATE(port, DISTRIBUTING))
339 distributing_offsets[distributing_count++] = i;
342 if (likely(distributing_count > 0)) {
343 /* Populate slaves mbuf with the packets which are to be sent */
344 for (i = 0; i < nb_pkts; i++) {
345 /* Select output slave using hash based on xmit policy */
346 op_slave_idx = internals->xmit_hash(bufs[i],
349 /* Populate slave mbuf arrays with mbufs for that slave.
350 * Use only slaves that are currently distributing.
352 uint8_t slave_offset =
353 distributing_offsets[op_slave_idx];
354 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
356 slave_nb_pkts[slave_offset]++;
360 /* Send packet burst on each slave device */
361 for (i = 0; i < num_of_slaves; i++) {
362 if (slave_nb_pkts[i] == 0)
365 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
366 slave_bufs[i], slave_nb_pkts[i]);
368 num_tx_total += num_tx_slave;
369 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
371 /* If tx burst fails move packets to end of bufs */
372 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
373 uint16_t j = nb_pkts - num_tx_fail_total;
374 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
376 bufs[j] = slave_bufs[i][num_tx_slave];
385 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
388 /* Cast to structure, containing bonded device's port id and queue id */
389 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
390 struct bond_dev_private *internals = bd_rx_q->dev_private;
391 struct ether_addr bond_mac;
393 struct ether_hdr *hdr;
395 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
396 uint16_t num_rx_total = 0; /* Total number of received packets */
397 uint8_t slaves[RTE_MAX_ETHPORTS];
398 uint8_t slave_count, idx;
400 uint8_t collecting; /* current slave collecting status */
401 const uint8_t promisc = internals->promiscuous_en;
405 rte_eth_macaddr_get(internals->port_id, &bond_mac);
406 /* Copy slave list to protect against slave up/down changes during tx
408 slave_count = internals->active_slave_count;
409 memcpy(slaves, internals->active_slaves,
410 sizeof(internals->active_slaves[0]) * slave_count);
412 idx = internals->active_slave;
413 if (idx >= slave_count) {
414 internals->active_slave = 0;
417 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
419 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
422 /* Read packets from this slave */
423 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
424 &bufs[num_rx_total], nb_pkts - num_rx_total);
426 for (k = j; k < 2 && k < num_rx_total; k++)
427 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
429 /* Handle slow protocol packets. */
430 while (j < num_rx_total) {
432 /* If packet is not pure L2 and is known, skip it */
433 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438 if (j + 3 < num_rx_total)
439 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
441 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
442 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
444 /* Remove packet from array if it is slow packet or slave is not
445 * in collecting state or bonding interface is not in promiscuous
446 * mode and packet address does not match. */
447 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
448 !collecting || (!promisc &&
449 !is_multicast_ether_addr(&hdr->d_addr) &&
450 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
452 if (hdr->ether_type == ether_type_slow_be) {
453 bond_mode_8023ad_handle_slow_pkt(
454 internals, slaves[idx], bufs[j]);
456 rte_pktmbuf_free(bufs[j]);
458 /* Packet is managed by mode 4 or dropped, shift the array */
460 if (j < num_rx_total) {
461 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
467 if (unlikely(++idx == slave_count))
471 internals->active_slave = idx;
475 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
476 uint32_t burstnumberRX;
477 uint32_t burstnumberTX;
479 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
482 arp_op_name(uint16_t arp_op, char *buf)
486 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
489 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
491 case ARP_OP_REVREQUEST:
492 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
493 "Reverse ARP Request");
495 case ARP_OP_REVREPLY:
496 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
497 "Reverse ARP Reply");
499 case ARP_OP_INVREQUEST:
500 snprintf(buf, sizeof("Peer Identify Request"), "%s",
501 "Peer Identify Request");
503 case ARP_OP_INVREPLY:
504 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
505 "Peer Identify Reply");
510 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
514 #define MaxIPv4String 16
516 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
520 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
521 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
522 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
526 #define MAX_CLIENTS_NUMBER 128
527 uint8_t active_clients;
528 struct client_stats_t {
531 uint32_t ipv4_rx_packets;
532 uint32_t ipv4_tx_packets;
534 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
537 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
541 for (; i < MAX_CLIENTS_NUMBER; i++) {
542 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
543 /* Just update RX packets number for this client */
544 if (TXorRXindicator == &burstnumberRX)
545 client_stats[i].ipv4_rx_packets++;
547 client_stats[i].ipv4_tx_packets++;
551 /* We have a new client. Insert him to the table, and increment stats */
552 if (TXorRXindicator == &burstnumberRX)
553 client_stats[active_clients].ipv4_rx_packets++;
555 client_stats[active_clients].ipv4_tx_packets++;
556 client_stats[active_clients].ipv4_addr = addr;
557 client_stats[active_clients].port = port;
562 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
563 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
564 RTE_LOG(DEBUG, PMD, \
567 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
569 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
575 eth_h->s_addr.addr_bytes[0], \
576 eth_h->s_addr.addr_bytes[1], \
577 eth_h->s_addr.addr_bytes[2], \
578 eth_h->s_addr.addr_bytes[3], \
579 eth_h->s_addr.addr_bytes[4], \
580 eth_h->s_addr.addr_bytes[5], \
582 eth_h->d_addr.addr_bytes[0], \
583 eth_h->d_addr.addr_bytes[1], \
584 eth_h->d_addr.addr_bytes[2], \
585 eth_h->d_addr.addr_bytes[3], \
586 eth_h->d_addr.addr_bytes[4], \
587 eth_h->d_addr.addr_bytes[5], \
594 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
595 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
597 struct ipv4_hdr *ipv4_h;
598 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
599 struct arp_hdr *arp_h;
606 uint16_t ether_type = eth_h->ether_type;
607 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 snprintf(buf, 16, "%s", info);
613 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
614 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
615 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
616 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
617 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
618 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
620 update_client_stats(ipv4_h->src_addr, port, burstnumber);
622 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
623 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
624 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
625 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
626 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
627 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
628 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
635 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
637 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
638 struct bond_dev_private *internals = bd_tx_q->dev_private;
639 struct ether_hdr *eth_h;
640 uint16_t ether_type, offset;
641 uint16_t nb_recv_pkts;
644 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
646 for (i = 0; i < nb_recv_pkts; i++) {
647 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
648 ether_type = eth_h->ether_type;
649 offset = get_vlan_offset(eth_h, ðer_type);
651 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
655 bond_mode_alb_arp_recv(eth_h, offset, internals);
657 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
658 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
659 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
667 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
670 struct bond_dev_private *internals;
671 struct bond_tx_queue *bd_tx_q;
673 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
674 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
676 uint8_t num_of_slaves;
677 uint8_t slaves[RTE_MAX_ETHPORTS];
679 uint16_t num_tx_total = 0, num_tx_slave;
681 static int slave_idx = 0;
682 int i, cslave_idx = 0, tx_fail_total = 0;
684 bd_tx_q = (struct bond_tx_queue *)queue;
685 internals = bd_tx_q->dev_private;
687 /* Copy slave list to protect against slave up/down changes during tx
689 num_of_slaves = internals->active_slave_count;
690 memcpy(slaves, internals->active_slaves,
691 sizeof(internals->active_slaves[0]) * num_of_slaves);
693 if (num_of_slaves < 1)
696 /* Populate slaves mbuf with which packets are to be sent on it */
697 for (i = 0; i < nb_pkts; i++) {
698 cslave_idx = (slave_idx + i) % num_of_slaves;
699 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
702 /* increment current slave index so the next call to tx burst starts on the
704 slave_idx = ++cslave_idx;
706 /* Send packet burst on each slave device */
707 for (i = 0; i < num_of_slaves; i++) {
708 if (slave_nb_pkts[i] > 0) {
709 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
710 slave_bufs[i], slave_nb_pkts[i]);
712 /* if tx burst fails move packets to end of bufs */
713 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
714 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
716 tx_fail_total += tx_fail_slave;
718 memcpy(&bufs[nb_pkts - tx_fail_total],
719 &slave_bufs[i][num_tx_slave],
720 tx_fail_slave * sizeof(bufs[0]));
722 num_tx_total += num_tx_slave;
730 bond_ethdev_tx_burst_active_backup(void *queue,
731 struct rte_mbuf **bufs, uint16_t nb_pkts)
733 struct bond_dev_private *internals;
734 struct bond_tx_queue *bd_tx_q;
736 bd_tx_q = (struct bond_tx_queue *)queue;
737 internals = bd_tx_q->dev_private;
739 if (internals->active_slave_count < 1)
742 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
746 static inline uint16_t
747 ether_hash(struct ether_hdr *eth_hdr)
749 unaligned_uint16_t *word_src_addr =
750 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
751 unaligned_uint16_t *word_dst_addr =
752 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
754 return (word_src_addr[0] ^ word_dst_addr[0]) ^
755 (word_src_addr[1] ^ word_dst_addr[1]) ^
756 (word_src_addr[2] ^ word_dst_addr[2]);
759 static inline uint32_t
760 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
762 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
765 static inline uint32_t
766 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
768 unaligned_uint32_t *word_src_addr =
769 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
770 unaligned_uint32_t *word_dst_addr =
771 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
773 return (word_src_addr[0] ^ word_dst_addr[0]) ^
774 (word_src_addr[1] ^ word_dst_addr[1]) ^
775 (word_src_addr[2] ^ word_dst_addr[2]) ^
776 (word_src_addr[3] ^ word_dst_addr[3]);
780 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
782 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
784 uint32_t hash = ether_hash(eth_hdr);
786 return (hash ^= hash >> 8) % slave_count;
790 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
792 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
793 uint16_t proto = eth_hdr->ether_type;
794 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
795 uint32_t hash, l3hash = 0;
797 hash = ether_hash(eth_hdr);
799 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
800 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
801 ((char *)(eth_hdr + 1) + vlan_offset);
802 l3hash = ipv4_hash(ipv4_hdr);
804 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
805 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
806 ((char *)(eth_hdr + 1) + vlan_offset);
807 l3hash = ipv6_hash(ipv6_hdr);
810 hash = hash ^ l3hash;
814 return hash % slave_count;
818 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
820 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821 uint16_t proto = eth_hdr->ether_type;
822 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
824 struct udp_hdr *udp_hdr = NULL;
825 struct tcp_hdr *tcp_hdr = NULL;
826 uint32_t hash, l3hash = 0, l4hash = 0;
828 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
829 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
830 ((char *)(eth_hdr + 1) + vlan_offset);
831 size_t ip_hdr_offset;
833 l3hash = ipv4_hash(ipv4_hdr);
835 /* there is no L4 header in fragmented packet */
836 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
837 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
840 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
841 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
843 l4hash = HASH_L4_PORTS(tcp_hdr);
844 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
845 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
847 l4hash = HASH_L4_PORTS(udp_hdr);
850 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
851 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
852 ((char *)(eth_hdr + 1) + vlan_offset);
853 l3hash = ipv6_hash(ipv6_hdr);
855 if (ipv6_hdr->proto == IPPROTO_TCP) {
856 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
857 l4hash = HASH_L4_PORTS(tcp_hdr);
858 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
859 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
860 l4hash = HASH_L4_PORTS(udp_hdr);
864 hash = l3hash ^ l4hash;
868 return hash % slave_count;
872 uint64_t bwg_left_int;
873 uint64_t bwg_left_remainder;
878 bond_tlb_activate_slave(struct bond_dev_private *internals) {
881 for (i = 0; i < internals->active_slave_count; i++) {
882 tlb_last_obytets[internals->active_slaves[i]] = 0;
887 bandwidth_cmp(const void *a, const void *b)
889 const struct bwg_slave *bwg_a = a;
890 const struct bwg_slave *bwg_b = b;
891 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
892 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
893 (int64_t)bwg_a->bwg_left_remainder;
907 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
908 struct bwg_slave *bwg_slave)
910 struct rte_eth_link link_status;
912 rte_eth_link_get_nowait(port_id, &link_status);
913 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
916 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
917 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
918 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
922 bond_ethdev_update_tlb_slave_cb(void *arg)
924 struct bond_dev_private *internals = arg;
925 struct rte_eth_stats slave_stats;
926 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
930 uint8_t update_stats = 0;
933 internals->slave_update_idx++;
936 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
939 for (i = 0; i < internals->active_slave_count; i++) {
940 slave_id = internals->active_slaves[i];
941 rte_eth_stats_get(slave_id, &slave_stats);
942 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
943 bandwidth_left(slave_id, tx_bytes,
944 internals->slave_update_idx, &bwg_array[i]);
945 bwg_array[i].slave = slave_id;
948 tlb_last_obytets[slave_id] = slave_stats.obytes;
952 if (update_stats == 1)
953 internals->slave_update_idx = 0;
956 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
957 for (i = 0; i < slave_count; i++)
958 internals->tlb_slaves_order[i] = bwg_array[i].slave;
960 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
961 (struct bond_dev_private *)internals);
965 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
967 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
968 struct bond_dev_private *internals = bd_tx_q->dev_private;
970 struct rte_eth_dev *primary_port =
971 &rte_eth_devices[internals->primary_port];
972 uint16_t num_tx_total = 0;
975 uint8_t num_of_slaves = internals->active_slave_count;
976 uint8_t slaves[RTE_MAX_ETHPORTS];
978 struct ether_hdr *ether_hdr;
979 struct ether_addr primary_slave_addr;
980 struct ether_addr active_slave_addr;
982 if (num_of_slaves < 1)
985 memcpy(slaves, internals->tlb_slaves_order,
986 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
989 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
992 for (i = 0; i < 3; i++)
993 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
996 for (i = 0; i < num_of_slaves; i++) {
997 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
998 for (j = num_tx_total; j < nb_pkts; j++) {
1000 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1002 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1003 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1004 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1005 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1006 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1010 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1011 bufs + num_tx_total, nb_pkts - num_tx_total);
1013 if (num_tx_total == nb_pkts)
1017 return num_tx_total;
1021 bond_tlb_disable(struct bond_dev_private *internals)
1023 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1027 bond_tlb_enable(struct bond_dev_private *internals)
1029 bond_ethdev_update_tlb_slave_cb(internals);
1033 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1035 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1036 struct bond_dev_private *internals = bd_tx_q->dev_private;
1038 struct ether_hdr *eth_h;
1039 uint16_t ether_type, offset;
1041 struct client_data *client_info;
1044 * We create transmit buffers for every slave and one additional to send
1045 * through tlb. In worst case every packet will be send on one port.
1047 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1048 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1051 * We create separate transmit buffers for update packets as they won't
1052 * be counted in num_tx_total.
1054 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1055 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1057 struct rte_mbuf *upd_pkt;
1060 uint16_t num_send, num_not_send = 0;
1061 uint16_t num_tx_total = 0;
1066 /* Search tx buffer for ARP packets and forward them to alb */
1067 for (i = 0; i < nb_pkts; i++) {
1068 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1069 ether_type = eth_h->ether_type;
1070 offset = get_vlan_offset(eth_h, ðer_type);
1072 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1073 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1075 /* Change src mac in eth header */
1076 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1078 /* Add packet to slave tx buffer */
1079 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1080 slave_bufs_pkts[slave_idx]++;
1082 /* If packet is not ARP, send it with TLB policy */
1083 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1085 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1089 /* Update connected client ARP tables */
1090 if (internals->mode6.ntt) {
1091 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1092 client_info = &internals->mode6.client_table[i];
1094 if (client_info->in_use) {
1095 /* Allocate new packet to send ARP update on current slave */
1096 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1097 if (upd_pkt == NULL) {
1098 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1101 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1102 + client_info->vlan_count * sizeof(struct vlan_hdr);
1103 upd_pkt->data_len = pkt_size;
1104 upd_pkt->pkt_len = pkt_size;
1106 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1109 /* Add packet to update tx buffer */
1110 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1111 update_bufs_pkts[slave_idx]++;
1114 internals->mode6.ntt = 0;
1117 /* Send ARP packets on proper slaves */
1118 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1119 if (slave_bufs_pkts[i] > 0) {
1120 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1121 slave_bufs[i], slave_bufs_pkts[i]);
1122 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1123 bufs[nb_pkts - 1 - num_not_send - j] =
1124 slave_bufs[i][nb_pkts - 1 - j];
1127 num_tx_total += num_send;
1128 num_not_send += slave_bufs_pkts[i] - num_send;
1130 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1131 /* Print TX stats including update packets */
1132 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1133 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1134 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1140 /* Send update packets on proper slaves */
1141 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1142 if (update_bufs_pkts[i] > 0) {
1143 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1144 update_bufs_pkts[i]);
1145 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1146 rte_pktmbuf_free(update_bufs[i][j]);
1148 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1149 for (j = 0; j < update_bufs_pkts[i]; j++) {
1150 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1151 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1157 /* Send non-ARP packets using tlb policy */
1158 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1159 num_send = bond_ethdev_tx_burst_tlb(queue,
1160 slave_bufs[RTE_MAX_ETHPORTS],
1161 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1163 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1164 bufs[nb_pkts - 1 - num_not_send - j] =
1165 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1168 num_tx_total += num_send;
1171 return num_tx_total;
1175 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1178 struct bond_dev_private *internals;
1179 struct bond_tx_queue *bd_tx_q;
1181 uint8_t num_of_slaves;
1182 uint8_t slaves[RTE_MAX_ETHPORTS];
1184 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1188 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1189 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1191 bd_tx_q = (struct bond_tx_queue *)queue;
1192 internals = bd_tx_q->dev_private;
1194 /* Copy slave list to protect against slave up/down changes during tx
1196 num_of_slaves = internals->active_slave_count;
1197 memcpy(slaves, internals->active_slaves,
1198 sizeof(internals->active_slaves[0]) * num_of_slaves);
1200 if (num_of_slaves < 1)
1201 return num_tx_total;
1203 /* Populate slaves mbuf with the packets which are to be sent on it */
1204 for (i = 0; i < nb_pkts; i++) {
1205 /* Select output slave using hash based on xmit policy */
1206 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1208 /* Populate slave mbuf arrays with mbufs for that slave */
1209 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1212 /* Send packet burst on each slave device */
1213 for (i = 0; i < num_of_slaves; i++) {
1214 if (slave_nb_pkts[i] > 0) {
1215 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1216 slave_bufs[i], slave_nb_pkts[i]);
1218 /* if tx burst fails move packets to end of bufs */
1219 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1220 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1222 tx_fail_total += slave_tx_fail_count;
1223 memcpy(&bufs[nb_pkts - tx_fail_total],
1224 &slave_bufs[i][num_tx_slave],
1225 slave_tx_fail_count * sizeof(bufs[0]));
1228 num_tx_total += num_tx_slave;
1232 return num_tx_total;
1236 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1239 struct bond_dev_private *internals;
1240 struct bond_tx_queue *bd_tx_q;
1242 uint8_t num_of_slaves;
1243 uint8_t slaves[RTE_MAX_ETHPORTS];
1244 /* positions in slaves, not ID */
1245 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1246 uint8_t distributing_count;
1248 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1249 uint16_t i, j, op_slave_idx;
1250 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1252 /* Allocate additional packets in case 8023AD mode. */
1253 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1254 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1256 /* Total amount of packets in slave_bufs */
1257 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1258 /* Slow packets placed in each slave */
1259 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1261 bd_tx_q = (struct bond_tx_queue *)queue;
1262 internals = bd_tx_q->dev_private;
1264 /* Copy slave list to protect against slave up/down changes during tx
1266 num_of_slaves = internals->active_slave_count;
1267 if (num_of_slaves < 1)
1268 return num_tx_total;
1270 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1272 distributing_count = 0;
1273 for (i = 0; i < num_of_slaves; i++) {
1274 struct port *port = &mode_8023ad_ports[slaves[i]];
1276 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1277 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1279 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1281 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1282 slave_bufs[i][j] = slow_pkts[j];
1284 if (ACTOR_STATE(port, DISTRIBUTING))
1285 distributing_offsets[distributing_count++] = i;
1288 if (likely(distributing_count > 0)) {
1289 /* Populate slaves mbuf with the packets which are to be sent on it */
1290 for (i = 0; i < nb_pkts; i++) {
1291 /* Select output slave using hash based on xmit policy */
1292 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1294 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1295 * slaves that are currently distributing. */
1296 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1297 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1298 slave_nb_pkts[slave_offset]++;
1302 /* Send packet burst on each slave device */
1303 for (i = 0; i < num_of_slaves; i++) {
1304 if (slave_nb_pkts[i] == 0)
1307 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1308 slave_bufs[i], slave_nb_pkts[i]);
1310 /* If tx burst fails drop slow packets */
1311 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1312 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1314 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1315 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1317 /* If tx burst fails move packets to end of bufs */
1318 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1319 uint16_t j = nb_pkts - num_tx_fail_total;
1320 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1321 bufs[j] = slave_bufs[i][num_tx_slave];
1325 return num_tx_total;
1329 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1332 struct bond_dev_private *internals;
1333 struct bond_tx_queue *bd_tx_q;
1335 uint8_t tx_failed_flag = 0, num_of_slaves;
1336 uint8_t slaves[RTE_MAX_ETHPORTS];
1338 uint16_t max_nb_of_tx_pkts = 0;
1340 int slave_tx_total[RTE_MAX_ETHPORTS];
1341 int i, most_successful_tx_slave = -1;
1343 bd_tx_q = (struct bond_tx_queue *)queue;
1344 internals = bd_tx_q->dev_private;
1346 /* Copy slave list to protect against slave up/down changes during tx
1348 num_of_slaves = internals->active_slave_count;
1349 memcpy(slaves, internals->active_slaves,
1350 sizeof(internals->active_slaves[0]) * num_of_slaves);
1352 if (num_of_slaves < 1)
1355 /* Increment reference count on mbufs */
1356 for (i = 0; i < nb_pkts; i++)
1357 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1359 /* Transmit burst on each active slave */
1360 for (i = 0; i < num_of_slaves; i++) {
1361 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1364 if (unlikely(slave_tx_total[i] < nb_pkts))
1367 /* record the value and slave index for the slave which transmits the
1368 * maximum number of packets */
1369 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1370 max_nb_of_tx_pkts = slave_tx_total[i];
1371 most_successful_tx_slave = i;
1375 /* if slaves fail to transmit packets from burst, the calling application
1376 * is not expected to know about multiple references to packets so we must
1377 * handle failures of all packets except those of the most successful slave
1379 if (unlikely(tx_failed_flag))
1380 for (i = 0; i < num_of_slaves; i++)
1381 if (i != most_successful_tx_slave)
1382 while (slave_tx_total[i] < nb_pkts)
1383 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1385 return max_nb_of_tx_pkts;
1389 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1391 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1393 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1395 * If in mode 4 then save the link properties of the first
1396 * slave, all subsequent slaves must match these properties
1398 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1400 bond_link->link_autoneg = slave_link->link_autoneg;
1401 bond_link->link_duplex = slave_link->link_duplex;
1402 bond_link->link_speed = slave_link->link_speed;
1405 * In any other mode the link properties are set to default
1406 * values of AUTONEG/DUPLEX
1408 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1409 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1414 link_properties_valid(struct rte_eth_dev *ethdev,
1415 struct rte_eth_link *slave_link)
1417 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1419 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1420 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1422 if (bond_link->link_duplex != slave_link->link_duplex ||
1423 bond_link->link_autoneg != slave_link->link_autoneg ||
1424 bond_link->link_speed != slave_link->link_speed)
1432 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1434 struct ether_addr *mac_addr;
1436 if (eth_dev == NULL) {
1437 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1441 if (dst_mac_addr == NULL) {
1442 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1446 mac_addr = eth_dev->data->mac_addrs;
1448 ether_addr_copy(mac_addr, dst_mac_addr);
1453 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1455 struct ether_addr *mac_addr;
1457 if (eth_dev == NULL) {
1458 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1462 if (new_mac_addr == NULL) {
1463 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1467 mac_addr = eth_dev->data->mac_addrs;
1469 /* If new MAC is different to current MAC then update */
1470 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1471 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1477 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1479 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1482 /* Update slave devices MAC addresses */
1483 if (internals->slave_count < 1)
1486 switch (internals->mode) {
1487 case BONDING_MODE_ROUND_ROBIN:
1488 case BONDING_MODE_BALANCE:
1489 case BONDING_MODE_BROADCAST:
1490 for (i = 0; i < internals->slave_count; i++) {
1491 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1492 bonded_eth_dev->data->mac_addrs)) {
1493 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1494 internals->slaves[i].port_id);
1499 case BONDING_MODE_8023AD:
1500 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1502 case BONDING_MODE_ACTIVE_BACKUP:
1503 case BONDING_MODE_TLB:
1504 case BONDING_MODE_ALB:
1506 for (i = 0; i < internals->slave_count; i++) {
1507 if (internals->slaves[i].port_id ==
1508 internals->current_primary_port) {
1509 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1510 bonded_eth_dev->data->mac_addrs)) {
1511 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1512 internals->current_primary_port);
1516 if (mac_address_set(
1517 &rte_eth_devices[internals->slaves[i].port_id],
1518 &internals->slaves[i].persisted_mac_addr)) {
1519 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1520 internals->slaves[i].port_id);
1531 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1533 struct bond_dev_private *internals;
1535 internals = eth_dev->data->dev_private;
1538 case BONDING_MODE_ROUND_ROBIN:
1539 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1540 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1542 case BONDING_MODE_ACTIVE_BACKUP:
1543 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1544 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1546 case BONDING_MODE_BALANCE:
1547 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1548 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1550 case BONDING_MODE_BROADCAST:
1551 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1552 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1554 case BONDING_MODE_8023AD:
1555 if (bond_mode_8023ad_enable(eth_dev) != 0)
1558 if (internals->mode4.dedicated_queues.enabled == 0) {
1559 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1560 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1561 RTE_LOG(WARNING, PMD,
1562 "Using mode 4, it is necessary to do TX burst "
1563 "and RX burst at least every 100ms.\n");
1565 /* Use flow director's optimization */
1566 eth_dev->rx_pkt_burst =
1567 bond_ethdev_rx_burst_8023ad_fast_queue;
1568 eth_dev->tx_pkt_burst =
1569 bond_ethdev_tx_burst_8023ad_fast_queue;
1572 case BONDING_MODE_TLB:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1576 case BONDING_MODE_ALB:
1577 if (bond_mode_alb_enable(eth_dev) != 0)
1580 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1581 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1587 internals->mode = mode;
1594 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1595 struct rte_eth_dev *slave_eth_dev)
1598 struct bond_dev_private *internals = (struct bond_dev_private *)
1599 bonded_eth_dev->data->dev_private;
1600 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1602 if (port->slow_pool == NULL) {
1604 int slave_id = slave_eth_dev->data->port_id;
1606 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1608 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1609 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1610 slave_eth_dev->data->numa_node);
1612 /* Any memory allocation failure in initialization is critical because
1613 * resources can't be free, so reinitialization is impossible. */
1614 if (port->slow_pool == NULL) {
1615 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1616 slave_id, mem_name, rte_strerror(rte_errno));
1620 if (internals->mode4.dedicated_queues.enabled == 1) {
1621 /* Configure slow Rx queue */
1623 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1624 internals->mode4.dedicated_queues.rx_qid, 128,
1625 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1626 NULL, port->slow_pool);
1629 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1630 slave_eth_dev->data->port_id,
1631 internals->mode4.dedicated_queues.rx_qid,
1636 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1637 internals->mode4.dedicated_queues.tx_qid, 512,
1638 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1642 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1643 slave_eth_dev->data->port_id,
1644 internals->mode4.dedicated_queues.tx_qid,
1653 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1654 struct rte_eth_dev *slave_eth_dev)
1656 struct bond_rx_queue *bd_rx_q;
1657 struct bond_tx_queue *bd_tx_q;
1658 uint16_t nb_rx_queues;
1659 uint16_t nb_tx_queues;
1663 struct rte_flow_error flow_error;
1665 struct bond_dev_private *internals = (struct bond_dev_private *)
1666 bonded_eth_dev->data->dev_private;
1669 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1671 /* Enable interrupts on slave device if supported */
1672 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1673 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1675 /* If RSS is enabled for bonding, try to enable it for slaves */
1676 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1677 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1679 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1680 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1681 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1682 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1684 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1687 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1688 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1689 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1690 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1693 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1694 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1696 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1697 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1699 if (internals->mode == BONDING_MODE_8023AD) {
1700 if (internals->mode4.dedicated_queues.enabled == 1) {
1706 /* Configure device */
1707 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1708 nb_rx_queues, nb_tx_queues,
1709 &(slave_eth_dev->data->dev_conf));
1711 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1712 slave_eth_dev->data->port_id, errval);
1716 /* Setup Rx Queues */
1717 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1718 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1720 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1721 bd_rx_q->nb_rx_desc,
1722 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1723 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1726 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1727 slave_eth_dev->data->port_id, q_id, errval);
1732 /* Setup Tx Queues */
1733 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1734 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1736 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1737 bd_tx_q->nb_tx_desc,
1738 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1742 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1743 slave_eth_dev->data->port_id, q_id, errval);
1748 if (internals->mode == BONDING_MODE_8023AD &&
1749 internals->mode4.dedicated_queues.enabled == 1) {
1750 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1754 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1755 slave_eth_dev->data->port_id) != 0) {
1757 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1758 slave_eth_dev->data->port_id, q_id, errval);
1762 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1763 rte_flow_destroy(slave_eth_dev->data->port_id,
1764 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1767 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1768 slave_eth_dev->data->port_id);
1772 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1774 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1775 slave_eth_dev->data->port_id, errval);
1779 /* If RSS is enabled for bonding, synchronize RETA */
1780 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1782 struct bond_dev_private *internals;
1784 internals = bonded_eth_dev->data->dev_private;
1786 for (i = 0; i < internals->slave_count; i++) {
1787 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1788 errval = rte_eth_dev_rss_reta_update(
1789 slave_eth_dev->data->port_id,
1790 &internals->reta_conf[0],
1791 internals->slaves[i].reta_size);
1793 RTE_LOG(WARNING, PMD,
1794 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1795 " RSS Configuration for bonding may be inconsistent.\n",
1796 slave_eth_dev->data->port_id, errval);
1803 /* If lsc interrupt is set, check initial slave's link status */
1804 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1805 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1806 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1807 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1815 slave_remove(struct bond_dev_private *internals,
1816 struct rte_eth_dev *slave_eth_dev)
1820 for (i = 0; i < internals->slave_count; i++)
1821 if (internals->slaves[i].port_id ==
1822 slave_eth_dev->data->port_id)
1825 if (i < (internals->slave_count - 1))
1826 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1827 sizeof(internals->slaves[0]) *
1828 (internals->slave_count - i - 1));
1830 internals->slave_count--;
1832 /* force reconfiguration of slave interfaces */
1833 _rte_eth_dev_reset(slave_eth_dev);
1837 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1840 slave_add(struct bond_dev_private *internals,
1841 struct rte_eth_dev *slave_eth_dev)
1843 struct bond_slave_details *slave_details =
1844 &internals->slaves[internals->slave_count];
1846 slave_details->port_id = slave_eth_dev->data->port_id;
1847 slave_details->last_link_status = 0;
1849 /* Mark slave devices that don't support interrupts so we can
1850 * compensate when we start the bond
1852 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1853 slave_details->link_status_poll_enabled = 1;
1856 slave_details->link_status_wait_to_complete = 0;
1857 /* clean tlb_last_obytes when adding port for bonding device */
1858 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1859 sizeof(struct ether_addr));
1863 bond_ethdev_primary_set(struct bond_dev_private *internals,
1864 uint8_t slave_port_id)
1868 if (internals->active_slave_count < 1)
1869 internals->current_primary_port = slave_port_id;
1871 /* Search bonded device slave ports for new proposed primary port */
1872 for (i = 0; i < internals->active_slave_count; i++) {
1873 if (internals->active_slaves[i] == slave_port_id)
1874 internals->current_primary_port = slave_port_id;
1879 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1882 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1884 struct bond_dev_private *internals;
1887 /* slave eth dev will be started by bonded device */
1888 if (check_for_bonded_ethdev(eth_dev)) {
1889 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1890 eth_dev->data->port_id);
1894 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1895 eth_dev->data->dev_started = 1;
1897 internals = eth_dev->data->dev_private;
1899 if (internals->slave_count == 0) {
1900 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1904 if (internals->user_defined_mac == 0) {
1905 struct ether_addr *new_mac_addr = NULL;
1907 for (i = 0; i < internals->slave_count; i++)
1908 if (internals->slaves[i].port_id == internals->primary_port)
1909 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1911 if (new_mac_addr == NULL)
1914 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1915 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1916 eth_dev->data->port_id);
1921 /* Update all slave devices MACs*/
1922 if (mac_address_slaves_update(eth_dev) != 0)
1925 /* If bonded device is configure in promiscuous mode then re-apply config */
1926 if (internals->promiscuous_en)
1927 bond_ethdev_promiscuous_enable(eth_dev);
1929 if (internals->mode == BONDING_MODE_8023AD) {
1930 if (internals->mode4.dedicated_queues.enabled == 1) {
1931 internals->mode4.dedicated_queues.rx_qid =
1932 eth_dev->data->nb_rx_queues;
1933 internals->mode4.dedicated_queues.tx_qid =
1934 eth_dev->data->nb_tx_queues;
1939 /* Reconfigure each slave device if starting bonded device */
1940 for (i = 0; i < internals->slave_count; i++) {
1941 struct rte_eth_dev *slave_ethdev =
1942 &(rte_eth_devices[internals->slaves[i].port_id]);
1943 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1945 "bonded port (%d) failed to reconfigure slave device (%d)",
1946 eth_dev->data->port_id,
1947 internals->slaves[i].port_id);
1950 /* We will need to poll for link status if any slave doesn't
1951 * support interrupts
1953 if (internals->slaves[i].link_status_poll_enabled)
1954 internals->link_status_polling_enabled = 1;
1956 /* start polling if needed */
1957 if (internals->link_status_polling_enabled) {
1959 internals->link_status_polling_interval_ms * 1000,
1960 bond_ethdev_slave_link_status_change_monitor,
1961 (void *)&rte_eth_devices[internals->port_id]);
1964 if (internals->user_defined_primary_port)
1965 bond_ethdev_primary_set(internals, internals->primary_port);
1967 if (internals->mode == BONDING_MODE_8023AD)
1968 bond_mode_8023ad_start(eth_dev);
1970 if (internals->mode == BONDING_MODE_TLB ||
1971 internals->mode == BONDING_MODE_ALB)
1972 bond_tlb_enable(internals);
1978 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1982 if (dev->data->rx_queues != NULL) {
1983 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1984 rte_free(dev->data->rx_queues[i]);
1985 dev->data->rx_queues[i] = NULL;
1987 dev->data->nb_rx_queues = 0;
1990 if (dev->data->tx_queues != NULL) {
1991 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1992 rte_free(dev->data->tx_queues[i]);
1993 dev->data->tx_queues[i] = NULL;
1995 dev->data->nb_tx_queues = 0;
2000 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2002 struct bond_dev_private *internals = eth_dev->data->dev_private;
2005 if (internals->mode == BONDING_MODE_8023AD) {
2009 bond_mode_8023ad_stop(eth_dev);
2011 /* Discard all messages to/from mode 4 state machines */
2012 for (i = 0; i < internals->active_slave_count; i++) {
2013 port = &mode_8023ad_ports[internals->active_slaves[i]];
2015 RTE_ASSERT(port->rx_ring != NULL);
2016 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2017 rte_pktmbuf_free(pkt);
2019 RTE_ASSERT(port->tx_ring != NULL);
2020 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2021 rte_pktmbuf_free(pkt);
2025 if (internals->mode == BONDING_MODE_TLB ||
2026 internals->mode == BONDING_MODE_ALB) {
2027 bond_tlb_disable(internals);
2028 for (i = 0; i < internals->active_slave_count; i++)
2029 tlb_last_obytets[internals->active_slaves[i]] = 0;
2032 internals->active_slave_count = 0;
2033 internals->link_status_polling_enabled = 0;
2034 for (i = 0; i < internals->slave_count; i++)
2035 internals->slaves[i].last_link_status = 0;
2037 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2038 eth_dev->data->dev_started = 0;
2042 bond_ethdev_close(struct rte_eth_dev *dev)
2044 struct bond_dev_private *internals = dev->data->dev_private;
2045 uint8_t bond_port_id = internals->port_id;
2048 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2049 while (internals->slave_count != skipped) {
2050 uint8_t port_id = internals->slaves[skipped].port_id;
2052 rte_eth_dev_stop(port_id);
2054 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2056 "Failed to remove port %d from bonded device "
2057 "%s\n", port_id, dev->device->name);
2061 bond_ethdev_free_queues(dev);
2062 rte_bitmap_reset(internals->vlan_filter_bmp);
2065 /* forward declaration */
2066 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2069 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2071 struct bond_dev_private *internals = dev->data->dev_private;
2073 uint16_t max_nb_rx_queues = UINT16_MAX;
2074 uint16_t max_nb_tx_queues = UINT16_MAX;
2076 dev_info->max_mac_addrs = 1;
2078 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2079 internals->candidate_max_rx_pktlen :
2080 ETHER_MAX_JUMBO_FRAME_LEN;
2082 /* Max number of tx/rx queues that the bonded device can support is the
2083 * minimum values of the bonded slaves, as all slaves must be capable
2084 * of supporting the same number of tx/rx queues.
2086 if (internals->slave_count > 0) {
2087 struct rte_eth_dev_info slave_info;
2090 for (idx = 0; idx < internals->slave_count; idx++) {
2091 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2094 if (slave_info.max_rx_queues < max_nb_rx_queues)
2095 max_nb_rx_queues = slave_info.max_rx_queues;
2097 if (slave_info.max_tx_queues < max_nb_tx_queues)
2098 max_nb_tx_queues = slave_info.max_tx_queues;
2102 dev_info->max_rx_queues = max_nb_rx_queues;
2103 dev_info->max_tx_queues = max_nb_tx_queues;
2106 * If dedicated hw queues enabled for link bonding device in LACP mode
2107 * then we need to reduce the maximum number of data path queues by 1.
2109 if (internals->mode == BONDING_MODE_8023AD &&
2110 internals->mode4.dedicated_queues.enabled == 1) {
2111 dev_info->max_rx_queues--;
2112 dev_info->max_tx_queues--;
2115 dev_info->min_rx_bufsize = 0;
2117 dev_info->rx_offload_capa = internals->rx_offload_capa;
2118 dev_info->tx_offload_capa = internals->tx_offload_capa;
2119 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2121 dev_info->reta_size = internals->reta_size;
2125 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2129 struct bond_dev_private *internals = dev->data->dev_private;
2131 /* don't do this while a slave is being added */
2132 rte_spinlock_lock(&internals->lock);
2135 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2137 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2139 for (i = 0; i < internals->slave_count; i++) {
2140 uint8_t port_id = internals->slaves[i].port_id;
2142 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2144 RTE_LOG(WARNING, PMD,
2145 "Setting VLAN filter on slave port %u not supported.\n",
2149 rte_spinlock_unlock(&internals->lock);
2154 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2155 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2156 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2158 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2159 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2160 0, dev->data->numa_node);
2161 if (bd_rx_q == NULL)
2164 bd_rx_q->queue_id = rx_queue_id;
2165 bd_rx_q->dev_private = dev->data->dev_private;
2167 bd_rx_q->nb_rx_desc = nb_rx_desc;
2169 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2170 bd_rx_q->mb_pool = mb_pool;
2172 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2178 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2179 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2180 const struct rte_eth_txconf *tx_conf)
2182 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2183 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2184 0, dev->data->numa_node);
2186 if (bd_tx_q == NULL)
2189 bd_tx_q->queue_id = tx_queue_id;
2190 bd_tx_q->dev_private = dev->data->dev_private;
2192 bd_tx_q->nb_tx_desc = nb_tx_desc;
2193 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2195 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2201 bond_ethdev_rx_queue_release(void *queue)
2210 bond_ethdev_tx_queue_release(void *queue)
2219 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2221 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2222 struct bond_dev_private *internals;
2224 /* Default value for polling slave found is true as we don't want to
2225 * disable the polling thread if we cannot get the lock */
2226 int i, polling_slave_found = 1;
2231 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2232 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2234 if (!bonded_ethdev->data->dev_started ||
2235 !internals->link_status_polling_enabled)
2238 /* If device is currently being configured then don't check slaves link
2239 * status, wait until next period */
2240 if (rte_spinlock_trylock(&internals->lock)) {
2241 if (internals->slave_count > 0)
2242 polling_slave_found = 0;
2244 for (i = 0; i < internals->slave_count; i++) {
2245 if (!internals->slaves[i].link_status_poll_enabled)
2248 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2249 polling_slave_found = 1;
2251 /* Update slave link status */
2252 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2253 internals->slaves[i].link_status_wait_to_complete);
2255 /* if link status has changed since last checked then call lsc
2257 if (slave_ethdev->data->dev_link.link_status !=
2258 internals->slaves[i].last_link_status) {
2259 internals->slaves[i].last_link_status =
2260 slave_ethdev->data->dev_link.link_status;
2262 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2263 RTE_ETH_EVENT_INTR_LSC,
2264 &bonded_ethdev->data->port_id,
2268 rte_spinlock_unlock(&internals->lock);
2271 if (polling_slave_found)
2272 /* Set alarm to continue monitoring link status of slave ethdev's */
2273 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2274 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2278 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2280 void (*link_update)(uint8_t port_id, struct rte_eth_link *eth_link);
2282 struct bond_dev_private *bond_ctx;
2283 struct rte_eth_link slave_link;
2287 bond_ctx = ethdev->data->dev_private;
2289 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2291 if (ethdev->data->dev_started == 0 ||
2292 bond_ctx->active_slave_count == 0) {
2293 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2297 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2299 if (wait_to_complete)
2300 link_update = rte_eth_link_get;
2302 link_update = rte_eth_link_get_nowait;
2304 switch (bond_ctx->mode) {
2305 case BONDING_MODE_BROADCAST:
2307 * Setting link speed to UINT32_MAX to ensure we pick up the
2308 * value of the first active slave
2310 ethdev->data->dev_link.link_speed = UINT32_MAX;
2313 * link speed is minimum value of all the slaves link speed as
2314 * packet loss will occur on this slave if transmission at rates
2315 * greater than this are attempted
2317 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2318 link_update(bond_ctx->active_slaves[0], &slave_link);
2320 if (slave_link.link_speed <
2321 ethdev->data->dev_link.link_speed)
2322 ethdev->data->dev_link.link_speed =
2323 slave_link.link_speed;
2326 case BONDING_MODE_ACTIVE_BACKUP:
2327 /* Current primary slave */
2328 link_update(bond_ctx->current_primary_port, &slave_link);
2330 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2332 case BONDING_MODE_8023AD:
2333 ethdev->data->dev_link.link_autoneg =
2334 bond_ctx->mode4.slave_link.link_autoneg;
2335 ethdev->data->dev_link.link_duplex =
2336 bond_ctx->mode4.slave_link.link_duplex;
2337 /* fall through to update link speed */
2338 case BONDING_MODE_ROUND_ROBIN:
2339 case BONDING_MODE_BALANCE:
2340 case BONDING_MODE_TLB:
2341 case BONDING_MODE_ALB:
2344 * In theses mode the maximum theoretical link speed is the sum
2347 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2349 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2350 link_update(bond_ctx->active_slaves[idx], &slave_link);
2352 ethdev->data->dev_link.link_speed +=
2353 slave_link.link_speed;
2363 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2365 struct bond_dev_private *internals = dev->data->dev_private;
2366 struct rte_eth_stats slave_stats;
2369 for (i = 0; i < internals->slave_count; i++) {
2370 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2372 stats->ipackets += slave_stats.ipackets;
2373 stats->opackets += slave_stats.opackets;
2374 stats->ibytes += slave_stats.ibytes;
2375 stats->obytes += slave_stats.obytes;
2376 stats->imissed += slave_stats.imissed;
2377 stats->ierrors += slave_stats.ierrors;
2378 stats->oerrors += slave_stats.oerrors;
2379 stats->rx_nombuf += slave_stats.rx_nombuf;
2381 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2382 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2383 stats->q_opackets[j] += slave_stats.q_opackets[j];
2384 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2385 stats->q_obytes[j] += slave_stats.q_obytes[j];
2386 stats->q_errors[j] += slave_stats.q_errors[j];
2393 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2395 struct bond_dev_private *internals = dev->data->dev_private;
2398 for (i = 0; i < internals->slave_count; i++)
2399 rte_eth_stats_reset(internals->slaves[i].port_id);
2403 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2405 struct bond_dev_private *internals = eth_dev->data->dev_private;
2408 internals->promiscuous_en = 1;
2410 switch (internals->mode) {
2411 /* Promiscuous mode is propagated to all slaves */
2412 case BONDING_MODE_ROUND_ROBIN:
2413 case BONDING_MODE_BALANCE:
2414 case BONDING_MODE_BROADCAST:
2415 for (i = 0; i < internals->slave_count; i++)
2416 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2418 /* In mode4 promiscus mode is managed when slave is added/removed */
2419 case BONDING_MODE_8023AD:
2421 /* Promiscuous mode is propagated only to primary slave */
2422 case BONDING_MODE_ACTIVE_BACKUP:
2423 case BONDING_MODE_TLB:
2424 case BONDING_MODE_ALB:
2426 rte_eth_promiscuous_enable(internals->current_primary_port);
2431 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2433 struct bond_dev_private *internals = dev->data->dev_private;
2436 internals->promiscuous_en = 0;
2438 switch (internals->mode) {
2439 /* Promiscuous mode is propagated to all slaves */
2440 case BONDING_MODE_ROUND_ROBIN:
2441 case BONDING_MODE_BALANCE:
2442 case BONDING_MODE_BROADCAST:
2443 for (i = 0; i < internals->slave_count; i++)
2444 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2446 /* In mode4 promiscus mode is set managed when slave is added/removed */
2447 case BONDING_MODE_8023AD:
2449 /* Promiscuous mode is propagated only to primary slave */
2450 case BONDING_MODE_ACTIVE_BACKUP:
2451 case BONDING_MODE_TLB:
2452 case BONDING_MODE_ALB:
2454 rte_eth_promiscuous_disable(internals->current_primary_port);
2459 bond_ethdev_delayed_lsc_propagation(void *arg)
2464 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2465 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2469 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2470 void *param, void *ret_param __rte_unused)
2472 struct rte_eth_dev *bonded_eth_dev;
2473 struct bond_dev_private *internals;
2474 struct rte_eth_link link;
2477 int i, valid_slave = 0;
2479 uint8_t lsc_flag = 0;
2481 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2484 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2486 if (check_for_bonded_ethdev(bonded_eth_dev))
2489 internals = bonded_eth_dev->data->dev_private;
2491 /* If the device isn't started don't handle interrupts */
2492 if (!bonded_eth_dev->data->dev_started)
2495 /* verify that port_id is a valid slave of bonded port */
2496 for (i = 0; i < internals->slave_count; i++) {
2497 if (internals->slaves[i].port_id == port_id) {
2506 /* Search for port in active port list */
2507 active_pos = find_slave_by_id(internals->active_slaves,
2508 internals->active_slave_count, port_id);
2510 rte_eth_link_get_nowait(port_id, &link);
2511 if (link.link_status) {
2512 if (active_pos < internals->active_slave_count)
2515 /* if no active slave ports then set this port to be primary port */
2516 if (internals->active_slave_count < 1) {
2517 /* If first active slave, then change link status */
2518 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2519 internals->current_primary_port = port_id;
2522 mac_address_slaves_update(bonded_eth_dev);
2525 activate_slave(bonded_eth_dev, port_id);
2527 /* If user has defined the primary port then default to using it */
2528 if (internals->user_defined_primary_port &&
2529 internals->primary_port == port_id)
2530 bond_ethdev_primary_set(internals, port_id);
2532 if (active_pos == internals->active_slave_count)
2535 /* Remove from active slave list */
2536 deactivate_slave(bonded_eth_dev, port_id);
2538 if (internals->active_slave_count < 1)
2541 /* Update primary id, take first active slave from list or if none
2542 * available set to -1 */
2543 if (port_id == internals->current_primary_port) {
2544 if (internals->active_slave_count > 0)
2545 bond_ethdev_primary_set(internals,
2546 internals->active_slaves[0]);
2548 internals->current_primary_port = internals->primary_port;
2553 * Update bonded device link properties after any change to active
2556 bond_ethdev_link_update(bonded_eth_dev, 0);
2559 /* Cancel any possible outstanding interrupts if delays are enabled */
2560 if (internals->link_up_delay_ms > 0 ||
2561 internals->link_down_delay_ms > 0)
2562 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2565 if (bonded_eth_dev->data->dev_link.link_status) {
2566 if (internals->link_up_delay_ms > 0)
2567 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2568 bond_ethdev_delayed_lsc_propagation,
2569 (void *)bonded_eth_dev);
2571 _rte_eth_dev_callback_process(bonded_eth_dev,
2572 RTE_ETH_EVENT_INTR_LSC,
2576 if (internals->link_down_delay_ms > 0)
2577 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2578 bond_ethdev_delayed_lsc_propagation,
2579 (void *)bonded_eth_dev);
2581 _rte_eth_dev_callback_process(bonded_eth_dev,
2582 RTE_ETH_EVENT_INTR_LSC,
2590 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2591 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2595 int slave_reta_size;
2596 unsigned reta_count;
2597 struct bond_dev_private *internals = dev->data->dev_private;
2599 if (reta_size != internals->reta_size)
2602 /* Copy RETA table */
2603 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2605 for (i = 0; i < reta_count; i++) {
2606 internals->reta_conf[i].mask = reta_conf[i].mask;
2607 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2608 if ((reta_conf[i].mask >> j) & 0x01)
2609 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2612 /* Fill rest of array */
2613 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2614 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2615 sizeof(internals->reta_conf[0]) * reta_count);
2617 /* Propagate RETA over slaves */
2618 for (i = 0; i < internals->slave_count; i++) {
2619 slave_reta_size = internals->slaves[i].reta_size;
2620 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2621 &internals->reta_conf[0], slave_reta_size);
2630 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2631 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2634 struct bond_dev_private *internals = dev->data->dev_private;
2636 if (reta_size != internals->reta_size)
2639 /* Copy RETA table */
2640 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2641 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2642 if ((reta_conf[i].mask >> j) & 0x01)
2643 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2649 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2650 struct rte_eth_rss_conf *rss_conf)
2653 struct bond_dev_private *internals = dev->data->dev_private;
2654 struct rte_eth_rss_conf bond_rss_conf;
2656 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2658 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2660 if (bond_rss_conf.rss_hf != 0)
2661 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2663 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2664 sizeof(internals->rss_key)) {
2665 if (bond_rss_conf.rss_key_len == 0)
2666 bond_rss_conf.rss_key_len = 40;
2667 internals->rss_key_len = bond_rss_conf.rss_key_len;
2668 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2669 internals->rss_key_len);
2672 for (i = 0; i < internals->slave_count; i++) {
2673 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2683 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2684 struct rte_eth_rss_conf *rss_conf)
2686 struct bond_dev_private *internals = dev->data->dev_private;
2688 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2689 rss_conf->rss_key_len = internals->rss_key_len;
2690 if (rss_conf->rss_key)
2691 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2696 const struct eth_dev_ops default_dev_ops = {
2697 .dev_start = bond_ethdev_start,
2698 .dev_stop = bond_ethdev_stop,
2699 .dev_close = bond_ethdev_close,
2700 .dev_configure = bond_ethdev_configure,
2701 .dev_infos_get = bond_ethdev_info,
2702 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2703 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2704 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2705 .rx_queue_release = bond_ethdev_rx_queue_release,
2706 .tx_queue_release = bond_ethdev_tx_queue_release,
2707 .link_update = bond_ethdev_link_update,
2708 .stats_get = bond_ethdev_stats_get,
2709 .stats_reset = bond_ethdev_stats_reset,
2710 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2711 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2712 .reta_update = bond_ethdev_rss_reta_update,
2713 .reta_query = bond_ethdev_rss_reta_query,
2714 .rss_hash_update = bond_ethdev_rss_hash_update,
2715 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2719 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2721 const char *name = rte_vdev_device_name(dev);
2722 uint8_t socket_id = dev->device.numa_node;
2723 struct bond_dev_private *internals = NULL;
2724 struct rte_eth_dev *eth_dev = NULL;
2725 uint32_t vlan_filter_bmp_size;
2727 /* now do all data allocation - for eth_dev structure, dummy pci driver
2728 * and internal (private) data
2731 /* reserve an ethdev entry */
2732 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2733 if (eth_dev == NULL) {
2734 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2738 internals = eth_dev->data->dev_private;
2739 eth_dev->data->nb_rx_queues = (uint16_t)1;
2740 eth_dev->data->nb_tx_queues = (uint16_t)1;
2742 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2744 if (eth_dev->data->mac_addrs == NULL) {
2745 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2749 eth_dev->dev_ops = &default_dev_ops;
2750 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
2751 RTE_ETH_DEV_DETACHABLE;
2753 rte_spinlock_init(&internals->lock);
2755 internals->port_id = eth_dev->data->port_id;
2756 internals->mode = BONDING_MODE_INVALID;
2757 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2758 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2759 internals->xmit_hash = xmit_l2_hash;
2760 internals->user_defined_mac = 0;
2762 internals->link_status_polling_enabled = 0;
2764 internals->link_status_polling_interval_ms =
2765 DEFAULT_POLLING_INTERVAL_10_MS;
2766 internals->link_down_delay_ms = 0;
2767 internals->link_up_delay_ms = 0;
2769 internals->slave_count = 0;
2770 internals->active_slave_count = 0;
2771 internals->rx_offload_capa = 0;
2772 internals->tx_offload_capa = 0;
2773 internals->candidate_max_rx_pktlen = 0;
2774 internals->max_rx_pktlen = 0;
2776 /* Initially allow to choose any offload type */
2777 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2779 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2780 memset(internals->slaves, 0, sizeof(internals->slaves));
2782 /* Set mode 4 default configuration */
2783 bond_mode_8023ad_setup(eth_dev, NULL);
2784 if (bond_ethdev_mode_set(eth_dev, mode)) {
2785 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2786 eth_dev->data->port_id, mode);
2790 vlan_filter_bmp_size =
2791 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2792 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2793 RTE_CACHE_LINE_SIZE);
2794 if (internals->vlan_filter_bmpmem == NULL) {
2796 "Failed to allocate vlan bitmap for bonded device %u\n",
2797 eth_dev->data->port_id);
2801 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2802 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2803 if (internals->vlan_filter_bmp == NULL) {
2805 "Failed to init vlan bitmap for bonded device %u\n",
2806 eth_dev->data->port_id);
2807 rte_free(internals->vlan_filter_bmpmem);
2811 return eth_dev->data->port_id;
2814 rte_free(internals);
2815 if (eth_dev != NULL) {
2816 rte_free(eth_dev->data->mac_addrs);
2817 rte_eth_dev_release_port(eth_dev);
2823 bond_probe(struct rte_vdev_device *dev)
2826 struct bond_dev_private *internals;
2827 struct rte_kvargs *kvlist;
2828 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2829 int arg_count, port_id;
2834 name = rte_vdev_device_name(dev);
2835 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2837 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2838 pmd_bond_init_valid_arguments);
2842 /* Parse link bonding mode */
2843 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2844 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2845 &bond_ethdev_parse_slave_mode_kvarg,
2846 &bonding_mode) != 0) {
2847 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2852 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2853 "device %s\n", name);
2857 /* Parse socket id to create bonding device on */
2858 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2859 if (arg_count == 1) {
2860 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2861 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2863 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2864 "bonded device %s\n", name);
2867 } else if (arg_count > 1) {
2868 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2869 "bonded device %s\n", name);
2872 socket_id = rte_socket_id();
2875 dev->device.numa_node = socket_id;
2877 /* Create link bonding eth device */
2878 port_id = bond_alloc(dev, bonding_mode);
2880 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2881 "socket %u.\n", name, bonding_mode, socket_id);
2884 internals = rte_eth_devices[port_id].data->dev_private;
2885 internals->kvlist = kvlist;
2887 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2888 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2892 rte_kvargs_free(kvlist);
2898 bond_remove(struct rte_vdev_device *dev)
2900 struct rte_eth_dev *eth_dev;
2901 struct bond_dev_private *internals;
2907 name = rte_vdev_device_name(dev);
2908 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2910 /* now free all data allocation - for eth_dev structure,
2911 * dummy pci driver and internal (private) data
2914 /* find an ethdev entry */
2915 eth_dev = rte_eth_dev_allocated(name);
2916 if (eth_dev == NULL)
2919 RTE_ASSERT(eth_dev->device == &dev->device);
2921 internals = eth_dev->data->dev_private;
2922 if (internals->slave_count != 0)
2925 if (eth_dev->data->dev_started == 1) {
2926 bond_ethdev_stop(eth_dev);
2927 bond_ethdev_close(eth_dev);
2930 eth_dev->dev_ops = NULL;
2931 eth_dev->rx_pkt_burst = NULL;
2932 eth_dev->tx_pkt_burst = NULL;
2934 internals = eth_dev->data->dev_private;
2935 rte_bitmap_free(internals->vlan_filter_bmp);
2936 rte_free(internals->vlan_filter_bmpmem);
2937 rte_free(eth_dev->data->dev_private);
2938 rte_free(eth_dev->data->mac_addrs);
2940 rte_eth_dev_release_port(eth_dev);
2945 /* this part will resolve the slave portids after all the other pdev and vdev
2946 * have been allocated */
2948 bond_ethdev_configure(struct rte_eth_dev *dev)
2950 const char *name = dev->device->name;
2951 struct bond_dev_private *internals = dev->data->dev_private;
2952 struct rte_kvargs *kvlist = internals->kvlist;
2954 uint8_t port_id = dev - rte_eth_devices;
2957 static const uint8_t default_rss_key[40] = {
2958 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2959 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2960 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2961 0xBE, 0xAC, 0x01, 0xFA
2966 /* If RSS is enabled, fill table and key with default values */
2967 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2968 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2969 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2970 memcpy(internals->rss_key, default_rss_key, 40);
2972 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2973 internals->reta_conf[i].mask = ~0LL;
2974 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2975 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2979 /* set the max_rx_pktlen */
2980 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2983 * if no kvlist, it means that this bonded device has been created
2984 * through the bonding api.
2989 /* Parse MAC address for bonded device */
2990 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2991 if (arg_count == 1) {
2992 struct ether_addr bond_mac;
2994 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2995 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2996 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3001 /* Set MAC address */
3002 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3004 "Failed to set mac address on bonded device %s\n",
3008 } else if (arg_count > 1) {
3010 "MAC address can be specified only once for bonded device %s\n",
3015 /* Parse/set balance mode transmit policy */
3016 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3017 if (arg_count == 1) {
3018 uint8_t xmit_policy;
3020 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3021 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3024 "Invalid xmit policy specified for bonded device %s\n",
3029 /* Set balance mode transmit policy*/
3030 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3032 "Failed to set balance xmit policy on bonded device %s\n",
3036 } else if (arg_count > 1) {
3038 "Transmit policy can be specified only once for bonded device"
3043 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3044 if (rte_kvargs_process(kvlist,
3045 PMD_BOND_AGG_MODE_KVARG,
3046 &bond_ethdev_parse_slave_agg_mode_kvarg,
3049 "Failed to parse agg selection mode for bonded device %s\n",
3052 if (internals->mode == BONDING_MODE_8023AD)
3054 rte_eth_bond_8023ad_agg_selection_set(port_id,
3058 /* Parse/add slave ports to bonded device */
3059 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3060 struct bond_ethdev_slave_ports slave_ports;
3063 memset(&slave_ports, 0, sizeof(slave_ports));
3065 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3066 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3068 "Failed to parse slave ports for bonded device %s\n",
3073 for (i = 0; i < slave_ports.slave_count; i++) {
3074 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3076 "Failed to add port %d as slave to bonded device %s\n",
3077 slave_ports.slaves[i], name);
3082 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3086 /* Parse/set primary slave port id*/
3087 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3088 if (arg_count == 1) {
3089 uint8_t primary_slave_port_id;
3091 if (rte_kvargs_process(kvlist,
3092 PMD_BOND_PRIMARY_SLAVE_KVARG,
3093 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3094 &primary_slave_port_id) < 0) {
3096 "Invalid primary slave port id specified for bonded device"
3101 /* Set balance mode transmit policy*/
3102 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
3105 "Failed to set primary slave port %d on bonded device %s\n",
3106 primary_slave_port_id, name);
3109 } else if (arg_count > 1) {
3111 "Primary slave can be specified only once for bonded device"
3116 /* Parse link status monitor polling interval */
3117 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3118 if (arg_count == 1) {
3119 uint32_t lsc_poll_interval_ms;
3121 if (rte_kvargs_process(kvlist,
3122 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3123 &bond_ethdev_parse_time_ms_kvarg,
3124 &lsc_poll_interval_ms) < 0) {
3126 "Invalid lsc polling interval value specified for bonded"
3127 " device %s\n", name);
3131 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3134 "Failed to set lsc monitor polling interval (%u ms) on"
3135 " bonded device %s\n", lsc_poll_interval_ms, name);
3138 } else if (arg_count > 1) {
3140 "LSC polling interval can be specified only once for bonded"
3141 " device %s\n", name);
3145 /* Parse link up interrupt propagation delay */
3146 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3147 if (arg_count == 1) {
3148 uint32_t link_up_delay_ms;
3150 if (rte_kvargs_process(kvlist,
3151 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3152 &bond_ethdev_parse_time_ms_kvarg,
3153 &link_up_delay_ms) < 0) {
3155 "Invalid link up propagation delay value specified for"
3156 " bonded device %s\n", name);
3160 /* Set balance mode transmit policy*/
3161 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3164 "Failed to set link up propagation delay (%u ms) on bonded"
3165 " device %s\n", link_up_delay_ms, name);
3168 } else if (arg_count > 1) {
3170 "Link up propagation delay can be specified only once for"
3171 " bonded device %s\n", name);
3175 /* Parse link down interrupt propagation delay */
3176 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3177 if (arg_count == 1) {
3178 uint32_t link_down_delay_ms;
3180 if (rte_kvargs_process(kvlist,
3181 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3182 &bond_ethdev_parse_time_ms_kvarg,
3183 &link_down_delay_ms) < 0) {
3185 "Invalid link down propagation delay value specified for"
3186 " bonded device %s\n", name);
3190 /* Set balance mode transmit policy*/
3191 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3194 "Failed to set link down propagation delay (%u ms) on"
3195 " bonded device %s\n", link_down_delay_ms, name);
3198 } else if (arg_count > 1) {
3200 "Link down propagation delay can be specified only once for"
3201 " bonded device %s\n", name);
3208 struct rte_vdev_driver pmd_bond_drv = {
3209 .probe = bond_probe,
3210 .remove = bond_remove,
3213 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3214 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3216 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3220 "xmit_policy=[l2 | l23 | l34] "
3221 "agg_mode=[count | stable | bandwidth] "
3224 "lsc_poll_period_ms=<int> "
3226 "down_delay=<int>");