4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
65 size_t vlan_offset = 0;
67 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
70 vlan_offset = sizeof(struct vlan_hdr);
71 *proto = vlan_hdr->eth_proto;
73 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74 vlan_hdr = vlan_hdr + 1;
75 *proto = vlan_hdr->eth_proto;
76 vlan_offset += sizeof(struct vlan_hdr);
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
85 struct bond_dev_private *internals;
87 uint16_t num_rx_slave = 0;
88 uint16_t num_rx_total = 0;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
98 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99 /* Offset of pointer to *bufs increases as packets are received
100 * from other slaves */
101 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
104 num_rx_total += num_rx_slave;
105 nb_pkts -= num_rx_slave;
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
116 struct bond_dev_private *internals;
118 /* Cast to structure, containing bonded device's port id and queue id */
119 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
121 internals = bd_rx_q->dev_private;
123 return rte_eth_rx_burst(internals->current_primary_port,
124 bd_rx_q->queue_id, bufs, nb_pkts);
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
130 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
132 return !vlan_tci && (ethertype == ether_type_slow_be &&
133 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
136 /*****************************************************************************
137 * Flow director's setup for mode 4 optimization
140 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
141 .dst.addr_bytes = { 0 },
142 .src.addr_bytes = { 0 },
143 .type = RTE_BE16(ETHER_TYPE_SLOW),
146 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
147 .dst.addr_bytes = { 0 },
148 .src.addr_bytes = { 0 },
152 static struct rte_flow_item flow_item_8023ad[] = {
154 .type = RTE_FLOW_ITEM_TYPE_ETH,
155 .spec = &flow_item_eth_type_8023ad,
157 .mask = &flow_item_eth_mask_type_8023ad,
160 .type = RTE_FLOW_ITEM_TYPE_END,
167 const struct rte_flow_attr flow_attr_8023ad = {
176 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
177 uint8_t slave_port) {
178 struct rte_flow_error error;
179 struct bond_dev_private *internals = (struct bond_dev_private *)
180 (bond_dev->data->dev_private);
182 struct rte_flow_action_queue lacp_queue_conf = {
183 .index = internals->mode4.dedicated_queues.rx_qid,
186 const struct rte_flow_action actions[] = {
188 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
189 .conf = &lacp_queue_conf
192 .type = RTE_FLOW_ACTION_TYPE_END,
196 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
197 flow_item_8023ad, actions, &error);
205 bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
206 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
207 struct bond_dev_private *internals = (struct bond_dev_private *)
208 (bond_dev->data->dev_private);
209 struct rte_eth_dev_info bond_info, slave_info;
212 /* Verify if all slaves in bonding supports flow director and */
213 if (internals->slave_count > 0) {
214 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
216 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
217 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
219 for (idx = 0; idx < internals->slave_count; idx++) {
220 rte_eth_dev_info_get(internals->slaves[idx].port_id,
223 if (bond_ethdev_8023ad_flow_verify(bond_dev,
224 internals->slaves[idx].port_id) != 0)
233 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
235 struct rte_flow_error error;
236 struct bond_dev_private *internals = (struct bond_dev_private *)
237 (bond_dev->data->dev_private);
239 struct rte_flow_action_queue lacp_queue_conf = {
240 .index = internals->mode4.dedicated_queues.rx_qid,
243 const struct rte_flow_action actions[] = {
245 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
246 .conf = &lacp_queue_conf
249 .type = RTE_FLOW_ACTION_TYPE_END,
253 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
254 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
255 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
256 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
257 "(slave_port=%d queue_id=%d)",
258 error.message, slave_port,
259 internals->mode4.dedicated_queues.rx_qid);
267 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
270 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
271 struct bond_dev_private *internals = bd_rx_q->dev_private;
272 uint16_t num_rx_total = 0; /* Total number of received packets */
273 uint8_t slaves[RTE_MAX_ETHPORTS];
278 /* Copy slave list to protect against slave up/down changes during tx
280 slave_count = internals->active_slave_count;
281 memcpy(slaves, internals->active_slaves,
282 sizeof(internals->active_slaves[0]) * slave_count);
284 for (i = 0, idx = internals->active_slave;
285 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
286 idx = idx % slave_count;
288 /* Read packets from this slave */
289 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
290 &bufs[num_rx_total], nb_pkts - num_rx_total);
293 internals->active_slave = idx;
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
302 struct bond_dev_private *internals;
303 struct bond_tx_queue *bd_tx_q;
305 uint8_t num_of_slaves;
306 uint8_t slaves[RTE_MAX_ETHPORTS];
307 /* positions in slaves, not ID */
308 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
309 uint8_t distributing_count;
311 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
312 uint16_t i, op_slave_idx;
314 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
316 /* Total amount of packets in slave_bufs */
317 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
318 /* Slow packets placed in each slave */
320 if (unlikely(nb_pkts == 0))
323 bd_tx_q = (struct bond_tx_queue *)queue;
324 internals = bd_tx_q->dev_private;
326 /* Copy slave list to protect against slave up/down changes during tx
328 num_of_slaves = internals->active_slave_count;
329 if (num_of_slaves < 1)
332 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
335 distributing_count = 0;
336 for (i = 0; i < num_of_slaves; i++) {
337 struct port *port = &mode_8023ad_ports[slaves[i]];
338 if (ACTOR_STATE(port, DISTRIBUTING))
339 distributing_offsets[distributing_count++] = i;
342 if (likely(distributing_count > 0)) {
343 /* Populate slaves mbuf with the packets which are to be sent */
344 for (i = 0; i < nb_pkts; i++) {
345 /* Select output slave using hash based on xmit policy */
346 op_slave_idx = internals->xmit_hash(bufs[i],
349 /* Populate slave mbuf arrays with mbufs for that slave.
350 * Use only slaves that are currently distributing.
352 uint8_t slave_offset =
353 distributing_offsets[op_slave_idx];
354 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
356 slave_nb_pkts[slave_offset]++;
360 /* Send packet burst on each slave device */
361 for (i = 0; i < num_of_slaves; i++) {
362 if (slave_nb_pkts[i] == 0)
365 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
366 slave_bufs[i], slave_nb_pkts[i]);
368 num_tx_total += num_tx_slave;
369 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
371 /* If tx burst fails move packets to end of bufs */
372 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
373 uint16_t j = nb_pkts - num_tx_fail_total;
374 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
376 bufs[j] = slave_bufs[i][num_tx_slave];
385 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
388 /* Cast to structure, containing bonded device's port id and queue id */
389 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
390 struct bond_dev_private *internals = bd_rx_q->dev_private;
391 struct ether_addr bond_mac;
393 struct ether_hdr *hdr;
395 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
396 uint16_t num_rx_total = 0; /* Total number of received packets */
397 uint8_t slaves[RTE_MAX_ETHPORTS];
398 uint8_t slave_count, idx;
400 uint8_t collecting; /* current slave collecting status */
401 const uint8_t promisc = internals->promiscuous_en;
405 rte_eth_macaddr_get(internals->port_id, &bond_mac);
406 /* Copy slave list to protect against slave up/down changes during tx
408 slave_count = internals->active_slave_count;
409 memcpy(slaves, internals->active_slaves,
410 sizeof(internals->active_slaves[0]) * slave_count);
412 idx = internals->active_slave;
413 if (idx >= slave_count) {
414 internals->active_slave = 0;
417 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
419 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
422 /* Read packets from this slave */
423 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
424 &bufs[num_rx_total], nb_pkts - num_rx_total);
426 for (k = j; k < 2 && k < num_rx_total; k++)
427 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
429 /* Handle slow protocol packets. */
430 while (j < num_rx_total) {
432 /* If packet is not pure L2 and is known, skip it */
433 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438 if (j + 3 < num_rx_total)
439 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
441 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
442 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
444 /* Remove packet from array if it is slow packet or slave is not
445 * in collecting state or bonding interface is not in promiscuous
446 * mode and packet address does not match. */
447 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
448 !collecting || (!promisc &&
449 !is_multicast_ether_addr(&hdr->d_addr) &&
450 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
452 if (hdr->ether_type == ether_type_slow_be) {
453 bond_mode_8023ad_handle_slow_pkt(
454 internals, slaves[idx], bufs[j]);
456 rte_pktmbuf_free(bufs[j]);
458 /* Packet is managed by mode 4 or dropped, shift the array */
460 if (j < num_rx_total) {
461 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
467 if (unlikely(++idx == slave_count))
471 internals->active_slave = idx;
475 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
476 uint32_t burstnumberRX;
477 uint32_t burstnumberTX;
479 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
482 arp_op_name(uint16_t arp_op, char *buf)
486 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
489 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
491 case ARP_OP_REVREQUEST:
492 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
493 "Reverse ARP Request");
495 case ARP_OP_REVREPLY:
496 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
497 "Reverse ARP Reply");
499 case ARP_OP_INVREQUEST:
500 snprintf(buf, sizeof("Peer Identify Request"), "%s",
501 "Peer Identify Request");
503 case ARP_OP_INVREPLY:
504 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
505 "Peer Identify Reply");
510 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
514 #define MaxIPv4String 16
516 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
520 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
521 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
522 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
526 #define MAX_CLIENTS_NUMBER 128
527 uint8_t active_clients;
528 struct client_stats_t {
531 uint32_t ipv4_rx_packets;
532 uint32_t ipv4_tx_packets;
534 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
537 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
541 for (; i < MAX_CLIENTS_NUMBER; i++) {
542 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
543 /* Just update RX packets number for this client */
544 if (TXorRXindicator == &burstnumberRX)
545 client_stats[i].ipv4_rx_packets++;
547 client_stats[i].ipv4_tx_packets++;
551 /* We have a new client. Insert him to the table, and increment stats */
552 if (TXorRXindicator == &burstnumberRX)
553 client_stats[active_clients].ipv4_rx_packets++;
555 client_stats[active_clients].ipv4_tx_packets++;
556 client_stats[active_clients].ipv4_addr = addr;
557 client_stats[active_clients].port = port;
562 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
563 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
564 RTE_LOG(DEBUG, PMD, \
567 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
569 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
575 eth_h->s_addr.addr_bytes[0], \
576 eth_h->s_addr.addr_bytes[1], \
577 eth_h->s_addr.addr_bytes[2], \
578 eth_h->s_addr.addr_bytes[3], \
579 eth_h->s_addr.addr_bytes[4], \
580 eth_h->s_addr.addr_bytes[5], \
582 eth_h->d_addr.addr_bytes[0], \
583 eth_h->d_addr.addr_bytes[1], \
584 eth_h->d_addr.addr_bytes[2], \
585 eth_h->d_addr.addr_bytes[3], \
586 eth_h->d_addr.addr_bytes[4], \
587 eth_h->d_addr.addr_bytes[5], \
594 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
595 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
597 struct ipv4_hdr *ipv4_h;
598 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
599 struct arp_hdr *arp_h;
606 uint16_t ether_type = eth_h->ether_type;
607 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 snprintf(buf, 16, "%s", info);
613 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
614 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
615 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
616 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
617 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
618 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
620 update_client_stats(ipv4_h->src_addr, port, burstnumber);
622 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
623 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
624 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
625 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
626 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
627 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
628 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
635 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
637 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
638 struct bond_dev_private *internals = bd_tx_q->dev_private;
639 struct ether_hdr *eth_h;
640 uint16_t ether_type, offset;
641 uint16_t nb_recv_pkts;
644 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
646 for (i = 0; i < nb_recv_pkts; i++) {
647 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
648 ether_type = eth_h->ether_type;
649 offset = get_vlan_offset(eth_h, ðer_type);
651 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
655 bond_mode_alb_arp_recv(eth_h, offset, internals);
657 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
658 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
659 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
667 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
670 struct bond_dev_private *internals;
671 struct bond_tx_queue *bd_tx_q;
673 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
674 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
676 uint8_t num_of_slaves;
677 uint8_t slaves[RTE_MAX_ETHPORTS];
679 uint16_t num_tx_total = 0, num_tx_slave;
681 static int slave_idx = 0;
682 int i, cslave_idx = 0, tx_fail_total = 0;
684 bd_tx_q = (struct bond_tx_queue *)queue;
685 internals = bd_tx_q->dev_private;
687 /* Copy slave list to protect against slave up/down changes during tx
689 num_of_slaves = internals->active_slave_count;
690 memcpy(slaves, internals->active_slaves,
691 sizeof(internals->active_slaves[0]) * num_of_slaves);
693 if (num_of_slaves < 1)
696 /* Populate slaves mbuf with which packets are to be sent on it */
697 for (i = 0; i < nb_pkts; i++) {
698 cslave_idx = (slave_idx + i) % num_of_slaves;
699 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
702 /* increment current slave index so the next call to tx burst starts on the
704 slave_idx = ++cslave_idx;
706 /* Send packet burst on each slave device */
707 for (i = 0; i < num_of_slaves; i++) {
708 if (slave_nb_pkts[i] > 0) {
709 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
710 slave_bufs[i], slave_nb_pkts[i]);
712 /* if tx burst fails move packets to end of bufs */
713 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
714 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
716 tx_fail_total += tx_fail_slave;
718 memcpy(&bufs[nb_pkts - tx_fail_total],
719 &slave_bufs[i][num_tx_slave],
720 tx_fail_slave * sizeof(bufs[0]));
722 num_tx_total += num_tx_slave;
730 bond_ethdev_tx_burst_active_backup(void *queue,
731 struct rte_mbuf **bufs, uint16_t nb_pkts)
733 struct bond_dev_private *internals;
734 struct bond_tx_queue *bd_tx_q;
736 bd_tx_q = (struct bond_tx_queue *)queue;
737 internals = bd_tx_q->dev_private;
739 if (internals->active_slave_count < 1)
742 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
746 static inline uint16_t
747 ether_hash(struct ether_hdr *eth_hdr)
749 unaligned_uint16_t *word_src_addr =
750 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
751 unaligned_uint16_t *word_dst_addr =
752 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
754 return (word_src_addr[0] ^ word_dst_addr[0]) ^
755 (word_src_addr[1] ^ word_dst_addr[1]) ^
756 (word_src_addr[2] ^ word_dst_addr[2]);
759 static inline uint32_t
760 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
762 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
765 static inline uint32_t
766 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
768 unaligned_uint32_t *word_src_addr =
769 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
770 unaligned_uint32_t *word_dst_addr =
771 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
773 return (word_src_addr[0] ^ word_dst_addr[0]) ^
774 (word_src_addr[1] ^ word_dst_addr[1]) ^
775 (word_src_addr[2] ^ word_dst_addr[2]) ^
776 (word_src_addr[3] ^ word_dst_addr[3]);
780 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
782 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
784 uint32_t hash = ether_hash(eth_hdr);
786 return (hash ^= hash >> 8) % slave_count;
790 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
792 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
793 uint16_t proto = eth_hdr->ether_type;
794 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
795 uint32_t hash, l3hash = 0;
797 hash = ether_hash(eth_hdr);
799 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
800 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
801 ((char *)(eth_hdr + 1) + vlan_offset);
802 l3hash = ipv4_hash(ipv4_hdr);
804 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
805 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
806 ((char *)(eth_hdr + 1) + vlan_offset);
807 l3hash = ipv6_hash(ipv6_hdr);
810 hash = hash ^ l3hash;
814 return hash % slave_count;
818 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
820 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821 uint16_t proto = eth_hdr->ether_type;
822 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
824 struct udp_hdr *udp_hdr = NULL;
825 struct tcp_hdr *tcp_hdr = NULL;
826 uint32_t hash, l3hash = 0, l4hash = 0;
828 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
829 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
830 ((char *)(eth_hdr + 1) + vlan_offset);
831 size_t ip_hdr_offset;
833 l3hash = ipv4_hash(ipv4_hdr);
835 /* there is no L4 header in fragmented packet */
836 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
837 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
840 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
841 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
843 l4hash = HASH_L4_PORTS(tcp_hdr);
844 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
845 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
847 l4hash = HASH_L4_PORTS(udp_hdr);
850 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
851 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
852 ((char *)(eth_hdr + 1) + vlan_offset);
853 l3hash = ipv6_hash(ipv6_hdr);
855 if (ipv6_hdr->proto == IPPROTO_TCP) {
856 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
857 l4hash = HASH_L4_PORTS(tcp_hdr);
858 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
859 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
860 l4hash = HASH_L4_PORTS(udp_hdr);
864 hash = l3hash ^ l4hash;
868 return hash % slave_count;
872 uint64_t bwg_left_int;
873 uint64_t bwg_left_remainder;
878 bond_tlb_activate_slave(struct bond_dev_private *internals) {
881 for (i = 0; i < internals->active_slave_count; i++) {
882 tlb_last_obytets[internals->active_slaves[i]] = 0;
887 bandwidth_cmp(const void *a, const void *b)
889 const struct bwg_slave *bwg_a = a;
890 const struct bwg_slave *bwg_b = b;
891 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
892 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
893 (int64_t)bwg_a->bwg_left_remainder;
907 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
908 struct bwg_slave *bwg_slave)
910 struct rte_eth_link link_status;
912 rte_eth_link_get_nowait(port_id, &link_status);
913 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
916 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
917 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
918 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
922 bond_ethdev_update_tlb_slave_cb(void *arg)
924 struct bond_dev_private *internals = arg;
925 struct rte_eth_stats slave_stats;
926 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
930 uint8_t update_stats = 0;
933 internals->slave_update_idx++;
936 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
939 for (i = 0; i < internals->active_slave_count; i++) {
940 slave_id = internals->active_slaves[i];
941 rte_eth_stats_get(slave_id, &slave_stats);
942 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
943 bandwidth_left(slave_id, tx_bytes,
944 internals->slave_update_idx, &bwg_array[i]);
945 bwg_array[i].slave = slave_id;
948 tlb_last_obytets[slave_id] = slave_stats.obytes;
952 if (update_stats == 1)
953 internals->slave_update_idx = 0;
956 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
957 for (i = 0; i < slave_count; i++)
958 internals->tlb_slaves_order[i] = bwg_array[i].slave;
960 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
961 (struct bond_dev_private *)internals);
965 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
967 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
968 struct bond_dev_private *internals = bd_tx_q->dev_private;
970 struct rte_eth_dev *primary_port =
971 &rte_eth_devices[internals->primary_port];
972 uint16_t num_tx_total = 0;
975 uint8_t num_of_slaves = internals->active_slave_count;
976 uint8_t slaves[RTE_MAX_ETHPORTS];
978 struct ether_hdr *ether_hdr;
979 struct ether_addr primary_slave_addr;
980 struct ether_addr active_slave_addr;
982 if (num_of_slaves < 1)
985 memcpy(slaves, internals->tlb_slaves_order,
986 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
989 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
992 for (i = 0; i < 3; i++)
993 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
996 for (i = 0; i < num_of_slaves; i++) {
997 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
998 for (j = num_tx_total; j < nb_pkts; j++) {
1000 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1002 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1003 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1004 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1005 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1006 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1010 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1011 bufs + num_tx_total, nb_pkts - num_tx_total);
1013 if (num_tx_total == nb_pkts)
1017 return num_tx_total;
1021 bond_tlb_disable(struct bond_dev_private *internals)
1023 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1027 bond_tlb_enable(struct bond_dev_private *internals)
1029 bond_ethdev_update_tlb_slave_cb(internals);
1033 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1035 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1036 struct bond_dev_private *internals = bd_tx_q->dev_private;
1038 struct ether_hdr *eth_h;
1039 uint16_t ether_type, offset;
1041 struct client_data *client_info;
1044 * We create transmit buffers for every slave and one additional to send
1045 * through tlb. In worst case every packet will be send on one port.
1047 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1048 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1051 * We create separate transmit buffers for update packets as they won't
1052 * be counted in num_tx_total.
1054 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1055 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1057 struct rte_mbuf *upd_pkt;
1060 uint16_t num_send, num_not_send = 0;
1061 uint16_t num_tx_total = 0;
1066 /* Search tx buffer for ARP packets and forward them to alb */
1067 for (i = 0; i < nb_pkts; i++) {
1068 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1069 ether_type = eth_h->ether_type;
1070 offset = get_vlan_offset(eth_h, ðer_type);
1072 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1073 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1075 /* Change src mac in eth header */
1076 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1078 /* Add packet to slave tx buffer */
1079 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1080 slave_bufs_pkts[slave_idx]++;
1082 /* If packet is not ARP, send it with TLB policy */
1083 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1085 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1089 /* Update connected client ARP tables */
1090 if (internals->mode6.ntt) {
1091 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1092 client_info = &internals->mode6.client_table[i];
1094 if (client_info->in_use) {
1095 /* Allocate new packet to send ARP update on current slave */
1096 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1097 if (upd_pkt == NULL) {
1098 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1101 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1102 + client_info->vlan_count * sizeof(struct vlan_hdr);
1103 upd_pkt->data_len = pkt_size;
1104 upd_pkt->pkt_len = pkt_size;
1106 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1109 /* Add packet to update tx buffer */
1110 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1111 update_bufs_pkts[slave_idx]++;
1114 internals->mode6.ntt = 0;
1117 /* Send ARP packets on proper slaves */
1118 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1119 if (slave_bufs_pkts[i] > 0) {
1120 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1121 slave_bufs[i], slave_bufs_pkts[i]);
1122 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1123 bufs[nb_pkts - 1 - num_not_send - j] =
1124 slave_bufs[i][nb_pkts - 1 - j];
1127 num_tx_total += num_send;
1128 num_not_send += slave_bufs_pkts[i] - num_send;
1130 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1131 /* Print TX stats including update packets */
1132 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1133 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1134 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1140 /* Send update packets on proper slaves */
1141 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1142 if (update_bufs_pkts[i] > 0) {
1143 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1144 update_bufs_pkts[i]);
1145 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1146 rte_pktmbuf_free(update_bufs[i][j]);
1148 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1149 for (j = 0; j < update_bufs_pkts[i]; j++) {
1150 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1151 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1157 /* Send non-ARP packets using tlb policy */
1158 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1159 num_send = bond_ethdev_tx_burst_tlb(queue,
1160 slave_bufs[RTE_MAX_ETHPORTS],
1161 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1163 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1164 bufs[nb_pkts - 1 - num_not_send - j] =
1165 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1168 num_tx_total += num_send;
1171 return num_tx_total;
1175 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1178 struct bond_dev_private *internals;
1179 struct bond_tx_queue *bd_tx_q;
1181 uint8_t num_of_slaves;
1182 uint8_t slaves[RTE_MAX_ETHPORTS];
1184 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1188 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1189 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1191 bd_tx_q = (struct bond_tx_queue *)queue;
1192 internals = bd_tx_q->dev_private;
1194 /* Copy slave list to protect against slave up/down changes during tx
1196 num_of_slaves = internals->active_slave_count;
1197 memcpy(slaves, internals->active_slaves,
1198 sizeof(internals->active_slaves[0]) * num_of_slaves);
1200 if (num_of_slaves < 1)
1201 return num_tx_total;
1203 /* Populate slaves mbuf with the packets which are to be sent on it */
1204 for (i = 0; i < nb_pkts; i++) {
1205 /* Select output slave using hash based on xmit policy */
1206 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1208 /* Populate slave mbuf arrays with mbufs for that slave */
1209 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1212 /* Send packet burst on each slave device */
1213 for (i = 0; i < num_of_slaves; i++) {
1214 if (slave_nb_pkts[i] > 0) {
1215 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1216 slave_bufs[i], slave_nb_pkts[i]);
1218 /* if tx burst fails move packets to end of bufs */
1219 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1220 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1222 tx_fail_total += slave_tx_fail_count;
1223 memcpy(&bufs[nb_pkts - tx_fail_total],
1224 &slave_bufs[i][num_tx_slave],
1225 slave_tx_fail_count * sizeof(bufs[0]));
1228 num_tx_total += num_tx_slave;
1232 return num_tx_total;
1236 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1239 struct bond_dev_private *internals;
1240 struct bond_tx_queue *bd_tx_q;
1242 uint8_t num_of_slaves;
1243 uint8_t slaves[RTE_MAX_ETHPORTS];
1244 /* positions in slaves, not ID */
1245 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1246 uint8_t distributing_count;
1248 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1249 uint16_t i, j, op_slave_idx;
1250 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1252 /* Allocate additional packets in case 8023AD mode. */
1253 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1254 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1256 /* Total amount of packets in slave_bufs */
1257 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1258 /* Slow packets placed in each slave */
1259 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1261 bd_tx_q = (struct bond_tx_queue *)queue;
1262 internals = bd_tx_q->dev_private;
1264 /* Copy slave list to protect against slave up/down changes during tx
1266 num_of_slaves = internals->active_slave_count;
1267 if (num_of_slaves < 1)
1268 return num_tx_total;
1270 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1272 distributing_count = 0;
1273 for (i = 0; i < num_of_slaves; i++) {
1274 struct port *port = &mode_8023ad_ports[slaves[i]];
1276 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1277 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1279 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1281 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1282 slave_bufs[i][j] = slow_pkts[j];
1284 if (ACTOR_STATE(port, DISTRIBUTING))
1285 distributing_offsets[distributing_count++] = i;
1288 if (likely(distributing_count > 0)) {
1289 /* Populate slaves mbuf with the packets which are to be sent on it */
1290 for (i = 0; i < nb_pkts; i++) {
1291 /* Select output slave using hash based on xmit policy */
1292 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1294 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1295 * slaves that are currently distributing. */
1296 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1297 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1298 slave_nb_pkts[slave_offset]++;
1302 /* Send packet burst on each slave device */
1303 for (i = 0; i < num_of_slaves; i++) {
1304 if (slave_nb_pkts[i] == 0)
1307 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1308 slave_bufs[i], slave_nb_pkts[i]);
1310 /* If tx burst fails drop slow packets */
1311 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1312 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1314 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1315 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1317 /* If tx burst fails move packets to end of bufs */
1318 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1319 uint16_t j = nb_pkts - num_tx_fail_total;
1320 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1321 bufs[j] = slave_bufs[i][num_tx_slave];
1325 return num_tx_total;
1329 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1332 struct bond_dev_private *internals;
1333 struct bond_tx_queue *bd_tx_q;
1335 uint8_t tx_failed_flag = 0, num_of_slaves;
1336 uint8_t slaves[RTE_MAX_ETHPORTS];
1338 uint16_t max_nb_of_tx_pkts = 0;
1340 int slave_tx_total[RTE_MAX_ETHPORTS];
1341 int i, most_successful_tx_slave = -1;
1343 bd_tx_q = (struct bond_tx_queue *)queue;
1344 internals = bd_tx_q->dev_private;
1346 /* Copy slave list to protect against slave up/down changes during tx
1348 num_of_slaves = internals->active_slave_count;
1349 memcpy(slaves, internals->active_slaves,
1350 sizeof(internals->active_slaves[0]) * num_of_slaves);
1352 if (num_of_slaves < 1)
1355 /* Increment reference count on mbufs */
1356 for (i = 0; i < nb_pkts; i++)
1357 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1359 /* Transmit burst on each active slave */
1360 for (i = 0; i < num_of_slaves; i++) {
1361 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1364 if (unlikely(slave_tx_total[i] < nb_pkts))
1367 /* record the value and slave index for the slave which transmits the
1368 * maximum number of packets */
1369 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1370 max_nb_of_tx_pkts = slave_tx_total[i];
1371 most_successful_tx_slave = i;
1375 /* if slaves fail to transmit packets from burst, the calling application
1376 * is not expected to know about multiple references to packets so we must
1377 * handle failures of all packets except those of the most successful slave
1379 if (unlikely(tx_failed_flag))
1380 for (i = 0; i < num_of_slaves; i++)
1381 if (i != most_successful_tx_slave)
1382 while (slave_tx_total[i] < nb_pkts)
1383 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1385 return max_nb_of_tx_pkts;
1389 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1391 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1393 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1395 * If in mode 4 then save the link properties of the first
1396 * slave, all subsequent slaves must match these properties
1398 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1400 bond_link->link_autoneg = slave_link->link_autoneg;
1401 bond_link->link_duplex = slave_link->link_duplex;
1402 bond_link->link_speed = slave_link->link_speed;
1405 * In any other mode the link properties are set to default
1406 * values of AUTONEG/DUPLEX
1408 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1409 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1414 link_properties_valid(struct rte_eth_dev *ethdev,
1415 struct rte_eth_link *slave_link)
1417 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1419 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1420 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1422 if (bond_link->link_duplex != slave_link->link_duplex ||
1423 bond_link->link_autoneg != slave_link->link_autoneg ||
1424 bond_link->link_speed != slave_link->link_speed)
1432 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1434 struct ether_addr *mac_addr;
1436 if (eth_dev == NULL) {
1437 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1441 if (dst_mac_addr == NULL) {
1442 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1446 mac_addr = eth_dev->data->mac_addrs;
1448 ether_addr_copy(mac_addr, dst_mac_addr);
1453 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1455 struct ether_addr *mac_addr;
1457 if (eth_dev == NULL) {
1458 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1462 if (new_mac_addr == NULL) {
1463 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1467 mac_addr = eth_dev->data->mac_addrs;
1469 /* If new MAC is different to current MAC then update */
1470 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1471 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1477 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1479 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1482 /* Update slave devices MAC addresses */
1483 if (internals->slave_count < 1)
1486 switch (internals->mode) {
1487 case BONDING_MODE_ROUND_ROBIN:
1488 case BONDING_MODE_BALANCE:
1489 case BONDING_MODE_BROADCAST:
1490 for (i = 0; i < internals->slave_count; i++) {
1491 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1492 bonded_eth_dev->data->mac_addrs)) {
1493 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1494 internals->slaves[i].port_id);
1499 case BONDING_MODE_8023AD:
1500 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1502 case BONDING_MODE_ACTIVE_BACKUP:
1503 case BONDING_MODE_TLB:
1504 case BONDING_MODE_ALB:
1506 for (i = 0; i < internals->slave_count; i++) {
1507 if (internals->slaves[i].port_id ==
1508 internals->current_primary_port) {
1509 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1510 bonded_eth_dev->data->mac_addrs)) {
1511 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1512 internals->current_primary_port);
1516 if (mac_address_set(
1517 &rte_eth_devices[internals->slaves[i].port_id],
1518 &internals->slaves[i].persisted_mac_addr)) {
1519 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1520 internals->slaves[i].port_id);
1531 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1533 struct bond_dev_private *internals;
1535 internals = eth_dev->data->dev_private;
1538 case BONDING_MODE_ROUND_ROBIN:
1539 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1540 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1542 case BONDING_MODE_ACTIVE_BACKUP:
1543 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1544 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1546 case BONDING_MODE_BALANCE:
1547 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1548 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1550 case BONDING_MODE_BROADCAST:
1551 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1552 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1554 case BONDING_MODE_8023AD:
1555 if (bond_mode_8023ad_enable(eth_dev) != 0)
1558 if (internals->mode4.dedicated_queues.enabled == 0) {
1559 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1560 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1561 RTE_LOG(WARNING, PMD,
1562 "Using mode 4, it is necessary to do TX burst "
1563 "and RX burst at least every 100ms.\n");
1565 /* Use flow director's optimization */
1566 eth_dev->rx_pkt_burst =
1567 bond_ethdev_rx_burst_8023ad_fast_queue;
1568 eth_dev->tx_pkt_burst =
1569 bond_ethdev_tx_burst_8023ad_fast_queue;
1572 case BONDING_MODE_TLB:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1576 case BONDING_MODE_ALB:
1577 if (bond_mode_alb_enable(eth_dev) != 0)
1580 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1581 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1587 internals->mode = mode;
1594 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1595 struct rte_eth_dev *slave_eth_dev)
1598 struct bond_dev_private *internals = (struct bond_dev_private *)
1599 bonded_eth_dev->data->dev_private;
1600 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1602 if (port->slow_pool == NULL) {
1604 int slave_id = slave_eth_dev->data->port_id;
1606 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1608 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1609 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1610 slave_eth_dev->data->numa_node);
1612 /* Any memory allocation failure in initialization is critical because
1613 * resources can't be free, so reinitialization is impossible. */
1614 if (port->slow_pool == NULL) {
1615 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1616 slave_id, mem_name, rte_strerror(rte_errno));
1620 if (internals->mode4.dedicated_queues.enabled == 1) {
1621 /* Configure slow Rx queue */
1623 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1624 internals->mode4.dedicated_queues.rx_qid, 128,
1625 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1626 NULL, port->slow_pool);
1629 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1630 slave_eth_dev->data->port_id,
1631 internals->mode4.dedicated_queues.rx_qid,
1636 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1637 internals->mode4.dedicated_queues.tx_qid, 512,
1638 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1642 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1643 slave_eth_dev->data->port_id,
1644 internals->mode4.dedicated_queues.tx_qid,
1653 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1654 struct rte_eth_dev *slave_eth_dev)
1656 struct bond_rx_queue *bd_rx_q;
1657 struct bond_tx_queue *bd_tx_q;
1658 uint16_t nb_rx_queues;
1659 uint16_t nb_tx_queues;
1663 struct rte_flow_error flow_error;
1665 struct bond_dev_private *internals = (struct bond_dev_private *)
1666 bonded_eth_dev->data->dev_private;
1669 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1671 /* Enable interrupts on slave device if supported */
1672 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1673 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1675 /* If RSS is enabled for bonding, try to enable it for slaves */
1676 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1677 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1679 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1680 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1681 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1682 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1684 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1687 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1688 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1689 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1690 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1693 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1694 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1696 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1697 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1699 if (internals->mode == BONDING_MODE_8023AD) {
1700 if (internals->mode4.dedicated_queues.enabled == 1) {
1706 /* Configure device */
1707 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1708 nb_rx_queues, nb_tx_queues,
1709 &(slave_eth_dev->data->dev_conf));
1711 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1712 slave_eth_dev->data->port_id, errval);
1716 /* Setup Rx Queues */
1717 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1718 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1720 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1721 bd_rx_q->nb_rx_desc,
1722 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1723 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1726 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1727 slave_eth_dev->data->port_id, q_id, errval);
1732 /* Setup Tx Queues */
1733 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1734 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1736 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1737 bd_tx_q->nb_tx_desc,
1738 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1742 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1743 slave_eth_dev->data->port_id, q_id, errval);
1748 if (internals->mode == BONDING_MODE_8023AD &&
1749 internals->mode4.dedicated_queues.enabled == 1) {
1750 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1754 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1755 slave_eth_dev->data->port_id) != 0) {
1757 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1758 slave_eth_dev->data->port_id, q_id, errval);
1762 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1763 rte_flow_destroy(slave_eth_dev->data->port_id,
1764 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1767 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1768 slave_eth_dev->data->port_id);
1772 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1774 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1775 slave_eth_dev->data->port_id, errval);
1779 /* If RSS is enabled for bonding, synchronize RETA */
1780 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1782 struct bond_dev_private *internals;
1784 internals = bonded_eth_dev->data->dev_private;
1786 for (i = 0; i < internals->slave_count; i++) {
1787 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1788 errval = rte_eth_dev_rss_reta_update(
1789 slave_eth_dev->data->port_id,
1790 &internals->reta_conf[0],
1791 internals->slaves[i].reta_size);
1793 RTE_LOG(WARNING, PMD,
1794 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1795 " RSS Configuration for bonding may be inconsistent.\n",
1796 slave_eth_dev->data->port_id, errval);
1803 /* If lsc interrupt is set, check initial slave's link status */
1804 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1805 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1806 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1807 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1815 slave_remove(struct bond_dev_private *internals,
1816 struct rte_eth_dev *slave_eth_dev)
1820 for (i = 0; i < internals->slave_count; i++)
1821 if (internals->slaves[i].port_id ==
1822 slave_eth_dev->data->port_id)
1825 if (i < (internals->slave_count - 1))
1826 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1827 sizeof(internals->slaves[0]) *
1828 (internals->slave_count - i - 1));
1830 internals->slave_count--;
1832 /* force reconfiguration of slave interfaces */
1833 _rte_eth_dev_reset(slave_eth_dev);
1837 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1840 slave_add(struct bond_dev_private *internals,
1841 struct rte_eth_dev *slave_eth_dev)
1843 struct bond_slave_details *slave_details =
1844 &internals->slaves[internals->slave_count];
1846 slave_details->port_id = slave_eth_dev->data->port_id;
1847 slave_details->last_link_status = 0;
1849 /* Mark slave devices that don't support interrupts so we can
1850 * compensate when we start the bond
1852 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1853 slave_details->link_status_poll_enabled = 1;
1856 slave_details->link_status_wait_to_complete = 0;
1857 /* clean tlb_last_obytes when adding port for bonding device */
1858 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1859 sizeof(struct ether_addr));
1863 bond_ethdev_primary_set(struct bond_dev_private *internals,
1864 uint8_t slave_port_id)
1868 if (internals->active_slave_count < 1)
1869 internals->current_primary_port = slave_port_id;
1871 /* Search bonded device slave ports for new proposed primary port */
1872 for (i = 0; i < internals->active_slave_count; i++) {
1873 if (internals->active_slaves[i] == slave_port_id)
1874 internals->current_primary_port = slave_port_id;
1879 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1882 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1884 struct bond_dev_private *internals;
1887 /* slave eth dev will be started by bonded device */
1888 if (check_for_bonded_ethdev(eth_dev)) {
1889 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1890 eth_dev->data->port_id);
1894 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1895 eth_dev->data->dev_started = 1;
1897 internals = eth_dev->data->dev_private;
1899 if (internals->slave_count == 0) {
1900 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1904 if (internals->user_defined_mac == 0) {
1905 struct ether_addr *new_mac_addr = NULL;
1907 for (i = 0; i < internals->slave_count; i++)
1908 if (internals->slaves[i].port_id == internals->primary_port)
1909 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1911 if (new_mac_addr == NULL)
1914 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1915 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1916 eth_dev->data->port_id);
1921 /* Update all slave devices MACs*/
1922 if (mac_address_slaves_update(eth_dev) != 0)
1925 /* If bonded device is configure in promiscuous mode then re-apply config */
1926 if (internals->promiscuous_en)
1927 bond_ethdev_promiscuous_enable(eth_dev);
1929 if (internals->mode == BONDING_MODE_8023AD) {
1930 if (internals->mode4.dedicated_queues.enabled == 1) {
1931 internals->mode4.dedicated_queues.rx_qid =
1932 eth_dev->data->nb_rx_queues;
1933 internals->mode4.dedicated_queues.tx_qid =
1934 eth_dev->data->nb_tx_queues;
1939 /* Reconfigure each slave device if starting bonded device */
1940 for (i = 0; i < internals->slave_count; i++) {
1941 struct rte_eth_dev *slave_ethdev =
1942 &(rte_eth_devices[internals->slaves[i].port_id]);
1943 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1945 "bonded port (%d) failed to reconfigure slave device (%d)",
1946 eth_dev->data->port_id,
1947 internals->slaves[i].port_id);
1950 /* We will need to poll for link status if any slave doesn't
1951 * support interrupts
1953 if (internals->slaves[i].link_status_poll_enabled)
1954 internals->link_status_polling_enabled = 1;
1956 /* start polling if needed */
1957 if (internals->link_status_polling_enabled) {
1959 internals->link_status_polling_interval_ms * 1000,
1960 bond_ethdev_slave_link_status_change_monitor,
1961 (void *)&rte_eth_devices[internals->port_id]);
1964 if (internals->user_defined_primary_port)
1965 bond_ethdev_primary_set(internals, internals->primary_port);
1967 if (internals->mode == BONDING_MODE_8023AD)
1968 bond_mode_8023ad_start(eth_dev);
1970 if (internals->mode == BONDING_MODE_TLB ||
1971 internals->mode == BONDING_MODE_ALB)
1972 bond_tlb_enable(internals);
1978 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1982 if (dev->data->rx_queues != NULL) {
1983 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1984 rte_free(dev->data->rx_queues[i]);
1985 dev->data->rx_queues[i] = NULL;
1987 dev->data->nb_rx_queues = 0;
1990 if (dev->data->tx_queues != NULL) {
1991 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1992 rte_free(dev->data->tx_queues[i]);
1993 dev->data->tx_queues[i] = NULL;
1995 dev->data->nb_tx_queues = 0;
2000 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2002 struct bond_dev_private *internals = eth_dev->data->dev_private;
2005 if (internals->mode == BONDING_MODE_8023AD) {
2009 bond_mode_8023ad_stop(eth_dev);
2011 /* Discard all messages to/from mode 4 state machines */
2012 for (i = 0; i < internals->active_slave_count; i++) {
2013 port = &mode_8023ad_ports[internals->active_slaves[i]];
2015 RTE_ASSERT(port->rx_ring != NULL);
2016 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2017 rte_pktmbuf_free(pkt);
2019 RTE_ASSERT(port->tx_ring != NULL);
2020 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2021 rte_pktmbuf_free(pkt);
2025 if (internals->mode == BONDING_MODE_TLB ||
2026 internals->mode == BONDING_MODE_ALB) {
2027 bond_tlb_disable(internals);
2028 for (i = 0; i < internals->active_slave_count; i++)
2029 tlb_last_obytets[internals->active_slaves[i]] = 0;
2032 internals->active_slave_count = 0;
2033 internals->link_status_polling_enabled = 0;
2034 for (i = 0; i < internals->slave_count; i++)
2035 internals->slaves[i].last_link_status = 0;
2037 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2038 eth_dev->data->dev_started = 0;
2042 bond_ethdev_close(struct rte_eth_dev *dev)
2044 struct bond_dev_private *internals = dev->data->dev_private;
2045 uint8_t bond_port_id = internals->port_id;
2048 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2049 while (internals->slave_count != skipped) {
2050 uint8_t port_id = internals->slaves[skipped].port_id;
2052 rte_eth_dev_stop(port_id);
2054 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2056 "Failed to remove port %d from bonded device "
2057 "%s\n", port_id, dev->device->name);
2061 bond_ethdev_free_queues(dev);
2062 rte_bitmap_reset(internals->vlan_filter_bmp);
2065 /* forward declaration */
2066 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2069 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2071 struct bond_dev_private *internals = dev->data->dev_private;
2073 uint16_t max_nb_rx_queues = UINT16_MAX;
2074 uint16_t max_nb_tx_queues = UINT16_MAX;
2076 dev_info->max_mac_addrs = 1;
2078 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2079 internals->candidate_max_rx_pktlen :
2080 ETHER_MAX_JUMBO_FRAME_LEN;
2082 /* Max number of tx/rx queues that the bonded device can support is the
2083 * minimum values of the bonded slaves, as all slaves must be capable
2084 * of supporting the same number of tx/rx queues.
2086 if (internals->slave_count > 0) {
2087 struct rte_eth_dev_info slave_info;
2090 for (idx = 0; idx < internals->slave_count; idx++) {
2091 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2094 if (slave_info.max_rx_queues < max_nb_rx_queues)
2095 max_nb_rx_queues = slave_info.max_rx_queues;
2097 if (slave_info.max_tx_queues < max_nb_tx_queues)
2098 max_nb_tx_queues = slave_info.max_tx_queues;
2102 dev_info->max_rx_queues = max_nb_rx_queues;
2103 dev_info->max_tx_queues = max_nb_tx_queues;
2106 * If dedicated hw queues enabled for link bonding device in LACP mode
2107 * then we need to reduce the maximum number of data path queues by 1.
2109 if (internals->mode == BONDING_MODE_8023AD &&
2110 internals->mode4.dedicated_queues.enabled == 1) {
2111 dev_info->max_rx_queues--;
2112 dev_info->max_tx_queues--;
2115 dev_info->min_rx_bufsize = 0;
2117 dev_info->rx_offload_capa = internals->rx_offload_capa;
2118 dev_info->tx_offload_capa = internals->tx_offload_capa;
2119 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2121 dev_info->reta_size = internals->reta_size;
2125 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2129 struct bond_dev_private *internals = dev->data->dev_private;
2131 /* don't do this while a slave is being added */
2132 rte_spinlock_lock(&internals->lock);
2135 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2137 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2139 for (i = 0; i < internals->slave_count; i++) {
2140 uint8_t port_id = internals->slaves[i].port_id;
2142 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2144 RTE_LOG(WARNING, PMD,
2145 "Setting VLAN filter on slave port %u not supported.\n",
2149 rte_spinlock_unlock(&internals->lock);
2154 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2155 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2156 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2158 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2159 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2160 0, dev->data->numa_node);
2161 if (bd_rx_q == NULL)
2164 bd_rx_q->queue_id = rx_queue_id;
2165 bd_rx_q->dev_private = dev->data->dev_private;
2167 bd_rx_q->nb_rx_desc = nb_rx_desc;
2169 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2170 bd_rx_q->mb_pool = mb_pool;
2172 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2178 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2179 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2180 const struct rte_eth_txconf *tx_conf)
2182 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2183 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2184 0, dev->data->numa_node);
2186 if (bd_tx_q == NULL)
2189 bd_tx_q->queue_id = tx_queue_id;
2190 bd_tx_q->dev_private = dev->data->dev_private;
2192 bd_tx_q->nb_tx_desc = nb_tx_desc;
2193 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2195 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2201 bond_ethdev_rx_queue_release(void *queue)
2210 bond_ethdev_tx_queue_release(void *queue)
2219 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2221 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2222 struct bond_dev_private *internals;
2224 /* Default value for polling slave found is true as we don't want to
2225 * disable the polling thread if we cannot get the lock */
2226 int i, polling_slave_found = 1;
2231 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2232 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2234 if (!bonded_ethdev->data->dev_started ||
2235 !internals->link_status_polling_enabled)
2238 /* If device is currently being configured then don't check slaves link
2239 * status, wait until next period */
2240 if (rte_spinlock_trylock(&internals->lock)) {
2241 if (internals->slave_count > 0)
2242 polling_slave_found = 0;
2244 for (i = 0; i < internals->slave_count; i++) {
2245 if (!internals->slaves[i].link_status_poll_enabled)
2248 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2249 polling_slave_found = 1;
2251 /* Update slave link status */
2252 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2253 internals->slaves[i].link_status_wait_to_complete);
2255 /* if link status has changed since last checked then call lsc
2257 if (slave_ethdev->data->dev_link.link_status !=
2258 internals->slaves[i].last_link_status) {
2259 internals->slaves[i].last_link_status =
2260 slave_ethdev->data->dev_link.link_status;
2262 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2263 RTE_ETH_EVENT_INTR_LSC,
2264 &bonded_ethdev->data->port_id,
2268 rte_spinlock_unlock(&internals->lock);
2271 if (polling_slave_found)
2272 /* Set alarm to continue monitoring link status of slave ethdev's */
2273 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2274 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2278 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2280 void (*link_update)(uint8_t port_id, struct rte_eth_link *eth_link);
2282 struct bond_dev_private *bond_ctx;
2283 struct rte_eth_link slave_link;
2287 bond_ctx = ethdev->data->dev_private;
2289 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2291 if (ethdev->data->dev_started == 0 ||
2292 bond_ctx->active_slave_count == 0) {
2293 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2297 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2299 if (wait_to_complete)
2300 link_update = rte_eth_link_get;
2302 link_update = rte_eth_link_get_nowait;
2304 switch (bond_ctx->mode) {
2305 case BONDING_MODE_BROADCAST:
2307 * Setting link speed to UINT32_MAX to ensure we pick up the
2308 * value of the first active slave
2310 ethdev->data->dev_link.link_speed = UINT32_MAX;
2313 * link speed is minimum value of all the slaves link speed as
2314 * packet loss will occur on this slave if transmission at rates
2315 * greater than this are attempted
2317 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2318 link_update(bond_ctx->active_slaves[0], &slave_link);
2320 if (slave_link.link_speed <
2321 ethdev->data->dev_link.link_speed)
2322 ethdev->data->dev_link.link_speed =
2323 slave_link.link_speed;
2326 case BONDING_MODE_ACTIVE_BACKUP:
2327 /* Current primary slave */
2328 link_update(bond_ctx->current_primary_port, &slave_link);
2330 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2332 case BONDING_MODE_8023AD:
2333 ethdev->data->dev_link.link_autoneg =
2334 bond_ctx->mode4.slave_link.link_autoneg;
2335 ethdev->data->dev_link.link_duplex =
2336 bond_ctx->mode4.slave_link.link_duplex;
2337 /* fall through to update link speed */
2338 case BONDING_MODE_ROUND_ROBIN:
2339 case BONDING_MODE_BALANCE:
2340 case BONDING_MODE_TLB:
2341 case BONDING_MODE_ALB:
2344 * In theses mode the maximum theoretical link speed is the sum
2347 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2349 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2350 link_update(bond_ctx->active_slaves[idx], &slave_link);
2352 ethdev->data->dev_link.link_speed +=
2353 slave_link.link_speed;
2363 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2365 struct bond_dev_private *internals = dev->data->dev_private;
2366 struct rte_eth_stats slave_stats;
2369 for (i = 0; i < internals->slave_count; i++) {
2370 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2372 stats->ipackets += slave_stats.ipackets;
2373 stats->opackets += slave_stats.opackets;
2374 stats->ibytes += slave_stats.ibytes;
2375 stats->obytes += slave_stats.obytes;
2376 stats->imissed += slave_stats.imissed;
2377 stats->ierrors += slave_stats.ierrors;
2378 stats->oerrors += slave_stats.oerrors;
2379 stats->rx_nombuf += slave_stats.rx_nombuf;
2381 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2382 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2383 stats->q_opackets[j] += slave_stats.q_opackets[j];
2384 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2385 stats->q_obytes[j] += slave_stats.q_obytes[j];
2386 stats->q_errors[j] += slave_stats.q_errors[j];
2393 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2395 struct bond_dev_private *internals = dev->data->dev_private;
2398 for (i = 0; i < internals->slave_count; i++)
2399 rte_eth_stats_reset(internals->slaves[i].port_id);
2403 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2405 struct bond_dev_private *internals = eth_dev->data->dev_private;
2408 internals->promiscuous_en = 1;
2410 switch (internals->mode) {
2411 /* Promiscuous mode is propagated to all slaves */
2412 case BONDING_MODE_ROUND_ROBIN:
2413 case BONDING_MODE_BALANCE:
2414 case BONDING_MODE_BROADCAST:
2415 for (i = 0; i < internals->slave_count; i++)
2416 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2418 /* In mode4 promiscus mode is managed when slave is added/removed */
2419 case BONDING_MODE_8023AD:
2421 /* Promiscuous mode is propagated only to primary slave */
2422 case BONDING_MODE_ACTIVE_BACKUP:
2423 case BONDING_MODE_TLB:
2424 case BONDING_MODE_ALB:
2426 rte_eth_promiscuous_enable(internals->current_primary_port);
2431 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2433 struct bond_dev_private *internals = dev->data->dev_private;
2436 internals->promiscuous_en = 0;
2438 switch (internals->mode) {
2439 /* Promiscuous mode is propagated to all slaves */
2440 case BONDING_MODE_ROUND_ROBIN:
2441 case BONDING_MODE_BALANCE:
2442 case BONDING_MODE_BROADCAST:
2443 for (i = 0; i < internals->slave_count; i++)
2444 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2446 /* In mode4 promiscus mode is set managed when slave is added/removed */
2447 case BONDING_MODE_8023AD:
2449 /* Promiscuous mode is propagated only to primary slave */
2450 case BONDING_MODE_ACTIVE_BACKUP:
2451 case BONDING_MODE_TLB:
2452 case BONDING_MODE_ALB:
2454 rte_eth_promiscuous_disable(internals->current_primary_port);
2459 bond_ethdev_delayed_lsc_propagation(void *arg)
2464 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2465 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2469 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2470 void *param, void *ret_param __rte_unused)
2472 struct rte_eth_dev *bonded_eth_dev;
2473 struct bond_dev_private *internals;
2474 struct rte_eth_link link;
2477 int i, valid_slave = 0;
2479 uint8_t lsc_flag = 0;
2481 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2484 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2486 if (check_for_bonded_ethdev(bonded_eth_dev))
2489 internals = bonded_eth_dev->data->dev_private;
2491 /* If the device isn't started don't handle interrupts */
2492 if (!bonded_eth_dev->data->dev_started)
2495 /* verify that port_id is a valid slave of bonded port */
2496 for (i = 0; i < internals->slave_count; i++) {
2497 if (internals->slaves[i].port_id == port_id) {
2506 /* Search for port in active port list */
2507 active_pos = find_slave_by_id(internals->active_slaves,
2508 internals->active_slave_count, port_id);
2510 rte_eth_link_get_nowait(port_id, &link);
2511 if (link.link_status) {
2512 if (active_pos < internals->active_slave_count)
2515 /* if no active slave ports then set this port to be primary port */
2516 if (internals->active_slave_count < 1) {
2517 /* If first active slave, then change link status */
2518 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2519 internals->current_primary_port = port_id;
2522 mac_address_slaves_update(bonded_eth_dev);
2525 activate_slave(bonded_eth_dev, port_id);
2527 /* If user has defined the primary port then default to using it */
2528 if (internals->user_defined_primary_port &&
2529 internals->primary_port == port_id)
2530 bond_ethdev_primary_set(internals, port_id);
2532 if (active_pos == internals->active_slave_count)
2535 /* Remove from active slave list */
2536 deactivate_slave(bonded_eth_dev, port_id);
2538 /* Update primary id, take first active slave from list or if none
2539 * available set to -1 */
2540 if (port_id == internals->current_primary_port) {
2541 if (internals->active_slave_count > 0)
2542 bond_ethdev_primary_set(internals,
2543 internals->active_slaves[0]);
2545 internals->current_primary_port = internals->primary_port;
2550 * Update bonded device link properties after any change to active
2553 bond_ethdev_link_update(bonded_eth_dev, 0);
2556 /* Cancel any possible outstanding interrupts if delays are enabled */
2557 if (internals->link_up_delay_ms > 0 ||
2558 internals->link_down_delay_ms > 0)
2559 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2562 if (bonded_eth_dev->data->dev_link.link_status) {
2563 if (internals->link_up_delay_ms > 0)
2564 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2565 bond_ethdev_delayed_lsc_propagation,
2566 (void *)bonded_eth_dev);
2568 _rte_eth_dev_callback_process(bonded_eth_dev,
2569 RTE_ETH_EVENT_INTR_LSC,
2573 if (internals->link_down_delay_ms > 0)
2574 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2575 bond_ethdev_delayed_lsc_propagation,
2576 (void *)bonded_eth_dev);
2578 _rte_eth_dev_callback_process(bonded_eth_dev,
2579 RTE_ETH_EVENT_INTR_LSC,
2587 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2588 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2592 int slave_reta_size;
2593 unsigned reta_count;
2594 struct bond_dev_private *internals = dev->data->dev_private;
2596 if (reta_size != internals->reta_size)
2599 /* Copy RETA table */
2600 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2602 for (i = 0; i < reta_count; i++) {
2603 internals->reta_conf[i].mask = reta_conf[i].mask;
2604 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2605 if ((reta_conf[i].mask >> j) & 0x01)
2606 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2609 /* Fill rest of array */
2610 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2611 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2612 sizeof(internals->reta_conf[0]) * reta_count);
2614 /* Propagate RETA over slaves */
2615 for (i = 0; i < internals->slave_count; i++) {
2616 slave_reta_size = internals->slaves[i].reta_size;
2617 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2618 &internals->reta_conf[0], slave_reta_size);
2627 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2628 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2631 struct bond_dev_private *internals = dev->data->dev_private;
2633 if (reta_size != internals->reta_size)
2636 /* Copy RETA table */
2637 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2638 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2639 if ((reta_conf[i].mask >> j) & 0x01)
2640 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2646 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2647 struct rte_eth_rss_conf *rss_conf)
2650 struct bond_dev_private *internals = dev->data->dev_private;
2651 struct rte_eth_rss_conf bond_rss_conf;
2653 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2655 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2657 if (bond_rss_conf.rss_hf != 0)
2658 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2660 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2661 sizeof(internals->rss_key)) {
2662 if (bond_rss_conf.rss_key_len == 0)
2663 bond_rss_conf.rss_key_len = 40;
2664 internals->rss_key_len = bond_rss_conf.rss_key_len;
2665 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2666 internals->rss_key_len);
2669 for (i = 0; i < internals->slave_count; i++) {
2670 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2680 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2681 struct rte_eth_rss_conf *rss_conf)
2683 struct bond_dev_private *internals = dev->data->dev_private;
2685 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2686 rss_conf->rss_key_len = internals->rss_key_len;
2687 if (rss_conf->rss_key)
2688 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2693 const struct eth_dev_ops default_dev_ops = {
2694 .dev_start = bond_ethdev_start,
2695 .dev_stop = bond_ethdev_stop,
2696 .dev_close = bond_ethdev_close,
2697 .dev_configure = bond_ethdev_configure,
2698 .dev_infos_get = bond_ethdev_info,
2699 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2700 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2701 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2702 .rx_queue_release = bond_ethdev_rx_queue_release,
2703 .tx_queue_release = bond_ethdev_tx_queue_release,
2704 .link_update = bond_ethdev_link_update,
2705 .stats_get = bond_ethdev_stats_get,
2706 .stats_reset = bond_ethdev_stats_reset,
2707 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2708 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2709 .reta_update = bond_ethdev_rss_reta_update,
2710 .reta_query = bond_ethdev_rss_reta_query,
2711 .rss_hash_update = bond_ethdev_rss_hash_update,
2712 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2716 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2718 const char *name = rte_vdev_device_name(dev);
2719 uint8_t socket_id = dev->device.numa_node;
2720 struct bond_dev_private *internals = NULL;
2721 struct rte_eth_dev *eth_dev = NULL;
2722 uint32_t vlan_filter_bmp_size;
2724 /* now do all data allocation - for eth_dev structure, dummy pci driver
2725 * and internal (private) data
2728 /* reserve an ethdev entry */
2729 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2730 if (eth_dev == NULL) {
2731 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2735 internals = eth_dev->data->dev_private;
2736 eth_dev->data->nb_rx_queues = (uint16_t)1;
2737 eth_dev->data->nb_tx_queues = (uint16_t)1;
2739 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2741 if (eth_dev->data->mac_addrs == NULL) {
2742 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2746 eth_dev->dev_ops = &default_dev_ops;
2747 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
2748 RTE_ETH_DEV_DETACHABLE;
2750 rte_spinlock_init(&internals->lock);
2752 internals->port_id = eth_dev->data->port_id;
2753 internals->mode = BONDING_MODE_INVALID;
2754 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2755 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2756 internals->xmit_hash = xmit_l2_hash;
2757 internals->user_defined_mac = 0;
2759 internals->link_status_polling_enabled = 0;
2761 internals->link_status_polling_interval_ms =
2762 DEFAULT_POLLING_INTERVAL_10_MS;
2763 internals->link_down_delay_ms = 0;
2764 internals->link_up_delay_ms = 0;
2766 internals->slave_count = 0;
2767 internals->active_slave_count = 0;
2768 internals->rx_offload_capa = 0;
2769 internals->tx_offload_capa = 0;
2770 internals->candidate_max_rx_pktlen = 0;
2771 internals->max_rx_pktlen = 0;
2773 /* Initially allow to choose any offload type */
2774 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2776 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2777 memset(internals->slaves, 0, sizeof(internals->slaves));
2779 /* Set mode 4 default configuration */
2780 bond_mode_8023ad_setup(eth_dev, NULL);
2781 if (bond_ethdev_mode_set(eth_dev, mode)) {
2782 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2783 eth_dev->data->port_id, mode);
2787 vlan_filter_bmp_size =
2788 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2789 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2790 RTE_CACHE_LINE_SIZE);
2791 if (internals->vlan_filter_bmpmem == NULL) {
2793 "Failed to allocate vlan bitmap for bonded device %u\n",
2794 eth_dev->data->port_id);
2798 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2799 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2800 if (internals->vlan_filter_bmp == NULL) {
2802 "Failed to init vlan bitmap for bonded device %u\n",
2803 eth_dev->data->port_id);
2804 rte_free(internals->vlan_filter_bmpmem);
2808 return eth_dev->data->port_id;
2811 rte_free(internals);
2812 if (eth_dev != NULL) {
2813 rte_free(eth_dev->data->mac_addrs);
2814 rte_eth_dev_release_port(eth_dev);
2820 bond_probe(struct rte_vdev_device *dev)
2823 struct bond_dev_private *internals;
2824 struct rte_kvargs *kvlist;
2825 uint8_t bonding_mode, socket_id;
2826 int arg_count, port_id;
2831 name = rte_vdev_device_name(dev);
2832 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2834 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2835 pmd_bond_init_valid_arguments);
2839 /* Parse link bonding mode */
2840 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2841 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2842 &bond_ethdev_parse_slave_mode_kvarg,
2843 &bonding_mode) != 0) {
2844 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2849 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2850 "device %s\n", name);
2854 /* Parse socket id to create bonding device on */
2855 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2856 if (arg_count == 1) {
2857 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2858 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2860 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2861 "bonded device %s\n", name);
2864 } else if (arg_count > 1) {
2865 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2866 "bonded device %s\n", name);
2869 socket_id = rte_socket_id();
2872 dev->device.numa_node = socket_id;
2874 /* Create link bonding eth device */
2875 port_id = bond_alloc(dev, bonding_mode);
2877 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2878 "socket %u.\n", name, bonding_mode, socket_id);
2881 internals = rte_eth_devices[port_id].data->dev_private;
2882 internals->kvlist = kvlist;
2884 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2885 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2889 rte_kvargs_free(kvlist);
2895 bond_remove(struct rte_vdev_device *dev)
2897 struct rte_eth_dev *eth_dev;
2898 struct bond_dev_private *internals;
2904 name = rte_vdev_device_name(dev);
2905 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2907 /* now free all data allocation - for eth_dev structure,
2908 * dummy pci driver and internal (private) data
2911 /* find an ethdev entry */
2912 eth_dev = rte_eth_dev_allocated(name);
2913 if (eth_dev == NULL)
2916 RTE_ASSERT(eth_dev->device == &dev->device);
2918 internals = eth_dev->data->dev_private;
2919 if (internals->slave_count != 0)
2922 if (eth_dev->data->dev_started == 1) {
2923 bond_ethdev_stop(eth_dev);
2924 bond_ethdev_close(eth_dev);
2927 eth_dev->dev_ops = NULL;
2928 eth_dev->rx_pkt_burst = NULL;
2929 eth_dev->tx_pkt_burst = NULL;
2931 internals = eth_dev->data->dev_private;
2932 rte_bitmap_free(internals->vlan_filter_bmp);
2933 rte_free(internals->vlan_filter_bmpmem);
2934 rte_free(eth_dev->data->dev_private);
2935 rte_free(eth_dev->data->mac_addrs);
2937 rte_eth_dev_release_port(eth_dev);
2942 /* this part will resolve the slave portids after all the other pdev and vdev
2943 * have been allocated */
2945 bond_ethdev_configure(struct rte_eth_dev *dev)
2947 const char *name = dev->device->name;
2948 struct bond_dev_private *internals = dev->data->dev_private;
2949 struct rte_kvargs *kvlist = internals->kvlist;
2951 uint8_t port_id = dev - rte_eth_devices;
2953 static const uint8_t default_rss_key[40] = {
2954 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2955 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2956 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2957 0xBE, 0xAC, 0x01, 0xFA
2962 /* If RSS is enabled, fill table and key with default values */
2963 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2964 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2965 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2966 memcpy(internals->rss_key, default_rss_key, 40);
2968 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2969 internals->reta_conf[i].mask = ~0LL;
2970 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2971 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2975 /* set the max_rx_pktlen */
2976 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2979 * if no kvlist, it means that this bonded device has been created
2980 * through the bonding api.
2985 /* Parse MAC address for bonded device */
2986 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2987 if (arg_count == 1) {
2988 struct ether_addr bond_mac;
2990 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2991 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2992 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2997 /* Set MAC address */
2998 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3000 "Failed to set mac address on bonded device %s\n",
3004 } else if (arg_count > 1) {
3006 "MAC address can be specified only once for bonded device %s\n",
3011 /* Parse/set balance mode transmit policy */
3012 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3013 if (arg_count == 1) {
3014 uint8_t xmit_policy;
3016 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3017 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3020 "Invalid xmit policy specified for bonded device %s\n",
3025 /* Set balance mode transmit policy*/
3026 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3028 "Failed to set balance xmit policy on bonded device %s\n",
3032 } else if (arg_count > 1) {
3034 "Transmit policy can be specified only once for bonded device"
3039 /* Parse/add slave ports to bonded device */
3040 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3041 struct bond_ethdev_slave_ports slave_ports;
3044 memset(&slave_ports, 0, sizeof(slave_ports));
3046 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3047 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3049 "Failed to parse slave ports for bonded device %s\n",
3054 for (i = 0; i < slave_ports.slave_count; i++) {
3055 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3057 "Failed to add port %d as slave to bonded device %s\n",
3058 slave_ports.slaves[i], name);
3063 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3067 /* Parse/set primary slave port id*/
3068 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3069 if (arg_count == 1) {
3070 uint8_t primary_slave_port_id;
3072 if (rte_kvargs_process(kvlist,
3073 PMD_BOND_PRIMARY_SLAVE_KVARG,
3074 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3075 &primary_slave_port_id) < 0) {
3077 "Invalid primary slave port id specified for bonded device"
3082 /* Set balance mode transmit policy*/
3083 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
3086 "Failed to set primary slave port %d on bonded device %s\n",
3087 primary_slave_port_id, name);
3090 } else if (arg_count > 1) {
3092 "Primary slave can be specified only once for bonded device"
3097 /* Parse link status monitor polling interval */
3098 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3099 if (arg_count == 1) {
3100 uint32_t lsc_poll_interval_ms;
3102 if (rte_kvargs_process(kvlist,
3103 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3104 &bond_ethdev_parse_time_ms_kvarg,
3105 &lsc_poll_interval_ms) < 0) {
3107 "Invalid lsc polling interval value specified for bonded"
3108 " device %s\n", name);
3112 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3115 "Failed to set lsc monitor polling interval (%u ms) on"
3116 " bonded device %s\n", lsc_poll_interval_ms, name);
3119 } else if (arg_count > 1) {
3121 "LSC polling interval can be specified only once for bonded"
3122 " device %s\n", name);
3126 /* Parse link up interrupt propagation delay */
3127 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3128 if (arg_count == 1) {
3129 uint32_t link_up_delay_ms;
3131 if (rte_kvargs_process(kvlist,
3132 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3133 &bond_ethdev_parse_time_ms_kvarg,
3134 &link_up_delay_ms) < 0) {
3136 "Invalid link up propagation delay value specified for"
3137 " bonded device %s\n", name);
3141 /* Set balance mode transmit policy*/
3142 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3145 "Failed to set link up propagation delay (%u ms) on bonded"
3146 " device %s\n", link_up_delay_ms, name);
3149 } else if (arg_count > 1) {
3151 "Link up propagation delay can be specified only once for"
3152 " bonded device %s\n", name);
3156 /* Parse link down interrupt propagation delay */
3157 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3158 if (arg_count == 1) {
3159 uint32_t link_down_delay_ms;
3161 if (rte_kvargs_process(kvlist,
3162 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3163 &bond_ethdev_parse_time_ms_kvarg,
3164 &link_down_delay_ms) < 0) {
3166 "Invalid link down propagation delay value specified for"
3167 " bonded device %s\n", name);
3171 /* Set balance mode transmit policy*/
3172 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3175 "Failed to set link down propagation delay (%u ms) on"
3176 " bonded device %s\n", link_down_delay_ms, name);
3179 } else if (arg_count > 1) {
3181 "Link down propagation delay can be specified only once for"
3182 " bonded device %s\n", name);
3189 struct rte_vdev_driver pmd_bond_drv = {
3190 .probe = bond_probe,
3191 .remove = bond_remove,
3194 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3195 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3197 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3201 "xmit_policy=[l2 | l23 | l34] "
3204 "lsc_poll_period_ms=<int> "
3206 "down_delay=<int>");