4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
65 size_t vlan_offset = 0;
67 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
70 vlan_offset = sizeof(struct vlan_hdr);
71 *proto = vlan_hdr->eth_proto;
73 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74 vlan_hdr = vlan_hdr + 1;
75 *proto = vlan_hdr->eth_proto;
76 vlan_offset += sizeof(struct vlan_hdr);
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
85 struct bond_dev_private *internals;
87 uint16_t num_rx_slave = 0;
88 uint16_t num_rx_total = 0;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
98 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99 /* Offset of pointer to *bufs increases as packets are received
100 * from other slaves */
101 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
104 num_rx_total += num_rx_slave;
105 nb_pkts -= num_rx_slave;
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
116 struct bond_dev_private *internals;
118 /* Cast to structure, containing bonded device's port id and queue id */
119 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
121 internals = bd_rx_q->dev_private;
123 return rte_eth_rx_burst(internals->current_primary_port,
124 bd_rx_q->queue_id, bufs, nb_pkts);
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
130 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
132 return !vlan_tci && (ethertype == ether_type_slow_be &&
133 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
136 /*****************************************************************************
137 * Flow director's setup for mode 4 optimization
140 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
141 .dst.addr_bytes = { 0 },
142 .src.addr_bytes = { 0 },
143 .type = RTE_BE16(ETHER_TYPE_SLOW),
146 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
147 .dst.addr_bytes = { 0 },
148 .src.addr_bytes = { 0 },
152 static struct rte_flow_item flow_item_8023ad[] = {
154 .type = RTE_FLOW_ITEM_TYPE_ETH,
155 .spec = &flow_item_eth_type_8023ad,
157 .mask = &flow_item_eth_mask_type_8023ad,
160 .type = RTE_FLOW_ITEM_TYPE_END,
167 const struct rte_flow_attr flow_attr_8023ad = {
176 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
177 uint8_t slave_port) {
178 struct rte_flow_error error;
179 struct bond_dev_private *internals = (struct bond_dev_private *)
180 (bond_dev->data->dev_private);
182 struct rte_flow_action_queue lacp_queue_conf = {
183 .index = internals->mode4.dedicated_queues.rx_qid,
186 const struct rte_flow_action actions[] = {
188 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
189 .conf = &lacp_queue_conf
192 .type = RTE_FLOW_ACTION_TYPE_END,
196 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
197 flow_item_8023ad, actions, &error);
205 bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
206 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
207 struct bond_dev_private *internals = (struct bond_dev_private *)
208 (bond_dev->data->dev_private);
209 struct rte_eth_dev_info bond_info, slave_info;
212 /* Verify if all slaves in bonding supports flow director and */
213 if (internals->slave_count > 0) {
214 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
216 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
217 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
219 for (idx = 0; idx < internals->slave_count; idx++) {
220 rte_eth_dev_info_get(internals->slaves[idx].port_id,
223 if (bond_ethdev_8023ad_flow_verify(bond_dev,
224 internals->slaves[idx].port_id) != 0)
233 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
235 struct rte_flow_error error;
236 struct bond_dev_private *internals = (struct bond_dev_private *)
237 (bond_dev->data->dev_private);
239 struct rte_flow_action_queue lacp_queue_conf = {
240 .index = internals->mode4.dedicated_queues.rx_qid,
243 const struct rte_flow_action actions[] = {
245 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
246 .conf = &lacp_queue_conf
249 .type = RTE_FLOW_ACTION_TYPE_END,
253 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
254 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
255 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
256 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
257 "(slave_port=%d queue_id=%d)",
258 error.message, slave_port,
259 internals->mode4.dedicated_queues.rx_qid);
267 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
270 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
271 struct bond_dev_private *internals = bd_rx_q->dev_private;
272 uint16_t num_rx_total = 0; /* Total number of received packets */
273 uint8_t slaves[RTE_MAX_ETHPORTS];
278 /* Copy slave list to protect against slave up/down changes during tx
280 slave_count = internals->active_slave_count;
281 memcpy(slaves, internals->active_slaves,
282 sizeof(internals->active_slaves[0]) * slave_count);
284 for (i = 0, idx = internals->active_slave;
285 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
286 idx = idx % slave_count;
288 /* Read packets from this slave */
289 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
290 &bufs[num_rx_total], nb_pkts - num_rx_total);
293 internals->active_slave = idx;
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
302 struct bond_dev_private *internals;
303 struct bond_tx_queue *bd_tx_q;
305 uint8_t num_of_slaves;
306 uint8_t slaves[RTE_MAX_ETHPORTS];
307 /* positions in slaves, not ID */
308 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
309 uint8_t distributing_count;
311 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
312 uint16_t i, op_slave_idx;
314 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
316 /* Total amount of packets in slave_bufs */
317 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
318 /* Slow packets placed in each slave */
320 if (unlikely(nb_pkts == 0))
323 bd_tx_q = (struct bond_tx_queue *)queue;
324 internals = bd_tx_q->dev_private;
326 /* Copy slave list to protect against slave up/down changes during tx
328 num_of_slaves = internals->active_slave_count;
329 if (num_of_slaves < 1)
332 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
335 distributing_count = 0;
336 for (i = 0; i < num_of_slaves; i++) {
337 struct port *port = &mode_8023ad_ports[slaves[i]];
338 if (ACTOR_STATE(port, DISTRIBUTING))
339 distributing_offsets[distributing_count++] = i;
342 if (likely(distributing_count > 0)) {
343 /* Populate slaves mbuf with the packets which are to be sent */
344 for (i = 0; i < nb_pkts; i++) {
345 /* Select output slave using hash based on xmit policy */
346 op_slave_idx = internals->xmit_hash(bufs[i],
349 /* Populate slave mbuf arrays with mbufs for that slave.
350 * Use only slaves that are currently distributing.
352 uint8_t slave_offset =
353 distributing_offsets[op_slave_idx];
354 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
356 slave_nb_pkts[slave_offset]++;
360 /* Send packet burst on each slave device */
361 for (i = 0; i < num_of_slaves; i++) {
362 if (slave_nb_pkts[i] == 0)
365 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
366 slave_bufs[i], slave_nb_pkts[i]);
368 num_tx_total += num_tx_slave;
369 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
371 /* If tx burst fails move packets to end of bufs */
372 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
373 uint16_t j = nb_pkts - num_tx_fail_total;
374 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
376 bufs[j] = slave_bufs[i][num_tx_slave];
385 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
388 /* Cast to structure, containing bonded device's port id and queue id */
389 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
390 struct bond_dev_private *internals = bd_rx_q->dev_private;
391 struct ether_addr bond_mac;
393 struct ether_hdr *hdr;
395 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
396 uint16_t num_rx_total = 0; /* Total number of received packets */
397 uint8_t slaves[RTE_MAX_ETHPORTS];
398 uint8_t slave_count, idx;
400 uint8_t collecting; /* current slave collecting status */
401 const uint8_t promisc = internals->promiscuous_en;
405 rte_eth_macaddr_get(internals->port_id, &bond_mac);
406 /* Copy slave list to protect against slave up/down changes during tx
408 slave_count = internals->active_slave_count;
409 memcpy(slaves, internals->active_slaves,
410 sizeof(internals->active_slaves[0]) * slave_count);
412 idx = internals->active_slave;
413 if (idx >= slave_count) {
414 internals->active_slave = 0;
417 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
419 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
422 /* Read packets from this slave */
423 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
424 &bufs[num_rx_total], nb_pkts - num_rx_total);
426 for (k = j; k < 2 && k < num_rx_total; k++)
427 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
429 /* Handle slow protocol packets. */
430 while (j < num_rx_total) {
432 /* If packet is not pure L2 and is known, skip it */
433 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438 if (j + 3 < num_rx_total)
439 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
441 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
442 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
444 /* Remove packet from array if it is slow packet or slave is not
445 * in collecting state or bonding interface is not in promiscuous
446 * mode and packet address does not match. */
447 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
448 !collecting || (!promisc &&
449 !is_multicast_ether_addr(&hdr->d_addr) &&
450 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
452 if (hdr->ether_type == ether_type_slow_be) {
453 bond_mode_8023ad_handle_slow_pkt(
454 internals, slaves[idx], bufs[j]);
456 rte_pktmbuf_free(bufs[j]);
458 /* Packet is managed by mode 4 or dropped, shift the array */
460 if (j < num_rx_total) {
461 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
467 if (unlikely(++idx == slave_count))
471 internals->active_slave = idx;
475 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
476 uint32_t burstnumberRX;
477 uint32_t burstnumberTX;
479 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
482 arp_op_name(uint16_t arp_op, char *buf)
486 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
489 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
491 case ARP_OP_REVREQUEST:
492 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
493 "Reverse ARP Request");
495 case ARP_OP_REVREPLY:
496 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
497 "Reverse ARP Reply");
499 case ARP_OP_INVREQUEST:
500 snprintf(buf, sizeof("Peer Identify Request"), "%s",
501 "Peer Identify Request");
503 case ARP_OP_INVREPLY:
504 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
505 "Peer Identify Reply");
510 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
514 #define MaxIPv4String 16
516 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
520 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
521 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
522 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
526 #define MAX_CLIENTS_NUMBER 128
527 uint8_t active_clients;
528 struct client_stats_t {
531 uint32_t ipv4_rx_packets;
532 uint32_t ipv4_tx_packets;
534 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
537 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
541 for (; i < MAX_CLIENTS_NUMBER; i++) {
542 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
543 /* Just update RX packets number for this client */
544 if (TXorRXindicator == &burstnumberRX)
545 client_stats[i].ipv4_rx_packets++;
547 client_stats[i].ipv4_tx_packets++;
551 /* We have a new client. Insert him to the table, and increment stats */
552 if (TXorRXindicator == &burstnumberRX)
553 client_stats[active_clients].ipv4_rx_packets++;
555 client_stats[active_clients].ipv4_tx_packets++;
556 client_stats[active_clients].ipv4_addr = addr;
557 client_stats[active_clients].port = port;
562 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
563 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
564 RTE_LOG(DEBUG, PMD, \
567 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
569 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
575 eth_h->s_addr.addr_bytes[0], \
576 eth_h->s_addr.addr_bytes[1], \
577 eth_h->s_addr.addr_bytes[2], \
578 eth_h->s_addr.addr_bytes[3], \
579 eth_h->s_addr.addr_bytes[4], \
580 eth_h->s_addr.addr_bytes[5], \
582 eth_h->d_addr.addr_bytes[0], \
583 eth_h->d_addr.addr_bytes[1], \
584 eth_h->d_addr.addr_bytes[2], \
585 eth_h->d_addr.addr_bytes[3], \
586 eth_h->d_addr.addr_bytes[4], \
587 eth_h->d_addr.addr_bytes[5], \
594 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
595 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
597 struct ipv4_hdr *ipv4_h;
598 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
599 struct arp_hdr *arp_h;
606 uint16_t ether_type = eth_h->ether_type;
607 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 snprintf(buf, 16, "%s", info);
613 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
614 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
615 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
616 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
617 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
618 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
620 update_client_stats(ipv4_h->src_addr, port, burstnumber);
622 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
623 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
624 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
625 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
626 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
627 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
628 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
635 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
637 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
638 struct bond_dev_private *internals = bd_tx_q->dev_private;
639 struct ether_hdr *eth_h;
640 uint16_t ether_type, offset;
641 uint16_t nb_recv_pkts;
644 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
646 for (i = 0; i < nb_recv_pkts; i++) {
647 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
648 ether_type = eth_h->ether_type;
649 offset = get_vlan_offset(eth_h, ðer_type);
651 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
655 bond_mode_alb_arp_recv(eth_h, offset, internals);
657 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
658 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
659 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
667 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
670 struct bond_dev_private *internals;
671 struct bond_tx_queue *bd_tx_q;
673 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
674 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
676 uint8_t num_of_slaves;
677 uint8_t slaves[RTE_MAX_ETHPORTS];
679 uint16_t num_tx_total = 0, num_tx_slave;
681 static int slave_idx = 0;
682 int i, cslave_idx = 0, tx_fail_total = 0;
684 bd_tx_q = (struct bond_tx_queue *)queue;
685 internals = bd_tx_q->dev_private;
687 /* Copy slave list to protect against slave up/down changes during tx
689 num_of_slaves = internals->active_slave_count;
690 memcpy(slaves, internals->active_slaves,
691 sizeof(internals->active_slaves[0]) * num_of_slaves);
693 if (num_of_slaves < 1)
696 /* Populate slaves mbuf with which packets are to be sent on it */
697 for (i = 0; i < nb_pkts; i++) {
698 cslave_idx = (slave_idx + i) % num_of_slaves;
699 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
702 /* increment current slave index so the next call to tx burst starts on the
704 slave_idx = ++cslave_idx;
706 /* Send packet burst on each slave device */
707 for (i = 0; i < num_of_slaves; i++) {
708 if (slave_nb_pkts[i] > 0) {
709 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
710 slave_bufs[i], slave_nb_pkts[i]);
712 /* if tx burst fails move packets to end of bufs */
713 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
714 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
716 tx_fail_total += tx_fail_slave;
718 memcpy(&bufs[nb_pkts - tx_fail_total],
719 &slave_bufs[i][num_tx_slave],
720 tx_fail_slave * sizeof(bufs[0]));
722 num_tx_total += num_tx_slave;
730 bond_ethdev_tx_burst_active_backup(void *queue,
731 struct rte_mbuf **bufs, uint16_t nb_pkts)
733 struct bond_dev_private *internals;
734 struct bond_tx_queue *bd_tx_q;
736 bd_tx_q = (struct bond_tx_queue *)queue;
737 internals = bd_tx_q->dev_private;
739 if (internals->active_slave_count < 1)
742 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
746 static inline uint16_t
747 ether_hash(struct ether_hdr *eth_hdr)
749 unaligned_uint16_t *word_src_addr =
750 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
751 unaligned_uint16_t *word_dst_addr =
752 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
754 return (word_src_addr[0] ^ word_dst_addr[0]) ^
755 (word_src_addr[1] ^ word_dst_addr[1]) ^
756 (word_src_addr[2] ^ word_dst_addr[2]);
759 static inline uint32_t
760 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
762 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
765 static inline uint32_t
766 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
768 unaligned_uint32_t *word_src_addr =
769 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
770 unaligned_uint32_t *word_dst_addr =
771 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
773 return (word_src_addr[0] ^ word_dst_addr[0]) ^
774 (word_src_addr[1] ^ word_dst_addr[1]) ^
775 (word_src_addr[2] ^ word_dst_addr[2]) ^
776 (word_src_addr[3] ^ word_dst_addr[3]);
780 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
782 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
784 uint32_t hash = ether_hash(eth_hdr);
786 return (hash ^= hash >> 8) % slave_count;
790 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
792 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
793 uint16_t proto = eth_hdr->ether_type;
794 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
795 uint32_t hash, l3hash = 0;
797 hash = ether_hash(eth_hdr);
799 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
800 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
801 ((char *)(eth_hdr + 1) + vlan_offset);
802 l3hash = ipv4_hash(ipv4_hdr);
804 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
805 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
806 ((char *)(eth_hdr + 1) + vlan_offset);
807 l3hash = ipv6_hash(ipv6_hdr);
810 hash = hash ^ l3hash;
814 return hash % slave_count;
818 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
820 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821 uint16_t proto = eth_hdr->ether_type;
822 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
824 struct udp_hdr *udp_hdr = NULL;
825 struct tcp_hdr *tcp_hdr = NULL;
826 uint32_t hash, l3hash = 0, l4hash = 0;
828 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
829 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
830 ((char *)(eth_hdr + 1) + vlan_offset);
831 size_t ip_hdr_offset;
833 l3hash = ipv4_hash(ipv4_hdr);
835 /* there is no L4 header in fragmented packet */
836 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
837 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
840 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
841 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
843 l4hash = HASH_L4_PORTS(tcp_hdr);
844 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
845 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
847 l4hash = HASH_L4_PORTS(udp_hdr);
850 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
851 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
852 ((char *)(eth_hdr + 1) + vlan_offset);
853 l3hash = ipv6_hash(ipv6_hdr);
855 if (ipv6_hdr->proto == IPPROTO_TCP) {
856 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
857 l4hash = HASH_L4_PORTS(tcp_hdr);
858 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
859 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
860 l4hash = HASH_L4_PORTS(udp_hdr);
864 hash = l3hash ^ l4hash;
868 return hash % slave_count;
872 uint64_t bwg_left_int;
873 uint64_t bwg_left_remainder;
878 bond_tlb_activate_slave(struct bond_dev_private *internals) {
881 for (i = 0; i < internals->active_slave_count; i++) {
882 tlb_last_obytets[internals->active_slaves[i]] = 0;
887 bandwidth_cmp(const void *a, const void *b)
889 const struct bwg_slave *bwg_a = a;
890 const struct bwg_slave *bwg_b = b;
891 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
892 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
893 (int64_t)bwg_a->bwg_left_remainder;
907 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
908 struct bwg_slave *bwg_slave)
910 struct rte_eth_link link_status;
912 rte_eth_link_get_nowait(port_id, &link_status);
913 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
916 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
917 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
918 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
922 bond_ethdev_update_tlb_slave_cb(void *arg)
924 struct bond_dev_private *internals = arg;
925 struct rte_eth_stats slave_stats;
926 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
930 uint8_t update_stats = 0;
933 internals->slave_update_idx++;
936 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
939 for (i = 0; i < internals->active_slave_count; i++) {
940 slave_id = internals->active_slaves[i];
941 rte_eth_stats_get(slave_id, &slave_stats);
942 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
943 bandwidth_left(slave_id, tx_bytes,
944 internals->slave_update_idx, &bwg_array[i]);
945 bwg_array[i].slave = slave_id;
948 tlb_last_obytets[slave_id] = slave_stats.obytes;
952 if (update_stats == 1)
953 internals->slave_update_idx = 0;
956 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
957 for (i = 0; i < slave_count; i++)
958 internals->tlb_slaves_order[i] = bwg_array[i].slave;
960 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
961 (struct bond_dev_private *)internals);
965 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
967 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
968 struct bond_dev_private *internals = bd_tx_q->dev_private;
970 struct rte_eth_dev *primary_port =
971 &rte_eth_devices[internals->primary_port];
972 uint16_t num_tx_total = 0;
975 uint8_t num_of_slaves = internals->active_slave_count;
976 uint8_t slaves[RTE_MAX_ETHPORTS];
978 struct ether_hdr *ether_hdr;
979 struct ether_addr primary_slave_addr;
980 struct ether_addr active_slave_addr;
982 if (num_of_slaves < 1)
985 memcpy(slaves, internals->tlb_slaves_order,
986 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
989 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
992 for (i = 0; i < 3; i++)
993 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
996 for (i = 0; i < num_of_slaves; i++) {
997 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
998 for (j = num_tx_total; j < nb_pkts; j++) {
1000 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1002 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1003 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1004 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1005 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1006 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1010 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1011 bufs + num_tx_total, nb_pkts - num_tx_total);
1013 if (num_tx_total == nb_pkts)
1017 return num_tx_total;
1021 bond_tlb_disable(struct bond_dev_private *internals)
1023 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1027 bond_tlb_enable(struct bond_dev_private *internals)
1029 bond_ethdev_update_tlb_slave_cb(internals);
1033 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1035 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1036 struct bond_dev_private *internals = bd_tx_q->dev_private;
1038 struct ether_hdr *eth_h;
1039 uint16_t ether_type, offset;
1041 struct client_data *client_info;
1044 * We create transmit buffers for every slave and one additional to send
1045 * through tlb. In worst case every packet will be send on one port.
1047 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1048 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1051 * We create separate transmit buffers for update packets as they won't
1052 * be counted in num_tx_total.
1054 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1055 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1057 struct rte_mbuf *upd_pkt;
1060 uint16_t num_send, num_not_send = 0;
1061 uint16_t num_tx_total = 0;
1066 /* Search tx buffer for ARP packets and forward them to alb */
1067 for (i = 0; i < nb_pkts; i++) {
1068 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1069 ether_type = eth_h->ether_type;
1070 offset = get_vlan_offset(eth_h, ðer_type);
1072 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1073 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1075 /* Change src mac in eth header */
1076 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1078 /* Add packet to slave tx buffer */
1079 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1080 slave_bufs_pkts[slave_idx]++;
1082 /* If packet is not ARP, send it with TLB policy */
1083 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1085 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1089 /* Update connected client ARP tables */
1090 if (internals->mode6.ntt) {
1091 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1092 client_info = &internals->mode6.client_table[i];
1094 if (client_info->in_use) {
1095 /* Allocate new packet to send ARP update on current slave */
1096 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1097 if (upd_pkt == NULL) {
1098 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1101 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1102 + client_info->vlan_count * sizeof(struct vlan_hdr);
1103 upd_pkt->data_len = pkt_size;
1104 upd_pkt->pkt_len = pkt_size;
1106 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1109 /* Add packet to update tx buffer */
1110 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1111 update_bufs_pkts[slave_idx]++;
1114 internals->mode6.ntt = 0;
1117 /* Send ARP packets on proper slaves */
1118 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1119 if (slave_bufs_pkts[i] > 0) {
1120 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1121 slave_bufs[i], slave_bufs_pkts[i]);
1122 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1123 bufs[nb_pkts - 1 - num_not_send - j] =
1124 slave_bufs[i][nb_pkts - 1 - j];
1127 num_tx_total += num_send;
1128 num_not_send += slave_bufs_pkts[i] - num_send;
1130 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1131 /* Print TX stats including update packets */
1132 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1133 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1134 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1140 /* Send update packets on proper slaves */
1141 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1142 if (update_bufs_pkts[i] > 0) {
1143 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1144 update_bufs_pkts[i]);
1145 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1146 rte_pktmbuf_free(update_bufs[i][j]);
1148 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1149 for (j = 0; j < update_bufs_pkts[i]; j++) {
1150 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1151 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1157 /* Send non-ARP packets using tlb policy */
1158 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1159 num_send = bond_ethdev_tx_burst_tlb(queue,
1160 slave_bufs[RTE_MAX_ETHPORTS],
1161 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1163 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1164 bufs[nb_pkts - 1 - num_not_send - j] =
1165 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1168 num_tx_total += num_send;
1171 return num_tx_total;
1175 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1178 struct bond_dev_private *internals;
1179 struct bond_tx_queue *bd_tx_q;
1181 uint8_t num_of_slaves;
1182 uint8_t slaves[RTE_MAX_ETHPORTS];
1184 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1188 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1189 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1191 bd_tx_q = (struct bond_tx_queue *)queue;
1192 internals = bd_tx_q->dev_private;
1194 /* Copy slave list to protect against slave up/down changes during tx
1196 num_of_slaves = internals->active_slave_count;
1197 memcpy(slaves, internals->active_slaves,
1198 sizeof(internals->active_slaves[0]) * num_of_slaves);
1200 if (num_of_slaves < 1)
1201 return num_tx_total;
1203 /* Populate slaves mbuf with the packets which are to be sent on it */
1204 for (i = 0; i < nb_pkts; i++) {
1205 /* Select output slave using hash based on xmit policy */
1206 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1208 /* Populate slave mbuf arrays with mbufs for that slave */
1209 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1212 /* Send packet burst on each slave device */
1213 for (i = 0; i < num_of_slaves; i++) {
1214 if (slave_nb_pkts[i] > 0) {
1215 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1216 slave_bufs[i], slave_nb_pkts[i]);
1218 /* if tx burst fails move packets to end of bufs */
1219 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1220 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1222 tx_fail_total += slave_tx_fail_count;
1223 memcpy(&bufs[nb_pkts - tx_fail_total],
1224 &slave_bufs[i][num_tx_slave],
1225 slave_tx_fail_count * sizeof(bufs[0]));
1228 num_tx_total += num_tx_slave;
1232 return num_tx_total;
1236 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1239 struct bond_dev_private *internals;
1240 struct bond_tx_queue *bd_tx_q;
1242 uint8_t num_of_slaves;
1243 uint8_t slaves[RTE_MAX_ETHPORTS];
1244 /* positions in slaves, not ID */
1245 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1246 uint8_t distributing_count;
1248 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1249 uint16_t i, j, op_slave_idx;
1250 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1252 /* Allocate additional packets in case 8023AD mode. */
1253 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1254 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1256 /* Total amount of packets in slave_bufs */
1257 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1258 /* Slow packets placed in each slave */
1259 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1261 bd_tx_q = (struct bond_tx_queue *)queue;
1262 internals = bd_tx_q->dev_private;
1264 /* Copy slave list to protect against slave up/down changes during tx
1266 num_of_slaves = internals->active_slave_count;
1267 if (num_of_slaves < 1)
1268 return num_tx_total;
1270 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1272 distributing_count = 0;
1273 for (i = 0; i < num_of_slaves; i++) {
1274 struct port *port = &mode_8023ad_ports[slaves[i]];
1276 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1277 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1279 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1281 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1282 slave_bufs[i][j] = slow_pkts[j];
1284 if (ACTOR_STATE(port, DISTRIBUTING))
1285 distributing_offsets[distributing_count++] = i;
1288 if (likely(distributing_count > 0)) {
1289 /* Populate slaves mbuf with the packets which are to be sent on it */
1290 for (i = 0; i < nb_pkts; i++) {
1291 /* Select output slave using hash based on xmit policy */
1292 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1294 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1295 * slaves that are currently distributing. */
1296 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1297 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1298 slave_nb_pkts[slave_offset]++;
1302 /* Send packet burst on each slave device */
1303 for (i = 0; i < num_of_slaves; i++) {
1304 if (slave_nb_pkts[i] == 0)
1307 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1308 slave_bufs[i], slave_nb_pkts[i]);
1310 /* If tx burst fails drop slow packets */
1311 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1312 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1314 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1315 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1317 /* If tx burst fails move packets to end of bufs */
1318 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1319 uint16_t j = nb_pkts - num_tx_fail_total;
1320 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1321 bufs[j] = slave_bufs[i][num_tx_slave];
1325 return num_tx_total;
1329 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1332 struct bond_dev_private *internals;
1333 struct bond_tx_queue *bd_tx_q;
1335 uint8_t tx_failed_flag = 0, num_of_slaves;
1336 uint8_t slaves[RTE_MAX_ETHPORTS];
1338 uint16_t max_nb_of_tx_pkts = 0;
1340 int slave_tx_total[RTE_MAX_ETHPORTS];
1341 int i, most_successful_tx_slave = -1;
1343 bd_tx_q = (struct bond_tx_queue *)queue;
1344 internals = bd_tx_q->dev_private;
1346 /* Copy slave list to protect against slave up/down changes during tx
1348 num_of_slaves = internals->active_slave_count;
1349 memcpy(slaves, internals->active_slaves,
1350 sizeof(internals->active_slaves[0]) * num_of_slaves);
1352 if (num_of_slaves < 1)
1355 /* Increment reference count on mbufs */
1356 for (i = 0; i < nb_pkts; i++)
1357 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1359 /* Transmit burst on each active slave */
1360 for (i = 0; i < num_of_slaves; i++) {
1361 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1364 if (unlikely(slave_tx_total[i] < nb_pkts))
1367 /* record the value and slave index for the slave which transmits the
1368 * maximum number of packets */
1369 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1370 max_nb_of_tx_pkts = slave_tx_total[i];
1371 most_successful_tx_slave = i;
1375 /* if slaves fail to transmit packets from burst, the calling application
1376 * is not expected to know about multiple references to packets so we must
1377 * handle failures of all packets except those of the most successful slave
1379 if (unlikely(tx_failed_flag))
1380 for (i = 0; i < num_of_slaves; i++)
1381 if (i != most_successful_tx_slave)
1382 while (slave_tx_total[i] < nb_pkts)
1383 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1385 return max_nb_of_tx_pkts;
1389 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1390 struct rte_eth_link *slave_dev_link)
1392 struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1393 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1395 if (slave_dev_link->link_status &&
1396 bonded_eth_dev->data->dev_started) {
1397 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1398 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1400 internals->link_props_set = 1;
1405 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1407 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1409 memset(&(bonded_eth_dev->data->dev_link), 0,
1410 sizeof(bonded_eth_dev->data->dev_link));
1412 internals->link_props_set = 0;
1416 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1417 struct rte_eth_link *slave_dev_link)
1419 if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1420 bonded_dev_link->link_speed != slave_dev_link->link_speed)
1427 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1429 struct ether_addr *mac_addr;
1431 if (eth_dev == NULL) {
1432 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1436 if (dst_mac_addr == NULL) {
1437 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1441 mac_addr = eth_dev->data->mac_addrs;
1443 ether_addr_copy(mac_addr, dst_mac_addr);
1448 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1450 struct ether_addr *mac_addr;
1452 if (eth_dev == NULL) {
1453 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1457 if (new_mac_addr == NULL) {
1458 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1462 mac_addr = eth_dev->data->mac_addrs;
1464 /* If new MAC is different to current MAC then update */
1465 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1466 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1472 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1474 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1477 /* Update slave devices MAC addresses */
1478 if (internals->slave_count < 1)
1481 switch (internals->mode) {
1482 case BONDING_MODE_ROUND_ROBIN:
1483 case BONDING_MODE_BALANCE:
1484 case BONDING_MODE_BROADCAST:
1485 for (i = 0; i < internals->slave_count; i++) {
1486 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1487 bonded_eth_dev->data->mac_addrs)) {
1488 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1489 internals->slaves[i].port_id);
1494 case BONDING_MODE_8023AD:
1495 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1497 case BONDING_MODE_ACTIVE_BACKUP:
1498 case BONDING_MODE_TLB:
1499 case BONDING_MODE_ALB:
1501 for (i = 0; i < internals->slave_count; i++) {
1502 if (internals->slaves[i].port_id ==
1503 internals->current_primary_port) {
1504 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1505 bonded_eth_dev->data->mac_addrs)) {
1506 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1507 internals->current_primary_port);
1511 if (mac_address_set(
1512 &rte_eth_devices[internals->slaves[i].port_id],
1513 &internals->slaves[i].persisted_mac_addr)) {
1514 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1515 internals->slaves[i].port_id);
1526 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1528 struct bond_dev_private *internals;
1530 internals = eth_dev->data->dev_private;
1533 case BONDING_MODE_ROUND_ROBIN:
1534 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1535 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1537 case BONDING_MODE_ACTIVE_BACKUP:
1538 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1539 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1541 case BONDING_MODE_BALANCE:
1542 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1543 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1545 case BONDING_MODE_BROADCAST:
1546 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1547 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1549 case BONDING_MODE_8023AD:
1550 if (bond_mode_8023ad_enable(eth_dev) != 0)
1553 if (internals->mode4.dedicated_queues.enabled == 0) {
1554 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1555 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1556 RTE_LOG(WARNING, PMD,
1557 "Using mode 4, it is necessary to do TX burst "
1558 "and RX burst at least every 100ms.\n");
1560 /* Use flow director's optimization */
1561 eth_dev->rx_pkt_burst =
1562 bond_ethdev_rx_burst_8023ad_fast_queue;
1563 eth_dev->tx_pkt_burst =
1564 bond_ethdev_tx_burst_8023ad_fast_queue;
1567 case BONDING_MODE_TLB:
1568 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1569 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571 case BONDING_MODE_ALB:
1572 if (bond_mode_alb_enable(eth_dev) != 0)
1575 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1576 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1582 internals->mode = mode;
1589 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1590 struct rte_eth_dev *slave_eth_dev)
1593 struct bond_dev_private *internals = (struct bond_dev_private *)
1594 bonded_eth_dev->data->dev_private;
1595 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1597 if (port->slow_pool == NULL) {
1599 int slave_id = slave_eth_dev->data->port_id;
1601 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1603 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1604 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1605 slave_eth_dev->data->numa_node);
1607 /* Any memory allocation failure in initialization is critical because
1608 * resources can't be free, so reinitialization is impossible. */
1609 if (port->slow_pool == NULL) {
1610 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1611 slave_id, mem_name, rte_strerror(rte_errno));
1615 if (internals->mode4.dedicated_queues.enabled == 1) {
1616 /* Configure slow Rx queue */
1618 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1619 internals->mode4.dedicated_queues.rx_qid, 128,
1620 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1621 NULL, port->slow_pool);
1624 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1625 slave_eth_dev->data->port_id,
1626 internals->mode4.dedicated_queues.rx_qid,
1631 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1632 internals->mode4.dedicated_queues.tx_qid, 512,
1633 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1637 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1638 slave_eth_dev->data->port_id,
1639 internals->mode4.dedicated_queues.tx_qid,
1648 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1649 struct rte_eth_dev *slave_eth_dev)
1651 struct bond_rx_queue *bd_rx_q;
1652 struct bond_tx_queue *bd_tx_q;
1653 uint16_t nb_rx_queues;
1654 uint16_t nb_tx_queues;
1658 struct rte_flow_error flow_error;
1660 struct bond_dev_private *internals = (struct bond_dev_private *)
1661 bonded_eth_dev->data->dev_private;
1664 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1666 /* Enable interrupts on slave device if supported */
1667 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1668 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1670 /* If RSS is enabled for bonding, try to enable it for slaves */
1671 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1672 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1674 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1675 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1676 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1677 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1679 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1682 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1683 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1684 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1685 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1688 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1689 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1691 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1692 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1694 if (internals->mode == BONDING_MODE_8023AD) {
1695 if (internals->mode4.dedicated_queues.enabled == 1) {
1701 /* Configure device */
1702 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1703 nb_rx_queues, nb_tx_queues,
1704 &(slave_eth_dev->data->dev_conf));
1706 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1707 slave_eth_dev->data->port_id, errval);
1711 /* Setup Rx Queues */
1712 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1713 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1715 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1716 bd_rx_q->nb_rx_desc,
1717 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1718 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1721 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1722 slave_eth_dev->data->port_id, q_id, errval);
1727 /* Setup Tx Queues */
1728 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1729 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1731 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1732 bd_tx_q->nb_tx_desc,
1733 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1737 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1738 slave_eth_dev->data->port_id, q_id, errval);
1743 if (internals->mode == BONDING_MODE_8023AD &&
1744 internals->mode4.dedicated_queues.enabled == 1) {
1745 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1749 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1750 slave_eth_dev->data->port_id) != 0) {
1752 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1753 slave_eth_dev->data->port_id, q_id, errval);
1757 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1758 rte_flow_destroy(slave_eth_dev->data->port_id,
1759 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1762 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1763 slave_eth_dev->data->port_id);
1767 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1769 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1770 slave_eth_dev->data->port_id, errval);
1774 /* If RSS is enabled for bonding, synchronize RETA */
1775 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1777 struct bond_dev_private *internals;
1779 internals = bonded_eth_dev->data->dev_private;
1781 for (i = 0; i < internals->slave_count; i++) {
1782 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1783 errval = rte_eth_dev_rss_reta_update(
1784 slave_eth_dev->data->port_id,
1785 &internals->reta_conf[0],
1786 internals->slaves[i].reta_size);
1788 RTE_LOG(WARNING, PMD,
1789 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1790 " RSS Configuration for bonding may be inconsistent.\n",
1791 slave_eth_dev->data->port_id, errval);
1798 /* If lsc interrupt is set, check initial slave's link status */
1799 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1800 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1801 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1802 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1810 slave_remove(struct bond_dev_private *internals,
1811 struct rte_eth_dev *slave_eth_dev)
1815 for (i = 0; i < internals->slave_count; i++)
1816 if (internals->slaves[i].port_id ==
1817 slave_eth_dev->data->port_id)
1820 if (i < (internals->slave_count - 1))
1821 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1822 sizeof(internals->slaves[0]) *
1823 (internals->slave_count - i - 1));
1825 internals->slave_count--;
1827 /* force reconfiguration of slave interfaces */
1828 _rte_eth_dev_reset(slave_eth_dev);
1832 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1835 slave_add(struct bond_dev_private *internals,
1836 struct rte_eth_dev *slave_eth_dev)
1838 struct bond_slave_details *slave_details =
1839 &internals->slaves[internals->slave_count];
1841 slave_details->port_id = slave_eth_dev->data->port_id;
1842 slave_details->last_link_status = 0;
1844 /* Mark slave devices that don't support interrupts so we can
1845 * compensate when we start the bond
1847 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1848 slave_details->link_status_poll_enabled = 1;
1851 slave_details->link_status_wait_to_complete = 0;
1852 /* clean tlb_last_obytes when adding port for bonding device */
1853 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1854 sizeof(struct ether_addr));
1858 bond_ethdev_primary_set(struct bond_dev_private *internals,
1859 uint8_t slave_port_id)
1863 if (internals->active_slave_count < 1)
1864 internals->current_primary_port = slave_port_id;
1866 /* Search bonded device slave ports for new proposed primary port */
1867 for (i = 0; i < internals->active_slave_count; i++) {
1868 if (internals->active_slaves[i] == slave_port_id)
1869 internals->current_primary_port = slave_port_id;
1874 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1877 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1879 struct bond_dev_private *internals;
1882 /* slave eth dev will be started by bonded device */
1883 if (check_for_bonded_ethdev(eth_dev)) {
1884 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1885 eth_dev->data->port_id);
1889 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1890 eth_dev->data->dev_started = 1;
1892 internals = eth_dev->data->dev_private;
1894 if (internals->slave_count == 0) {
1895 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1899 if (internals->user_defined_mac == 0) {
1900 struct ether_addr *new_mac_addr = NULL;
1902 for (i = 0; i < internals->slave_count; i++)
1903 if (internals->slaves[i].port_id == internals->primary_port)
1904 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1906 if (new_mac_addr == NULL)
1909 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1910 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1911 eth_dev->data->port_id);
1916 /* Update all slave devices MACs*/
1917 if (mac_address_slaves_update(eth_dev) != 0)
1920 /* If bonded device is configure in promiscuous mode then re-apply config */
1921 if (internals->promiscuous_en)
1922 bond_ethdev_promiscuous_enable(eth_dev);
1924 if (internals->mode == BONDING_MODE_8023AD) {
1925 if (internals->mode4.dedicated_queues.enabled == 1) {
1926 internals->mode4.dedicated_queues.rx_qid =
1927 eth_dev->data->nb_rx_queues;
1928 internals->mode4.dedicated_queues.tx_qid =
1929 eth_dev->data->nb_tx_queues;
1934 /* Reconfigure each slave device if starting bonded device */
1935 for (i = 0; i < internals->slave_count; i++) {
1936 struct rte_eth_dev *slave_ethdev =
1937 &(rte_eth_devices[internals->slaves[i].port_id]);
1938 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1940 "bonded port (%d) failed to reconfigure slave device (%d)",
1941 eth_dev->data->port_id,
1942 internals->slaves[i].port_id);
1945 /* We will need to poll for link status if any slave doesn't
1946 * support interrupts
1948 if (internals->slaves[i].link_status_poll_enabled)
1949 internals->link_status_polling_enabled = 1;
1951 /* start polling if needed */
1952 if (internals->link_status_polling_enabled) {
1954 internals->link_status_polling_interval_ms * 1000,
1955 bond_ethdev_slave_link_status_change_monitor,
1956 (void *)&rte_eth_devices[internals->port_id]);
1959 if (internals->user_defined_primary_port)
1960 bond_ethdev_primary_set(internals, internals->primary_port);
1962 if (internals->mode == BONDING_MODE_8023AD)
1963 bond_mode_8023ad_start(eth_dev);
1965 if (internals->mode == BONDING_MODE_TLB ||
1966 internals->mode == BONDING_MODE_ALB)
1967 bond_tlb_enable(internals);
1973 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1977 if (dev->data->rx_queues != NULL) {
1978 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1979 rte_free(dev->data->rx_queues[i]);
1980 dev->data->rx_queues[i] = NULL;
1982 dev->data->nb_rx_queues = 0;
1985 if (dev->data->tx_queues != NULL) {
1986 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1987 rte_free(dev->data->tx_queues[i]);
1988 dev->data->tx_queues[i] = NULL;
1990 dev->data->nb_tx_queues = 0;
1995 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1997 struct bond_dev_private *internals = eth_dev->data->dev_private;
2000 if (internals->mode == BONDING_MODE_8023AD) {
2004 bond_mode_8023ad_stop(eth_dev);
2006 /* Discard all messages to/from mode 4 state machines */
2007 for (i = 0; i < internals->active_slave_count; i++) {
2008 port = &mode_8023ad_ports[internals->active_slaves[i]];
2010 RTE_ASSERT(port->rx_ring != NULL);
2011 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2012 rte_pktmbuf_free(pkt);
2014 RTE_ASSERT(port->tx_ring != NULL);
2015 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2016 rte_pktmbuf_free(pkt);
2020 if (internals->mode == BONDING_MODE_TLB ||
2021 internals->mode == BONDING_MODE_ALB) {
2022 bond_tlb_disable(internals);
2023 for (i = 0; i < internals->active_slave_count; i++)
2024 tlb_last_obytets[internals->active_slaves[i]] = 0;
2027 internals->active_slave_count = 0;
2028 internals->link_status_polling_enabled = 0;
2029 for (i = 0; i < internals->slave_count; i++)
2030 internals->slaves[i].last_link_status = 0;
2032 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2033 eth_dev->data->dev_started = 0;
2037 bond_ethdev_close(struct rte_eth_dev *dev)
2039 struct bond_dev_private *internals = dev->data->dev_private;
2040 uint8_t bond_port_id = internals->port_id;
2043 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2044 while (internals->slave_count != skipped) {
2045 uint8_t port_id = internals->slaves[skipped].port_id;
2047 rte_eth_dev_stop(port_id);
2049 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2051 "Failed to remove port %d from bonded device "
2052 "%s\n", port_id, dev->device->name);
2056 bond_ethdev_free_queues(dev);
2057 rte_bitmap_reset(internals->vlan_filter_bmp);
2060 /* forward declaration */
2061 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2064 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2066 struct bond_dev_private *internals = dev->data->dev_private;
2068 uint16_t max_nb_rx_queues = UINT16_MAX;
2069 uint16_t max_nb_tx_queues = UINT16_MAX;
2071 dev_info->max_mac_addrs = 1;
2073 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2074 internals->candidate_max_rx_pktlen :
2075 ETHER_MAX_JUMBO_FRAME_LEN;
2077 /* Max number of tx/rx queues that the bonded device can support is the
2078 * minimum values of the bonded slaves, as all slaves must be capable
2079 * of supporting the same number of tx/rx queues.
2081 if (internals->slave_count > 0) {
2082 struct rte_eth_dev_info slave_info;
2085 for (idx = 0; idx < internals->slave_count; idx++) {
2086 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2089 if (slave_info.max_rx_queues < max_nb_rx_queues)
2090 max_nb_rx_queues = slave_info.max_rx_queues;
2092 if (slave_info.max_tx_queues < max_nb_tx_queues)
2093 max_nb_tx_queues = slave_info.max_tx_queues;
2097 dev_info->max_rx_queues = max_nb_rx_queues;
2098 dev_info->max_tx_queues = max_nb_tx_queues;
2101 * If dedicated hw queues enabled for link bonding device in LACP mode
2102 * then we need to reduce the maximum number of data path queues by 1.
2104 if (internals->mode == BONDING_MODE_8023AD &&
2105 internals->mode4.dedicated_queues.enabled == 1) {
2106 dev_info->max_rx_queues--;
2107 dev_info->max_tx_queues--;
2110 dev_info->min_rx_bufsize = 0;
2112 dev_info->rx_offload_capa = internals->rx_offload_capa;
2113 dev_info->tx_offload_capa = internals->tx_offload_capa;
2114 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2116 dev_info->reta_size = internals->reta_size;
2120 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2124 struct bond_dev_private *internals = dev->data->dev_private;
2126 /* don't do this while a slave is being added */
2127 rte_spinlock_lock(&internals->lock);
2130 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2132 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2134 for (i = 0; i < internals->slave_count; i++) {
2135 uint8_t port_id = internals->slaves[i].port_id;
2137 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2139 RTE_LOG(WARNING, PMD,
2140 "Setting VLAN filter on slave port %u not supported.\n",
2144 rte_spinlock_unlock(&internals->lock);
2149 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2150 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2151 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2153 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2154 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2155 0, dev->data->numa_node);
2156 if (bd_rx_q == NULL)
2159 bd_rx_q->queue_id = rx_queue_id;
2160 bd_rx_q->dev_private = dev->data->dev_private;
2162 bd_rx_q->nb_rx_desc = nb_rx_desc;
2164 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2165 bd_rx_q->mb_pool = mb_pool;
2167 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2173 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2174 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2175 const struct rte_eth_txconf *tx_conf)
2177 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2178 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2179 0, dev->data->numa_node);
2181 if (bd_tx_q == NULL)
2184 bd_tx_q->queue_id = tx_queue_id;
2185 bd_tx_q->dev_private = dev->data->dev_private;
2187 bd_tx_q->nb_tx_desc = nb_tx_desc;
2188 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2190 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2196 bond_ethdev_rx_queue_release(void *queue)
2205 bond_ethdev_tx_queue_release(void *queue)
2214 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2216 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2217 struct bond_dev_private *internals;
2219 /* Default value for polling slave found is true as we don't want to
2220 * disable the polling thread if we cannot get the lock */
2221 int i, polling_slave_found = 1;
2226 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2227 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2229 if (!bonded_ethdev->data->dev_started ||
2230 !internals->link_status_polling_enabled)
2233 /* If device is currently being configured then don't check slaves link
2234 * status, wait until next period */
2235 if (rte_spinlock_trylock(&internals->lock)) {
2236 if (internals->slave_count > 0)
2237 polling_slave_found = 0;
2239 for (i = 0; i < internals->slave_count; i++) {
2240 if (!internals->slaves[i].link_status_poll_enabled)
2243 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2244 polling_slave_found = 1;
2246 /* Update slave link status */
2247 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2248 internals->slaves[i].link_status_wait_to_complete);
2250 /* if link status has changed since last checked then call lsc
2252 if (slave_ethdev->data->dev_link.link_status !=
2253 internals->slaves[i].last_link_status) {
2254 internals->slaves[i].last_link_status =
2255 slave_ethdev->data->dev_link.link_status;
2257 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2258 RTE_ETH_EVENT_INTR_LSC,
2259 &bonded_ethdev->data->port_id,
2263 rte_spinlock_unlock(&internals->lock);
2266 if (polling_slave_found)
2267 /* Set alarm to continue monitoring link status of slave ethdev's */
2268 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2269 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2273 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
2274 int wait_to_complete)
2276 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
2278 if (!bonded_eth_dev->data->dev_started ||
2279 internals->active_slave_count == 0) {
2280 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2283 struct rte_eth_dev *slave_eth_dev;
2286 for (i = 0; i < internals->active_slave_count; i++) {
2287 slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
2289 (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
2291 if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2297 bonded_eth_dev->data->dev_link.link_status = link_up;
2304 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2306 struct bond_dev_private *internals = dev->data->dev_private;
2307 struct rte_eth_stats slave_stats;
2310 for (i = 0; i < internals->slave_count; i++) {
2311 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2313 stats->ipackets += slave_stats.ipackets;
2314 stats->opackets += slave_stats.opackets;
2315 stats->ibytes += slave_stats.ibytes;
2316 stats->obytes += slave_stats.obytes;
2317 stats->imissed += slave_stats.imissed;
2318 stats->ierrors += slave_stats.ierrors;
2319 stats->oerrors += slave_stats.oerrors;
2320 stats->rx_nombuf += slave_stats.rx_nombuf;
2322 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2323 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2324 stats->q_opackets[j] += slave_stats.q_opackets[j];
2325 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2326 stats->q_obytes[j] += slave_stats.q_obytes[j];
2327 stats->q_errors[j] += slave_stats.q_errors[j];
2334 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2336 struct bond_dev_private *internals = dev->data->dev_private;
2339 for (i = 0; i < internals->slave_count; i++)
2340 rte_eth_stats_reset(internals->slaves[i].port_id);
2344 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2346 struct bond_dev_private *internals = eth_dev->data->dev_private;
2349 internals->promiscuous_en = 1;
2351 switch (internals->mode) {
2352 /* Promiscuous mode is propagated to all slaves */
2353 case BONDING_MODE_ROUND_ROBIN:
2354 case BONDING_MODE_BALANCE:
2355 case BONDING_MODE_BROADCAST:
2356 for (i = 0; i < internals->slave_count; i++)
2357 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2359 /* In mode4 promiscus mode is managed when slave is added/removed */
2360 case BONDING_MODE_8023AD:
2362 /* Promiscuous mode is propagated only to primary slave */
2363 case BONDING_MODE_ACTIVE_BACKUP:
2364 case BONDING_MODE_TLB:
2365 case BONDING_MODE_ALB:
2367 rte_eth_promiscuous_enable(internals->current_primary_port);
2372 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2374 struct bond_dev_private *internals = dev->data->dev_private;
2377 internals->promiscuous_en = 0;
2379 switch (internals->mode) {
2380 /* Promiscuous mode is propagated to all slaves */
2381 case BONDING_MODE_ROUND_ROBIN:
2382 case BONDING_MODE_BALANCE:
2383 case BONDING_MODE_BROADCAST:
2384 for (i = 0; i < internals->slave_count; i++)
2385 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2387 /* In mode4 promiscus mode is set managed when slave is added/removed */
2388 case BONDING_MODE_8023AD:
2390 /* Promiscuous mode is propagated only to primary slave */
2391 case BONDING_MODE_ACTIVE_BACKUP:
2392 case BONDING_MODE_TLB:
2393 case BONDING_MODE_ALB:
2395 rte_eth_promiscuous_disable(internals->current_primary_port);
2400 bond_ethdev_delayed_lsc_propagation(void *arg)
2405 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2406 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2410 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2411 void *param, void *ret_param __rte_unused)
2413 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
2414 struct bond_dev_private *internals;
2415 struct rte_eth_link link;
2418 int i, valid_slave = 0;
2420 uint8_t lsc_flag = 0;
2422 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2425 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2426 slave_eth_dev = &rte_eth_devices[port_id];
2428 if (check_for_bonded_ethdev(bonded_eth_dev))
2431 internals = bonded_eth_dev->data->dev_private;
2433 /* If the device isn't started don't handle interrupts */
2434 if (!bonded_eth_dev->data->dev_started)
2437 /* verify that port_id is a valid slave of bonded port */
2438 for (i = 0; i < internals->slave_count; i++) {
2439 if (internals->slaves[i].port_id == port_id) {
2448 /* Search for port in active port list */
2449 active_pos = find_slave_by_id(internals->active_slaves,
2450 internals->active_slave_count, port_id);
2452 rte_eth_link_get_nowait(port_id, &link);
2453 if (link.link_status) {
2454 if (active_pos < internals->active_slave_count)
2457 /* if no active slave ports then set this port to be primary port */
2458 if (internals->active_slave_count < 1) {
2459 /* If first active slave, then change link status */
2460 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2461 internals->current_primary_port = port_id;
2464 mac_address_slaves_update(bonded_eth_dev);
2466 /* Inherit eth dev link properties from first active slave */
2467 link_properties_set(bonded_eth_dev,
2468 &(slave_eth_dev->data->dev_link));
2470 if (link_properties_valid(
2471 &bonded_eth_dev->data->dev_link, &link) != 0) {
2472 slave_eth_dev->data->dev_flags &=
2473 (~RTE_ETH_DEV_BONDED_SLAVE);
2475 "port %u invalid speed/duplex\n",
2481 activate_slave(bonded_eth_dev, port_id);
2483 /* If user has defined the primary port then default to using it */
2484 if (internals->user_defined_primary_port &&
2485 internals->primary_port == port_id)
2486 bond_ethdev_primary_set(internals, port_id);
2488 if (active_pos == internals->active_slave_count)
2491 /* Remove from active slave list */
2492 deactivate_slave(bonded_eth_dev, port_id);
2494 /* No active slaves, change link status to down and reset other
2495 * link properties */
2496 if (internals->active_slave_count < 1) {
2498 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2500 link_properties_reset(bonded_eth_dev);
2503 /* Update primary id, take first active slave from list or if none
2504 * available set to -1 */
2505 if (port_id == internals->current_primary_port) {
2506 if (internals->active_slave_count > 0)
2507 bond_ethdev_primary_set(internals,
2508 internals->active_slaves[0]);
2510 internals->current_primary_port = internals->primary_port;
2515 /* Cancel any possible outstanding interrupts if delays are enabled */
2516 if (internals->link_up_delay_ms > 0 ||
2517 internals->link_down_delay_ms > 0)
2518 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2521 if (bonded_eth_dev->data->dev_link.link_status) {
2522 if (internals->link_up_delay_ms > 0)
2523 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2524 bond_ethdev_delayed_lsc_propagation,
2525 (void *)bonded_eth_dev);
2527 _rte_eth_dev_callback_process(bonded_eth_dev,
2528 RTE_ETH_EVENT_INTR_LSC,
2532 if (internals->link_down_delay_ms > 0)
2533 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2534 bond_ethdev_delayed_lsc_propagation,
2535 (void *)bonded_eth_dev);
2537 _rte_eth_dev_callback_process(bonded_eth_dev,
2538 RTE_ETH_EVENT_INTR_LSC,
2546 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2547 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2551 int slave_reta_size;
2552 unsigned reta_count;
2553 struct bond_dev_private *internals = dev->data->dev_private;
2555 if (reta_size != internals->reta_size)
2558 /* Copy RETA table */
2559 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2561 for (i = 0; i < reta_count; i++) {
2562 internals->reta_conf[i].mask = reta_conf[i].mask;
2563 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2564 if ((reta_conf[i].mask >> j) & 0x01)
2565 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2568 /* Fill rest of array */
2569 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2570 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2571 sizeof(internals->reta_conf[0]) * reta_count);
2573 /* Propagate RETA over slaves */
2574 for (i = 0; i < internals->slave_count; i++) {
2575 slave_reta_size = internals->slaves[i].reta_size;
2576 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2577 &internals->reta_conf[0], slave_reta_size);
2586 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2587 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2590 struct bond_dev_private *internals = dev->data->dev_private;
2592 if (reta_size != internals->reta_size)
2595 /* Copy RETA table */
2596 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2597 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2598 if ((reta_conf[i].mask >> j) & 0x01)
2599 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2605 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2606 struct rte_eth_rss_conf *rss_conf)
2609 struct bond_dev_private *internals = dev->data->dev_private;
2610 struct rte_eth_rss_conf bond_rss_conf;
2612 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2614 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2616 if (bond_rss_conf.rss_hf != 0)
2617 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2619 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2620 sizeof(internals->rss_key)) {
2621 if (bond_rss_conf.rss_key_len == 0)
2622 bond_rss_conf.rss_key_len = 40;
2623 internals->rss_key_len = bond_rss_conf.rss_key_len;
2624 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2625 internals->rss_key_len);
2628 for (i = 0; i < internals->slave_count; i++) {
2629 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2639 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2640 struct rte_eth_rss_conf *rss_conf)
2642 struct bond_dev_private *internals = dev->data->dev_private;
2644 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2645 rss_conf->rss_key_len = internals->rss_key_len;
2646 if (rss_conf->rss_key)
2647 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2652 const struct eth_dev_ops default_dev_ops = {
2653 .dev_start = bond_ethdev_start,
2654 .dev_stop = bond_ethdev_stop,
2655 .dev_close = bond_ethdev_close,
2656 .dev_configure = bond_ethdev_configure,
2657 .dev_infos_get = bond_ethdev_info,
2658 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2659 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2660 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2661 .rx_queue_release = bond_ethdev_rx_queue_release,
2662 .tx_queue_release = bond_ethdev_tx_queue_release,
2663 .link_update = bond_ethdev_link_update,
2664 .stats_get = bond_ethdev_stats_get,
2665 .stats_reset = bond_ethdev_stats_reset,
2666 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2667 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2668 .reta_update = bond_ethdev_rss_reta_update,
2669 .reta_query = bond_ethdev_rss_reta_query,
2670 .rss_hash_update = bond_ethdev_rss_hash_update,
2671 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2675 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2677 const char *name = rte_vdev_device_name(dev);
2678 uint8_t socket_id = dev->device.numa_node;
2679 struct bond_dev_private *internals = NULL;
2680 struct rte_eth_dev *eth_dev = NULL;
2681 uint32_t vlan_filter_bmp_size;
2683 /* now do all data allocation - for eth_dev structure, dummy pci driver
2684 * and internal (private) data
2687 /* reserve an ethdev entry */
2688 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2689 if (eth_dev == NULL) {
2690 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2694 internals = eth_dev->data->dev_private;
2695 eth_dev->data->nb_rx_queues = (uint16_t)1;
2696 eth_dev->data->nb_tx_queues = (uint16_t)1;
2698 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2700 if (eth_dev->data->mac_addrs == NULL) {
2701 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2705 eth_dev->dev_ops = &default_dev_ops;
2706 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
2707 RTE_ETH_DEV_DETACHABLE;
2709 rte_spinlock_init(&internals->lock);
2711 internals->port_id = eth_dev->data->port_id;
2712 internals->mode = BONDING_MODE_INVALID;
2713 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2714 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2715 internals->xmit_hash = xmit_l2_hash;
2716 internals->user_defined_mac = 0;
2717 internals->link_props_set = 0;
2719 internals->link_status_polling_enabled = 0;
2721 internals->link_status_polling_interval_ms =
2722 DEFAULT_POLLING_INTERVAL_10_MS;
2723 internals->link_down_delay_ms = 0;
2724 internals->link_up_delay_ms = 0;
2726 internals->slave_count = 0;
2727 internals->active_slave_count = 0;
2728 internals->rx_offload_capa = 0;
2729 internals->tx_offload_capa = 0;
2730 internals->candidate_max_rx_pktlen = 0;
2731 internals->max_rx_pktlen = 0;
2733 /* Initially allow to choose any offload type */
2734 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2736 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2737 memset(internals->slaves, 0, sizeof(internals->slaves));
2739 /* Set mode 4 default configuration */
2740 bond_mode_8023ad_setup(eth_dev, NULL);
2741 if (bond_ethdev_mode_set(eth_dev, mode)) {
2742 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2743 eth_dev->data->port_id, mode);
2747 vlan_filter_bmp_size =
2748 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2749 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2750 RTE_CACHE_LINE_SIZE);
2751 if (internals->vlan_filter_bmpmem == NULL) {
2753 "Failed to allocate vlan bitmap for bonded device %u\n",
2754 eth_dev->data->port_id);
2758 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2759 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2760 if (internals->vlan_filter_bmp == NULL) {
2762 "Failed to init vlan bitmap for bonded device %u\n",
2763 eth_dev->data->port_id);
2764 rte_free(internals->vlan_filter_bmpmem);
2768 return eth_dev->data->port_id;
2771 rte_free(internals);
2772 if (eth_dev != NULL) {
2773 rte_free(eth_dev->data->mac_addrs);
2774 rte_eth_dev_release_port(eth_dev);
2780 bond_probe(struct rte_vdev_device *dev)
2783 struct bond_dev_private *internals;
2784 struct rte_kvargs *kvlist;
2785 uint8_t bonding_mode, socket_id;
2786 int arg_count, port_id;
2791 name = rte_vdev_device_name(dev);
2792 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2794 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2795 pmd_bond_init_valid_arguments);
2799 /* Parse link bonding mode */
2800 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2801 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2802 &bond_ethdev_parse_slave_mode_kvarg,
2803 &bonding_mode) != 0) {
2804 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2809 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2810 "device %s\n", name);
2814 /* Parse socket id to create bonding device on */
2815 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2816 if (arg_count == 1) {
2817 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2818 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2820 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2821 "bonded device %s\n", name);
2824 } else if (arg_count > 1) {
2825 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2826 "bonded device %s\n", name);
2829 socket_id = rte_socket_id();
2832 dev->device.numa_node = socket_id;
2834 /* Create link bonding eth device */
2835 port_id = bond_alloc(dev, bonding_mode);
2837 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2838 "socket %u.\n", name, bonding_mode, socket_id);
2841 internals = rte_eth_devices[port_id].data->dev_private;
2842 internals->kvlist = kvlist;
2844 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2845 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2849 rte_kvargs_free(kvlist);
2855 bond_remove(struct rte_vdev_device *dev)
2857 struct rte_eth_dev *eth_dev;
2858 struct bond_dev_private *internals;
2864 name = rte_vdev_device_name(dev);
2865 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2867 /* now free all data allocation - for eth_dev structure,
2868 * dummy pci driver and internal (private) data
2871 /* find an ethdev entry */
2872 eth_dev = rte_eth_dev_allocated(name);
2873 if (eth_dev == NULL)
2876 RTE_ASSERT(eth_dev->device == &dev->device);
2878 internals = eth_dev->data->dev_private;
2879 if (internals->slave_count != 0)
2882 if (eth_dev->data->dev_started == 1) {
2883 bond_ethdev_stop(eth_dev);
2884 bond_ethdev_close(eth_dev);
2887 eth_dev->dev_ops = NULL;
2888 eth_dev->rx_pkt_burst = NULL;
2889 eth_dev->tx_pkt_burst = NULL;
2891 internals = eth_dev->data->dev_private;
2892 rte_bitmap_free(internals->vlan_filter_bmp);
2893 rte_free(internals->vlan_filter_bmpmem);
2894 rte_free(eth_dev->data->dev_private);
2895 rte_free(eth_dev->data->mac_addrs);
2897 rte_eth_dev_release_port(eth_dev);
2902 /* this part will resolve the slave portids after all the other pdev and vdev
2903 * have been allocated */
2905 bond_ethdev_configure(struct rte_eth_dev *dev)
2907 const char *name = dev->device->name;
2908 struct bond_dev_private *internals = dev->data->dev_private;
2909 struct rte_kvargs *kvlist = internals->kvlist;
2911 uint8_t port_id = dev - rte_eth_devices;
2913 static const uint8_t default_rss_key[40] = {
2914 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2915 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2916 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2917 0xBE, 0xAC, 0x01, 0xFA
2922 /* If RSS is enabled, fill table and key with default values */
2923 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2924 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2925 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2926 memcpy(internals->rss_key, default_rss_key, 40);
2928 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2929 internals->reta_conf[i].mask = ~0LL;
2930 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2931 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2935 /* set the max_rx_pktlen */
2936 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2939 * if no kvlist, it means that this bonded device has been created
2940 * through the bonding api.
2945 /* Parse MAC address for bonded device */
2946 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2947 if (arg_count == 1) {
2948 struct ether_addr bond_mac;
2950 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2951 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2952 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2957 /* Set MAC address */
2958 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2960 "Failed to set mac address on bonded device %s\n",
2964 } else if (arg_count > 1) {
2966 "MAC address can be specified only once for bonded device %s\n",
2971 /* Parse/set balance mode transmit policy */
2972 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2973 if (arg_count == 1) {
2974 uint8_t xmit_policy;
2976 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2977 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2980 "Invalid xmit policy specified for bonded device %s\n",
2985 /* Set balance mode transmit policy*/
2986 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2988 "Failed to set balance xmit policy on bonded device %s\n",
2992 } else if (arg_count > 1) {
2994 "Transmit policy can be specified only once for bonded device"
2999 /* Parse/add slave ports to bonded device */
3000 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3001 struct bond_ethdev_slave_ports slave_ports;
3004 memset(&slave_ports, 0, sizeof(slave_ports));
3006 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3007 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3009 "Failed to parse slave ports for bonded device %s\n",
3014 for (i = 0; i < slave_ports.slave_count; i++) {
3015 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3017 "Failed to add port %d as slave to bonded device %s\n",
3018 slave_ports.slaves[i], name);
3023 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3027 /* Parse/set primary slave port id*/
3028 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3029 if (arg_count == 1) {
3030 uint8_t primary_slave_port_id;
3032 if (rte_kvargs_process(kvlist,
3033 PMD_BOND_PRIMARY_SLAVE_KVARG,
3034 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3035 &primary_slave_port_id) < 0) {
3037 "Invalid primary slave port id specified for bonded device"
3042 /* Set balance mode transmit policy*/
3043 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
3046 "Failed to set primary slave port %d on bonded device %s\n",
3047 primary_slave_port_id, name);
3050 } else if (arg_count > 1) {
3052 "Primary slave can be specified only once for bonded device"
3057 /* Parse link status monitor polling interval */
3058 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3059 if (arg_count == 1) {
3060 uint32_t lsc_poll_interval_ms;
3062 if (rte_kvargs_process(kvlist,
3063 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3064 &bond_ethdev_parse_time_ms_kvarg,
3065 &lsc_poll_interval_ms) < 0) {
3067 "Invalid lsc polling interval value specified for bonded"
3068 " device %s\n", name);
3072 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3075 "Failed to set lsc monitor polling interval (%u ms) on"
3076 " bonded device %s\n", lsc_poll_interval_ms, name);
3079 } else if (arg_count > 1) {
3081 "LSC polling interval can be specified only once for bonded"
3082 " device %s\n", name);
3086 /* Parse link up interrupt propagation delay */
3087 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3088 if (arg_count == 1) {
3089 uint32_t link_up_delay_ms;
3091 if (rte_kvargs_process(kvlist,
3092 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3093 &bond_ethdev_parse_time_ms_kvarg,
3094 &link_up_delay_ms) < 0) {
3096 "Invalid link up propagation delay value specified for"
3097 " bonded device %s\n", name);
3101 /* Set balance mode transmit policy*/
3102 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3105 "Failed to set link up propagation delay (%u ms) on bonded"
3106 " device %s\n", link_up_delay_ms, name);
3109 } else if (arg_count > 1) {
3111 "Link up propagation delay can be specified only once for"
3112 " bonded device %s\n", name);
3116 /* Parse link down interrupt propagation delay */
3117 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3118 if (arg_count == 1) {
3119 uint32_t link_down_delay_ms;
3121 if (rte_kvargs_process(kvlist,
3122 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3123 &bond_ethdev_parse_time_ms_kvarg,
3124 &link_down_delay_ms) < 0) {
3126 "Invalid link down propagation delay value specified for"
3127 " bonded device %s\n", name);
3131 /* Set balance mode transmit policy*/
3132 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3135 "Failed to set link down propagation delay (%u ms) on"
3136 " bonded device %s\n", link_down_delay_ms, name);
3139 } else if (arg_count > 1) {
3141 "Link down propagation delay can be specified only once for"
3142 " bonded device %s\n", name);
3149 struct rte_vdev_driver pmd_bond_drv = {
3150 .probe = bond_probe,
3151 .remove = bond_remove,
3154 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3155 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3157 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3161 "xmit_policy=[l2 | l23 | l34] "
3164 "lsc_poll_period_ms=<int> "
3166 "down_delay=<int>");