1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 size_t vlan_offset = 0;
38 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41 vlan_offset = sizeof(struct vlan_hdr);
42 *proto = vlan_hdr->eth_proto;
44 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45 vlan_hdr = vlan_hdr + 1;
46 *proto = vlan_hdr->eth_proto;
47 vlan_offset += sizeof(struct vlan_hdr);
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 struct bond_dev_private *internals;
58 uint16_t num_rx_slave = 0;
59 uint16_t num_rx_total = 0;
63 /* Cast to structure, containing bonded device's port id and queue id */
64 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66 internals = bd_rx_q->dev_private;
69 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70 /* Offset of pointer to *bufs increases as packets are received
71 * from other slaves */
72 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75 num_rx_total += num_rx_slave;
76 nb_pkts -= num_rx_slave;
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87 struct bond_dev_private *internals;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
94 return rte_eth_rx_burst(internals->current_primary_port,
95 bd_rx_q->queue_id, bufs, nb_pkts);
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104 (ethertype == ether_type_slow_be &&
105 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 /*****************************************************************************
109 * Flow director's setup for mode 4 optimization
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113 .dst.addr_bytes = { 0 },
114 .src.addr_bytes = { 0 },
115 .type = RTE_BE16(ETHER_TYPE_SLOW),
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119 .dst.addr_bytes = { 0 },
120 .src.addr_bytes = { 0 },
124 static struct rte_flow_item flow_item_8023ad[] = {
126 .type = RTE_FLOW_ITEM_TYPE_ETH,
127 .spec = &flow_item_eth_type_8023ad,
129 .mask = &flow_item_eth_mask_type_8023ad,
132 .type = RTE_FLOW_ITEM_TYPE_END,
139 const struct rte_flow_attr flow_attr_8023ad = {
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149 uint16_t slave_port) {
150 struct rte_eth_dev_info slave_info;
151 struct rte_flow_error error;
152 struct bond_dev_private *internals = (struct bond_dev_private *)
153 (bond_dev->data->dev_private);
155 const struct rte_flow_action_queue lacp_queue_conf = {
159 const struct rte_flow_action actions[] = {
161 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162 .conf = &lacp_queue_conf
165 .type = RTE_FLOW_ACTION_TYPE_END,
169 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170 flow_item_8023ad, actions, &error);
172 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173 __func__, error.message, slave_port,
174 internals->mode4.dedicated_queues.rx_qid);
178 rte_eth_dev_info_get(slave_port, &slave_info);
179 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183 __func__, slave_port);
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193 struct bond_dev_private *internals = (struct bond_dev_private *)
194 (bond_dev->data->dev_private);
195 struct rte_eth_dev_info bond_info;
198 /* Verify if all slaves in bonding supports flow director and */
199 if (internals->slave_count > 0) {
200 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205 for (idx = 0; idx < internals->slave_count; idx++) {
206 if (bond_ethdev_8023ad_flow_verify(bond_dev,
207 internals->slaves[idx].port_id) != 0)
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218 struct rte_flow_error error;
219 struct bond_dev_private *internals = (struct bond_dev_private *)
220 (bond_dev->data->dev_private);
222 struct rte_flow_action_queue lacp_queue_conf = {
223 .index = internals->mode4.dedicated_queues.rx_qid,
226 const struct rte_flow_action actions[] = {
228 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229 .conf = &lacp_queue_conf
232 .type = RTE_FLOW_ACTION_TYPE_END,
236 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240 "(slave_port=%d queue_id=%d)",
241 error.message, slave_port,
242 internals->mode4.dedicated_queues.rx_qid);
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254 struct bond_dev_private *internals = bd_rx_q->dev_private;
255 uint16_t num_rx_total = 0; /* Total number of received packets */
256 uint16_t slaves[RTE_MAX_ETHPORTS];
257 uint16_t slave_count;
261 /* Copy slave list to protect against slave up/down changes during tx
263 slave_count = internals->active_slave_count;
264 memcpy(slaves, internals->active_slaves,
265 sizeof(internals->active_slaves[0]) * slave_count);
267 for (i = 0, idx = internals->active_slave;
268 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269 idx = idx % slave_count;
271 /* Read packets from this slave */
272 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273 &bufs[num_rx_total], nb_pkts - num_rx_total);
276 internals->active_slave = idx;
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285 struct bond_dev_private *internals;
286 struct bond_tx_queue *bd_tx_q;
288 uint16_t num_of_slaves;
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 /* positions in slaves, not ID */
291 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
292 uint8_t distributing_count;
294 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
295 uint16_t i, op_slave_idx;
297 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
299 /* Total amount of packets in slave_bufs */
300 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
301 /* Slow packets placed in each slave */
303 if (unlikely(nb_pkts == 0))
306 bd_tx_q = (struct bond_tx_queue *)queue;
307 internals = bd_tx_q->dev_private;
309 /* Copy slave list to protect against slave up/down changes during tx
311 num_of_slaves = internals->active_slave_count;
312 if (num_of_slaves < 1)
315 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
318 distributing_count = 0;
319 for (i = 0; i < num_of_slaves; i++) {
320 struct port *port = &mode_8023ad_ports[slaves[i]];
321 if (ACTOR_STATE(port, DISTRIBUTING))
322 distributing_offsets[distributing_count++] = i;
325 if (likely(distributing_count > 0)) {
326 /* Populate slaves mbuf with the packets which are to be sent */
327 for (i = 0; i < nb_pkts; i++) {
328 /* Select output slave using hash based on xmit policy */
329 op_slave_idx = internals->xmit_hash(bufs[i],
332 /* Populate slave mbuf arrays with mbufs for that slave.
333 * Use only slaves that are currently distributing.
335 uint8_t slave_offset =
336 distributing_offsets[op_slave_idx];
337 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
339 slave_nb_pkts[slave_offset]++;
343 /* Send packet burst on each slave device */
344 for (i = 0; i < num_of_slaves; i++) {
345 if (slave_nb_pkts[i] == 0)
348 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
349 slave_bufs[i], slave_nb_pkts[i]);
351 num_tx_total += num_tx_slave;
352 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
354 /* If tx burst fails move packets to end of bufs */
355 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
356 uint16_t j = nb_pkts - num_tx_fail_total;
357 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
359 bufs[j] = slave_bufs[i][num_tx_slave];
368 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
371 /* Cast to structure, containing bonded device's port id and queue id */
372 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
373 struct bond_dev_private *internals = bd_rx_q->dev_private;
374 struct ether_addr bond_mac;
376 struct ether_hdr *hdr;
378 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
379 uint16_t num_rx_total = 0; /* Total number of received packets */
380 uint16_t slaves[RTE_MAX_ETHPORTS];
381 uint16_t slave_count, idx;
383 uint8_t collecting; /* current slave collecting status */
384 const uint8_t promisc = internals->promiscuous_en;
388 rte_eth_macaddr_get(internals->port_id, &bond_mac);
389 /* Copy slave list to protect against slave up/down changes during tx
391 slave_count = internals->active_slave_count;
392 memcpy(slaves, internals->active_slaves,
393 sizeof(internals->active_slaves[0]) * slave_count);
395 idx = internals->active_slave;
396 if (idx >= slave_count) {
397 internals->active_slave = 0;
400 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
402 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
405 /* Read packets from this slave */
406 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
407 &bufs[num_rx_total], nb_pkts - num_rx_total);
409 for (k = j; k < 2 && k < num_rx_total; k++)
410 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
412 /* Handle slow protocol packets. */
413 while (j < num_rx_total) {
415 /* If packet is not pure L2 and is known, skip it */
416 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
421 if (j + 3 < num_rx_total)
422 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
424 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
425 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
427 /* Remove packet from array if it is slow packet or slave is not
428 * in collecting state or bonding interface is not in promiscuous
429 * mode and packet address does not match. */
430 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
431 !collecting || (!promisc &&
432 !is_multicast_ether_addr(&hdr->d_addr) &&
433 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
435 if (hdr->ether_type == ether_type_slow_be) {
436 bond_mode_8023ad_handle_slow_pkt(
437 internals, slaves[idx], bufs[j]);
439 rte_pktmbuf_free(bufs[j]);
441 /* Packet is managed by mode 4 or dropped, shift the array */
443 if (j < num_rx_total) {
444 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
450 if (unlikely(++idx == slave_count))
454 internals->active_slave = idx;
458 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
459 uint32_t burstnumberRX;
460 uint32_t burstnumberTX;
462 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
465 arp_op_name(uint16_t arp_op, char *buf)
469 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
472 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
474 case ARP_OP_REVREQUEST:
475 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
476 "Reverse ARP Request");
478 case ARP_OP_REVREPLY:
479 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
480 "Reverse ARP Reply");
482 case ARP_OP_INVREQUEST:
483 snprintf(buf, sizeof("Peer Identify Request"), "%s",
484 "Peer Identify Request");
486 case ARP_OP_INVREPLY:
487 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
488 "Peer Identify Reply");
493 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
497 #define MaxIPv4String 16
499 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
503 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
504 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
505 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
509 #define MAX_CLIENTS_NUMBER 128
510 uint8_t active_clients;
511 struct client_stats_t {
514 uint32_t ipv4_rx_packets;
515 uint32_t ipv4_tx_packets;
517 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
520 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
524 for (; i < MAX_CLIENTS_NUMBER; i++) {
525 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
526 /* Just update RX packets number for this client */
527 if (TXorRXindicator == &burstnumberRX)
528 client_stats[i].ipv4_rx_packets++;
530 client_stats[i].ipv4_tx_packets++;
534 /* We have a new client. Insert him to the table, and increment stats */
535 if (TXorRXindicator == &burstnumberRX)
536 client_stats[active_clients].ipv4_rx_packets++;
538 client_stats[active_clients].ipv4_tx_packets++;
539 client_stats[active_clients].ipv4_addr = addr;
540 client_stats[active_clients].port = port;
545 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
546 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
547 RTE_LOG(DEBUG, PMD, \
550 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
552 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
558 eth_h->s_addr.addr_bytes[0], \
559 eth_h->s_addr.addr_bytes[1], \
560 eth_h->s_addr.addr_bytes[2], \
561 eth_h->s_addr.addr_bytes[3], \
562 eth_h->s_addr.addr_bytes[4], \
563 eth_h->s_addr.addr_bytes[5], \
565 eth_h->d_addr.addr_bytes[0], \
566 eth_h->d_addr.addr_bytes[1], \
567 eth_h->d_addr.addr_bytes[2], \
568 eth_h->d_addr.addr_bytes[3], \
569 eth_h->d_addr.addr_bytes[4], \
570 eth_h->d_addr.addr_bytes[5], \
577 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
578 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
580 struct ipv4_hdr *ipv4_h;
581 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
582 struct arp_hdr *arp_h;
589 uint16_t ether_type = eth_h->ether_type;
590 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
592 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
593 snprintf(buf, 16, "%s", info);
596 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
597 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
598 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
599 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
600 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
601 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
603 update_client_stats(ipv4_h->src_addr, port, burstnumber);
605 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
606 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
607 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
608 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
609 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
610 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
611 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
618 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
620 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
621 struct bond_dev_private *internals = bd_tx_q->dev_private;
622 struct ether_hdr *eth_h;
623 uint16_t ether_type, offset;
624 uint16_t nb_recv_pkts;
627 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
629 for (i = 0; i < nb_recv_pkts; i++) {
630 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
631 ether_type = eth_h->ether_type;
632 offset = get_vlan_offset(eth_h, ðer_type);
634 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
638 bond_mode_alb_arp_recv(eth_h, offset, internals);
640 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
641 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
642 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
650 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
653 struct bond_dev_private *internals;
654 struct bond_tx_queue *bd_tx_q;
656 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
657 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
659 uint16_t num_of_slaves;
660 uint16_t slaves[RTE_MAX_ETHPORTS];
662 uint16_t num_tx_total = 0, num_tx_slave;
664 static int slave_idx = 0;
665 int i, cslave_idx = 0, tx_fail_total = 0;
667 bd_tx_q = (struct bond_tx_queue *)queue;
668 internals = bd_tx_q->dev_private;
670 /* Copy slave list to protect against slave up/down changes during tx
672 num_of_slaves = internals->active_slave_count;
673 memcpy(slaves, internals->active_slaves,
674 sizeof(internals->active_slaves[0]) * num_of_slaves);
676 if (num_of_slaves < 1)
679 /* Populate slaves mbuf with which packets are to be sent on it */
680 for (i = 0; i < nb_pkts; i++) {
681 cslave_idx = (slave_idx + i) % num_of_slaves;
682 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
685 /* increment current slave index so the next call to tx burst starts on the
687 slave_idx = ++cslave_idx;
689 /* Send packet burst on each slave device */
690 for (i = 0; i < num_of_slaves; i++) {
691 if (slave_nb_pkts[i] > 0) {
692 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
693 slave_bufs[i], slave_nb_pkts[i]);
695 /* if tx burst fails move packets to end of bufs */
696 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
697 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
699 tx_fail_total += tx_fail_slave;
701 memcpy(&bufs[nb_pkts - tx_fail_total],
702 &slave_bufs[i][num_tx_slave],
703 tx_fail_slave * sizeof(bufs[0]));
705 num_tx_total += num_tx_slave;
713 bond_ethdev_tx_burst_active_backup(void *queue,
714 struct rte_mbuf **bufs, uint16_t nb_pkts)
716 struct bond_dev_private *internals;
717 struct bond_tx_queue *bd_tx_q;
719 bd_tx_q = (struct bond_tx_queue *)queue;
720 internals = bd_tx_q->dev_private;
722 if (internals->active_slave_count < 1)
725 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
729 static inline uint16_t
730 ether_hash(struct ether_hdr *eth_hdr)
732 unaligned_uint16_t *word_src_addr =
733 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
734 unaligned_uint16_t *word_dst_addr =
735 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
737 return (word_src_addr[0] ^ word_dst_addr[0]) ^
738 (word_src_addr[1] ^ word_dst_addr[1]) ^
739 (word_src_addr[2] ^ word_dst_addr[2]);
742 static inline uint32_t
743 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
745 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
748 static inline uint32_t
749 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
751 unaligned_uint32_t *word_src_addr =
752 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
753 unaligned_uint32_t *word_dst_addr =
754 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
756 return (word_src_addr[0] ^ word_dst_addr[0]) ^
757 (word_src_addr[1] ^ word_dst_addr[1]) ^
758 (word_src_addr[2] ^ word_dst_addr[2]) ^
759 (word_src_addr[3] ^ word_dst_addr[3]);
763 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
765 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
767 uint32_t hash = ether_hash(eth_hdr);
769 return (hash ^= hash >> 8) % slave_count;
773 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
775 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
776 uint16_t proto = eth_hdr->ether_type;
777 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
778 uint32_t hash, l3hash = 0;
780 hash = ether_hash(eth_hdr);
782 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
783 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
784 ((char *)(eth_hdr + 1) + vlan_offset);
785 l3hash = ipv4_hash(ipv4_hdr);
787 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
788 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
789 ((char *)(eth_hdr + 1) + vlan_offset);
790 l3hash = ipv6_hash(ipv6_hdr);
793 hash = hash ^ l3hash;
797 return hash % slave_count;
801 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
804 uint16_t proto = eth_hdr->ether_type;
805 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807 struct udp_hdr *udp_hdr = NULL;
808 struct tcp_hdr *tcp_hdr = NULL;
809 uint32_t hash, l3hash = 0, l4hash = 0;
811 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813 ((char *)(eth_hdr + 1) + vlan_offset);
814 size_t ip_hdr_offset;
816 l3hash = ipv4_hash(ipv4_hdr);
818 /* there is no L4 header in fragmented packet */
819 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
820 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
823 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
824 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
826 l4hash = HASH_L4_PORTS(tcp_hdr);
827 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
828 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
830 l4hash = HASH_L4_PORTS(udp_hdr);
833 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
834 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
835 ((char *)(eth_hdr + 1) + vlan_offset);
836 l3hash = ipv6_hash(ipv6_hdr);
838 if (ipv6_hdr->proto == IPPROTO_TCP) {
839 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
840 l4hash = HASH_L4_PORTS(tcp_hdr);
841 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
842 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
843 l4hash = HASH_L4_PORTS(udp_hdr);
847 hash = l3hash ^ l4hash;
851 return hash % slave_count;
855 uint64_t bwg_left_int;
856 uint64_t bwg_left_remainder;
861 bond_tlb_activate_slave(struct bond_dev_private *internals) {
864 for (i = 0; i < internals->active_slave_count; i++) {
865 tlb_last_obytets[internals->active_slaves[i]] = 0;
870 bandwidth_cmp(const void *a, const void *b)
872 const struct bwg_slave *bwg_a = a;
873 const struct bwg_slave *bwg_b = b;
874 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
875 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
876 (int64_t)bwg_a->bwg_left_remainder;
890 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
891 struct bwg_slave *bwg_slave)
893 struct rte_eth_link link_status;
895 rte_eth_link_get_nowait(port_id, &link_status);
896 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
899 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
900 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
901 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
905 bond_ethdev_update_tlb_slave_cb(void *arg)
907 struct bond_dev_private *internals = arg;
908 struct rte_eth_stats slave_stats;
909 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
913 uint8_t update_stats = 0;
916 internals->slave_update_idx++;
919 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
922 for (i = 0; i < internals->active_slave_count; i++) {
923 slave_id = internals->active_slaves[i];
924 rte_eth_stats_get(slave_id, &slave_stats);
925 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
926 bandwidth_left(slave_id, tx_bytes,
927 internals->slave_update_idx, &bwg_array[i]);
928 bwg_array[i].slave = slave_id;
931 tlb_last_obytets[slave_id] = slave_stats.obytes;
935 if (update_stats == 1)
936 internals->slave_update_idx = 0;
939 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
940 for (i = 0; i < slave_count; i++)
941 internals->tlb_slaves_order[i] = bwg_array[i].slave;
943 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
944 (struct bond_dev_private *)internals);
948 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
950 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
951 struct bond_dev_private *internals = bd_tx_q->dev_private;
953 struct rte_eth_dev *primary_port =
954 &rte_eth_devices[internals->primary_port];
955 uint16_t num_tx_total = 0;
958 uint16_t num_of_slaves = internals->active_slave_count;
959 uint16_t slaves[RTE_MAX_ETHPORTS];
961 struct ether_hdr *ether_hdr;
962 struct ether_addr primary_slave_addr;
963 struct ether_addr active_slave_addr;
965 if (num_of_slaves < 1)
968 memcpy(slaves, internals->tlb_slaves_order,
969 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
972 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
975 for (i = 0; i < 3; i++)
976 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
979 for (i = 0; i < num_of_slaves; i++) {
980 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
981 for (j = num_tx_total; j < nb_pkts; j++) {
983 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
985 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
986 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
987 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
988 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
989 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
993 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
994 bufs + num_tx_total, nb_pkts - num_tx_total);
996 if (num_tx_total == nb_pkts)
1000 return num_tx_total;
1004 bond_tlb_disable(struct bond_dev_private *internals)
1006 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1010 bond_tlb_enable(struct bond_dev_private *internals)
1012 bond_ethdev_update_tlb_slave_cb(internals);
1016 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1018 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1019 struct bond_dev_private *internals = bd_tx_q->dev_private;
1021 struct ether_hdr *eth_h;
1022 uint16_t ether_type, offset;
1024 struct client_data *client_info;
1027 * We create transmit buffers for every slave and one additional to send
1028 * through tlb. In worst case every packet will be send on one port.
1030 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1031 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1034 * We create separate transmit buffers for update packets as they won't
1035 * be counted in num_tx_total.
1037 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1038 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1040 struct rte_mbuf *upd_pkt;
1043 uint16_t num_send, num_not_send = 0;
1044 uint16_t num_tx_total = 0;
1049 /* Search tx buffer for ARP packets and forward them to alb */
1050 for (i = 0; i < nb_pkts; i++) {
1051 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1052 ether_type = eth_h->ether_type;
1053 offset = get_vlan_offset(eth_h, ðer_type);
1055 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1056 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1058 /* Change src mac in eth header */
1059 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1061 /* Add packet to slave tx buffer */
1062 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1063 slave_bufs_pkts[slave_idx]++;
1065 /* If packet is not ARP, send it with TLB policy */
1066 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1068 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1072 /* Update connected client ARP tables */
1073 if (internals->mode6.ntt) {
1074 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1075 client_info = &internals->mode6.client_table[i];
1077 if (client_info->in_use) {
1078 /* Allocate new packet to send ARP update on current slave */
1079 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1080 if (upd_pkt == NULL) {
1081 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1084 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1085 + client_info->vlan_count * sizeof(struct vlan_hdr);
1086 upd_pkt->data_len = pkt_size;
1087 upd_pkt->pkt_len = pkt_size;
1089 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1092 /* Add packet to update tx buffer */
1093 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1094 update_bufs_pkts[slave_idx]++;
1097 internals->mode6.ntt = 0;
1100 /* Send ARP packets on proper slaves */
1101 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1102 if (slave_bufs_pkts[i] > 0) {
1103 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1104 slave_bufs[i], slave_bufs_pkts[i]);
1105 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1106 bufs[nb_pkts - 1 - num_not_send - j] =
1107 slave_bufs[i][nb_pkts - 1 - j];
1110 num_tx_total += num_send;
1111 num_not_send += slave_bufs_pkts[i] - num_send;
1113 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1114 /* Print TX stats including update packets */
1115 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1116 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1117 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1123 /* Send update packets on proper slaves */
1124 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1125 if (update_bufs_pkts[i] > 0) {
1126 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1127 update_bufs_pkts[i]);
1128 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1129 rte_pktmbuf_free(update_bufs[i][j]);
1131 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1132 for (j = 0; j < update_bufs_pkts[i]; j++) {
1133 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1134 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1140 /* Send non-ARP packets using tlb policy */
1141 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1142 num_send = bond_ethdev_tx_burst_tlb(queue,
1143 slave_bufs[RTE_MAX_ETHPORTS],
1144 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1146 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1147 bufs[nb_pkts - 1 - num_not_send - j] =
1148 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1151 num_tx_total += num_send;
1154 return num_tx_total;
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1161 struct bond_dev_private *internals;
1162 struct bond_tx_queue *bd_tx_q;
1164 uint16_t num_of_slaves;
1165 uint16_t slaves[RTE_MAX_ETHPORTS];
1167 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1171 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1172 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1174 bd_tx_q = (struct bond_tx_queue *)queue;
1175 internals = bd_tx_q->dev_private;
1177 /* Copy slave list to protect against slave up/down changes during tx
1179 num_of_slaves = internals->active_slave_count;
1180 memcpy(slaves, internals->active_slaves,
1181 sizeof(internals->active_slaves[0]) * num_of_slaves);
1183 if (num_of_slaves < 1)
1184 return num_tx_total;
1186 /* Populate slaves mbuf with the packets which are to be sent on it */
1187 for (i = 0; i < nb_pkts; i++) {
1188 /* Select output slave using hash based on xmit policy */
1189 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1191 /* Populate slave mbuf arrays with mbufs for that slave */
1192 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1195 /* Send packet burst on each slave device */
1196 for (i = 0; i < num_of_slaves; i++) {
1197 if (slave_nb_pkts[i] > 0) {
1198 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1199 slave_bufs[i], slave_nb_pkts[i]);
1201 /* if tx burst fails move packets to end of bufs */
1202 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1203 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1205 tx_fail_total += slave_tx_fail_count;
1206 memcpy(&bufs[nb_pkts - tx_fail_total],
1207 &slave_bufs[i][num_tx_slave],
1208 slave_tx_fail_count * sizeof(bufs[0]));
1211 num_tx_total += num_tx_slave;
1215 return num_tx_total;
1219 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1222 struct bond_dev_private *internals;
1223 struct bond_tx_queue *bd_tx_q;
1225 uint16_t num_of_slaves;
1226 uint16_t slaves[RTE_MAX_ETHPORTS];
1227 /* positions in slaves, not ID */
1228 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1229 uint8_t distributing_count;
1231 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1232 uint16_t i, j, op_slave_idx;
1233 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1235 /* Allocate additional packets in case 8023AD mode. */
1236 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1237 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1239 /* Total amount of packets in slave_bufs */
1240 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1241 /* Slow packets placed in each slave */
1242 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1244 bd_tx_q = (struct bond_tx_queue *)queue;
1245 internals = bd_tx_q->dev_private;
1247 /* Copy slave list to protect against slave up/down changes during tx
1249 num_of_slaves = internals->active_slave_count;
1250 if (num_of_slaves < 1)
1251 return num_tx_total;
1253 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1255 distributing_count = 0;
1256 for (i = 0; i < num_of_slaves; i++) {
1257 struct port *port = &mode_8023ad_ports[slaves[i]];
1259 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1260 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1262 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1264 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1265 slave_bufs[i][j] = slow_pkts[j];
1267 if (ACTOR_STATE(port, DISTRIBUTING))
1268 distributing_offsets[distributing_count++] = i;
1271 if (likely(distributing_count > 0)) {
1272 /* Populate slaves mbuf with the packets which are to be sent on it */
1273 for (i = 0; i < nb_pkts; i++) {
1274 /* Select output slave using hash based on xmit policy */
1275 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1277 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1278 * slaves that are currently distributing. */
1279 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1280 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1281 slave_nb_pkts[slave_offset]++;
1285 /* Send packet burst on each slave device */
1286 for (i = 0; i < num_of_slaves; i++) {
1287 if (slave_nb_pkts[i] == 0)
1290 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1291 slave_bufs[i], slave_nb_pkts[i]);
1293 /* If tx burst fails drop slow packets */
1294 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1295 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1297 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1298 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1300 /* If tx burst fails move packets to end of bufs */
1301 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1302 uint16_t j = nb_pkts - num_tx_fail_total;
1303 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1304 bufs[j] = slave_bufs[i][num_tx_slave];
1308 return num_tx_total;
1312 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1315 struct bond_dev_private *internals;
1316 struct bond_tx_queue *bd_tx_q;
1318 uint8_t tx_failed_flag = 0, num_of_slaves;
1319 uint16_t slaves[RTE_MAX_ETHPORTS];
1321 uint16_t max_nb_of_tx_pkts = 0;
1323 int slave_tx_total[RTE_MAX_ETHPORTS];
1324 int i, most_successful_tx_slave = -1;
1326 bd_tx_q = (struct bond_tx_queue *)queue;
1327 internals = bd_tx_q->dev_private;
1329 /* Copy slave list to protect against slave up/down changes during tx
1331 num_of_slaves = internals->active_slave_count;
1332 memcpy(slaves, internals->active_slaves,
1333 sizeof(internals->active_slaves[0]) * num_of_slaves);
1335 if (num_of_slaves < 1)
1338 /* Increment reference count on mbufs */
1339 for (i = 0; i < nb_pkts; i++)
1340 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1342 /* Transmit burst on each active slave */
1343 for (i = 0; i < num_of_slaves; i++) {
1344 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1347 if (unlikely(slave_tx_total[i] < nb_pkts))
1350 /* record the value and slave index for the slave which transmits the
1351 * maximum number of packets */
1352 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1353 max_nb_of_tx_pkts = slave_tx_total[i];
1354 most_successful_tx_slave = i;
1358 /* if slaves fail to transmit packets from burst, the calling application
1359 * is not expected to know about multiple references to packets so we must
1360 * handle failures of all packets except those of the most successful slave
1362 if (unlikely(tx_failed_flag))
1363 for (i = 0; i < num_of_slaves; i++)
1364 if (i != most_successful_tx_slave)
1365 while (slave_tx_total[i] < nb_pkts)
1366 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1368 return max_nb_of_tx_pkts;
1372 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1374 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1376 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1378 * If in mode 4 then save the link properties of the first
1379 * slave, all subsequent slaves must match these properties
1381 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1383 bond_link->link_autoneg = slave_link->link_autoneg;
1384 bond_link->link_duplex = slave_link->link_duplex;
1385 bond_link->link_speed = slave_link->link_speed;
1388 * In any other mode the link properties are set to default
1389 * values of AUTONEG/DUPLEX
1391 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1392 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1397 link_properties_valid(struct rte_eth_dev *ethdev,
1398 struct rte_eth_link *slave_link)
1400 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1402 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1403 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1405 if (bond_link->link_duplex != slave_link->link_duplex ||
1406 bond_link->link_autoneg != slave_link->link_autoneg ||
1407 bond_link->link_speed != slave_link->link_speed)
1415 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1417 struct ether_addr *mac_addr;
1419 if (eth_dev == NULL) {
1420 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1424 if (dst_mac_addr == NULL) {
1425 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1429 mac_addr = eth_dev->data->mac_addrs;
1431 ether_addr_copy(mac_addr, dst_mac_addr);
1436 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1438 struct ether_addr *mac_addr;
1440 if (eth_dev == NULL) {
1441 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1445 if (new_mac_addr == NULL) {
1446 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1450 mac_addr = eth_dev->data->mac_addrs;
1452 /* If new MAC is different to current MAC then update */
1453 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1454 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1460 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1462 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1465 /* Update slave devices MAC addresses */
1466 if (internals->slave_count < 1)
1469 switch (internals->mode) {
1470 case BONDING_MODE_ROUND_ROBIN:
1471 case BONDING_MODE_BALANCE:
1472 case BONDING_MODE_BROADCAST:
1473 for (i = 0; i < internals->slave_count; i++) {
1474 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1475 bonded_eth_dev->data->mac_addrs)) {
1476 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1477 internals->slaves[i].port_id);
1482 case BONDING_MODE_8023AD:
1483 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1485 case BONDING_MODE_ACTIVE_BACKUP:
1486 case BONDING_MODE_TLB:
1487 case BONDING_MODE_ALB:
1489 for (i = 0; i < internals->slave_count; i++) {
1490 if (internals->slaves[i].port_id ==
1491 internals->current_primary_port) {
1492 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1493 bonded_eth_dev->data->mac_addrs)) {
1494 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1495 internals->current_primary_port);
1499 if (mac_address_set(
1500 &rte_eth_devices[internals->slaves[i].port_id],
1501 &internals->slaves[i].persisted_mac_addr)) {
1502 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1503 internals->slaves[i].port_id);
1514 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1516 struct bond_dev_private *internals;
1518 internals = eth_dev->data->dev_private;
1521 case BONDING_MODE_ROUND_ROBIN:
1522 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1523 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1525 case BONDING_MODE_ACTIVE_BACKUP:
1526 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1527 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1529 case BONDING_MODE_BALANCE:
1530 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1531 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1533 case BONDING_MODE_BROADCAST:
1534 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1535 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1537 case BONDING_MODE_8023AD:
1538 if (bond_mode_8023ad_enable(eth_dev) != 0)
1541 if (internals->mode4.dedicated_queues.enabled == 0) {
1542 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1543 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1544 RTE_LOG(WARNING, PMD,
1545 "Using mode 4, it is necessary to do TX burst "
1546 "and RX burst at least every 100ms.\n");
1548 /* Use flow director's optimization */
1549 eth_dev->rx_pkt_burst =
1550 bond_ethdev_rx_burst_8023ad_fast_queue;
1551 eth_dev->tx_pkt_burst =
1552 bond_ethdev_tx_burst_8023ad_fast_queue;
1555 case BONDING_MODE_TLB:
1556 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1557 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1559 case BONDING_MODE_ALB:
1560 if (bond_mode_alb_enable(eth_dev) != 0)
1563 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1564 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1570 internals->mode = mode;
1577 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1578 struct rte_eth_dev *slave_eth_dev)
1581 struct bond_dev_private *internals = (struct bond_dev_private *)
1582 bonded_eth_dev->data->dev_private;
1583 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1585 if (port->slow_pool == NULL) {
1587 int slave_id = slave_eth_dev->data->port_id;
1589 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1591 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1592 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1593 slave_eth_dev->data->numa_node);
1595 /* Any memory allocation failure in initialization is critical because
1596 * resources can't be free, so reinitialization is impossible. */
1597 if (port->slow_pool == NULL) {
1598 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1599 slave_id, mem_name, rte_strerror(rte_errno));
1603 if (internals->mode4.dedicated_queues.enabled == 1) {
1604 /* Configure slow Rx queue */
1606 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1607 internals->mode4.dedicated_queues.rx_qid, 128,
1608 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1609 NULL, port->slow_pool);
1612 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1613 slave_eth_dev->data->port_id,
1614 internals->mode4.dedicated_queues.rx_qid,
1619 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1620 internals->mode4.dedicated_queues.tx_qid, 512,
1621 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1625 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1626 slave_eth_dev->data->port_id,
1627 internals->mode4.dedicated_queues.tx_qid,
1636 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1637 struct rte_eth_dev *slave_eth_dev)
1639 struct bond_rx_queue *bd_rx_q;
1640 struct bond_tx_queue *bd_tx_q;
1641 uint16_t nb_rx_queues;
1642 uint16_t nb_tx_queues;
1646 struct rte_flow_error flow_error;
1648 struct bond_dev_private *internals = (struct bond_dev_private *)
1649 bonded_eth_dev->data->dev_private;
1652 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1654 /* Enable interrupts on slave device if supported */
1655 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1656 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1658 /* If RSS is enabled for bonding, try to enable it for slaves */
1659 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1660 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1662 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1663 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1664 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1665 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1667 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1670 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1671 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1672 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1673 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1676 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1677 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1679 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1680 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1682 if (internals->mode == BONDING_MODE_8023AD) {
1683 if (internals->mode4.dedicated_queues.enabled == 1) {
1689 /* Configure device */
1690 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1691 nb_rx_queues, nb_tx_queues,
1692 &(slave_eth_dev->data->dev_conf));
1694 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1695 slave_eth_dev->data->port_id, errval);
1699 /* Setup Rx Queues */
1700 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1701 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1703 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1704 bd_rx_q->nb_rx_desc,
1705 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1706 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1709 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1710 slave_eth_dev->data->port_id, q_id, errval);
1715 /* Setup Tx Queues */
1716 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1717 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1719 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1720 bd_tx_q->nb_tx_desc,
1721 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1725 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1726 slave_eth_dev->data->port_id, q_id, errval);
1731 if (internals->mode == BONDING_MODE_8023AD &&
1732 internals->mode4.dedicated_queues.enabled == 1) {
1733 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1737 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1738 slave_eth_dev->data->port_id) != 0) {
1740 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1741 slave_eth_dev->data->port_id, q_id, errval);
1745 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1746 rte_flow_destroy(slave_eth_dev->data->port_id,
1747 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1750 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1751 slave_eth_dev->data->port_id);
1755 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1757 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1758 slave_eth_dev->data->port_id, errval);
1762 /* If RSS is enabled for bonding, synchronize RETA */
1763 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1765 struct bond_dev_private *internals;
1767 internals = bonded_eth_dev->data->dev_private;
1769 for (i = 0; i < internals->slave_count; i++) {
1770 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1771 errval = rte_eth_dev_rss_reta_update(
1772 slave_eth_dev->data->port_id,
1773 &internals->reta_conf[0],
1774 internals->slaves[i].reta_size);
1776 RTE_LOG(WARNING, PMD,
1777 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1778 " RSS Configuration for bonding may be inconsistent.\n",
1779 slave_eth_dev->data->port_id, errval);
1786 /* If lsc interrupt is set, check initial slave's link status */
1787 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1788 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1789 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1790 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1798 slave_remove(struct bond_dev_private *internals,
1799 struct rte_eth_dev *slave_eth_dev)
1803 for (i = 0; i < internals->slave_count; i++)
1804 if (internals->slaves[i].port_id ==
1805 slave_eth_dev->data->port_id)
1808 if (i < (internals->slave_count - 1))
1809 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1810 sizeof(internals->slaves[0]) *
1811 (internals->slave_count - i - 1));
1813 internals->slave_count--;
1815 /* force reconfiguration of slave interfaces */
1816 _rte_eth_dev_reset(slave_eth_dev);
1820 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1823 slave_add(struct bond_dev_private *internals,
1824 struct rte_eth_dev *slave_eth_dev)
1826 struct bond_slave_details *slave_details =
1827 &internals->slaves[internals->slave_count];
1829 slave_details->port_id = slave_eth_dev->data->port_id;
1830 slave_details->last_link_status = 0;
1832 /* Mark slave devices that don't support interrupts so we can
1833 * compensate when we start the bond
1835 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1836 slave_details->link_status_poll_enabled = 1;
1839 slave_details->link_status_wait_to_complete = 0;
1840 /* clean tlb_last_obytes when adding port for bonding device */
1841 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1842 sizeof(struct ether_addr));
1846 bond_ethdev_primary_set(struct bond_dev_private *internals,
1847 uint16_t slave_port_id)
1851 if (internals->active_slave_count < 1)
1852 internals->current_primary_port = slave_port_id;
1854 /* Search bonded device slave ports for new proposed primary port */
1855 for (i = 0; i < internals->active_slave_count; i++) {
1856 if (internals->active_slaves[i] == slave_port_id)
1857 internals->current_primary_port = slave_port_id;
1862 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1865 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1867 struct bond_dev_private *internals;
1870 /* slave eth dev will be started by bonded device */
1871 if (check_for_bonded_ethdev(eth_dev)) {
1872 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1873 eth_dev->data->port_id);
1877 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1878 eth_dev->data->dev_started = 1;
1880 internals = eth_dev->data->dev_private;
1882 if (internals->slave_count == 0) {
1883 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1887 if (internals->user_defined_mac == 0) {
1888 struct ether_addr *new_mac_addr = NULL;
1890 for (i = 0; i < internals->slave_count; i++)
1891 if (internals->slaves[i].port_id == internals->primary_port)
1892 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1894 if (new_mac_addr == NULL)
1897 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1898 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1899 eth_dev->data->port_id);
1904 /* Update all slave devices MACs*/
1905 if (mac_address_slaves_update(eth_dev) != 0)
1908 /* If bonded device is configure in promiscuous mode then re-apply config */
1909 if (internals->promiscuous_en)
1910 bond_ethdev_promiscuous_enable(eth_dev);
1912 if (internals->mode == BONDING_MODE_8023AD) {
1913 if (internals->mode4.dedicated_queues.enabled == 1) {
1914 internals->mode4.dedicated_queues.rx_qid =
1915 eth_dev->data->nb_rx_queues;
1916 internals->mode4.dedicated_queues.tx_qid =
1917 eth_dev->data->nb_tx_queues;
1922 /* Reconfigure each slave device if starting bonded device */
1923 for (i = 0; i < internals->slave_count; i++) {
1924 struct rte_eth_dev *slave_ethdev =
1925 &(rte_eth_devices[internals->slaves[i].port_id]);
1926 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1928 "bonded port (%d) failed to reconfigure slave device (%d)",
1929 eth_dev->data->port_id,
1930 internals->slaves[i].port_id);
1933 /* We will need to poll for link status if any slave doesn't
1934 * support interrupts
1936 if (internals->slaves[i].link_status_poll_enabled)
1937 internals->link_status_polling_enabled = 1;
1939 /* start polling if needed */
1940 if (internals->link_status_polling_enabled) {
1942 internals->link_status_polling_interval_ms * 1000,
1943 bond_ethdev_slave_link_status_change_monitor,
1944 (void *)&rte_eth_devices[internals->port_id]);
1947 if (internals->user_defined_primary_port)
1948 bond_ethdev_primary_set(internals, internals->primary_port);
1950 if (internals->mode == BONDING_MODE_8023AD)
1951 bond_mode_8023ad_start(eth_dev);
1953 if (internals->mode == BONDING_MODE_TLB ||
1954 internals->mode == BONDING_MODE_ALB)
1955 bond_tlb_enable(internals);
1961 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1965 if (dev->data->rx_queues != NULL) {
1966 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1967 rte_free(dev->data->rx_queues[i]);
1968 dev->data->rx_queues[i] = NULL;
1970 dev->data->nb_rx_queues = 0;
1973 if (dev->data->tx_queues != NULL) {
1974 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1975 rte_free(dev->data->tx_queues[i]);
1976 dev->data->tx_queues[i] = NULL;
1978 dev->data->nb_tx_queues = 0;
1983 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1985 struct bond_dev_private *internals = eth_dev->data->dev_private;
1988 if (internals->mode == BONDING_MODE_8023AD) {
1992 bond_mode_8023ad_stop(eth_dev);
1994 /* Discard all messages to/from mode 4 state machines */
1995 for (i = 0; i < internals->active_slave_count; i++) {
1996 port = &mode_8023ad_ports[internals->active_slaves[i]];
1998 RTE_ASSERT(port->rx_ring != NULL);
1999 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2000 rte_pktmbuf_free(pkt);
2002 RTE_ASSERT(port->tx_ring != NULL);
2003 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2004 rte_pktmbuf_free(pkt);
2008 if (internals->mode == BONDING_MODE_TLB ||
2009 internals->mode == BONDING_MODE_ALB) {
2010 bond_tlb_disable(internals);
2011 for (i = 0; i < internals->active_slave_count; i++)
2012 tlb_last_obytets[internals->active_slaves[i]] = 0;
2015 internals->active_slave_count = 0;
2016 internals->link_status_polling_enabled = 0;
2017 for (i = 0; i < internals->slave_count; i++)
2018 internals->slaves[i].last_link_status = 0;
2020 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2021 eth_dev->data->dev_started = 0;
2025 bond_ethdev_close(struct rte_eth_dev *dev)
2027 struct bond_dev_private *internals = dev->data->dev_private;
2028 uint8_t bond_port_id = internals->port_id;
2031 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2032 while (internals->slave_count != skipped) {
2033 uint16_t port_id = internals->slaves[skipped].port_id;
2035 rte_eth_dev_stop(port_id);
2037 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2039 "Failed to remove port %d from bonded device "
2040 "%s\n", port_id, dev->device->name);
2044 bond_ethdev_free_queues(dev);
2045 rte_bitmap_reset(internals->vlan_filter_bmp);
2048 /* forward declaration */
2049 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2052 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2054 struct bond_dev_private *internals = dev->data->dev_private;
2056 uint16_t max_nb_rx_queues = UINT16_MAX;
2057 uint16_t max_nb_tx_queues = UINT16_MAX;
2059 dev_info->max_mac_addrs = 1;
2061 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2062 internals->candidate_max_rx_pktlen :
2063 ETHER_MAX_JUMBO_FRAME_LEN;
2065 /* Max number of tx/rx queues that the bonded device can support is the
2066 * minimum values of the bonded slaves, as all slaves must be capable
2067 * of supporting the same number of tx/rx queues.
2069 if (internals->slave_count > 0) {
2070 struct rte_eth_dev_info slave_info;
2073 for (idx = 0; idx < internals->slave_count; idx++) {
2074 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2077 if (slave_info.max_rx_queues < max_nb_rx_queues)
2078 max_nb_rx_queues = slave_info.max_rx_queues;
2080 if (slave_info.max_tx_queues < max_nb_tx_queues)
2081 max_nb_tx_queues = slave_info.max_tx_queues;
2085 dev_info->max_rx_queues = max_nb_rx_queues;
2086 dev_info->max_tx_queues = max_nb_tx_queues;
2089 * If dedicated hw queues enabled for link bonding device in LACP mode
2090 * then we need to reduce the maximum number of data path queues by 1.
2092 if (internals->mode == BONDING_MODE_8023AD &&
2093 internals->mode4.dedicated_queues.enabled == 1) {
2094 dev_info->max_rx_queues--;
2095 dev_info->max_tx_queues--;
2098 dev_info->min_rx_bufsize = 0;
2100 dev_info->rx_offload_capa = internals->rx_offload_capa;
2101 dev_info->tx_offload_capa = internals->tx_offload_capa;
2102 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2104 dev_info->reta_size = internals->reta_size;
2108 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2112 struct bond_dev_private *internals = dev->data->dev_private;
2114 /* don't do this while a slave is being added */
2115 rte_spinlock_lock(&internals->lock);
2118 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2120 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2122 for (i = 0; i < internals->slave_count; i++) {
2123 uint16_t port_id = internals->slaves[i].port_id;
2125 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2127 RTE_LOG(WARNING, PMD,
2128 "Setting VLAN filter on slave port %u not supported.\n",
2132 rte_spinlock_unlock(&internals->lock);
2137 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2138 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2139 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2141 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2142 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2143 0, dev->data->numa_node);
2144 if (bd_rx_q == NULL)
2147 bd_rx_q->queue_id = rx_queue_id;
2148 bd_rx_q->dev_private = dev->data->dev_private;
2150 bd_rx_q->nb_rx_desc = nb_rx_desc;
2152 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2153 bd_rx_q->mb_pool = mb_pool;
2155 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2161 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2162 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2163 const struct rte_eth_txconf *tx_conf)
2165 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2166 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2167 0, dev->data->numa_node);
2169 if (bd_tx_q == NULL)
2172 bd_tx_q->queue_id = tx_queue_id;
2173 bd_tx_q->dev_private = dev->data->dev_private;
2175 bd_tx_q->nb_tx_desc = nb_tx_desc;
2176 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2178 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2184 bond_ethdev_rx_queue_release(void *queue)
2193 bond_ethdev_tx_queue_release(void *queue)
2202 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2204 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2205 struct bond_dev_private *internals;
2207 /* Default value for polling slave found is true as we don't want to
2208 * disable the polling thread if we cannot get the lock */
2209 int i, polling_slave_found = 1;
2214 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2215 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2217 if (!bonded_ethdev->data->dev_started ||
2218 !internals->link_status_polling_enabled)
2221 /* If device is currently being configured then don't check slaves link
2222 * status, wait until next period */
2223 if (rte_spinlock_trylock(&internals->lock)) {
2224 if (internals->slave_count > 0)
2225 polling_slave_found = 0;
2227 for (i = 0; i < internals->slave_count; i++) {
2228 if (!internals->slaves[i].link_status_poll_enabled)
2231 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2232 polling_slave_found = 1;
2234 /* Update slave link status */
2235 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2236 internals->slaves[i].link_status_wait_to_complete);
2238 /* if link status has changed since last checked then call lsc
2240 if (slave_ethdev->data->dev_link.link_status !=
2241 internals->slaves[i].last_link_status) {
2242 internals->slaves[i].last_link_status =
2243 slave_ethdev->data->dev_link.link_status;
2245 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2246 RTE_ETH_EVENT_INTR_LSC,
2247 &bonded_ethdev->data->port_id,
2251 rte_spinlock_unlock(&internals->lock);
2254 if (polling_slave_found)
2255 /* Set alarm to continue monitoring link status of slave ethdev's */
2256 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2257 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2261 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2263 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2265 struct bond_dev_private *bond_ctx;
2266 struct rte_eth_link slave_link;
2270 bond_ctx = ethdev->data->dev_private;
2272 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2274 if (ethdev->data->dev_started == 0 ||
2275 bond_ctx->active_slave_count == 0) {
2276 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2280 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2282 if (wait_to_complete)
2283 link_update = rte_eth_link_get;
2285 link_update = rte_eth_link_get_nowait;
2287 switch (bond_ctx->mode) {
2288 case BONDING_MODE_BROADCAST:
2290 * Setting link speed to UINT32_MAX to ensure we pick up the
2291 * value of the first active slave
2293 ethdev->data->dev_link.link_speed = UINT32_MAX;
2296 * link speed is minimum value of all the slaves link speed as
2297 * packet loss will occur on this slave if transmission at rates
2298 * greater than this are attempted
2300 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2301 link_update(bond_ctx->active_slaves[0], &slave_link);
2303 if (slave_link.link_speed <
2304 ethdev->data->dev_link.link_speed)
2305 ethdev->data->dev_link.link_speed =
2306 slave_link.link_speed;
2309 case BONDING_MODE_ACTIVE_BACKUP:
2310 /* Current primary slave */
2311 link_update(bond_ctx->current_primary_port, &slave_link);
2313 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2315 case BONDING_MODE_8023AD:
2316 ethdev->data->dev_link.link_autoneg =
2317 bond_ctx->mode4.slave_link.link_autoneg;
2318 ethdev->data->dev_link.link_duplex =
2319 bond_ctx->mode4.slave_link.link_duplex;
2320 /* fall through to update link speed */
2321 case BONDING_MODE_ROUND_ROBIN:
2322 case BONDING_MODE_BALANCE:
2323 case BONDING_MODE_TLB:
2324 case BONDING_MODE_ALB:
2327 * In theses mode the maximum theoretical link speed is the sum
2330 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2332 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2333 link_update(bond_ctx->active_slaves[idx], &slave_link);
2335 ethdev->data->dev_link.link_speed +=
2336 slave_link.link_speed;
2346 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2348 struct bond_dev_private *internals = dev->data->dev_private;
2349 struct rte_eth_stats slave_stats;
2352 for (i = 0; i < internals->slave_count; i++) {
2353 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2355 stats->ipackets += slave_stats.ipackets;
2356 stats->opackets += slave_stats.opackets;
2357 stats->ibytes += slave_stats.ibytes;
2358 stats->obytes += slave_stats.obytes;
2359 stats->imissed += slave_stats.imissed;
2360 stats->ierrors += slave_stats.ierrors;
2361 stats->oerrors += slave_stats.oerrors;
2362 stats->rx_nombuf += slave_stats.rx_nombuf;
2364 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2365 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2366 stats->q_opackets[j] += slave_stats.q_opackets[j];
2367 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2368 stats->q_obytes[j] += slave_stats.q_obytes[j];
2369 stats->q_errors[j] += slave_stats.q_errors[j];
2378 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2380 struct bond_dev_private *internals = dev->data->dev_private;
2383 for (i = 0; i < internals->slave_count; i++)
2384 rte_eth_stats_reset(internals->slaves[i].port_id);
2388 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2390 struct bond_dev_private *internals = eth_dev->data->dev_private;
2393 internals->promiscuous_en = 1;
2395 switch (internals->mode) {
2396 /* Promiscuous mode is propagated to all slaves */
2397 case BONDING_MODE_ROUND_ROBIN:
2398 case BONDING_MODE_BALANCE:
2399 case BONDING_MODE_BROADCAST:
2400 for (i = 0; i < internals->slave_count; i++)
2401 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2403 /* In mode4 promiscus mode is managed when slave is added/removed */
2404 case BONDING_MODE_8023AD:
2406 /* Promiscuous mode is propagated only to primary slave */
2407 case BONDING_MODE_ACTIVE_BACKUP:
2408 case BONDING_MODE_TLB:
2409 case BONDING_MODE_ALB:
2411 rte_eth_promiscuous_enable(internals->current_primary_port);
2416 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2418 struct bond_dev_private *internals = dev->data->dev_private;
2421 internals->promiscuous_en = 0;
2423 switch (internals->mode) {
2424 /* Promiscuous mode is propagated to all slaves */
2425 case BONDING_MODE_ROUND_ROBIN:
2426 case BONDING_MODE_BALANCE:
2427 case BONDING_MODE_BROADCAST:
2428 for (i = 0; i < internals->slave_count; i++)
2429 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2431 /* In mode4 promiscus mode is set managed when slave is added/removed */
2432 case BONDING_MODE_8023AD:
2434 /* Promiscuous mode is propagated only to primary slave */
2435 case BONDING_MODE_ACTIVE_BACKUP:
2436 case BONDING_MODE_TLB:
2437 case BONDING_MODE_ALB:
2439 rte_eth_promiscuous_disable(internals->current_primary_port);
2444 bond_ethdev_delayed_lsc_propagation(void *arg)
2449 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2450 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2454 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2455 void *param, void *ret_param __rte_unused)
2457 struct rte_eth_dev *bonded_eth_dev;
2458 struct bond_dev_private *internals;
2459 struct rte_eth_link link;
2462 int i, valid_slave = 0;
2464 uint8_t lsc_flag = 0;
2466 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2469 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2471 if (check_for_bonded_ethdev(bonded_eth_dev))
2474 internals = bonded_eth_dev->data->dev_private;
2476 /* If the device isn't started don't handle interrupts */
2477 if (!bonded_eth_dev->data->dev_started)
2480 /* verify that port_id is a valid slave of bonded port */
2481 for (i = 0; i < internals->slave_count; i++) {
2482 if (internals->slaves[i].port_id == port_id) {
2491 /* Search for port in active port list */
2492 active_pos = find_slave_by_id(internals->active_slaves,
2493 internals->active_slave_count, port_id);
2495 rte_eth_link_get_nowait(port_id, &link);
2496 if (link.link_status) {
2497 if (active_pos < internals->active_slave_count)
2500 /* if no active slave ports then set this port to be primary port */
2501 if (internals->active_slave_count < 1) {
2502 /* If first active slave, then change link status */
2503 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2504 internals->current_primary_port = port_id;
2507 mac_address_slaves_update(bonded_eth_dev);
2510 activate_slave(bonded_eth_dev, port_id);
2512 /* If user has defined the primary port then default to using it */
2513 if (internals->user_defined_primary_port &&
2514 internals->primary_port == port_id)
2515 bond_ethdev_primary_set(internals, port_id);
2517 if (active_pos == internals->active_slave_count)
2520 /* Remove from active slave list */
2521 deactivate_slave(bonded_eth_dev, port_id);
2523 if (internals->active_slave_count < 1)
2526 /* Update primary id, take first active slave from list or if none
2527 * available set to -1 */
2528 if (port_id == internals->current_primary_port) {
2529 if (internals->active_slave_count > 0)
2530 bond_ethdev_primary_set(internals,
2531 internals->active_slaves[0]);
2533 internals->current_primary_port = internals->primary_port;
2538 * Update bonded device link properties after any change to active
2541 bond_ethdev_link_update(bonded_eth_dev, 0);
2544 /* Cancel any possible outstanding interrupts if delays are enabled */
2545 if (internals->link_up_delay_ms > 0 ||
2546 internals->link_down_delay_ms > 0)
2547 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2550 if (bonded_eth_dev->data->dev_link.link_status) {
2551 if (internals->link_up_delay_ms > 0)
2552 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2553 bond_ethdev_delayed_lsc_propagation,
2554 (void *)bonded_eth_dev);
2556 _rte_eth_dev_callback_process(bonded_eth_dev,
2557 RTE_ETH_EVENT_INTR_LSC,
2561 if (internals->link_down_delay_ms > 0)
2562 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2563 bond_ethdev_delayed_lsc_propagation,
2564 (void *)bonded_eth_dev);
2566 _rte_eth_dev_callback_process(bonded_eth_dev,
2567 RTE_ETH_EVENT_INTR_LSC,
2575 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2576 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2580 int slave_reta_size;
2581 unsigned reta_count;
2582 struct bond_dev_private *internals = dev->data->dev_private;
2584 if (reta_size != internals->reta_size)
2587 /* Copy RETA table */
2588 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2590 for (i = 0; i < reta_count; i++) {
2591 internals->reta_conf[i].mask = reta_conf[i].mask;
2592 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2593 if ((reta_conf[i].mask >> j) & 0x01)
2594 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2597 /* Fill rest of array */
2598 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2599 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2600 sizeof(internals->reta_conf[0]) * reta_count);
2602 /* Propagate RETA over slaves */
2603 for (i = 0; i < internals->slave_count; i++) {
2604 slave_reta_size = internals->slaves[i].reta_size;
2605 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2606 &internals->reta_conf[0], slave_reta_size);
2615 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2616 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2619 struct bond_dev_private *internals = dev->data->dev_private;
2621 if (reta_size != internals->reta_size)
2624 /* Copy RETA table */
2625 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2626 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2627 if ((reta_conf[i].mask >> j) & 0x01)
2628 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2634 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2635 struct rte_eth_rss_conf *rss_conf)
2638 struct bond_dev_private *internals = dev->data->dev_private;
2639 struct rte_eth_rss_conf bond_rss_conf;
2641 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2643 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2645 if (bond_rss_conf.rss_hf != 0)
2646 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2648 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2649 sizeof(internals->rss_key)) {
2650 if (bond_rss_conf.rss_key_len == 0)
2651 bond_rss_conf.rss_key_len = 40;
2652 internals->rss_key_len = bond_rss_conf.rss_key_len;
2653 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2654 internals->rss_key_len);
2657 for (i = 0; i < internals->slave_count; i++) {
2658 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2668 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2669 struct rte_eth_rss_conf *rss_conf)
2671 struct bond_dev_private *internals = dev->data->dev_private;
2673 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2674 rss_conf->rss_key_len = internals->rss_key_len;
2675 if (rss_conf->rss_key)
2676 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2681 const struct eth_dev_ops default_dev_ops = {
2682 .dev_start = bond_ethdev_start,
2683 .dev_stop = bond_ethdev_stop,
2684 .dev_close = bond_ethdev_close,
2685 .dev_configure = bond_ethdev_configure,
2686 .dev_infos_get = bond_ethdev_info,
2687 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2688 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2689 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2690 .rx_queue_release = bond_ethdev_rx_queue_release,
2691 .tx_queue_release = bond_ethdev_tx_queue_release,
2692 .link_update = bond_ethdev_link_update,
2693 .stats_get = bond_ethdev_stats_get,
2694 .stats_reset = bond_ethdev_stats_reset,
2695 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2696 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2697 .reta_update = bond_ethdev_rss_reta_update,
2698 .reta_query = bond_ethdev_rss_reta_query,
2699 .rss_hash_update = bond_ethdev_rss_hash_update,
2700 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2704 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2706 const char *name = rte_vdev_device_name(dev);
2707 uint8_t socket_id = dev->device.numa_node;
2708 struct bond_dev_private *internals = NULL;
2709 struct rte_eth_dev *eth_dev = NULL;
2710 uint32_t vlan_filter_bmp_size;
2712 /* now do all data allocation - for eth_dev structure, dummy pci driver
2713 * and internal (private) data
2716 /* reserve an ethdev entry */
2717 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2718 if (eth_dev == NULL) {
2719 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2723 internals = eth_dev->data->dev_private;
2724 eth_dev->data->nb_rx_queues = (uint16_t)1;
2725 eth_dev->data->nb_tx_queues = (uint16_t)1;
2727 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2729 if (eth_dev->data->mac_addrs == NULL) {
2730 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2734 eth_dev->dev_ops = &default_dev_ops;
2735 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2737 rte_spinlock_init(&internals->lock);
2739 internals->port_id = eth_dev->data->port_id;
2740 internals->mode = BONDING_MODE_INVALID;
2741 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2742 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2743 internals->xmit_hash = xmit_l2_hash;
2744 internals->user_defined_mac = 0;
2746 internals->link_status_polling_enabled = 0;
2748 internals->link_status_polling_interval_ms =
2749 DEFAULT_POLLING_INTERVAL_10_MS;
2750 internals->link_down_delay_ms = 0;
2751 internals->link_up_delay_ms = 0;
2753 internals->slave_count = 0;
2754 internals->active_slave_count = 0;
2755 internals->rx_offload_capa = 0;
2756 internals->tx_offload_capa = 0;
2757 internals->candidate_max_rx_pktlen = 0;
2758 internals->max_rx_pktlen = 0;
2760 /* Initially allow to choose any offload type */
2761 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2763 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2764 memset(internals->slaves, 0, sizeof(internals->slaves));
2766 /* Set mode 4 default configuration */
2767 bond_mode_8023ad_setup(eth_dev, NULL);
2768 if (bond_ethdev_mode_set(eth_dev, mode)) {
2769 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2770 eth_dev->data->port_id, mode);
2774 vlan_filter_bmp_size =
2775 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2776 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2777 RTE_CACHE_LINE_SIZE);
2778 if (internals->vlan_filter_bmpmem == NULL) {
2780 "Failed to allocate vlan bitmap for bonded device %u\n",
2781 eth_dev->data->port_id);
2785 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2786 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2787 if (internals->vlan_filter_bmp == NULL) {
2789 "Failed to init vlan bitmap for bonded device %u\n",
2790 eth_dev->data->port_id);
2791 rte_free(internals->vlan_filter_bmpmem);
2795 return eth_dev->data->port_id;
2798 rte_free(internals);
2799 if (eth_dev != NULL) {
2800 rte_free(eth_dev->data->mac_addrs);
2801 rte_eth_dev_release_port(eth_dev);
2807 bond_probe(struct rte_vdev_device *dev)
2810 struct bond_dev_private *internals;
2811 struct rte_kvargs *kvlist;
2812 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2813 int arg_count, port_id;
2819 name = rte_vdev_device_name(dev);
2820 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2822 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2823 pmd_bond_init_valid_arguments);
2827 /* Parse link bonding mode */
2828 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2829 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2830 &bond_ethdev_parse_slave_mode_kvarg,
2831 &bonding_mode) != 0) {
2832 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2837 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2838 "device %s\n", name);
2842 /* Parse socket id to create bonding device on */
2843 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2844 if (arg_count == 1) {
2845 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2846 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2848 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2849 "bonded device %s\n", name);
2852 } else if (arg_count > 1) {
2853 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2854 "bonded device %s\n", name);
2857 socket_id = rte_socket_id();
2860 dev->device.numa_node = socket_id;
2862 /* Create link bonding eth device */
2863 port_id = bond_alloc(dev, bonding_mode);
2865 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2866 "socket %u.\n", name, bonding_mode, socket_id);
2869 internals = rte_eth_devices[port_id].data->dev_private;
2870 internals->kvlist = kvlist;
2873 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2874 if (rte_kvargs_process(kvlist,
2875 PMD_BOND_AGG_MODE_KVARG,
2876 &bond_ethdev_parse_slave_agg_mode_kvarg,
2879 "Failed to parse agg selection mode for bonded device %s\n",
2884 if (internals->mode == BONDING_MODE_8023AD)
2885 rte_eth_bond_8023ad_agg_selection_set(port_id,
2888 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2891 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2892 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2896 rte_kvargs_free(kvlist);
2902 bond_remove(struct rte_vdev_device *dev)
2904 struct rte_eth_dev *eth_dev;
2905 struct bond_dev_private *internals;
2911 name = rte_vdev_device_name(dev);
2912 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2914 /* now free all data allocation - for eth_dev structure,
2915 * dummy pci driver and internal (private) data
2918 /* find an ethdev entry */
2919 eth_dev = rte_eth_dev_allocated(name);
2920 if (eth_dev == NULL)
2923 RTE_ASSERT(eth_dev->device == &dev->device);
2925 internals = eth_dev->data->dev_private;
2926 if (internals->slave_count != 0)
2929 if (eth_dev->data->dev_started == 1) {
2930 bond_ethdev_stop(eth_dev);
2931 bond_ethdev_close(eth_dev);
2934 eth_dev->dev_ops = NULL;
2935 eth_dev->rx_pkt_burst = NULL;
2936 eth_dev->tx_pkt_burst = NULL;
2938 internals = eth_dev->data->dev_private;
2939 rte_bitmap_free(internals->vlan_filter_bmp);
2940 rte_free(internals->vlan_filter_bmpmem);
2941 rte_free(eth_dev->data->dev_private);
2942 rte_free(eth_dev->data->mac_addrs);
2944 rte_eth_dev_release_port(eth_dev);
2949 /* this part will resolve the slave portids after all the other pdev and vdev
2950 * have been allocated */
2952 bond_ethdev_configure(struct rte_eth_dev *dev)
2954 const char *name = dev->device->name;
2955 struct bond_dev_private *internals = dev->data->dev_private;
2956 struct rte_kvargs *kvlist = internals->kvlist;
2958 uint16_t port_id = dev - rte_eth_devices;
2961 static const uint8_t default_rss_key[40] = {
2962 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2963 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2964 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2965 0xBE, 0xAC, 0x01, 0xFA
2970 /* If RSS is enabled, fill table and key with default values */
2971 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2972 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2973 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2974 memcpy(internals->rss_key, default_rss_key, 40);
2976 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2977 internals->reta_conf[i].mask = ~0LL;
2978 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2979 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2983 /* set the max_rx_pktlen */
2984 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2987 * if no kvlist, it means that this bonded device has been created
2988 * through the bonding api.
2993 /* Parse MAC address for bonded device */
2994 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2995 if (arg_count == 1) {
2996 struct ether_addr bond_mac;
2998 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2999 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3000 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3005 /* Set MAC address */
3006 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3008 "Failed to set mac address on bonded device %s\n",
3012 } else if (arg_count > 1) {
3014 "MAC address can be specified only once for bonded device %s\n",
3019 /* Parse/set balance mode transmit policy */
3020 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3021 if (arg_count == 1) {
3022 uint8_t xmit_policy;
3024 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3025 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3028 "Invalid xmit policy specified for bonded device %s\n",
3033 /* Set balance mode transmit policy*/
3034 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3036 "Failed to set balance xmit policy on bonded device %s\n",
3040 } else if (arg_count > 1) {
3042 "Transmit policy can be specified only once for bonded device"
3047 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3048 if (rte_kvargs_process(kvlist,
3049 PMD_BOND_AGG_MODE_KVARG,
3050 &bond_ethdev_parse_slave_agg_mode_kvarg,
3053 "Failed to parse agg selection mode for bonded device %s\n",
3056 if (internals->mode == BONDING_MODE_8023AD)
3057 rte_eth_bond_8023ad_agg_selection_set(port_id,
3061 /* Parse/add slave ports to bonded device */
3062 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3063 struct bond_ethdev_slave_ports slave_ports;
3066 memset(&slave_ports, 0, sizeof(slave_ports));
3068 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3069 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3071 "Failed to parse slave ports for bonded device %s\n",
3076 for (i = 0; i < slave_ports.slave_count; i++) {
3077 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3079 "Failed to add port %d as slave to bonded device %s\n",
3080 slave_ports.slaves[i], name);
3085 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3089 /* Parse/set primary slave port id*/
3090 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3091 if (arg_count == 1) {
3092 uint16_t primary_slave_port_id;
3094 if (rte_kvargs_process(kvlist,
3095 PMD_BOND_PRIMARY_SLAVE_KVARG,
3096 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3097 &primary_slave_port_id) < 0) {
3099 "Invalid primary slave port id specified for bonded device"
3104 /* Set balance mode transmit policy*/
3105 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3108 "Failed to set primary slave port %d on bonded device %s\n",
3109 primary_slave_port_id, name);
3112 } else if (arg_count > 1) {
3114 "Primary slave can be specified only once for bonded device"
3119 /* Parse link status monitor polling interval */
3120 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3121 if (arg_count == 1) {
3122 uint32_t lsc_poll_interval_ms;
3124 if (rte_kvargs_process(kvlist,
3125 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3126 &bond_ethdev_parse_time_ms_kvarg,
3127 &lsc_poll_interval_ms) < 0) {
3129 "Invalid lsc polling interval value specified for bonded"
3130 " device %s\n", name);
3134 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3137 "Failed to set lsc monitor polling interval (%u ms) on"
3138 " bonded device %s\n", lsc_poll_interval_ms, name);
3141 } else if (arg_count > 1) {
3143 "LSC polling interval can be specified only once for bonded"
3144 " device %s\n", name);
3148 /* Parse link up interrupt propagation delay */
3149 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3150 if (arg_count == 1) {
3151 uint32_t link_up_delay_ms;
3153 if (rte_kvargs_process(kvlist,
3154 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3155 &bond_ethdev_parse_time_ms_kvarg,
3156 &link_up_delay_ms) < 0) {
3158 "Invalid link up propagation delay value specified for"
3159 " bonded device %s\n", name);
3163 /* Set balance mode transmit policy*/
3164 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3167 "Failed to set link up propagation delay (%u ms) on bonded"
3168 " device %s\n", link_up_delay_ms, name);
3171 } else if (arg_count > 1) {
3173 "Link up propagation delay can be specified only once for"
3174 " bonded device %s\n", name);
3178 /* Parse link down interrupt propagation delay */
3179 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3180 if (arg_count == 1) {
3181 uint32_t link_down_delay_ms;
3183 if (rte_kvargs_process(kvlist,
3184 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3185 &bond_ethdev_parse_time_ms_kvarg,
3186 &link_down_delay_ms) < 0) {
3188 "Invalid link down propagation delay value specified for"
3189 " bonded device %s\n", name);
3193 /* Set balance mode transmit policy*/
3194 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3197 "Failed to set link down propagation delay (%u ms) on"
3198 " bonded device %s\n", link_down_delay_ms, name);
3201 } else if (arg_count > 1) {
3203 "Link down propagation delay can be specified only once for"
3204 " bonded device %s\n", name);
3211 struct rte_vdev_driver pmd_bond_drv = {
3212 .probe = bond_probe,
3213 .remove = bond_remove,
3216 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3217 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3219 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3223 "xmit_policy=[l2 | l23 | l34] "
3224 "agg_mode=[count | stable | bandwidth] "
3227 "lsc_poll_period_ms=<int> "
3229 "down_delay=<int>");