1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 size_t vlan_offset = 0;
38 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41 vlan_offset = sizeof(struct vlan_hdr);
42 *proto = vlan_hdr->eth_proto;
44 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45 vlan_hdr = vlan_hdr + 1;
46 *proto = vlan_hdr->eth_proto;
47 vlan_offset += sizeof(struct vlan_hdr);
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 struct bond_dev_private *internals;
58 uint16_t num_rx_slave = 0;
59 uint16_t num_rx_total = 0;
63 /* Cast to structure, containing bonded device's port id and queue id */
64 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66 internals = bd_rx_q->dev_private;
69 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70 /* Offset of pointer to *bufs increases as packets are received
71 * from other slaves */
72 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75 num_rx_total += num_rx_slave;
76 nb_pkts -= num_rx_slave;
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87 struct bond_dev_private *internals;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
94 return rte_eth_rx_burst(internals->current_primary_port,
95 bd_rx_q->queue_id, bufs, nb_pkts);
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104 (ethertype == ether_type_slow_be &&
105 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 /*****************************************************************************
109 * Flow director's setup for mode 4 optimization
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113 .dst.addr_bytes = { 0 },
114 .src.addr_bytes = { 0 },
115 .type = RTE_BE16(ETHER_TYPE_SLOW),
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119 .dst.addr_bytes = { 0 },
120 .src.addr_bytes = { 0 },
124 static struct rte_flow_item flow_item_8023ad[] = {
126 .type = RTE_FLOW_ITEM_TYPE_ETH,
127 .spec = &flow_item_eth_type_8023ad,
129 .mask = &flow_item_eth_mask_type_8023ad,
132 .type = RTE_FLOW_ITEM_TYPE_END,
139 const struct rte_flow_attr flow_attr_8023ad = {
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149 uint16_t slave_port) {
150 struct rte_eth_dev_info slave_info;
151 struct rte_flow_error error;
152 struct bond_dev_private *internals = (struct bond_dev_private *)
153 (bond_dev->data->dev_private);
155 const struct rte_flow_action_queue lacp_queue_conf = {
159 const struct rte_flow_action actions[] = {
161 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162 .conf = &lacp_queue_conf
165 .type = RTE_FLOW_ACTION_TYPE_END,
169 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170 flow_item_8023ad, actions, &error);
172 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173 __func__, error.message, slave_port,
174 internals->mode4.dedicated_queues.rx_qid);
178 rte_eth_dev_info_get(slave_port, &slave_info);
179 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183 __func__, slave_port);
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193 struct bond_dev_private *internals = (struct bond_dev_private *)
194 (bond_dev->data->dev_private);
195 struct rte_eth_dev_info bond_info;
198 /* Verify if all slaves in bonding supports flow director and */
199 if (internals->slave_count > 0) {
200 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205 for (idx = 0; idx < internals->slave_count; idx++) {
206 if (bond_ethdev_8023ad_flow_verify(bond_dev,
207 internals->slaves[idx].port_id) != 0)
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218 struct rte_flow_error error;
219 struct bond_dev_private *internals = (struct bond_dev_private *)
220 (bond_dev->data->dev_private);
222 struct rte_flow_action_queue lacp_queue_conf = {
223 .index = internals->mode4.dedicated_queues.rx_qid,
226 const struct rte_flow_action actions[] = {
228 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229 .conf = &lacp_queue_conf
232 .type = RTE_FLOW_ACTION_TYPE_END,
236 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240 "(slave_port=%d queue_id=%d)",
241 error.message, slave_port,
242 internals->mode4.dedicated_queues.rx_qid);
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254 struct bond_dev_private *internals = bd_rx_q->dev_private;
255 uint16_t num_rx_total = 0; /* Total number of received packets */
256 uint16_t slaves[RTE_MAX_ETHPORTS];
257 uint16_t slave_count;
261 /* Copy slave list to protect against slave up/down changes during tx
263 slave_count = internals->active_slave_count;
264 memcpy(slaves, internals->active_slaves,
265 sizeof(internals->active_slaves[0]) * slave_count);
267 for (i = 0, idx = internals->active_slave;
268 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269 idx = idx % slave_count;
271 /* Read packets from this slave */
272 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273 &bufs[num_rx_total], nb_pkts - num_rx_total);
276 internals->active_slave = idx;
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
286 struct bond_dev_private *internals = bd_tx_q->dev_private;
288 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
289 uint16_t slave_count;
291 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
292 uint16_t dist_slave_count;
294 /* 2-D array to sort mbufs for transmission on each slave into */
295 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
296 /* Number of mbufs for transmission on each slave */
297 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
298 /* Mapping array generated by hash function to map mbufs to slaves */
299 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
302 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
306 if (unlikely(nb_bufs == 0))
309 /* Copy slave list to protect against slave up/down changes during tx
311 slave_count = internals->active_slave_count;
312 if (unlikely(slave_count < 1))
315 memcpy(slave_port_ids, internals->active_slaves,
316 sizeof(slave_port_ids[0]) * slave_count);
319 dist_slave_count = 0;
320 for (i = 0; i < slave_count; i++) {
321 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323 if (ACTOR_STATE(port, DISTRIBUTING))
324 dist_slave_port_ids[dist_slave_count++] =
328 if (unlikely(dist_slave_count < 1))
332 * Populate slaves mbuf with the packets which are to be sent on it
333 * selecting output slave using hash based on xmit policy
335 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
336 bufs_slave_port_idxs);
338 for (i = 0; i < nb_bufs; i++) {
339 /* Populate slave mbuf arrays with mbufs for that slave. */
340 uint8_t slave_idx = bufs_slave_port_idxs[i];
342 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
346 /* Send packet burst on each slave device */
347 for (i = 0; i < dist_slave_count; i++) {
348 if (slave_nb_bufs[i] == 0)
351 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
352 bd_tx_q->queue_id, slave_bufs[i],
355 total_tx_count += slave_tx_count;
357 /* If tx burst fails move packets to end of bufs */
358 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
359 slave_tx_fail_count[i] = slave_nb_bufs[i] -
361 total_tx_fail_count += slave_tx_fail_count[i];
364 * Shift bufs to beginning of array to allow reordering
367 for (j = 0; j < slave_tx_fail_count[i]; j++) {
369 slave_bufs[i][(slave_tx_count - 1) + j];
375 * If there are tx burst failures we move packets to end of bufs to
376 * preserve expected PMD behaviour of all failed transmitted being
377 * at the end of the input mbuf array
379 if (unlikely(total_tx_fail_count > 0)) {
380 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382 for (i = 0; i < slave_count; i++) {
383 if (slave_tx_fail_count[i] > 0) {
384 for (j = 0; j < slave_tx_fail_count[i]; j++)
385 bufs[bufs_idx++] = slave_bufs[i][j];
390 return total_tx_count;
395 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398 /* Cast to structure, containing bonded device's port id and queue id */
399 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
400 struct bond_dev_private *internals = bd_rx_q->dev_private;
401 struct ether_addr bond_mac;
403 struct ether_hdr *hdr;
405 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
406 uint16_t num_rx_total = 0; /* Total number of received packets */
407 uint16_t slaves[RTE_MAX_ETHPORTS];
408 uint16_t slave_count, idx;
410 uint8_t collecting; /* current slave collecting status */
411 const uint8_t promisc = internals->promiscuous_en;
415 rte_eth_macaddr_get(internals->port_id, &bond_mac);
416 /* Copy slave list to protect against slave up/down changes during tx
418 slave_count = internals->active_slave_count;
419 memcpy(slaves, internals->active_slaves,
420 sizeof(internals->active_slaves[0]) * slave_count);
422 idx = internals->active_slave;
423 if (idx >= slave_count) {
424 internals->active_slave = 0;
427 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432 /* Read packets from this slave */
433 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
434 &bufs[num_rx_total], nb_pkts - num_rx_total);
436 for (k = j; k < 2 && k < num_rx_total; k++)
437 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439 /* Handle slow protocol packets. */
440 while (j < num_rx_total) {
442 /* If packet is not pure L2 and is known, skip it */
443 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
448 if (j + 3 < num_rx_total)
449 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
452 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454 /* Remove packet from array if it is slow packet or slave is not
455 * in collecting state or bonding interface is not in promiscuous
456 * mode and packet address does not match. */
457 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
458 !collecting || (!promisc &&
459 !is_multicast_ether_addr(&hdr->d_addr) &&
460 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462 if (hdr->ether_type == ether_type_slow_be) {
463 bond_mode_8023ad_handle_slow_pkt(
464 internals, slaves[idx], bufs[j]);
466 rte_pktmbuf_free(bufs[j]);
468 /* Packet is managed by mode 4 or dropped, shift the array */
470 if (j < num_rx_total) {
471 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
477 if (unlikely(++idx == slave_count))
481 internals->active_slave = idx;
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492 arp_op_name(uint16_t arp_op, char *buf)
496 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501 case ARP_OP_REVREQUEST:
502 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
503 "Reverse ARP Request");
505 case ARP_OP_REVREPLY:
506 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
507 "Reverse ARP Reply");
509 case ARP_OP_INVREQUEST:
510 snprintf(buf, sizeof("Peer Identify Request"), "%s",
511 "Peer Identify Request");
513 case ARP_OP_INVREPLY:
514 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
515 "Peer Identify Reply");
520 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
524 #define MaxIPv4String 16
526 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
530 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
531 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
532 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
536 #define MAX_CLIENTS_NUMBER 128
537 uint8_t active_clients;
538 struct client_stats_t {
541 uint32_t ipv4_rx_packets;
542 uint32_t ipv4_tx_packets;
544 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
551 for (; i < MAX_CLIENTS_NUMBER; i++) {
552 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
553 /* Just update RX packets number for this client */
554 if (TXorRXindicator == &burstnumberRX)
555 client_stats[i].ipv4_rx_packets++;
557 client_stats[i].ipv4_tx_packets++;
561 /* We have a new client. Insert him to the table, and increment stats */
562 if (TXorRXindicator == &burstnumberRX)
563 client_stats[active_clients].ipv4_rx_packets++;
565 client_stats[active_clients].ipv4_tx_packets++;
566 client_stats[active_clients].ipv4_addr = addr;
567 client_stats[active_clients].port = port;
572 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
573 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
574 RTE_LOG(DEBUG, PMD, \
577 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
579 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
585 eth_h->s_addr.addr_bytes[0], \
586 eth_h->s_addr.addr_bytes[1], \
587 eth_h->s_addr.addr_bytes[2], \
588 eth_h->s_addr.addr_bytes[3], \
589 eth_h->s_addr.addr_bytes[4], \
590 eth_h->s_addr.addr_bytes[5], \
592 eth_h->d_addr.addr_bytes[0], \
593 eth_h->d_addr.addr_bytes[1], \
594 eth_h->d_addr.addr_bytes[2], \
595 eth_h->d_addr.addr_bytes[3], \
596 eth_h->d_addr.addr_bytes[4], \
597 eth_h->d_addr.addr_bytes[5], \
604 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
605 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
607 struct ipv4_hdr *ipv4_h;
608 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
609 struct arp_hdr *arp_h;
616 uint16_t ether_type = eth_h->ether_type;
617 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
619 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
620 snprintf(buf, 16, "%s", info);
623 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
624 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
625 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
626 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
627 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
628 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
630 update_client_stats(ipv4_h->src_addr, port, burstnumber);
632 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
633 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
634 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
635 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
636 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
637 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
638 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
645 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
647 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
648 struct bond_dev_private *internals = bd_tx_q->dev_private;
649 struct ether_hdr *eth_h;
650 uint16_t ether_type, offset;
651 uint16_t nb_recv_pkts;
654 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
656 for (i = 0; i < nb_recv_pkts; i++) {
657 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
658 ether_type = eth_h->ether_type;
659 offset = get_vlan_offset(eth_h, ðer_type);
661 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
662 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
663 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
665 bond_mode_alb_arp_recv(eth_h, offset, internals);
667 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
668 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
669 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
677 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
680 struct bond_dev_private *internals;
681 struct bond_tx_queue *bd_tx_q;
683 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
684 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
686 uint16_t num_of_slaves;
687 uint16_t slaves[RTE_MAX_ETHPORTS];
689 uint16_t num_tx_total = 0, num_tx_slave;
691 static int slave_idx = 0;
692 int i, cslave_idx = 0, tx_fail_total = 0;
694 bd_tx_q = (struct bond_tx_queue *)queue;
695 internals = bd_tx_q->dev_private;
697 /* Copy slave list to protect against slave up/down changes during tx
699 num_of_slaves = internals->active_slave_count;
700 memcpy(slaves, internals->active_slaves,
701 sizeof(internals->active_slaves[0]) * num_of_slaves);
703 if (num_of_slaves < 1)
706 /* Populate slaves mbuf with which packets are to be sent on it */
707 for (i = 0; i < nb_pkts; i++) {
708 cslave_idx = (slave_idx + i) % num_of_slaves;
709 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
712 /* increment current slave index so the next call to tx burst starts on the
714 slave_idx = ++cslave_idx;
716 /* Send packet burst on each slave device */
717 for (i = 0; i < num_of_slaves; i++) {
718 if (slave_nb_pkts[i] > 0) {
719 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
720 slave_bufs[i], slave_nb_pkts[i]);
722 /* if tx burst fails move packets to end of bufs */
723 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
724 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
726 tx_fail_total += tx_fail_slave;
728 memcpy(&bufs[nb_pkts - tx_fail_total],
729 &slave_bufs[i][num_tx_slave],
730 tx_fail_slave * sizeof(bufs[0]));
732 num_tx_total += num_tx_slave;
740 bond_ethdev_tx_burst_active_backup(void *queue,
741 struct rte_mbuf **bufs, uint16_t nb_pkts)
743 struct bond_dev_private *internals;
744 struct bond_tx_queue *bd_tx_q;
746 bd_tx_q = (struct bond_tx_queue *)queue;
747 internals = bd_tx_q->dev_private;
749 if (internals->active_slave_count < 1)
752 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
756 static inline uint16_t
757 ether_hash(struct ether_hdr *eth_hdr)
759 unaligned_uint16_t *word_src_addr =
760 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
761 unaligned_uint16_t *word_dst_addr =
762 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
764 return (word_src_addr[0] ^ word_dst_addr[0]) ^
765 (word_src_addr[1] ^ word_dst_addr[1]) ^
766 (word_src_addr[2] ^ word_dst_addr[2]);
769 static inline uint32_t
770 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
772 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
775 static inline uint32_t
776 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
778 unaligned_uint32_t *word_src_addr =
779 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
780 unaligned_uint32_t *word_dst_addr =
781 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
783 return (word_src_addr[0] ^ word_dst_addr[0]) ^
784 (word_src_addr[1] ^ word_dst_addr[1]) ^
785 (word_src_addr[2] ^ word_dst_addr[2]) ^
786 (word_src_addr[3] ^ word_dst_addr[3]);
791 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792 uint8_t slave_count, uint16_t *slaves)
794 struct ether_hdr *eth_hdr;
798 for (i = 0; i < nb_pkts; i++) {
799 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
801 hash = ether_hash(eth_hdr);
803 slaves[i++] = (hash ^= hash >> 8) % slave_count;
808 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
809 uint8_t slave_count, uint16_t *slaves)
812 struct ether_hdr *eth_hdr;
815 uint32_t hash, l3hash;
817 for (i = 0; i < nb_pkts; i++) {
818 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
821 proto = eth_hdr->ether_type;
822 hash = ether_hash(eth_hdr);
824 vlan_offset = get_vlan_offset(eth_hdr, &proto);
826 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
827 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
828 ((char *)(eth_hdr + 1) + vlan_offset);
829 l3hash = ipv4_hash(ipv4_hdr);
831 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
832 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
833 ((char *)(eth_hdr + 1) + vlan_offset);
834 l3hash = ipv6_hash(ipv6_hdr);
837 hash = hash ^ l3hash;
841 slaves[i++] = hash % slave_count;
846 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
847 uint8_t slave_count, uint16_t *slaves)
849 struct ether_hdr *eth_hdr;
854 struct udp_hdr *udp_hdr;
855 struct tcp_hdr *tcp_hdr;
856 uint32_t hash, l3hash, l4hash;
858 for (i = 0; i < nb_pkts; i++) {
859 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
860 proto = eth_hdr->ether_type;
861 vlan_offset = get_vlan_offset(eth_hdr, &proto);
865 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
866 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
867 ((char *)(eth_hdr + 1) + vlan_offset);
868 size_t ip_hdr_offset;
870 l3hash = ipv4_hash(ipv4_hdr);
872 /* there is no L4 header in fragmented packet */
873 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
875 ip_hdr_offset = (ipv4_hdr->version_ihl
876 & IPV4_HDR_IHL_MASK) *
879 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
880 tcp_hdr = (struct tcp_hdr *)
883 l4hash = HASH_L4_PORTS(tcp_hdr);
884 } else if (ipv4_hdr->next_proto_id ==
886 udp_hdr = (struct udp_hdr *)
889 l4hash = HASH_L4_PORTS(udp_hdr);
892 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
893 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
894 ((char *)(eth_hdr + 1) + vlan_offset);
895 l3hash = ipv6_hash(ipv6_hdr);
897 if (ipv6_hdr->proto == IPPROTO_TCP) {
898 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
899 l4hash = HASH_L4_PORTS(tcp_hdr);
900 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
901 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
902 l4hash = HASH_L4_PORTS(udp_hdr);
906 hash = l3hash ^ l4hash;
910 slaves[i++] = hash % slave_count;
915 uint64_t bwg_left_int;
916 uint64_t bwg_left_remainder;
921 bond_tlb_activate_slave(struct bond_dev_private *internals) {
924 for (i = 0; i < internals->active_slave_count; i++) {
925 tlb_last_obytets[internals->active_slaves[i]] = 0;
930 bandwidth_cmp(const void *a, const void *b)
932 const struct bwg_slave *bwg_a = a;
933 const struct bwg_slave *bwg_b = b;
934 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
935 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
936 (int64_t)bwg_a->bwg_left_remainder;
950 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
951 struct bwg_slave *bwg_slave)
953 struct rte_eth_link link_status;
955 rte_eth_link_get_nowait(port_id, &link_status);
956 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
959 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
960 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
961 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
965 bond_ethdev_update_tlb_slave_cb(void *arg)
967 struct bond_dev_private *internals = arg;
968 struct rte_eth_stats slave_stats;
969 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
973 uint8_t update_stats = 0;
976 internals->slave_update_idx++;
979 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
982 for (i = 0; i < internals->active_slave_count; i++) {
983 slave_id = internals->active_slaves[i];
984 rte_eth_stats_get(slave_id, &slave_stats);
985 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
986 bandwidth_left(slave_id, tx_bytes,
987 internals->slave_update_idx, &bwg_array[i]);
988 bwg_array[i].slave = slave_id;
991 tlb_last_obytets[slave_id] = slave_stats.obytes;
995 if (update_stats == 1)
996 internals->slave_update_idx = 0;
999 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1000 for (i = 0; i < slave_count; i++)
1001 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1003 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1004 (struct bond_dev_private *)internals);
1008 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1010 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1011 struct bond_dev_private *internals = bd_tx_q->dev_private;
1013 struct rte_eth_dev *primary_port =
1014 &rte_eth_devices[internals->primary_port];
1015 uint16_t num_tx_total = 0;
1018 uint16_t num_of_slaves = internals->active_slave_count;
1019 uint16_t slaves[RTE_MAX_ETHPORTS];
1021 struct ether_hdr *ether_hdr;
1022 struct ether_addr primary_slave_addr;
1023 struct ether_addr active_slave_addr;
1025 if (num_of_slaves < 1)
1026 return num_tx_total;
1028 memcpy(slaves, internals->tlb_slaves_order,
1029 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1032 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1035 for (i = 0; i < 3; i++)
1036 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1039 for (i = 0; i < num_of_slaves; i++) {
1040 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1041 for (j = num_tx_total; j < nb_pkts; j++) {
1042 if (j + 3 < nb_pkts)
1043 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1045 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1046 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1047 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1048 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1049 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1053 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1054 bufs + num_tx_total, nb_pkts - num_tx_total);
1056 if (num_tx_total == nb_pkts)
1060 return num_tx_total;
1064 bond_tlb_disable(struct bond_dev_private *internals)
1066 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1070 bond_tlb_enable(struct bond_dev_private *internals)
1072 bond_ethdev_update_tlb_slave_cb(internals);
1076 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1078 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1079 struct bond_dev_private *internals = bd_tx_q->dev_private;
1081 struct ether_hdr *eth_h;
1082 uint16_t ether_type, offset;
1084 struct client_data *client_info;
1087 * We create transmit buffers for every slave and one additional to send
1088 * through tlb. In worst case every packet will be send on one port.
1090 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1091 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1094 * We create separate transmit buffers for update packets as they won't
1095 * be counted in num_tx_total.
1097 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1098 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1100 struct rte_mbuf *upd_pkt;
1103 uint16_t num_send, num_not_send = 0;
1104 uint16_t num_tx_total = 0;
1109 /* Search tx buffer for ARP packets and forward them to alb */
1110 for (i = 0; i < nb_pkts; i++) {
1111 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1112 ether_type = eth_h->ether_type;
1113 offset = get_vlan_offset(eth_h, ðer_type);
1115 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1116 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1118 /* Change src mac in eth header */
1119 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1121 /* Add packet to slave tx buffer */
1122 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1123 slave_bufs_pkts[slave_idx]++;
1125 /* If packet is not ARP, send it with TLB policy */
1126 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1128 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1132 /* Update connected client ARP tables */
1133 if (internals->mode6.ntt) {
1134 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1135 client_info = &internals->mode6.client_table[i];
1137 if (client_info->in_use) {
1138 /* Allocate new packet to send ARP update on current slave */
1139 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1140 if (upd_pkt == NULL) {
1141 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1144 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1145 + client_info->vlan_count * sizeof(struct vlan_hdr);
1146 upd_pkt->data_len = pkt_size;
1147 upd_pkt->pkt_len = pkt_size;
1149 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1152 /* Add packet to update tx buffer */
1153 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1154 update_bufs_pkts[slave_idx]++;
1157 internals->mode6.ntt = 0;
1160 /* Send ARP packets on proper slaves */
1161 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1162 if (slave_bufs_pkts[i] > 0) {
1163 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1164 slave_bufs[i], slave_bufs_pkts[i]);
1165 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1166 bufs[nb_pkts - 1 - num_not_send - j] =
1167 slave_bufs[i][nb_pkts - 1 - j];
1170 num_tx_total += num_send;
1171 num_not_send += slave_bufs_pkts[i] - num_send;
1173 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1174 /* Print TX stats including update packets */
1175 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1176 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1177 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1183 /* Send update packets on proper slaves */
1184 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1185 if (update_bufs_pkts[i] > 0) {
1186 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1187 update_bufs_pkts[i]);
1188 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1189 rte_pktmbuf_free(update_bufs[i][j]);
1191 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1192 for (j = 0; j < update_bufs_pkts[i]; j++) {
1193 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1194 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1200 /* Send non-ARP packets using tlb policy */
1201 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1202 num_send = bond_ethdev_tx_burst_tlb(queue,
1203 slave_bufs[RTE_MAX_ETHPORTS],
1204 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1206 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1207 bufs[nb_pkts - 1 - num_not_send - j] =
1208 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1211 num_tx_total += num_send;
1214 return num_tx_total;
1218 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1221 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1222 struct bond_dev_private *internals = bd_tx_q->dev_private;
1224 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1225 uint16_t slave_count;
1227 /* Array to sort mbufs for transmission on each slave into */
1228 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1229 /* Number of mbufs for transmission on each slave */
1230 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1231 /* Mapping array generated by hash function to map mbufs to slaves */
1232 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1234 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1235 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1239 if (unlikely(nb_bufs == 0))
1242 /* Copy slave list to protect against slave up/down changes during tx
1244 slave_count = internals->active_slave_count;
1245 if (unlikely(slave_count < 1))
1248 memcpy(slave_port_ids, internals->active_slaves,
1249 sizeof(slave_port_ids[0]) * slave_count);
1252 * Populate slaves mbuf with the packets which are to be sent on it
1253 * selecting output slave using hash based on xmit policy
1255 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1256 bufs_slave_port_idxs);
1258 for (i = 0; i < nb_bufs; i++) {
1259 /* Populate slave mbuf arrays with mbufs for that slave. */
1260 uint8_t slave_idx = bufs_slave_port_idxs[i];
1262 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1265 /* Send packet burst on each slave device */
1266 for (i = 0; i < slave_count; i++) {
1267 if (slave_nb_bufs[i] == 0)
1270 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1271 bd_tx_q->queue_id, slave_bufs[i],
1274 total_tx_count += slave_tx_count;
1276 /* If tx burst fails move packets to end of bufs */
1277 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1278 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1280 total_tx_fail_count += slave_tx_fail_count[i];
1283 * Shift bufs to beginning of array to allow reordering
1286 for (j = 0; j < slave_tx_fail_count[i]; j++) {
1288 slave_bufs[i][(slave_tx_count - 1) + j];
1294 * If there are tx burst failures we move packets to end of bufs to
1295 * preserve expected PMD behaviour of all failed transmitted being
1296 * at the end of the input mbuf array
1298 if (unlikely(total_tx_fail_count > 0)) {
1299 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1301 for (i = 0; i < slave_count; i++) {
1302 if (slave_tx_fail_count[i] > 0) {
1303 for (j = 0; j < slave_tx_fail_count[i]; j++)
1304 bufs[bufs_idx++] = slave_bufs[i][j];
1309 return total_tx_count;
1313 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1316 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1317 struct bond_dev_private *internals = bd_tx_q->dev_private;
1319 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1320 uint16_t slave_count;
1322 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1323 uint16_t dist_slave_count;
1325 /* 2-D array to sort mbufs for transmission on each slave into */
1326 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1327 /* Number of mbufs for transmission on each slave */
1328 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1329 /* Mapping array generated by hash function to map mbufs to slaves */
1330 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1332 uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1333 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1337 if (unlikely(nb_bufs == 0))
1340 /* Copy slave list to protect against slave up/down changes during tx
1342 slave_count = internals->active_slave_count;
1343 if (unlikely(slave_count < 1))
1346 memcpy(slave_port_ids, internals->active_slaves,
1347 sizeof(slave_port_ids[0]) * slave_count);
1349 dist_slave_count = 0;
1350 for (i = 0; i < slave_count; i++) {
1351 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1353 if (ACTOR_STATE(port, DISTRIBUTING))
1354 dist_slave_port_ids[dist_slave_count++] =
1358 if (likely(dist_slave_count > 1)) {
1361 * Populate slaves mbuf with the packets which are to be sent
1362 * on it, selecting output slave using hash based on xmit policy
1364 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1365 bufs_slave_port_idxs);
1367 for (i = 0; i < nb_bufs; i++) {
1369 * Populate slave mbuf arrays with mbufs for that
1372 uint8_t slave_idx = bufs_slave_port_idxs[i];
1374 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1379 /* Send packet burst on each slave device */
1380 for (i = 0; i < dist_slave_count; i++) {
1381 if (slave_nb_bufs[i] == 0)
1384 slave_tx_count = rte_eth_tx_burst(
1385 dist_slave_port_ids[i],
1386 bd_tx_q->queue_id, slave_bufs[i],
1389 total_tx_count += slave_tx_count;
1391 /* If tx burst fails move packets to end of bufs */
1392 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1393 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1395 total_tx_fail_count += slave_tx_fail_count[i];
1398 * Shift bufs to beginning of array to allow
1401 for (j = 0; j < slave_tx_fail_count[i]; j++)
1404 [(slave_tx_count - 1)
1410 * If there are tx burst failures we move packets to end of
1411 * bufs to preserve expected PMD behaviour of all failed
1412 * transmitted being at the end of the input mbuf array
1414 if (unlikely(total_tx_fail_count > 0)) {
1415 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1417 for (i = 0; i < slave_count; i++) {
1418 if (slave_tx_fail_count[i] > 0) {
1420 j < slave_tx_fail_count[i];
1430 /* Check for LACP control packets and send if available */
1431 for (i = 0; i < slave_count; i++) {
1432 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1433 struct rte_mbuf *ctrl_pkt = NULL;
1435 if (likely(rte_ring_empty(port->tx_ring)))
1438 rte_ring_dequeue(port->tx_ring, (void **)&ctrl_pkt);
1440 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1441 bd_tx_q->queue_id, &ctrl_pkt, 1);
1444 * re-enqueue LAG control plane packets to buffering
1445 * ring if transmission fails so the packet isn't lost.
1447 if (slave_tx_count != 1)
1448 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1451 return total_tx_count;
1455 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1458 struct bond_dev_private *internals;
1459 struct bond_tx_queue *bd_tx_q;
1461 uint8_t tx_failed_flag = 0, num_of_slaves;
1462 uint16_t slaves[RTE_MAX_ETHPORTS];
1464 uint16_t max_nb_of_tx_pkts = 0;
1466 int slave_tx_total[RTE_MAX_ETHPORTS];
1467 int i, most_successful_tx_slave = -1;
1469 bd_tx_q = (struct bond_tx_queue *)queue;
1470 internals = bd_tx_q->dev_private;
1472 /* Copy slave list to protect against slave up/down changes during tx
1474 num_of_slaves = internals->active_slave_count;
1475 memcpy(slaves, internals->active_slaves,
1476 sizeof(internals->active_slaves[0]) * num_of_slaves);
1478 if (num_of_slaves < 1)
1481 /* Increment reference count on mbufs */
1482 for (i = 0; i < nb_pkts; i++)
1483 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1485 /* Transmit burst on each active slave */
1486 for (i = 0; i < num_of_slaves; i++) {
1487 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1490 if (unlikely(slave_tx_total[i] < nb_pkts))
1493 /* record the value and slave index for the slave which transmits the
1494 * maximum number of packets */
1495 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1496 max_nb_of_tx_pkts = slave_tx_total[i];
1497 most_successful_tx_slave = i;
1501 /* if slaves fail to transmit packets from burst, the calling application
1502 * is not expected to know about multiple references to packets so we must
1503 * handle failures of all packets except those of the most successful slave
1505 if (unlikely(tx_failed_flag))
1506 for (i = 0; i < num_of_slaves; i++)
1507 if (i != most_successful_tx_slave)
1508 while (slave_tx_total[i] < nb_pkts)
1509 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1511 return max_nb_of_tx_pkts;
1515 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1517 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1519 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1521 * If in mode 4 then save the link properties of the first
1522 * slave, all subsequent slaves must match these properties
1524 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1526 bond_link->link_autoneg = slave_link->link_autoneg;
1527 bond_link->link_duplex = slave_link->link_duplex;
1528 bond_link->link_speed = slave_link->link_speed;
1531 * In any other mode the link properties are set to default
1532 * values of AUTONEG/DUPLEX
1534 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1535 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1540 link_properties_valid(struct rte_eth_dev *ethdev,
1541 struct rte_eth_link *slave_link)
1543 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1545 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1546 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1548 if (bond_link->link_duplex != slave_link->link_duplex ||
1549 bond_link->link_autoneg != slave_link->link_autoneg ||
1550 bond_link->link_speed != slave_link->link_speed)
1558 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1560 struct ether_addr *mac_addr;
1562 if (eth_dev == NULL) {
1563 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1567 if (dst_mac_addr == NULL) {
1568 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1572 mac_addr = eth_dev->data->mac_addrs;
1574 ether_addr_copy(mac_addr, dst_mac_addr);
1579 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1581 struct ether_addr *mac_addr;
1583 if (eth_dev == NULL) {
1584 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1588 if (new_mac_addr == NULL) {
1589 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1593 mac_addr = eth_dev->data->mac_addrs;
1595 /* If new MAC is different to current MAC then update */
1596 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1597 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1603 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1608 /* Update slave devices MAC addresses */
1609 if (internals->slave_count < 1)
1612 switch (internals->mode) {
1613 case BONDING_MODE_ROUND_ROBIN:
1614 case BONDING_MODE_BALANCE:
1615 case BONDING_MODE_BROADCAST:
1616 for (i = 0; i < internals->slave_count; i++) {
1617 if (rte_eth_dev_default_mac_addr_set(
1618 internals->slaves[i].port_id,
1619 bonded_eth_dev->data->mac_addrs)) {
1620 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1621 internals->slaves[i].port_id);
1626 case BONDING_MODE_8023AD:
1627 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629 case BONDING_MODE_ACTIVE_BACKUP:
1630 case BONDING_MODE_TLB:
1631 case BONDING_MODE_ALB:
1633 for (i = 0; i < internals->slave_count; i++) {
1634 if (internals->slaves[i].port_id ==
1635 internals->current_primary_port) {
1636 if (rte_eth_dev_default_mac_addr_set(
1637 internals->primary_port,
1638 bonded_eth_dev->data->mac_addrs)) {
1639 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1640 internals->current_primary_port);
1644 if (rte_eth_dev_default_mac_addr_set(
1645 internals->slaves[i].port_id,
1646 &internals->slaves[i].persisted_mac_addr)) {
1647 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1648 internals->slaves[i].port_id);
1659 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 struct bond_dev_private *internals;
1663 internals = eth_dev->data->dev_private;
1666 case BONDING_MODE_ROUND_ROBIN:
1667 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1668 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670 case BONDING_MODE_ACTIVE_BACKUP:
1671 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1672 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674 case BONDING_MODE_BALANCE:
1675 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1676 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678 case BONDING_MODE_BROADCAST:
1679 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1680 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682 case BONDING_MODE_8023AD:
1683 if (bond_mode_8023ad_enable(eth_dev) != 0)
1686 if (internals->mode4.dedicated_queues.enabled == 0) {
1687 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1688 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1689 RTE_LOG(WARNING, PMD,
1690 "Using mode 4, it is necessary to do TX burst "
1691 "and RX burst at least every 100ms.\n");
1693 /* Use flow director's optimization */
1694 eth_dev->rx_pkt_burst =
1695 bond_ethdev_rx_burst_8023ad_fast_queue;
1696 eth_dev->tx_pkt_burst =
1697 bond_ethdev_tx_burst_8023ad_fast_queue;
1700 case BONDING_MODE_TLB:
1701 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1702 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704 case BONDING_MODE_ALB:
1705 if (bond_mode_alb_enable(eth_dev) != 0)
1708 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1709 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1715 internals->mode = mode;
1722 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1723 struct rte_eth_dev *slave_eth_dev)
1726 struct bond_dev_private *internals = (struct bond_dev_private *)
1727 bonded_eth_dev->data->dev_private;
1728 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1730 if (port->slow_pool == NULL) {
1732 int slave_id = slave_eth_dev->data->port_id;
1734 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1737 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1738 slave_eth_dev->data->numa_node);
1740 /* Any memory allocation failure in initialization is critical because
1741 * resources can't be free, so reinitialization is impossible. */
1742 if (port->slow_pool == NULL) {
1743 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1744 slave_id, mem_name, rte_strerror(rte_errno));
1748 if (internals->mode4.dedicated_queues.enabled == 1) {
1749 /* Configure slow Rx queue */
1751 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1752 internals->mode4.dedicated_queues.rx_qid, 128,
1753 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1754 NULL, port->slow_pool);
1757 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1758 slave_eth_dev->data->port_id,
1759 internals->mode4.dedicated_queues.rx_qid,
1764 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1765 internals->mode4.dedicated_queues.tx_qid, 512,
1766 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1770 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1771 slave_eth_dev->data->port_id,
1772 internals->mode4.dedicated_queues.tx_qid,
1781 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1782 struct rte_eth_dev *slave_eth_dev)
1784 struct bond_rx_queue *bd_rx_q;
1785 struct bond_tx_queue *bd_tx_q;
1786 uint16_t nb_rx_queues;
1787 uint16_t nb_tx_queues;
1791 struct rte_flow_error flow_error;
1793 struct bond_dev_private *internals = (struct bond_dev_private *)
1794 bonded_eth_dev->data->dev_private;
1797 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799 /* Enable interrupts on slave device if supported */
1800 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1801 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803 /* If RSS is enabled for bonding, try to enable it for slaves */
1804 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1805 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1807 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1808 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1809 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1810 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1812 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1815 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1816 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1817 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1818 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1821 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1822 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1824 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1825 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1827 if (internals->mode == BONDING_MODE_8023AD) {
1828 if (internals->mode4.dedicated_queues.enabled == 1) {
1834 /* Configure device */
1835 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1836 nb_rx_queues, nb_tx_queues,
1837 &(slave_eth_dev->data->dev_conf));
1839 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1840 slave_eth_dev->data->port_id, errval);
1844 /* Setup Rx Queues */
1845 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1846 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1848 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1849 bd_rx_q->nb_rx_desc,
1850 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1851 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1854 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1855 slave_eth_dev->data->port_id, q_id, errval);
1860 /* Setup Tx Queues */
1861 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1862 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1864 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1865 bd_tx_q->nb_tx_desc,
1866 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1870 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1871 slave_eth_dev->data->port_id, q_id, errval);
1876 if (internals->mode == BONDING_MODE_8023AD &&
1877 internals->mode4.dedicated_queues.enabled == 1) {
1878 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1882 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1883 slave_eth_dev->data->port_id) != 0) {
1885 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1886 slave_eth_dev->data->port_id, q_id, errval);
1890 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1891 rte_flow_destroy(slave_eth_dev->data->port_id,
1892 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1895 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1896 slave_eth_dev->data->port_id);
1900 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1902 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1903 slave_eth_dev->data->port_id, errval);
1907 /* If RSS is enabled for bonding, synchronize RETA */
1908 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1910 struct bond_dev_private *internals;
1912 internals = bonded_eth_dev->data->dev_private;
1914 for (i = 0; i < internals->slave_count; i++) {
1915 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1916 errval = rte_eth_dev_rss_reta_update(
1917 slave_eth_dev->data->port_id,
1918 &internals->reta_conf[0],
1919 internals->slaves[i].reta_size);
1921 RTE_LOG(WARNING, PMD,
1922 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1923 " RSS Configuration for bonding may be inconsistent.\n",
1924 slave_eth_dev->data->port_id, errval);
1931 /* If lsc interrupt is set, check initial slave's link status */
1932 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1933 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1934 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1935 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1943 slave_remove(struct bond_dev_private *internals,
1944 struct rte_eth_dev *slave_eth_dev)
1948 for (i = 0; i < internals->slave_count; i++)
1949 if (internals->slaves[i].port_id ==
1950 slave_eth_dev->data->port_id)
1953 if (i < (internals->slave_count - 1))
1954 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1955 sizeof(internals->slaves[0]) *
1956 (internals->slave_count - i - 1));
1958 internals->slave_count--;
1960 /* force reconfiguration of slave interfaces */
1961 _rte_eth_dev_reset(slave_eth_dev);
1965 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1968 slave_add(struct bond_dev_private *internals,
1969 struct rte_eth_dev *slave_eth_dev)
1971 struct bond_slave_details *slave_details =
1972 &internals->slaves[internals->slave_count];
1974 slave_details->port_id = slave_eth_dev->data->port_id;
1975 slave_details->last_link_status = 0;
1977 /* Mark slave devices that don't support interrupts so we can
1978 * compensate when we start the bond
1980 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1981 slave_details->link_status_poll_enabled = 1;
1984 slave_details->link_status_wait_to_complete = 0;
1985 /* clean tlb_last_obytes when adding port for bonding device */
1986 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1987 sizeof(struct ether_addr));
1991 bond_ethdev_primary_set(struct bond_dev_private *internals,
1992 uint16_t slave_port_id)
1996 if (internals->active_slave_count < 1)
1997 internals->current_primary_port = slave_port_id;
1999 /* Search bonded device slave ports for new proposed primary port */
2000 for (i = 0; i < internals->active_slave_count; i++) {
2001 if (internals->active_slaves[i] == slave_port_id)
2002 internals->current_primary_port = slave_port_id;
2007 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2010 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2012 struct bond_dev_private *internals;
2015 /* slave eth dev will be started by bonded device */
2016 if (check_for_bonded_ethdev(eth_dev)) {
2017 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2018 eth_dev->data->port_id);
2022 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2023 eth_dev->data->dev_started = 1;
2025 internals = eth_dev->data->dev_private;
2027 if (internals->slave_count == 0) {
2028 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2032 if (internals->user_defined_mac == 0) {
2033 struct ether_addr *new_mac_addr = NULL;
2035 for (i = 0; i < internals->slave_count; i++)
2036 if (internals->slaves[i].port_id == internals->primary_port)
2037 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2039 if (new_mac_addr == NULL)
2042 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2043 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2044 eth_dev->data->port_id);
2049 /* Update all slave devices MACs*/
2050 if (mac_address_slaves_update(eth_dev) != 0)
2053 /* If bonded device is configure in promiscuous mode then re-apply config */
2054 if (internals->promiscuous_en)
2055 bond_ethdev_promiscuous_enable(eth_dev);
2057 if (internals->mode == BONDING_MODE_8023AD) {
2058 if (internals->mode4.dedicated_queues.enabled == 1) {
2059 internals->mode4.dedicated_queues.rx_qid =
2060 eth_dev->data->nb_rx_queues;
2061 internals->mode4.dedicated_queues.tx_qid =
2062 eth_dev->data->nb_tx_queues;
2067 /* Reconfigure each slave device if starting bonded device */
2068 for (i = 0; i < internals->slave_count; i++) {
2069 struct rte_eth_dev *slave_ethdev =
2070 &(rte_eth_devices[internals->slaves[i].port_id]);
2071 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2073 "bonded port (%d) failed to reconfigure slave device (%d)",
2074 eth_dev->data->port_id,
2075 internals->slaves[i].port_id);
2078 /* We will need to poll for link status if any slave doesn't
2079 * support interrupts
2081 if (internals->slaves[i].link_status_poll_enabled)
2082 internals->link_status_polling_enabled = 1;
2084 /* start polling if needed */
2085 if (internals->link_status_polling_enabled) {
2087 internals->link_status_polling_interval_ms * 1000,
2088 bond_ethdev_slave_link_status_change_monitor,
2089 (void *)&rte_eth_devices[internals->port_id]);
2092 if (internals->user_defined_primary_port)
2093 bond_ethdev_primary_set(internals, internals->primary_port);
2095 if (internals->mode == BONDING_MODE_8023AD)
2096 bond_mode_8023ad_start(eth_dev);
2098 if (internals->mode == BONDING_MODE_TLB ||
2099 internals->mode == BONDING_MODE_ALB)
2100 bond_tlb_enable(internals);
2106 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2110 if (dev->data->rx_queues != NULL) {
2111 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2112 rte_free(dev->data->rx_queues[i]);
2113 dev->data->rx_queues[i] = NULL;
2115 dev->data->nb_rx_queues = 0;
2118 if (dev->data->tx_queues != NULL) {
2119 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2120 rte_free(dev->data->tx_queues[i]);
2121 dev->data->tx_queues[i] = NULL;
2123 dev->data->nb_tx_queues = 0;
2128 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2130 struct bond_dev_private *internals = eth_dev->data->dev_private;
2133 if (internals->mode == BONDING_MODE_8023AD) {
2137 bond_mode_8023ad_stop(eth_dev);
2139 /* Discard all messages to/from mode 4 state machines */
2140 for (i = 0; i < internals->active_slave_count; i++) {
2141 port = &mode_8023ad_ports[internals->active_slaves[i]];
2143 RTE_ASSERT(port->rx_ring != NULL);
2144 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2145 rte_pktmbuf_free(pkt);
2147 RTE_ASSERT(port->tx_ring != NULL);
2148 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2149 rte_pktmbuf_free(pkt);
2153 if (internals->mode == BONDING_MODE_TLB ||
2154 internals->mode == BONDING_MODE_ALB) {
2155 bond_tlb_disable(internals);
2156 for (i = 0; i < internals->active_slave_count; i++)
2157 tlb_last_obytets[internals->active_slaves[i]] = 0;
2160 internals->active_slave_count = 0;
2161 internals->link_status_polling_enabled = 0;
2162 for (i = 0; i < internals->slave_count; i++)
2163 internals->slaves[i].last_link_status = 0;
2165 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2166 eth_dev->data->dev_started = 0;
2170 bond_ethdev_close(struct rte_eth_dev *dev)
2172 struct bond_dev_private *internals = dev->data->dev_private;
2173 uint8_t bond_port_id = internals->port_id;
2176 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2177 while (internals->slave_count != skipped) {
2178 uint16_t port_id = internals->slaves[skipped].port_id;
2180 rte_eth_dev_stop(port_id);
2182 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2184 "Failed to remove port %d from bonded device "
2185 "%s\n", port_id, dev->device->name);
2189 bond_ethdev_free_queues(dev);
2190 rte_bitmap_reset(internals->vlan_filter_bmp);
2193 /* forward declaration */
2194 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2197 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2199 struct bond_dev_private *internals = dev->data->dev_private;
2201 uint16_t max_nb_rx_queues = UINT16_MAX;
2202 uint16_t max_nb_tx_queues = UINT16_MAX;
2204 dev_info->max_mac_addrs = 1;
2206 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2207 internals->candidate_max_rx_pktlen :
2208 ETHER_MAX_JUMBO_FRAME_LEN;
2210 /* Max number of tx/rx queues that the bonded device can support is the
2211 * minimum values of the bonded slaves, as all slaves must be capable
2212 * of supporting the same number of tx/rx queues.
2214 if (internals->slave_count > 0) {
2215 struct rte_eth_dev_info slave_info;
2218 for (idx = 0; idx < internals->slave_count; idx++) {
2219 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2222 if (slave_info.max_rx_queues < max_nb_rx_queues)
2223 max_nb_rx_queues = slave_info.max_rx_queues;
2225 if (slave_info.max_tx_queues < max_nb_tx_queues)
2226 max_nb_tx_queues = slave_info.max_tx_queues;
2230 dev_info->max_rx_queues = max_nb_rx_queues;
2231 dev_info->max_tx_queues = max_nb_tx_queues;
2234 * If dedicated hw queues enabled for link bonding device in LACP mode
2235 * then we need to reduce the maximum number of data path queues by 1.
2237 if (internals->mode == BONDING_MODE_8023AD &&
2238 internals->mode4.dedicated_queues.enabled == 1) {
2239 dev_info->max_rx_queues--;
2240 dev_info->max_tx_queues--;
2243 dev_info->min_rx_bufsize = 0;
2245 dev_info->rx_offload_capa = internals->rx_offload_capa;
2246 dev_info->tx_offload_capa = internals->tx_offload_capa;
2247 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2249 dev_info->reta_size = internals->reta_size;
2253 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2257 struct bond_dev_private *internals = dev->data->dev_private;
2259 /* don't do this while a slave is being added */
2260 rte_spinlock_lock(&internals->lock);
2263 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2265 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2267 for (i = 0; i < internals->slave_count; i++) {
2268 uint16_t port_id = internals->slaves[i].port_id;
2270 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2272 RTE_LOG(WARNING, PMD,
2273 "Setting VLAN filter on slave port %u not supported.\n",
2277 rte_spinlock_unlock(&internals->lock);
2282 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2283 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2284 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2286 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2287 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2288 0, dev->data->numa_node);
2289 if (bd_rx_q == NULL)
2292 bd_rx_q->queue_id = rx_queue_id;
2293 bd_rx_q->dev_private = dev->data->dev_private;
2295 bd_rx_q->nb_rx_desc = nb_rx_desc;
2297 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2298 bd_rx_q->mb_pool = mb_pool;
2300 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2306 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2307 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2308 const struct rte_eth_txconf *tx_conf)
2310 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2311 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2312 0, dev->data->numa_node);
2314 if (bd_tx_q == NULL)
2317 bd_tx_q->queue_id = tx_queue_id;
2318 bd_tx_q->dev_private = dev->data->dev_private;
2320 bd_tx_q->nb_tx_desc = nb_tx_desc;
2321 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2323 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2329 bond_ethdev_rx_queue_release(void *queue)
2338 bond_ethdev_tx_queue_release(void *queue)
2347 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2349 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2350 struct bond_dev_private *internals;
2352 /* Default value for polling slave found is true as we don't want to
2353 * disable the polling thread if we cannot get the lock */
2354 int i, polling_slave_found = 1;
2359 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2360 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2362 if (!bonded_ethdev->data->dev_started ||
2363 !internals->link_status_polling_enabled)
2366 /* If device is currently being configured then don't check slaves link
2367 * status, wait until next period */
2368 if (rte_spinlock_trylock(&internals->lock)) {
2369 if (internals->slave_count > 0)
2370 polling_slave_found = 0;
2372 for (i = 0; i < internals->slave_count; i++) {
2373 if (!internals->slaves[i].link_status_poll_enabled)
2376 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2377 polling_slave_found = 1;
2379 /* Update slave link status */
2380 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2381 internals->slaves[i].link_status_wait_to_complete);
2383 /* if link status has changed since last checked then call lsc
2385 if (slave_ethdev->data->dev_link.link_status !=
2386 internals->slaves[i].last_link_status) {
2387 internals->slaves[i].last_link_status =
2388 slave_ethdev->data->dev_link.link_status;
2390 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2391 RTE_ETH_EVENT_INTR_LSC,
2392 &bonded_ethdev->data->port_id,
2396 rte_spinlock_unlock(&internals->lock);
2399 if (polling_slave_found)
2400 /* Set alarm to continue monitoring link status of slave ethdev's */
2401 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2402 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2406 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2408 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2410 struct bond_dev_private *bond_ctx;
2411 struct rte_eth_link slave_link;
2415 bond_ctx = ethdev->data->dev_private;
2417 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2419 if (ethdev->data->dev_started == 0 ||
2420 bond_ctx->active_slave_count == 0) {
2421 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2425 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2427 if (wait_to_complete)
2428 link_update = rte_eth_link_get;
2430 link_update = rte_eth_link_get_nowait;
2432 switch (bond_ctx->mode) {
2433 case BONDING_MODE_BROADCAST:
2435 * Setting link speed to UINT32_MAX to ensure we pick up the
2436 * value of the first active slave
2438 ethdev->data->dev_link.link_speed = UINT32_MAX;
2441 * link speed is minimum value of all the slaves link speed as
2442 * packet loss will occur on this slave if transmission at rates
2443 * greater than this are attempted
2445 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2446 link_update(bond_ctx->active_slaves[0], &slave_link);
2448 if (slave_link.link_speed <
2449 ethdev->data->dev_link.link_speed)
2450 ethdev->data->dev_link.link_speed =
2451 slave_link.link_speed;
2454 case BONDING_MODE_ACTIVE_BACKUP:
2455 /* Current primary slave */
2456 link_update(bond_ctx->current_primary_port, &slave_link);
2458 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2460 case BONDING_MODE_8023AD:
2461 ethdev->data->dev_link.link_autoneg =
2462 bond_ctx->mode4.slave_link.link_autoneg;
2463 ethdev->data->dev_link.link_duplex =
2464 bond_ctx->mode4.slave_link.link_duplex;
2465 /* fall through to update link speed */
2466 case BONDING_MODE_ROUND_ROBIN:
2467 case BONDING_MODE_BALANCE:
2468 case BONDING_MODE_TLB:
2469 case BONDING_MODE_ALB:
2472 * In theses mode the maximum theoretical link speed is the sum
2475 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2477 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2478 link_update(bond_ctx->active_slaves[idx], &slave_link);
2480 ethdev->data->dev_link.link_speed +=
2481 slave_link.link_speed;
2491 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2493 struct bond_dev_private *internals = dev->data->dev_private;
2494 struct rte_eth_stats slave_stats;
2497 for (i = 0; i < internals->slave_count; i++) {
2498 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2500 stats->ipackets += slave_stats.ipackets;
2501 stats->opackets += slave_stats.opackets;
2502 stats->ibytes += slave_stats.ibytes;
2503 stats->obytes += slave_stats.obytes;
2504 stats->imissed += slave_stats.imissed;
2505 stats->ierrors += slave_stats.ierrors;
2506 stats->oerrors += slave_stats.oerrors;
2507 stats->rx_nombuf += slave_stats.rx_nombuf;
2509 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2510 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2511 stats->q_opackets[j] += slave_stats.q_opackets[j];
2512 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2513 stats->q_obytes[j] += slave_stats.q_obytes[j];
2514 stats->q_errors[j] += slave_stats.q_errors[j];
2523 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2525 struct bond_dev_private *internals = dev->data->dev_private;
2528 for (i = 0; i < internals->slave_count; i++)
2529 rte_eth_stats_reset(internals->slaves[i].port_id);
2533 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2535 struct bond_dev_private *internals = eth_dev->data->dev_private;
2538 internals->promiscuous_en = 1;
2540 switch (internals->mode) {
2541 /* Promiscuous mode is propagated to all slaves */
2542 case BONDING_MODE_ROUND_ROBIN:
2543 case BONDING_MODE_BALANCE:
2544 case BONDING_MODE_BROADCAST:
2545 for (i = 0; i < internals->slave_count; i++)
2546 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2548 /* In mode4 promiscus mode is managed when slave is added/removed */
2549 case BONDING_MODE_8023AD:
2551 /* Promiscuous mode is propagated only to primary slave */
2552 case BONDING_MODE_ACTIVE_BACKUP:
2553 case BONDING_MODE_TLB:
2554 case BONDING_MODE_ALB:
2556 rte_eth_promiscuous_enable(internals->current_primary_port);
2561 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2563 struct bond_dev_private *internals = dev->data->dev_private;
2566 internals->promiscuous_en = 0;
2568 switch (internals->mode) {
2569 /* Promiscuous mode is propagated to all slaves */
2570 case BONDING_MODE_ROUND_ROBIN:
2571 case BONDING_MODE_BALANCE:
2572 case BONDING_MODE_BROADCAST:
2573 for (i = 0; i < internals->slave_count; i++)
2574 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2576 /* In mode4 promiscus mode is set managed when slave is added/removed */
2577 case BONDING_MODE_8023AD:
2579 /* Promiscuous mode is propagated only to primary slave */
2580 case BONDING_MODE_ACTIVE_BACKUP:
2581 case BONDING_MODE_TLB:
2582 case BONDING_MODE_ALB:
2584 rte_eth_promiscuous_disable(internals->current_primary_port);
2589 bond_ethdev_delayed_lsc_propagation(void *arg)
2594 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2595 RTE_ETH_EVENT_INTR_LSC, NULL);
2599 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2600 void *param, void *ret_param __rte_unused)
2602 struct rte_eth_dev *bonded_eth_dev;
2603 struct bond_dev_private *internals;
2604 struct rte_eth_link link;
2607 int i, valid_slave = 0;
2609 uint8_t lsc_flag = 0;
2611 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2614 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2616 if (check_for_bonded_ethdev(bonded_eth_dev))
2619 internals = bonded_eth_dev->data->dev_private;
2621 /* If the device isn't started don't handle interrupts */
2622 if (!bonded_eth_dev->data->dev_started)
2625 /* verify that port_id is a valid slave of bonded port */
2626 for (i = 0; i < internals->slave_count; i++) {
2627 if (internals->slaves[i].port_id == port_id) {
2636 /* Search for port in active port list */
2637 active_pos = find_slave_by_id(internals->active_slaves,
2638 internals->active_slave_count, port_id);
2640 rte_eth_link_get_nowait(port_id, &link);
2641 if (link.link_status) {
2642 if (active_pos < internals->active_slave_count)
2645 /* if no active slave ports then set this port to be primary port */
2646 if (internals->active_slave_count < 1) {
2647 /* If first active slave, then change link status */
2648 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2649 internals->current_primary_port = port_id;
2652 mac_address_slaves_update(bonded_eth_dev);
2655 activate_slave(bonded_eth_dev, port_id);
2657 /* If user has defined the primary port then default to using it */
2658 if (internals->user_defined_primary_port &&
2659 internals->primary_port == port_id)
2660 bond_ethdev_primary_set(internals, port_id);
2662 if (active_pos == internals->active_slave_count)
2665 /* Remove from active slave list */
2666 deactivate_slave(bonded_eth_dev, port_id);
2668 if (internals->active_slave_count < 1)
2671 /* Update primary id, take first active slave from list or if none
2672 * available set to -1 */
2673 if (port_id == internals->current_primary_port) {
2674 if (internals->active_slave_count > 0)
2675 bond_ethdev_primary_set(internals,
2676 internals->active_slaves[0]);
2678 internals->current_primary_port = internals->primary_port;
2683 * Update bonded device link properties after any change to active
2686 bond_ethdev_link_update(bonded_eth_dev, 0);
2689 /* Cancel any possible outstanding interrupts if delays are enabled */
2690 if (internals->link_up_delay_ms > 0 ||
2691 internals->link_down_delay_ms > 0)
2692 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2695 if (bonded_eth_dev->data->dev_link.link_status) {
2696 if (internals->link_up_delay_ms > 0)
2697 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2698 bond_ethdev_delayed_lsc_propagation,
2699 (void *)bonded_eth_dev);
2701 _rte_eth_dev_callback_process(bonded_eth_dev,
2702 RTE_ETH_EVENT_INTR_LSC,
2706 if (internals->link_down_delay_ms > 0)
2707 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2708 bond_ethdev_delayed_lsc_propagation,
2709 (void *)bonded_eth_dev);
2711 _rte_eth_dev_callback_process(bonded_eth_dev,
2712 RTE_ETH_EVENT_INTR_LSC,
2720 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2721 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2725 int slave_reta_size;
2726 unsigned reta_count;
2727 struct bond_dev_private *internals = dev->data->dev_private;
2729 if (reta_size != internals->reta_size)
2732 /* Copy RETA table */
2733 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2735 for (i = 0; i < reta_count; i++) {
2736 internals->reta_conf[i].mask = reta_conf[i].mask;
2737 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2738 if ((reta_conf[i].mask >> j) & 0x01)
2739 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2742 /* Fill rest of array */
2743 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2744 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2745 sizeof(internals->reta_conf[0]) * reta_count);
2747 /* Propagate RETA over slaves */
2748 for (i = 0; i < internals->slave_count; i++) {
2749 slave_reta_size = internals->slaves[i].reta_size;
2750 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2751 &internals->reta_conf[0], slave_reta_size);
2760 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2761 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2764 struct bond_dev_private *internals = dev->data->dev_private;
2766 if (reta_size != internals->reta_size)
2769 /* Copy RETA table */
2770 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2771 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2772 if ((reta_conf[i].mask >> j) & 0x01)
2773 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2779 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2780 struct rte_eth_rss_conf *rss_conf)
2783 struct bond_dev_private *internals = dev->data->dev_private;
2784 struct rte_eth_rss_conf bond_rss_conf;
2786 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2788 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2790 if (bond_rss_conf.rss_hf != 0)
2791 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2793 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2794 sizeof(internals->rss_key)) {
2795 if (bond_rss_conf.rss_key_len == 0)
2796 bond_rss_conf.rss_key_len = 40;
2797 internals->rss_key_len = bond_rss_conf.rss_key_len;
2798 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2799 internals->rss_key_len);
2802 for (i = 0; i < internals->slave_count; i++) {
2803 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2813 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2814 struct rte_eth_rss_conf *rss_conf)
2816 struct bond_dev_private *internals = dev->data->dev_private;
2818 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2819 rss_conf->rss_key_len = internals->rss_key_len;
2820 if (rss_conf->rss_key)
2821 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2826 const struct eth_dev_ops default_dev_ops = {
2827 .dev_start = bond_ethdev_start,
2828 .dev_stop = bond_ethdev_stop,
2829 .dev_close = bond_ethdev_close,
2830 .dev_configure = bond_ethdev_configure,
2831 .dev_infos_get = bond_ethdev_info,
2832 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2833 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2834 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2835 .rx_queue_release = bond_ethdev_rx_queue_release,
2836 .tx_queue_release = bond_ethdev_tx_queue_release,
2837 .link_update = bond_ethdev_link_update,
2838 .stats_get = bond_ethdev_stats_get,
2839 .stats_reset = bond_ethdev_stats_reset,
2840 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2841 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2842 .reta_update = bond_ethdev_rss_reta_update,
2843 .reta_query = bond_ethdev_rss_reta_query,
2844 .rss_hash_update = bond_ethdev_rss_hash_update,
2845 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2849 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2851 const char *name = rte_vdev_device_name(dev);
2852 uint8_t socket_id = dev->device.numa_node;
2853 struct bond_dev_private *internals = NULL;
2854 struct rte_eth_dev *eth_dev = NULL;
2855 uint32_t vlan_filter_bmp_size;
2857 /* now do all data allocation - for eth_dev structure, dummy pci driver
2858 * and internal (private) data
2861 /* reserve an ethdev entry */
2862 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2863 if (eth_dev == NULL) {
2864 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2868 internals = eth_dev->data->dev_private;
2869 eth_dev->data->nb_rx_queues = (uint16_t)1;
2870 eth_dev->data->nb_tx_queues = (uint16_t)1;
2872 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2874 if (eth_dev->data->mac_addrs == NULL) {
2875 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2879 eth_dev->dev_ops = &default_dev_ops;
2880 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2882 rte_spinlock_init(&internals->lock);
2884 internals->port_id = eth_dev->data->port_id;
2885 internals->mode = BONDING_MODE_INVALID;
2886 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2887 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2888 internals->burst_xmit_hash = burst_xmit_l2_hash;
2889 internals->user_defined_mac = 0;
2891 internals->link_status_polling_enabled = 0;
2893 internals->link_status_polling_interval_ms =
2894 DEFAULT_POLLING_INTERVAL_10_MS;
2895 internals->link_down_delay_ms = 0;
2896 internals->link_up_delay_ms = 0;
2898 internals->slave_count = 0;
2899 internals->active_slave_count = 0;
2900 internals->rx_offload_capa = 0;
2901 internals->tx_offload_capa = 0;
2902 internals->candidate_max_rx_pktlen = 0;
2903 internals->max_rx_pktlen = 0;
2905 /* Initially allow to choose any offload type */
2906 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2908 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2909 memset(internals->slaves, 0, sizeof(internals->slaves));
2911 /* Set mode 4 default configuration */
2912 bond_mode_8023ad_setup(eth_dev, NULL);
2913 if (bond_ethdev_mode_set(eth_dev, mode)) {
2914 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2915 eth_dev->data->port_id, mode);
2919 vlan_filter_bmp_size =
2920 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2921 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2922 RTE_CACHE_LINE_SIZE);
2923 if (internals->vlan_filter_bmpmem == NULL) {
2925 "Failed to allocate vlan bitmap for bonded device %u\n",
2926 eth_dev->data->port_id);
2930 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2931 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2932 if (internals->vlan_filter_bmp == NULL) {
2934 "Failed to init vlan bitmap for bonded device %u\n",
2935 eth_dev->data->port_id);
2936 rte_free(internals->vlan_filter_bmpmem);
2940 return eth_dev->data->port_id;
2943 rte_free(internals);
2944 if (eth_dev != NULL) {
2945 rte_free(eth_dev->data->mac_addrs);
2946 rte_eth_dev_release_port(eth_dev);
2952 bond_probe(struct rte_vdev_device *dev)
2955 struct bond_dev_private *internals;
2956 struct rte_kvargs *kvlist;
2957 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2958 int arg_count, port_id;
2964 name = rte_vdev_device_name(dev);
2965 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2967 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2968 pmd_bond_init_valid_arguments);
2972 /* Parse link bonding mode */
2973 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2974 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2975 &bond_ethdev_parse_slave_mode_kvarg,
2976 &bonding_mode) != 0) {
2977 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2982 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2983 "device %s\n", name);
2987 /* Parse socket id to create bonding device on */
2988 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2989 if (arg_count == 1) {
2990 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2991 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2993 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2994 "bonded device %s\n", name);
2997 } else if (arg_count > 1) {
2998 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2999 "bonded device %s\n", name);
3002 socket_id = rte_socket_id();
3005 dev->device.numa_node = socket_id;
3007 /* Create link bonding eth device */
3008 port_id = bond_alloc(dev, bonding_mode);
3010 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3011 "socket %u.\n", name, bonding_mode, socket_id);
3014 internals = rte_eth_devices[port_id].data->dev_private;
3015 internals->kvlist = kvlist;
3018 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3019 if (rte_kvargs_process(kvlist,
3020 PMD_BOND_AGG_MODE_KVARG,
3021 &bond_ethdev_parse_slave_agg_mode_kvarg,
3024 "Failed to parse agg selection mode for bonded device %s\n",
3029 if (internals->mode == BONDING_MODE_8023AD)
3030 rte_eth_bond_8023ad_agg_selection_set(port_id,
3033 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3036 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3037 "socket %u.\n", name, port_id, bonding_mode, socket_id);
3041 rte_kvargs_free(kvlist);
3047 bond_remove(struct rte_vdev_device *dev)
3049 struct rte_eth_dev *eth_dev;
3050 struct bond_dev_private *internals;
3056 name = rte_vdev_device_name(dev);
3057 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3059 /* now free all data allocation - for eth_dev structure,
3060 * dummy pci driver and internal (private) data
3063 /* find an ethdev entry */
3064 eth_dev = rte_eth_dev_allocated(name);
3065 if (eth_dev == NULL)
3068 RTE_ASSERT(eth_dev->device == &dev->device);
3070 internals = eth_dev->data->dev_private;
3071 if (internals->slave_count != 0)
3074 if (eth_dev->data->dev_started == 1) {
3075 bond_ethdev_stop(eth_dev);
3076 bond_ethdev_close(eth_dev);
3079 eth_dev->dev_ops = NULL;
3080 eth_dev->rx_pkt_burst = NULL;
3081 eth_dev->tx_pkt_burst = NULL;
3083 internals = eth_dev->data->dev_private;
3084 rte_bitmap_free(internals->vlan_filter_bmp);
3085 rte_free(internals->vlan_filter_bmpmem);
3086 rte_free(eth_dev->data->dev_private);
3087 rte_free(eth_dev->data->mac_addrs);
3089 rte_eth_dev_release_port(eth_dev);
3094 /* this part will resolve the slave portids after all the other pdev and vdev
3095 * have been allocated */
3097 bond_ethdev_configure(struct rte_eth_dev *dev)
3099 const char *name = dev->device->name;
3100 struct bond_dev_private *internals = dev->data->dev_private;
3101 struct rte_kvargs *kvlist = internals->kvlist;
3103 uint16_t port_id = dev - rte_eth_devices;
3106 static const uint8_t default_rss_key[40] = {
3107 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3108 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3109 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3110 0xBE, 0xAC, 0x01, 0xFA
3115 /* If RSS is enabled, fill table and key with default values */
3116 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3117 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3118 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3119 memcpy(internals->rss_key, default_rss_key, 40);
3121 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3122 internals->reta_conf[i].mask = ~0LL;
3123 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3124 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3128 /* set the max_rx_pktlen */
3129 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3132 * if no kvlist, it means that this bonded device has been created
3133 * through the bonding api.
3138 /* Parse MAC address for bonded device */
3139 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3140 if (arg_count == 1) {
3141 struct ether_addr bond_mac;
3143 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3144 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3145 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3150 /* Set MAC address */
3151 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3153 "Failed to set mac address on bonded device %s\n",
3157 } else if (arg_count > 1) {
3159 "MAC address can be specified only once for bonded device %s\n",
3164 /* Parse/set balance mode transmit policy */
3165 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3166 if (arg_count == 1) {
3167 uint8_t xmit_policy;
3169 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3170 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3173 "Invalid xmit policy specified for bonded device %s\n",
3178 /* Set balance mode transmit policy*/
3179 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3181 "Failed to set balance xmit policy on bonded device %s\n",
3185 } else if (arg_count > 1) {
3187 "Transmit policy can be specified only once for bonded device"
3192 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3193 if (rte_kvargs_process(kvlist,
3194 PMD_BOND_AGG_MODE_KVARG,
3195 &bond_ethdev_parse_slave_agg_mode_kvarg,
3198 "Failed to parse agg selection mode for bonded device %s\n",
3201 if (internals->mode == BONDING_MODE_8023AD)
3202 rte_eth_bond_8023ad_agg_selection_set(port_id,
3206 /* Parse/add slave ports to bonded device */
3207 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3208 struct bond_ethdev_slave_ports slave_ports;
3211 memset(&slave_ports, 0, sizeof(slave_ports));
3213 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3214 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3216 "Failed to parse slave ports for bonded device %s\n",
3221 for (i = 0; i < slave_ports.slave_count; i++) {
3222 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3224 "Failed to add port %d as slave to bonded device %s\n",
3225 slave_ports.slaves[i], name);
3230 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3234 /* Parse/set primary slave port id*/
3235 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3236 if (arg_count == 1) {
3237 uint16_t primary_slave_port_id;
3239 if (rte_kvargs_process(kvlist,
3240 PMD_BOND_PRIMARY_SLAVE_KVARG,
3241 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3242 &primary_slave_port_id) < 0) {
3244 "Invalid primary slave port id specified for bonded device"
3249 /* Set balance mode transmit policy*/
3250 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3253 "Failed to set primary slave port %d on bonded device %s\n",
3254 primary_slave_port_id, name);
3257 } else if (arg_count > 1) {
3259 "Primary slave can be specified only once for bonded device"
3264 /* Parse link status monitor polling interval */
3265 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3266 if (arg_count == 1) {
3267 uint32_t lsc_poll_interval_ms;
3269 if (rte_kvargs_process(kvlist,
3270 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3271 &bond_ethdev_parse_time_ms_kvarg,
3272 &lsc_poll_interval_ms) < 0) {
3274 "Invalid lsc polling interval value specified for bonded"
3275 " device %s\n", name);
3279 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3282 "Failed to set lsc monitor polling interval (%u ms) on"
3283 " bonded device %s\n", lsc_poll_interval_ms, name);
3286 } else if (arg_count > 1) {
3288 "LSC polling interval can be specified only once for bonded"
3289 " device %s\n", name);
3293 /* Parse link up interrupt propagation delay */
3294 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3295 if (arg_count == 1) {
3296 uint32_t link_up_delay_ms;
3298 if (rte_kvargs_process(kvlist,
3299 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3300 &bond_ethdev_parse_time_ms_kvarg,
3301 &link_up_delay_ms) < 0) {
3303 "Invalid link up propagation delay value specified for"
3304 " bonded device %s\n", name);
3308 /* Set balance mode transmit policy*/
3309 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3312 "Failed to set link up propagation delay (%u ms) on bonded"
3313 " device %s\n", link_up_delay_ms, name);
3316 } else if (arg_count > 1) {
3318 "Link up propagation delay can be specified only once for"
3319 " bonded device %s\n", name);
3323 /* Parse link down interrupt propagation delay */
3324 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3325 if (arg_count == 1) {
3326 uint32_t link_down_delay_ms;
3328 if (rte_kvargs_process(kvlist,
3329 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3330 &bond_ethdev_parse_time_ms_kvarg,
3331 &link_down_delay_ms) < 0) {
3333 "Invalid link down propagation delay value specified for"
3334 " bonded device %s\n", name);
3338 /* Set balance mode transmit policy*/
3339 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3342 "Failed to set link down propagation delay (%u ms) on"
3343 " bonded device %s\n", link_down_delay_ms, name);
3346 } else if (arg_count > 1) {
3348 "Link down propagation delay can be specified only once for"
3349 " bonded device %s\n", name);
3356 struct rte_vdev_driver pmd_bond_drv = {
3357 .probe = bond_probe,
3358 .remove = bond_remove,
3361 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3362 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3364 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3368 "xmit_policy=[l2 | l23 | l34] "
3369 "agg_mode=[count | stable | bandwidth] "
3372 "lsc_poll_period_ms=<int> "
3374 "down_delay=<int>");