1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 size_t vlan_offset = 0;
38 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41 vlan_offset = sizeof(struct vlan_hdr);
42 *proto = vlan_hdr->eth_proto;
44 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45 vlan_hdr = vlan_hdr + 1;
46 *proto = vlan_hdr->eth_proto;
47 vlan_offset += sizeof(struct vlan_hdr);
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 struct bond_dev_private *internals;
58 uint16_t num_rx_slave = 0;
59 uint16_t num_rx_total = 0;
63 /* Cast to structure, containing bonded device's port id and queue id */
64 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66 internals = bd_rx_q->dev_private;
69 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70 /* Offset of pointer to *bufs increases as packets are received
71 * from other slaves */
72 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75 num_rx_total += num_rx_slave;
76 nb_pkts -= num_rx_slave;
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87 struct bond_dev_private *internals;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
94 return rte_eth_rx_burst(internals->current_primary_port,
95 bd_rx_q->queue_id, bufs, nb_pkts);
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104 (ethertype == ether_type_slow_be &&
105 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 /*****************************************************************************
109 * Flow director's setup for mode 4 optimization
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113 .dst.addr_bytes = { 0 },
114 .src.addr_bytes = { 0 },
115 .type = RTE_BE16(ETHER_TYPE_SLOW),
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119 .dst.addr_bytes = { 0 },
120 .src.addr_bytes = { 0 },
124 static struct rte_flow_item flow_item_8023ad[] = {
126 .type = RTE_FLOW_ITEM_TYPE_ETH,
127 .spec = &flow_item_eth_type_8023ad,
129 .mask = &flow_item_eth_mask_type_8023ad,
132 .type = RTE_FLOW_ITEM_TYPE_END,
139 const struct rte_flow_attr flow_attr_8023ad = {
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149 uint16_t slave_port) {
150 struct rte_eth_dev_info slave_info;
151 struct rte_flow_error error;
152 struct bond_dev_private *internals = (struct bond_dev_private *)
153 (bond_dev->data->dev_private);
155 const struct rte_flow_action_queue lacp_queue_conf = {
159 const struct rte_flow_action actions[] = {
161 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162 .conf = &lacp_queue_conf
165 .type = RTE_FLOW_ACTION_TYPE_END,
169 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170 flow_item_8023ad, actions, &error);
172 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173 __func__, error.message, slave_port,
174 internals->mode4.dedicated_queues.rx_qid);
178 rte_eth_dev_info_get(slave_port, &slave_info);
179 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183 __func__, slave_port);
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193 struct bond_dev_private *internals = (struct bond_dev_private *)
194 (bond_dev->data->dev_private);
195 struct rte_eth_dev_info bond_info;
198 /* Verify if all slaves in bonding supports flow director and */
199 if (internals->slave_count > 0) {
200 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205 for (idx = 0; idx < internals->slave_count; idx++) {
206 if (bond_ethdev_8023ad_flow_verify(bond_dev,
207 internals->slaves[idx].port_id) != 0)
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218 struct rte_flow_error error;
219 struct bond_dev_private *internals = (struct bond_dev_private *)
220 (bond_dev->data->dev_private);
222 struct rte_flow_action_queue lacp_queue_conf = {
223 .index = internals->mode4.dedicated_queues.rx_qid,
226 const struct rte_flow_action actions[] = {
228 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229 .conf = &lacp_queue_conf
232 .type = RTE_FLOW_ACTION_TYPE_END,
236 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240 "(slave_port=%d queue_id=%d)",
241 error.message, slave_port,
242 internals->mode4.dedicated_queues.rx_qid);
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254 struct bond_dev_private *internals = bd_rx_q->dev_private;
255 uint16_t num_rx_total = 0; /* Total number of received packets */
256 uint16_t slaves[RTE_MAX_ETHPORTS];
257 uint16_t slave_count;
261 /* Copy slave list to protect against slave up/down changes during tx
263 slave_count = internals->active_slave_count;
264 memcpy(slaves, internals->active_slaves,
265 sizeof(internals->active_slaves[0]) * slave_count);
267 for (i = 0, idx = internals->active_slave;
268 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269 idx = idx % slave_count;
271 /* Read packets from this slave */
272 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273 &bufs[num_rx_total], nb_pkts - num_rx_total);
276 internals->active_slave = idx;
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285 struct bond_dev_private *internals;
286 struct bond_tx_queue *bd_tx_q;
288 uint16_t num_of_slaves;
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 /* positions in slaves, not ID */
291 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
292 uint8_t distributing_count;
294 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
295 uint16_t i, op_slave_idx;
297 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
299 /* Total amount of packets in slave_bufs */
300 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
301 /* Slow packets placed in each slave */
303 if (unlikely(nb_pkts == 0))
306 bd_tx_q = (struct bond_tx_queue *)queue;
307 internals = bd_tx_q->dev_private;
309 /* Copy slave list to protect against slave up/down changes during tx
311 num_of_slaves = internals->active_slave_count;
312 if (num_of_slaves < 1)
315 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
318 distributing_count = 0;
319 for (i = 0; i < num_of_slaves; i++) {
320 struct port *port = &mode_8023ad_ports[slaves[i]];
321 if (ACTOR_STATE(port, DISTRIBUTING))
322 distributing_offsets[distributing_count++] = i;
325 if (likely(distributing_count > 0)) {
326 /* Populate slaves mbuf with the packets which are to be sent */
327 for (i = 0; i < nb_pkts; i++) {
328 /* Select output slave using hash based on xmit policy */
329 op_slave_idx = internals->xmit_hash(bufs[i],
332 /* Populate slave mbuf arrays with mbufs for that slave.
333 * Use only slaves that are currently distributing.
335 uint8_t slave_offset =
336 distributing_offsets[op_slave_idx];
337 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
339 slave_nb_pkts[slave_offset]++;
343 /* Send packet burst on each slave device */
344 for (i = 0; i < num_of_slaves; i++) {
345 if (slave_nb_pkts[i] == 0)
348 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
349 slave_bufs[i], slave_nb_pkts[i]);
351 num_tx_total += num_tx_slave;
352 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
354 /* If tx burst fails move packets to end of bufs */
355 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
356 uint16_t j = nb_pkts - num_tx_fail_total;
357 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
359 bufs[j] = slave_bufs[i][num_tx_slave];
368 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
371 /* Cast to structure, containing bonded device's port id and queue id */
372 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
373 struct bond_dev_private *internals = bd_rx_q->dev_private;
374 struct ether_addr bond_mac;
376 struct ether_hdr *hdr;
378 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
379 uint16_t num_rx_total = 0; /* Total number of received packets */
380 uint16_t slaves[RTE_MAX_ETHPORTS];
381 uint16_t slave_count, idx;
383 uint8_t collecting; /* current slave collecting status */
384 const uint8_t promisc = internals->promiscuous_en;
388 rte_eth_macaddr_get(internals->port_id, &bond_mac);
389 /* Copy slave list to protect against slave up/down changes during tx
391 slave_count = internals->active_slave_count;
392 memcpy(slaves, internals->active_slaves,
393 sizeof(internals->active_slaves[0]) * slave_count);
395 idx = internals->active_slave;
396 if (idx >= slave_count) {
397 internals->active_slave = 0;
400 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
402 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
405 /* Read packets from this slave */
406 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
407 &bufs[num_rx_total], nb_pkts - num_rx_total);
409 for (k = j; k < 2 && k < num_rx_total; k++)
410 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
412 /* Handle slow protocol packets. */
413 while (j < num_rx_total) {
415 /* If packet is not pure L2 and is known, skip it */
416 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
421 if (j + 3 < num_rx_total)
422 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
424 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
425 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
427 /* Remove packet from array if it is slow packet or slave is not
428 * in collecting state or bonding interface is not in promiscuous
429 * mode and packet address does not match. */
430 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
431 !collecting || (!promisc &&
432 !is_multicast_ether_addr(&hdr->d_addr) &&
433 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
435 if (hdr->ether_type == ether_type_slow_be) {
436 bond_mode_8023ad_handle_slow_pkt(
437 internals, slaves[idx], bufs[j]);
439 rte_pktmbuf_free(bufs[j]);
441 /* Packet is managed by mode 4 or dropped, shift the array */
443 if (j < num_rx_total) {
444 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
450 if (unlikely(++idx == slave_count))
454 internals->active_slave = idx;
458 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
459 uint32_t burstnumberRX;
460 uint32_t burstnumberTX;
462 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
465 arp_op_name(uint16_t arp_op, char *buf)
469 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
472 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
474 case ARP_OP_REVREQUEST:
475 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
476 "Reverse ARP Request");
478 case ARP_OP_REVREPLY:
479 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
480 "Reverse ARP Reply");
482 case ARP_OP_INVREQUEST:
483 snprintf(buf, sizeof("Peer Identify Request"), "%s",
484 "Peer Identify Request");
486 case ARP_OP_INVREPLY:
487 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
488 "Peer Identify Reply");
493 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
497 #define MaxIPv4String 16
499 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
503 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
504 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
505 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
509 #define MAX_CLIENTS_NUMBER 128
510 uint8_t active_clients;
511 struct client_stats_t {
514 uint32_t ipv4_rx_packets;
515 uint32_t ipv4_tx_packets;
517 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
520 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
524 for (; i < MAX_CLIENTS_NUMBER; i++) {
525 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
526 /* Just update RX packets number for this client */
527 if (TXorRXindicator == &burstnumberRX)
528 client_stats[i].ipv4_rx_packets++;
530 client_stats[i].ipv4_tx_packets++;
534 /* We have a new client. Insert him to the table, and increment stats */
535 if (TXorRXindicator == &burstnumberRX)
536 client_stats[active_clients].ipv4_rx_packets++;
538 client_stats[active_clients].ipv4_tx_packets++;
539 client_stats[active_clients].ipv4_addr = addr;
540 client_stats[active_clients].port = port;
545 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
546 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
547 RTE_LOG(DEBUG, PMD, \
550 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
552 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
558 eth_h->s_addr.addr_bytes[0], \
559 eth_h->s_addr.addr_bytes[1], \
560 eth_h->s_addr.addr_bytes[2], \
561 eth_h->s_addr.addr_bytes[3], \
562 eth_h->s_addr.addr_bytes[4], \
563 eth_h->s_addr.addr_bytes[5], \
565 eth_h->d_addr.addr_bytes[0], \
566 eth_h->d_addr.addr_bytes[1], \
567 eth_h->d_addr.addr_bytes[2], \
568 eth_h->d_addr.addr_bytes[3], \
569 eth_h->d_addr.addr_bytes[4], \
570 eth_h->d_addr.addr_bytes[5], \
577 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
578 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
580 struct ipv4_hdr *ipv4_h;
581 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
582 struct arp_hdr *arp_h;
589 uint16_t ether_type = eth_h->ether_type;
590 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
592 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
593 snprintf(buf, 16, "%s", info);
596 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
597 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
598 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
599 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
600 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
601 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
603 update_client_stats(ipv4_h->src_addr, port, burstnumber);
605 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
606 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
607 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
608 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
609 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
610 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
611 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
618 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
620 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
621 struct bond_dev_private *internals = bd_tx_q->dev_private;
622 struct ether_hdr *eth_h;
623 uint16_t ether_type, offset;
624 uint16_t nb_recv_pkts;
627 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
629 for (i = 0; i < nb_recv_pkts; i++) {
630 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
631 ether_type = eth_h->ether_type;
632 offset = get_vlan_offset(eth_h, ðer_type);
634 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
638 bond_mode_alb_arp_recv(eth_h, offset, internals);
640 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
641 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
642 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
650 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
653 struct bond_dev_private *internals;
654 struct bond_tx_queue *bd_tx_q;
656 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
657 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
659 uint16_t num_of_slaves;
660 uint16_t slaves[RTE_MAX_ETHPORTS];
662 uint16_t num_tx_total = 0, num_tx_slave;
664 static int slave_idx = 0;
665 int i, cslave_idx = 0, tx_fail_total = 0;
667 bd_tx_q = (struct bond_tx_queue *)queue;
668 internals = bd_tx_q->dev_private;
670 /* Copy slave list to protect against slave up/down changes during tx
672 num_of_slaves = internals->active_slave_count;
673 memcpy(slaves, internals->active_slaves,
674 sizeof(internals->active_slaves[0]) * num_of_slaves);
676 if (num_of_slaves < 1)
679 /* Populate slaves mbuf with which packets are to be sent on it */
680 for (i = 0; i < nb_pkts; i++) {
681 cslave_idx = (slave_idx + i) % num_of_slaves;
682 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
685 /* increment current slave index so the next call to tx burst starts on the
687 slave_idx = ++cslave_idx;
689 /* Send packet burst on each slave device */
690 for (i = 0; i < num_of_slaves; i++) {
691 if (slave_nb_pkts[i] > 0) {
692 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
693 slave_bufs[i], slave_nb_pkts[i]);
695 /* if tx burst fails move packets to end of bufs */
696 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
697 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
699 tx_fail_total += tx_fail_slave;
701 memcpy(&bufs[nb_pkts - tx_fail_total],
702 &slave_bufs[i][num_tx_slave],
703 tx_fail_slave * sizeof(bufs[0]));
705 num_tx_total += num_tx_slave;
713 bond_ethdev_tx_burst_active_backup(void *queue,
714 struct rte_mbuf **bufs, uint16_t nb_pkts)
716 struct bond_dev_private *internals;
717 struct bond_tx_queue *bd_tx_q;
719 bd_tx_q = (struct bond_tx_queue *)queue;
720 internals = bd_tx_q->dev_private;
722 if (internals->active_slave_count < 1)
725 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
729 static inline uint16_t
730 ether_hash(struct ether_hdr *eth_hdr)
732 unaligned_uint16_t *word_src_addr =
733 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
734 unaligned_uint16_t *word_dst_addr =
735 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
737 return (word_src_addr[0] ^ word_dst_addr[0]) ^
738 (word_src_addr[1] ^ word_dst_addr[1]) ^
739 (word_src_addr[2] ^ word_dst_addr[2]);
742 static inline uint32_t
743 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
745 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
748 static inline uint32_t
749 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
751 unaligned_uint32_t *word_src_addr =
752 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
753 unaligned_uint32_t *word_dst_addr =
754 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
756 return (word_src_addr[0] ^ word_dst_addr[0]) ^
757 (word_src_addr[1] ^ word_dst_addr[1]) ^
758 (word_src_addr[2] ^ word_dst_addr[2]) ^
759 (word_src_addr[3] ^ word_dst_addr[3]);
763 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
765 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
767 uint32_t hash = ether_hash(eth_hdr);
769 return (hash ^= hash >> 8) % slave_count;
773 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
775 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
776 uint16_t proto = eth_hdr->ether_type;
777 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
778 uint32_t hash, l3hash = 0;
780 hash = ether_hash(eth_hdr);
782 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
783 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
784 ((char *)(eth_hdr + 1) + vlan_offset);
785 l3hash = ipv4_hash(ipv4_hdr);
787 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
788 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
789 ((char *)(eth_hdr + 1) + vlan_offset);
790 l3hash = ipv6_hash(ipv6_hdr);
793 hash = hash ^ l3hash;
797 return hash % slave_count;
801 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
804 uint16_t proto = eth_hdr->ether_type;
805 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807 struct udp_hdr *udp_hdr = NULL;
808 struct tcp_hdr *tcp_hdr = NULL;
809 uint32_t hash, l3hash = 0, l4hash = 0;
811 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813 ((char *)(eth_hdr + 1) + vlan_offset);
814 size_t ip_hdr_offset;
816 l3hash = ipv4_hash(ipv4_hdr);
818 /* there is no L4 header in fragmented packet */
819 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
820 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
823 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
824 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
826 l4hash = HASH_L4_PORTS(tcp_hdr);
827 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
828 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
830 l4hash = HASH_L4_PORTS(udp_hdr);
833 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
834 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
835 ((char *)(eth_hdr + 1) + vlan_offset);
836 l3hash = ipv6_hash(ipv6_hdr);
838 if (ipv6_hdr->proto == IPPROTO_TCP) {
839 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
840 l4hash = HASH_L4_PORTS(tcp_hdr);
841 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
842 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
843 l4hash = HASH_L4_PORTS(udp_hdr);
847 hash = l3hash ^ l4hash;
851 return hash % slave_count;
855 uint64_t bwg_left_int;
856 uint64_t bwg_left_remainder;
861 bond_tlb_activate_slave(struct bond_dev_private *internals) {
864 for (i = 0; i < internals->active_slave_count; i++) {
865 tlb_last_obytets[internals->active_slaves[i]] = 0;
870 bandwidth_cmp(const void *a, const void *b)
872 const struct bwg_slave *bwg_a = a;
873 const struct bwg_slave *bwg_b = b;
874 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
875 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
876 (int64_t)bwg_a->bwg_left_remainder;
890 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
891 struct bwg_slave *bwg_slave)
893 struct rte_eth_link link_status;
895 rte_eth_link_get_nowait(port_id, &link_status);
896 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
899 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
900 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
901 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
905 bond_ethdev_update_tlb_slave_cb(void *arg)
907 struct bond_dev_private *internals = arg;
908 struct rte_eth_stats slave_stats;
909 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
913 uint8_t update_stats = 0;
916 internals->slave_update_idx++;
919 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
922 for (i = 0; i < internals->active_slave_count; i++) {
923 slave_id = internals->active_slaves[i];
924 rte_eth_stats_get(slave_id, &slave_stats);
925 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
926 bandwidth_left(slave_id, tx_bytes,
927 internals->slave_update_idx, &bwg_array[i]);
928 bwg_array[i].slave = slave_id;
931 tlb_last_obytets[slave_id] = slave_stats.obytes;
935 if (update_stats == 1)
936 internals->slave_update_idx = 0;
939 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
940 for (i = 0; i < slave_count; i++)
941 internals->tlb_slaves_order[i] = bwg_array[i].slave;
943 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
944 (struct bond_dev_private *)internals);
948 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
950 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
951 struct bond_dev_private *internals = bd_tx_q->dev_private;
953 struct rte_eth_dev *primary_port =
954 &rte_eth_devices[internals->primary_port];
955 uint16_t num_tx_total = 0;
958 uint16_t num_of_slaves = internals->active_slave_count;
959 uint16_t slaves[RTE_MAX_ETHPORTS];
961 struct ether_hdr *ether_hdr;
962 struct ether_addr primary_slave_addr;
963 struct ether_addr active_slave_addr;
965 if (num_of_slaves < 1)
968 memcpy(slaves, internals->tlb_slaves_order,
969 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
972 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
975 for (i = 0; i < 3; i++)
976 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
979 for (i = 0; i < num_of_slaves; i++) {
980 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
981 for (j = num_tx_total; j < nb_pkts; j++) {
983 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
985 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
986 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
987 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
988 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
989 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
993 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
994 bufs + num_tx_total, nb_pkts - num_tx_total);
996 if (num_tx_total == nb_pkts)
1000 return num_tx_total;
1004 bond_tlb_disable(struct bond_dev_private *internals)
1006 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1010 bond_tlb_enable(struct bond_dev_private *internals)
1012 bond_ethdev_update_tlb_slave_cb(internals);
1016 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1018 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1019 struct bond_dev_private *internals = bd_tx_q->dev_private;
1021 struct ether_hdr *eth_h;
1022 uint16_t ether_type, offset;
1024 struct client_data *client_info;
1027 * We create transmit buffers for every slave and one additional to send
1028 * through tlb. In worst case every packet will be send on one port.
1030 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1031 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1034 * We create separate transmit buffers for update packets as they won't
1035 * be counted in num_tx_total.
1037 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1038 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1040 struct rte_mbuf *upd_pkt;
1043 uint16_t num_send, num_not_send = 0;
1044 uint16_t num_tx_total = 0;
1049 /* Search tx buffer for ARP packets and forward them to alb */
1050 for (i = 0; i < nb_pkts; i++) {
1051 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1052 ether_type = eth_h->ether_type;
1053 offset = get_vlan_offset(eth_h, ðer_type);
1055 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1056 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1058 /* Change src mac in eth header */
1059 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1061 /* Add packet to slave tx buffer */
1062 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1063 slave_bufs_pkts[slave_idx]++;
1065 /* If packet is not ARP, send it with TLB policy */
1066 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1068 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1072 /* Update connected client ARP tables */
1073 if (internals->mode6.ntt) {
1074 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1075 client_info = &internals->mode6.client_table[i];
1077 if (client_info->in_use) {
1078 /* Allocate new packet to send ARP update on current slave */
1079 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1080 if (upd_pkt == NULL) {
1081 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1084 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1085 + client_info->vlan_count * sizeof(struct vlan_hdr);
1086 upd_pkt->data_len = pkt_size;
1087 upd_pkt->pkt_len = pkt_size;
1089 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1092 /* Add packet to update tx buffer */
1093 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1094 update_bufs_pkts[slave_idx]++;
1097 internals->mode6.ntt = 0;
1100 /* Send ARP packets on proper slaves */
1101 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1102 if (slave_bufs_pkts[i] > 0) {
1103 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1104 slave_bufs[i], slave_bufs_pkts[i]);
1105 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1106 bufs[nb_pkts - 1 - num_not_send - j] =
1107 slave_bufs[i][nb_pkts - 1 - j];
1110 num_tx_total += num_send;
1111 num_not_send += slave_bufs_pkts[i] - num_send;
1113 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1114 /* Print TX stats including update packets */
1115 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1116 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1117 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1123 /* Send update packets on proper slaves */
1124 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1125 if (update_bufs_pkts[i] > 0) {
1126 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1127 update_bufs_pkts[i]);
1128 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1129 rte_pktmbuf_free(update_bufs[i][j]);
1131 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1132 for (j = 0; j < update_bufs_pkts[i]; j++) {
1133 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1134 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1140 /* Send non-ARP packets using tlb policy */
1141 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1142 num_send = bond_ethdev_tx_burst_tlb(queue,
1143 slave_bufs[RTE_MAX_ETHPORTS],
1144 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1146 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1147 bufs[nb_pkts - 1 - num_not_send - j] =
1148 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1151 num_tx_total += num_send;
1154 return num_tx_total;
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1161 struct bond_dev_private *internals;
1162 struct bond_tx_queue *bd_tx_q;
1164 uint16_t num_of_slaves;
1165 uint16_t slaves[RTE_MAX_ETHPORTS];
1167 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1171 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1172 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1174 bd_tx_q = (struct bond_tx_queue *)queue;
1175 internals = bd_tx_q->dev_private;
1177 /* Copy slave list to protect against slave up/down changes during tx
1179 num_of_slaves = internals->active_slave_count;
1180 memcpy(slaves, internals->active_slaves,
1181 sizeof(internals->active_slaves[0]) * num_of_slaves);
1183 if (num_of_slaves < 1)
1184 return num_tx_total;
1186 /* Populate slaves mbuf with the packets which are to be sent on it */
1187 for (i = 0; i < nb_pkts; i++) {
1188 /* Select output slave using hash based on xmit policy */
1189 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1191 /* Populate slave mbuf arrays with mbufs for that slave */
1192 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1195 /* Send packet burst on each slave device */
1196 for (i = 0; i < num_of_slaves; i++) {
1197 if (slave_nb_pkts[i] > 0) {
1198 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1199 slave_bufs[i], slave_nb_pkts[i]);
1201 /* if tx burst fails move packets to end of bufs */
1202 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1203 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1205 tx_fail_total += slave_tx_fail_count;
1206 memcpy(&bufs[nb_pkts - tx_fail_total],
1207 &slave_bufs[i][num_tx_slave],
1208 slave_tx_fail_count * sizeof(bufs[0]));
1211 num_tx_total += num_tx_slave;
1215 return num_tx_total;
1219 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1222 struct bond_dev_private *internals;
1223 struct bond_tx_queue *bd_tx_q;
1225 uint16_t num_of_slaves;
1226 uint16_t slaves[RTE_MAX_ETHPORTS];
1227 /* positions in slaves, not ID */
1228 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1229 uint8_t distributing_count;
1231 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1232 uint16_t i, op_slave_idx;
1234 /* Allocate additional packets in case 8023AD mode. */
1235 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1237 /* Total amount of packets in slave_bufs */
1238 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1239 /* Slow packets placed in each slave */
1240 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1242 bd_tx_q = (struct bond_tx_queue *)queue;
1243 internals = bd_tx_q->dev_private;
1245 /* Copy slave list to protect against slave up/down changes during tx
1247 num_of_slaves = internals->active_slave_count;
1248 if (num_of_slaves < 1)
1249 return num_tx_total;
1251 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1253 distributing_count = 0;
1254 for (i = 0; i < num_of_slaves; i++) {
1255 struct port *port = &mode_8023ad_ports[slaves[i]];
1257 if (ACTOR_STATE(port, DISTRIBUTING))
1258 distributing_offsets[distributing_count++] = i;
1261 if (likely(distributing_count > 0)) {
1262 /* Populate slaves mbuf with the packets which are to be sent on it */
1263 for (i = 0; i < nb_pkts; i++) {
1264 /* Select output slave using hash based on xmit policy */
1265 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1267 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1268 * slaves that are currently distributing. */
1269 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1270 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1271 slave_nb_pkts[slave_offset]++;
1275 /* Send packet burst on each slave device */
1276 for (i = 0; i < num_of_slaves; i++) {
1277 if (slave_nb_pkts[i] == 0)
1280 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1281 slave_bufs[i], slave_nb_pkts[i]);
1283 /* If tx burst fails drop slow packets */
1284 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1285 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1287 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1288 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1290 /* If tx burst fails move packets to end of bufs */
1291 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1292 uint16_t j = nb_pkts - num_tx_fail_total;
1293 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1294 bufs[j] = slave_bufs[i][num_tx_slave];
1298 /* Check for LACP control packets and send if available */
1299 for (i = 0; i < num_of_slaves; i++) {
1300 struct port *port = &mode_8023ad_ports[slaves[i]];
1301 struct rte_mbuf *ctrl_pkt = NULL;
1303 int pkt_avail = rte_ring_dequeue(port->tx_ring,
1304 (void **)&ctrl_pkt);
1306 if (unlikely(pkt_avail == 0)) {
1307 num_tx_slave = rte_eth_tx_burst(slaves[i],
1308 bd_tx_q->queue_id, &ctrl_pkt, 1);
1311 * re-enqueue LAG control plane packets to buffering
1312 * ring if transmission fails so the packet isn't lost.
1314 if (num_tx_slave != nb_pkts)
1315 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1319 return num_tx_total;
1323 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1326 struct bond_dev_private *internals;
1327 struct bond_tx_queue *bd_tx_q;
1329 uint8_t tx_failed_flag = 0, num_of_slaves;
1330 uint16_t slaves[RTE_MAX_ETHPORTS];
1332 uint16_t max_nb_of_tx_pkts = 0;
1334 int slave_tx_total[RTE_MAX_ETHPORTS];
1335 int i, most_successful_tx_slave = -1;
1337 bd_tx_q = (struct bond_tx_queue *)queue;
1338 internals = bd_tx_q->dev_private;
1340 /* Copy slave list to protect against slave up/down changes during tx
1342 num_of_slaves = internals->active_slave_count;
1343 memcpy(slaves, internals->active_slaves,
1344 sizeof(internals->active_slaves[0]) * num_of_slaves);
1346 if (num_of_slaves < 1)
1349 /* Increment reference count on mbufs */
1350 for (i = 0; i < nb_pkts; i++)
1351 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1353 /* Transmit burst on each active slave */
1354 for (i = 0; i < num_of_slaves; i++) {
1355 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1358 if (unlikely(slave_tx_total[i] < nb_pkts))
1361 /* record the value and slave index for the slave which transmits the
1362 * maximum number of packets */
1363 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1364 max_nb_of_tx_pkts = slave_tx_total[i];
1365 most_successful_tx_slave = i;
1369 /* if slaves fail to transmit packets from burst, the calling application
1370 * is not expected to know about multiple references to packets so we must
1371 * handle failures of all packets except those of the most successful slave
1373 if (unlikely(tx_failed_flag))
1374 for (i = 0; i < num_of_slaves; i++)
1375 if (i != most_successful_tx_slave)
1376 while (slave_tx_total[i] < nb_pkts)
1377 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1379 return max_nb_of_tx_pkts;
1383 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1385 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1387 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1389 * If in mode 4 then save the link properties of the first
1390 * slave, all subsequent slaves must match these properties
1392 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1394 bond_link->link_autoneg = slave_link->link_autoneg;
1395 bond_link->link_duplex = slave_link->link_duplex;
1396 bond_link->link_speed = slave_link->link_speed;
1399 * In any other mode the link properties are set to default
1400 * values of AUTONEG/DUPLEX
1402 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1403 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1408 link_properties_valid(struct rte_eth_dev *ethdev,
1409 struct rte_eth_link *slave_link)
1411 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1413 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1414 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1416 if (bond_link->link_duplex != slave_link->link_duplex ||
1417 bond_link->link_autoneg != slave_link->link_autoneg ||
1418 bond_link->link_speed != slave_link->link_speed)
1426 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1428 struct ether_addr *mac_addr;
1430 if (eth_dev == NULL) {
1431 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1435 if (dst_mac_addr == NULL) {
1436 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1440 mac_addr = eth_dev->data->mac_addrs;
1442 ether_addr_copy(mac_addr, dst_mac_addr);
1447 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1449 struct ether_addr *mac_addr;
1451 if (eth_dev == NULL) {
1452 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1456 if (new_mac_addr == NULL) {
1457 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1461 mac_addr = eth_dev->data->mac_addrs;
1463 /* If new MAC is different to current MAC then update */
1464 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1465 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1471 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1473 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1476 /* Update slave devices MAC addresses */
1477 if (internals->slave_count < 1)
1480 switch (internals->mode) {
1481 case BONDING_MODE_ROUND_ROBIN:
1482 case BONDING_MODE_BALANCE:
1483 case BONDING_MODE_BROADCAST:
1484 for (i = 0; i < internals->slave_count; i++) {
1485 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1486 bonded_eth_dev->data->mac_addrs)) {
1487 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1488 internals->slaves[i].port_id);
1493 case BONDING_MODE_8023AD:
1494 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1496 case BONDING_MODE_ACTIVE_BACKUP:
1497 case BONDING_MODE_TLB:
1498 case BONDING_MODE_ALB:
1500 for (i = 0; i < internals->slave_count; i++) {
1501 if (internals->slaves[i].port_id ==
1502 internals->current_primary_port) {
1503 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1504 bonded_eth_dev->data->mac_addrs)) {
1505 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1506 internals->current_primary_port);
1510 if (mac_address_set(
1511 &rte_eth_devices[internals->slaves[i].port_id],
1512 &internals->slaves[i].persisted_mac_addr)) {
1513 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1514 internals->slaves[i].port_id);
1525 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1527 struct bond_dev_private *internals;
1529 internals = eth_dev->data->dev_private;
1532 case BONDING_MODE_ROUND_ROBIN:
1533 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1534 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1536 case BONDING_MODE_ACTIVE_BACKUP:
1537 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1538 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1540 case BONDING_MODE_BALANCE:
1541 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1542 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1544 case BONDING_MODE_BROADCAST:
1545 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1546 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1548 case BONDING_MODE_8023AD:
1549 if (bond_mode_8023ad_enable(eth_dev) != 0)
1552 if (internals->mode4.dedicated_queues.enabled == 0) {
1553 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1554 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1555 RTE_LOG(WARNING, PMD,
1556 "Using mode 4, it is necessary to do TX burst "
1557 "and RX burst at least every 100ms.\n");
1559 /* Use flow director's optimization */
1560 eth_dev->rx_pkt_burst =
1561 bond_ethdev_rx_burst_8023ad_fast_queue;
1562 eth_dev->tx_pkt_burst =
1563 bond_ethdev_tx_burst_8023ad_fast_queue;
1566 case BONDING_MODE_TLB:
1567 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1568 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1570 case BONDING_MODE_ALB:
1571 if (bond_mode_alb_enable(eth_dev) != 0)
1574 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1575 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1581 internals->mode = mode;
1588 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1589 struct rte_eth_dev *slave_eth_dev)
1592 struct bond_dev_private *internals = (struct bond_dev_private *)
1593 bonded_eth_dev->data->dev_private;
1594 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1596 if (port->slow_pool == NULL) {
1598 int slave_id = slave_eth_dev->data->port_id;
1600 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1602 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1603 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1604 slave_eth_dev->data->numa_node);
1606 /* Any memory allocation failure in initialization is critical because
1607 * resources can't be free, so reinitialization is impossible. */
1608 if (port->slow_pool == NULL) {
1609 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1610 slave_id, mem_name, rte_strerror(rte_errno));
1614 if (internals->mode4.dedicated_queues.enabled == 1) {
1615 /* Configure slow Rx queue */
1617 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1618 internals->mode4.dedicated_queues.rx_qid, 128,
1619 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1620 NULL, port->slow_pool);
1623 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1624 slave_eth_dev->data->port_id,
1625 internals->mode4.dedicated_queues.rx_qid,
1630 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1631 internals->mode4.dedicated_queues.tx_qid, 512,
1632 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1636 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1637 slave_eth_dev->data->port_id,
1638 internals->mode4.dedicated_queues.tx_qid,
1647 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1648 struct rte_eth_dev *slave_eth_dev)
1650 struct bond_rx_queue *bd_rx_q;
1651 struct bond_tx_queue *bd_tx_q;
1652 uint16_t nb_rx_queues;
1653 uint16_t nb_tx_queues;
1657 struct rte_flow_error flow_error;
1659 struct bond_dev_private *internals = (struct bond_dev_private *)
1660 bonded_eth_dev->data->dev_private;
1663 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1665 /* Enable interrupts on slave device if supported */
1666 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1667 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1669 /* If RSS is enabled for bonding, try to enable it for slaves */
1670 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1671 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1673 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1674 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1675 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1676 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1678 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1681 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1682 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1683 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1684 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1687 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1688 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1690 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1691 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1693 if (internals->mode == BONDING_MODE_8023AD) {
1694 if (internals->mode4.dedicated_queues.enabled == 1) {
1700 /* Configure device */
1701 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1702 nb_rx_queues, nb_tx_queues,
1703 &(slave_eth_dev->data->dev_conf));
1705 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1706 slave_eth_dev->data->port_id, errval);
1710 /* Setup Rx Queues */
1711 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1712 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1714 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1715 bd_rx_q->nb_rx_desc,
1716 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1717 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1720 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1721 slave_eth_dev->data->port_id, q_id, errval);
1726 /* Setup Tx Queues */
1727 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1728 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1730 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1731 bd_tx_q->nb_tx_desc,
1732 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1736 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1737 slave_eth_dev->data->port_id, q_id, errval);
1742 if (internals->mode == BONDING_MODE_8023AD &&
1743 internals->mode4.dedicated_queues.enabled == 1) {
1744 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1748 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1749 slave_eth_dev->data->port_id) != 0) {
1751 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1752 slave_eth_dev->data->port_id, q_id, errval);
1756 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1757 rte_flow_destroy(slave_eth_dev->data->port_id,
1758 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1761 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1762 slave_eth_dev->data->port_id);
1766 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1768 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1769 slave_eth_dev->data->port_id, errval);
1773 /* If RSS is enabled for bonding, synchronize RETA */
1774 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1776 struct bond_dev_private *internals;
1778 internals = bonded_eth_dev->data->dev_private;
1780 for (i = 0; i < internals->slave_count; i++) {
1781 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1782 errval = rte_eth_dev_rss_reta_update(
1783 slave_eth_dev->data->port_id,
1784 &internals->reta_conf[0],
1785 internals->slaves[i].reta_size);
1787 RTE_LOG(WARNING, PMD,
1788 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1789 " RSS Configuration for bonding may be inconsistent.\n",
1790 slave_eth_dev->data->port_id, errval);
1797 /* If lsc interrupt is set, check initial slave's link status */
1798 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1799 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1800 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1801 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1809 slave_remove(struct bond_dev_private *internals,
1810 struct rte_eth_dev *slave_eth_dev)
1814 for (i = 0; i < internals->slave_count; i++)
1815 if (internals->slaves[i].port_id ==
1816 slave_eth_dev->data->port_id)
1819 if (i < (internals->slave_count - 1))
1820 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1821 sizeof(internals->slaves[0]) *
1822 (internals->slave_count - i - 1));
1824 internals->slave_count--;
1826 /* force reconfiguration of slave interfaces */
1827 _rte_eth_dev_reset(slave_eth_dev);
1831 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1834 slave_add(struct bond_dev_private *internals,
1835 struct rte_eth_dev *slave_eth_dev)
1837 struct bond_slave_details *slave_details =
1838 &internals->slaves[internals->slave_count];
1840 slave_details->port_id = slave_eth_dev->data->port_id;
1841 slave_details->last_link_status = 0;
1843 /* Mark slave devices that don't support interrupts so we can
1844 * compensate when we start the bond
1846 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1847 slave_details->link_status_poll_enabled = 1;
1850 slave_details->link_status_wait_to_complete = 0;
1851 /* clean tlb_last_obytes when adding port for bonding device */
1852 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1853 sizeof(struct ether_addr));
1857 bond_ethdev_primary_set(struct bond_dev_private *internals,
1858 uint16_t slave_port_id)
1862 if (internals->active_slave_count < 1)
1863 internals->current_primary_port = slave_port_id;
1865 /* Search bonded device slave ports for new proposed primary port */
1866 for (i = 0; i < internals->active_slave_count; i++) {
1867 if (internals->active_slaves[i] == slave_port_id)
1868 internals->current_primary_port = slave_port_id;
1873 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1876 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1878 struct bond_dev_private *internals;
1881 /* slave eth dev will be started by bonded device */
1882 if (check_for_bonded_ethdev(eth_dev)) {
1883 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1884 eth_dev->data->port_id);
1888 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1889 eth_dev->data->dev_started = 1;
1891 internals = eth_dev->data->dev_private;
1893 if (internals->slave_count == 0) {
1894 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1898 if (internals->user_defined_mac == 0) {
1899 struct ether_addr *new_mac_addr = NULL;
1901 for (i = 0; i < internals->slave_count; i++)
1902 if (internals->slaves[i].port_id == internals->primary_port)
1903 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1905 if (new_mac_addr == NULL)
1908 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1909 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1910 eth_dev->data->port_id);
1915 /* Update all slave devices MACs*/
1916 if (mac_address_slaves_update(eth_dev) != 0)
1919 /* If bonded device is configure in promiscuous mode then re-apply config */
1920 if (internals->promiscuous_en)
1921 bond_ethdev_promiscuous_enable(eth_dev);
1923 if (internals->mode == BONDING_MODE_8023AD) {
1924 if (internals->mode4.dedicated_queues.enabled == 1) {
1925 internals->mode4.dedicated_queues.rx_qid =
1926 eth_dev->data->nb_rx_queues;
1927 internals->mode4.dedicated_queues.tx_qid =
1928 eth_dev->data->nb_tx_queues;
1933 /* Reconfigure each slave device if starting bonded device */
1934 for (i = 0; i < internals->slave_count; i++) {
1935 struct rte_eth_dev *slave_ethdev =
1936 &(rte_eth_devices[internals->slaves[i].port_id]);
1937 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1939 "bonded port (%d) failed to reconfigure slave device (%d)",
1940 eth_dev->data->port_id,
1941 internals->slaves[i].port_id);
1944 /* We will need to poll for link status if any slave doesn't
1945 * support interrupts
1947 if (internals->slaves[i].link_status_poll_enabled)
1948 internals->link_status_polling_enabled = 1;
1950 /* start polling if needed */
1951 if (internals->link_status_polling_enabled) {
1953 internals->link_status_polling_interval_ms * 1000,
1954 bond_ethdev_slave_link_status_change_monitor,
1955 (void *)&rte_eth_devices[internals->port_id]);
1958 if (internals->user_defined_primary_port)
1959 bond_ethdev_primary_set(internals, internals->primary_port);
1961 if (internals->mode == BONDING_MODE_8023AD)
1962 bond_mode_8023ad_start(eth_dev);
1964 if (internals->mode == BONDING_MODE_TLB ||
1965 internals->mode == BONDING_MODE_ALB)
1966 bond_tlb_enable(internals);
1972 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1976 if (dev->data->rx_queues != NULL) {
1977 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1978 rte_free(dev->data->rx_queues[i]);
1979 dev->data->rx_queues[i] = NULL;
1981 dev->data->nb_rx_queues = 0;
1984 if (dev->data->tx_queues != NULL) {
1985 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1986 rte_free(dev->data->tx_queues[i]);
1987 dev->data->tx_queues[i] = NULL;
1989 dev->data->nb_tx_queues = 0;
1994 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1996 struct bond_dev_private *internals = eth_dev->data->dev_private;
1999 if (internals->mode == BONDING_MODE_8023AD) {
2003 bond_mode_8023ad_stop(eth_dev);
2005 /* Discard all messages to/from mode 4 state machines */
2006 for (i = 0; i < internals->active_slave_count; i++) {
2007 port = &mode_8023ad_ports[internals->active_slaves[i]];
2009 RTE_ASSERT(port->rx_ring != NULL);
2010 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2011 rte_pktmbuf_free(pkt);
2013 RTE_ASSERT(port->tx_ring != NULL);
2014 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2015 rte_pktmbuf_free(pkt);
2019 if (internals->mode == BONDING_MODE_TLB ||
2020 internals->mode == BONDING_MODE_ALB) {
2021 bond_tlb_disable(internals);
2022 for (i = 0; i < internals->active_slave_count; i++)
2023 tlb_last_obytets[internals->active_slaves[i]] = 0;
2026 internals->active_slave_count = 0;
2027 internals->link_status_polling_enabled = 0;
2028 for (i = 0; i < internals->slave_count; i++)
2029 internals->slaves[i].last_link_status = 0;
2031 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2032 eth_dev->data->dev_started = 0;
2036 bond_ethdev_close(struct rte_eth_dev *dev)
2038 struct bond_dev_private *internals = dev->data->dev_private;
2039 uint8_t bond_port_id = internals->port_id;
2042 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2043 while (internals->slave_count != skipped) {
2044 uint16_t port_id = internals->slaves[skipped].port_id;
2046 rte_eth_dev_stop(port_id);
2048 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2050 "Failed to remove port %d from bonded device "
2051 "%s\n", port_id, dev->device->name);
2055 bond_ethdev_free_queues(dev);
2056 rte_bitmap_reset(internals->vlan_filter_bmp);
2059 /* forward declaration */
2060 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2063 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2065 struct bond_dev_private *internals = dev->data->dev_private;
2067 uint16_t max_nb_rx_queues = UINT16_MAX;
2068 uint16_t max_nb_tx_queues = UINT16_MAX;
2070 dev_info->max_mac_addrs = 1;
2072 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2073 internals->candidate_max_rx_pktlen :
2074 ETHER_MAX_JUMBO_FRAME_LEN;
2076 /* Max number of tx/rx queues that the bonded device can support is the
2077 * minimum values of the bonded slaves, as all slaves must be capable
2078 * of supporting the same number of tx/rx queues.
2080 if (internals->slave_count > 0) {
2081 struct rte_eth_dev_info slave_info;
2084 for (idx = 0; idx < internals->slave_count; idx++) {
2085 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2088 if (slave_info.max_rx_queues < max_nb_rx_queues)
2089 max_nb_rx_queues = slave_info.max_rx_queues;
2091 if (slave_info.max_tx_queues < max_nb_tx_queues)
2092 max_nb_tx_queues = slave_info.max_tx_queues;
2096 dev_info->max_rx_queues = max_nb_rx_queues;
2097 dev_info->max_tx_queues = max_nb_tx_queues;
2100 * If dedicated hw queues enabled for link bonding device in LACP mode
2101 * then we need to reduce the maximum number of data path queues by 1.
2103 if (internals->mode == BONDING_MODE_8023AD &&
2104 internals->mode4.dedicated_queues.enabled == 1) {
2105 dev_info->max_rx_queues--;
2106 dev_info->max_tx_queues--;
2109 dev_info->min_rx_bufsize = 0;
2111 dev_info->rx_offload_capa = internals->rx_offload_capa;
2112 dev_info->tx_offload_capa = internals->tx_offload_capa;
2113 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2115 dev_info->reta_size = internals->reta_size;
2119 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2123 struct bond_dev_private *internals = dev->data->dev_private;
2125 /* don't do this while a slave is being added */
2126 rte_spinlock_lock(&internals->lock);
2129 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2131 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2133 for (i = 0; i < internals->slave_count; i++) {
2134 uint16_t port_id = internals->slaves[i].port_id;
2136 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2138 RTE_LOG(WARNING, PMD,
2139 "Setting VLAN filter on slave port %u not supported.\n",
2143 rte_spinlock_unlock(&internals->lock);
2148 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2149 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2150 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2152 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2153 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2154 0, dev->data->numa_node);
2155 if (bd_rx_q == NULL)
2158 bd_rx_q->queue_id = rx_queue_id;
2159 bd_rx_q->dev_private = dev->data->dev_private;
2161 bd_rx_q->nb_rx_desc = nb_rx_desc;
2163 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2164 bd_rx_q->mb_pool = mb_pool;
2166 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2172 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2173 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2174 const struct rte_eth_txconf *tx_conf)
2176 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2177 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2178 0, dev->data->numa_node);
2180 if (bd_tx_q == NULL)
2183 bd_tx_q->queue_id = tx_queue_id;
2184 bd_tx_q->dev_private = dev->data->dev_private;
2186 bd_tx_q->nb_tx_desc = nb_tx_desc;
2187 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2189 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2195 bond_ethdev_rx_queue_release(void *queue)
2204 bond_ethdev_tx_queue_release(void *queue)
2213 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2215 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2216 struct bond_dev_private *internals;
2218 /* Default value for polling slave found is true as we don't want to
2219 * disable the polling thread if we cannot get the lock */
2220 int i, polling_slave_found = 1;
2225 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2226 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2228 if (!bonded_ethdev->data->dev_started ||
2229 !internals->link_status_polling_enabled)
2232 /* If device is currently being configured then don't check slaves link
2233 * status, wait until next period */
2234 if (rte_spinlock_trylock(&internals->lock)) {
2235 if (internals->slave_count > 0)
2236 polling_slave_found = 0;
2238 for (i = 0; i < internals->slave_count; i++) {
2239 if (!internals->slaves[i].link_status_poll_enabled)
2242 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2243 polling_slave_found = 1;
2245 /* Update slave link status */
2246 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2247 internals->slaves[i].link_status_wait_to_complete);
2249 /* if link status has changed since last checked then call lsc
2251 if (slave_ethdev->data->dev_link.link_status !=
2252 internals->slaves[i].last_link_status) {
2253 internals->slaves[i].last_link_status =
2254 slave_ethdev->data->dev_link.link_status;
2256 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2257 RTE_ETH_EVENT_INTR_LSC,
2258 &bonded_ethdev->data->port_id,
2262 rte_spinlock_unlock(&internals->lock);
2265 if (polling_slave_found)
2266 /* Set alarm to continue monitoring link status of slave ethdev's */
2267 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2268 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2272 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2274 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2276 struct bond_dev_private *bond_ctx;
2277 struct rte_eth_link slave_link;
2281 bond_ctx = ethdev->data->dev_private;
2283 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2285 if (ethdev->data->dev_started == 0 ||
2286 bond_ctx->active_slave_count == 0) {
2287 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2291 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2293 if (wait_to_complete)
2294 link_update = rte_eth_link_get;
2296 link_update = rte_eth_link_get_nowait;
2298 switch (bond_ctx->mode) {
2299 case BONDING_MODE_BROADCAST:
2301 * Setting link speed to UINT32_MAX to ensure we pick up the
2302 * value of the first active slave
2304 ethdev->data->dev_link.link_speed = UINT32_MAX;
2307 * link speed is minimum value of all the slaves link speed as
2308 * packet loss will occur on this slave if transmission at rates
2309 * greater than this are attempted
2311 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2312 link_update(bond_ctx->active_slaves[0], &slave_link);
2314 if (slave_link.link_speed <
2315 ethdev->data->dev_link.link_speed)
2316 ethdev->data->dev_link.link_speed =
2317 slave_link.link_speed;
2320 case BONDING_MODE_ACTIVE_BACKUP:
2321 /* Current primary slave */
2322 link_update(bond_ctx->current_primary_port, &slave_link);
2324 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2326 case BONDING_MODE_8023AD:
2327 ethdev->data->dev_link.link_autoneg =
2328 bond_ctx->mode4.slave_link.link_autoneg;
2329 ethdev->data->dev_link.link_duplex =
2330 bond_ctx->mode4.slave_link.link_duplex;
2331 /* fall through to update link speed */
2332 case BONDING_MODE_ROUND_ROBIN:
2333 case BONDING_MODE_BALANCE:
2334 case BONDING_MODE_TLB:
2335 case BONDING_MODE_ALB:
2338 * In theses mode the maximum theoretical link speed is the sum
2341 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2343 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2344 link_update(bond_ctx->active_slaves[idx], &slave_link);
2346 ethdev->data->dev_link.link_speed +=
2347 slave_link.link_speed;
2357 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2359 struct bond_dev_private *internals = dev->data->dev_private;
2360 struct rte_eth_stats slave_stats;
2363 for (i = 0; i < internals->slave_count; i++) {
2364 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2366 stats->ipackets += slave_stats.ipackets;
2367 stats->opackets += slave_stats.opackets;
2368 stats->ibytes += slave_stats.ibytes;
2369 stats->obytes += slave_stats.obytes;
2370 stats->imissed += slave_stats.imissed;
2371 stats->ierrors += slave_stats.ierrors;
2372 stats->oerrors += slave_stats.oerrors;
2373 stats->rx_nombuf += slave_stats.rx_nombuf;
2375 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2376 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2377 stats->q_opackets[j] += slave_stats.q_opackets[j];
2378 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2379 stats->q_obytes[j] += slave_stats.q_obytes[j];
2380 stats->q_errors[j] += slave_stats.q_errors[j];
2389 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2391 struct bond_dev_private *internals = dev->data->dev_private;
2394 for (i = 0; i < internals->slave_count; i++)
2395 rte_eth_stats_reset(internals->slaves[i].port_id);
2399 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2401 struct bond_dev_private *internals = eth_dev->data->dev_private;
2404 internals->promiscuous_en = 1;
2406 switch (internals->mode) {
2407 /* Promiscuous mode is propagated to all slaves */
2408 case BONDING_MODE_ROUND_ROBIN:
2409 case BONDING_MODE_BALANCE:
2410 case BONDING_MODE_BROADCAST:
2411 for (i = 0; i < internals->slave_count; i++)
2412 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2414 /* In mode4 promiscus mode is managed when slave is added/removed */
2415 case BONDING_MODE_8023AD:
2417 /* Promiscuous mode is propagated only to primary slave */
2418 case BONDING_MODE_ACTIVE_BACKUP:
2419 case BONDING_MODE_TLB:
2420 case BONDING_MODE_ALB:
2422 rte_eth_promiscuous_enable(internals->current_primary_port);
2427 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2429 struct bond_dev_private *internals = dev->data->dev_private;
2432 internals->promiscuous_en = 0;
2434 switch (internals->mode) {
2435 /* Promiscuous mode is propagated to all slaves */
2436 case BONDING_MODE_ROUND_ROBIN:
2437 case BONDING_MODE_BALANCE:
2438 case BONDING_MODE_BROADCAST:
2439 for (i = 0; i < internals->slave_count; i++)
2440 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2442 /* In mode4 promiscus mode is set managed when slave is added/removed */
2443 case BONDING_MODE_8023AD:
2445 /* Promiscuous mode is propagated only to primary slave */
2446 case BONDING_MODE_ACTIVE_BACKUP:
2447 case BONDING_MODE_TLB:
2448 case BONDING_MODE_ALB:
2450 rte_eth_promiscuous_disable(internals->current_primary_port);
2455 bond_ethdev_delayed_lsc_propagation(void *arg)
2460 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2461 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2465 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2466 void *param, void *ret_param __rte_unused)
2468 struct rte_eth_dev *bonded_eth_dev;
2469 struct bond_dev_private *internals;
2470 struct rte_eth_link link;
2473 int i, valid_slave = 0;
2475 uint8_t lsc_flag = 0;
2477 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2480 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2482 if (check_for_bonded_ethdev(bonded_eth_dev))
2485 internals = bonded_eth_dev->data->dev_private;
2487 /* If the device isn't started don't handle interrupts */
2488 if (!bonded_eth_dev->data->dev_started)
2491 /* verify that port_id is a valid slave of bonded port */
2492 for (i = 0; i < internals->slave_count; i++) {
2493 if (internals->slaves[i].port_id == port_id) {
2502 /* Search for port in active port list */
2503 active_pos = find_slave_by_id(internals->active_slaves,
2504 internals->active_slave_count, port_id);
2506 rte_eth_link_get_nowait(port_id, &link);
2507 if (link.link_status) {
2508 if (active_pos < internals->active_slave_count)
2511 /* if no active slave ports then set this port to be primary port */
2512 if (internals->active_slave_count < 1) {
2513 /* If first active slave, then change link status */
2514 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2515 internals->current_primary_port = port_id;
2518 mac_address_slaves_update(bonded_eth_dev);
2521 activate_slave(bonded_eth_dev, port_id);
2523 /* If user has defined the primary port then default to using it */
2524 if (internals->user_defined_primary_port &&
2525 internals->primary_port == port_id)
2526 bond_ethdev_primary_set(internals, port_id);
2528 if (active_pos == internals->active_slave_count)
2531 /* Remove from active slave list */
2532 deactivate_slave(bonded_eth_dev, port_id);
2534 if (internals->active_slave_count < 1)
2537 /* Update primary id, take first active slave from list or if none
2538 * available set to -1 */
2539 if (port_id == internals->current_primary_port) {
2540 if (internals->active_slave_count > 0)
2541 bond_ethdev_primary_set(internals,
2542 internals->active_slaves[0]);
2544 internals->current_primary_port = internals->primary_port;
2549 * Update bonded device link properties after any change to active
2552 bond_ethdev_link_update(bonded_eth_dev, 0);
2555 /* Cancel any possible outstanding interrupts if delays are enabled */
2556 if (internals->link_up_delay_ms > 0 ||
2557 internals->link_down_delay_ms > 0)
2558 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2561 if (bonded_eth_dev->data->dev_link.link_status) {
2562 if (internals->link_up_delay_ms > 0)
2563 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2564 bond_ethdev_delayed_lsc_propagation,
2565 (void *)bonded_eth_dev);
2567 _rte_eth_dev_callback_process(bonded_eth_dev,
2568 RTE_ETH_EVENT_INTR_LSC,
2572 if (internals->link_down_delay_ms > 0)
2573 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2574 bond_ethdev_delayed_lsc_propagation,
2575 (void *)bonded_eth_dev);
2577 _rte_eth_dev_callback_process(bonded_eth_dev,
2578 RTE_ETH_EVENT_INTR_LSC,
2586 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2587 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2591 int slave_reta_size;
2592 unsigned reta_count;
2593 struct bond_dev_private *internals = dev->data->dev_private;
2595 if (reta_size != internals->reta_size)
2598 /* Copy RETA table */
2599 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2601 for (i = 0; i < reta_count; i++) {
2602 internals->reta_conf[i].mask = reta_conf[i].mask;
2603 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2604 if ((reta_conf[i].mask >> j) & 0x01)
2605 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2608 /* Fill rest of array */
2609 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2610 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2611 sizeof(internals->reta_conf[0]) * reta_count);
2613 /* Propagate RETA over slaves */
2614 for (i = 0; i < internals->slave_count; i++) {
2615 slave_reta_size = internals->slaves[i].reta_size;
2616 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2617 &internals->reta_conf[0], slave_reta_size);
2626 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2627 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2630 struct bond_dev_private *internals = dev->data->dev_private;
2632 if (reta_size != internals->reta_size)
2635 /* Copy RETA table */
2636 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2637 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2638 if ((reta_conf[i].mask >> j) & 0x01)
2639 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2645 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2646 struct rte_eth_rss_conf *rss_conf)
2649 struct bond_dev_private *internals = dev->data->dev_private;
2650 struct rte_eth_rss_conf bond_rss_conf;
2652 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2654 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2656 if (bond_rss_conf.rss_hf != 0)
2657 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2659 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2660 sizeof(internals->rss_key)) {
2661 if (bond_rss_conf.rss_key_len == 0)
2662 bond_rss_conf.rss_key_len = 40;
2663 internals->rss_key_len = bond_rss_conf.rss_key_len;
2664 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2665 internals->rss_key_len);
2668 for (i = 0; i < internals->slave_count; i++) {
2669 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2679 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2680 struct rte_eth_rss_conf *rss_conf)
2682 struct bond_dev_private *internals = dev->data->dev_private;
2684 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2685 rss_conf->rss_key_len = internals->rss_key_len;
2686 if (rss_conf->rss_key)
2687 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2692 const struct eth_dev_ops default_dev_ops = {
2693 .dev_start = bond_ethdev_start,
2694 .dev_stop = bond_ethdev_stop,
2695 .dev_close = bond_ethdev_close,
2696 .dev_configure = bond_ethdev_configure,
2697 .dev_infos_get = bond_ethdev_info,
2698 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2699 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2700 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2701 .rx_queue_release = bond_ethdev_rx_queue_release,
2702 .tx_queue_release = bond_ethdev_tx_queue_release,
2703 .link_update = bond_ethdev_link_update,
2704 .stats_get = bond_ethdev_stats_get,
2705 .stats_reset = bond_ethdev_stats_reset,
2706 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2707 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2708 .reta_update = bond_ethdev_rss_reta_update,
2709 .reta_query = bond_ethdev_rss_reta_query,
2710 .rss_hash_update = bond_ethdev_rss_hash_update,
2711 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2715 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2717 const char *name = rte_vdev_device_name(dev);
2718 uint8_t socket_id = dev->device.numa_node;
2719 struct bond_dev_private *internals = NULL;
2720 struct rte_eth_dev *eth_dev = NULL;
2721 uint32_t vlan_filter_bmp_size;
2723 /* now do all data allocation - for eth_dev structure, dummy pci driver
2724 * and internal (private) data
2727 /* reserve an ethdev entry */
2728 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2729 if (eth_dev == NULL) {
2730 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2734 internals = eth_dev->data->dev_private;
2735 eth_dev->data->nb_rx_queues = (uint16_t)1;
2736 eth_dev->data->nb_tx_queues = (uint16_t)1;
2738 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2740 if (eth_dev->data->mac_addrs == NULL) {
2741 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2745 eth_dev->dev_ops = &default_dev_ops;
2746 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2748 rte_spinlock_init(&internals->lock);
2750 internals->port_id = eth_dev->data->port_id;
2751 internals->mode = BONDING_MODE_INVALID;
2752 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2753 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2754 internals->xmit_hash = xmit_l2_hash;
2755 internals->user_defined_mac = 0;
2757 internals->link_status_polling_enabled = 0;
2759 internals->link_status_polling_interval_ms =
2760 DEFAULT_POLLING_INTERVAL_10_MS;
2761 internals->link_down_delay_ms = 0;
2762 internals->link_up_delay_ms = 0;
2764 internals->slave_count = 0;
2765 internals->active_slave_count = 0;
2766 internals->rx_offload_capa = 0;
2767 internals->tx_offload_capa = 0;
2768 internals->candidate_max_rx_pktlen = 0;
2769 internals->max_rx_pktlen = 0;
2771 /* Initially allow to choose any offload type */
2772 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2774 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2775 memset(internals->slaves, 0, sizeof(internals->slaves));
2777 /* Set mode 4 default configuration */
2778 bond_mode_8023ad_setup(eth_dev, NULL);
2779 if (bond_ethdev_mode_set(eth_dev, mode)) {
2780 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2781 eth_dev->data->port_id, mode);
2785 vlan_filter_bmp_size =
2786 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2787 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2788 RTE_CACHE_LINE_SIZE);
2789 if (internals->vlan_filter_bmpmem == NULL) {
2791 "Failed to allocate vlan bitmap for bonded device %u\n",
2792 eth_dev->data->port_id);
2796 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2797 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2798 if (internals->vlan_filter_bmp == NULL) {
2800 "Failed to init vlan bitmap for bonded device %u\n",
2801 eth_dev->data->port_id);
2802 rte_free(internals->vlan_filter_bmpmem);
2806 return eth_dev->data->port_id;
2809 rte_free(internals);
2810 if (eth_dev != NULL) {
2811 rte_free(eth_dev->data->mac_addrs);
2812 rte_eth_dev_release_port(eth_dev);
2818 bond_probe(struct rte_vdev_device *dev)
2821 struct bond_dev_private *internals;
2822 struct rte_kvargs *kvlist;
2823 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2824 int arg_count, port_id;
2830 name = rte_vdev_device_name(dev);
2831 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2833 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2834 pmd_bond_init_valid_arguments);
2838 /* Parse link bonding mode */
2839 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2840 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2841 &bond_ethdev_parse_slave_mode_kvarg,
2842 &bonding_mode) != 0) {
2843 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2848 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2849 "device %s\n", name);
2853 /* Parse socket id to create bonding device on */
2854 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2855 if (arg_count == 1) {
2856 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2857 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2859 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2860 "bonded device %s\n", name);
2863 } else if (arg_count > 1) {
2864 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2865 "bonded device %s\n", name);
2868 socket_id = rte_socket_id();
2871 dev->device.numa_node = socket_id;
2873 /* Create link bonding eth device */
2874 port_id = bond_alloc(dev, bonding_mode);
2876 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2877 "socket %u.\n", name, bonding_mode, socket_id);
2880 internals = rte_eth_devices[port_id].data->dev_private;
2881 internals->kvlist = kvlist;
2884 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2885 if (rte_kvargs_process(kvlist,
2886 PMD_BOND_AGG_MODE_KVARG,
2887 &bond_ethdev_parse_slave_agg_mode_kvarg,
2890 "Failed to parse agg selection mode for bonded device %s\n",
2895 if (internals->mode == BONDING_MODE_8023AD)
2896 rte_eth_bond_8023ad_agg_selection_set(port_id,
2899 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2902 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2903 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2907 rte_kvargs_free(kvlist);
2913 bond_remove(struct rte_vdev_device *dev)
2915 struct rte_eth_dev *eth_dev;
2916 struct bond_dev_private *internals;
2922 name = rte_vdev_device_name(dev);
2923 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2925 /* now free all data allocation - for eth_dev structure,
2926 * dummy pci driver and internal (private) data
2929 /* find an ethdev entry */
2930 eth_dev = rte_eth_dev_allocated(name);
2931 if (eth_dev == NULL)
2934 RTE_ASSERT(eth_dev->device == &dev->device);
2936 internals = eth_dev->data->dev_private;
2937 if (internals->slave_count != 0)
2940 if (eth_dev->data->dev_started == 1) {
2941 bond_ethdev_stop(eth_dev);
2942 bond_ethdev_close(eth_dev);
2945 eth_dev->dev_ops = NULL;
2946 eth_dev->rx_pkt_burst = NULL;
2947 eth_dev->tx_pkt_burst = NULL;
2949 internals = eth_dev->data->dev_private;
2950 rte_bitmap_free(internals->vlan_filter_bmp);
2951 rte_free(internals->vlan_filter_bmpmem);
2952 rte_free(eth_dev->data->dev_private);
2953 rte_free(eth_dev->data->mac_addrs);
2955 rte_eth_dev_release_port(eth_dev);
2960 /* this part will resolve the slave portids after all the other pdev and vdev
2961 * have been allocated */
2963 bond_ethdev_configure(struct rte_eth_dev *dev)
2965 const char *name = dev->device->name;
2966 struct bond_dev_private *internals = dev->data->dev_private;
2967 struct rte_kvargs *kvlist = internals->kvlist;
2969 uint16_t port_id = dev - rte_eth_devices;
2972 static const uint8_t default_rss_key[40] = {
2973 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2974 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2975 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2976 0xBE, 0xAC, 0x01, 0xFA
2981 /* If RSS is enabled, fill table and key with default values */
2982 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2983 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2984 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2985 memcpy(internals->rss_key, default_rss_key, 40);
2987 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2988 internals->reta_conf[i].mask = ~0LL;
2989 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2990 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2994 /* set the max_rx_pktlen */
2995 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2998 * if no kvlist, it means that this bonded device has been created
2999 * through the bonding api.
3004 /* Parse MAC address for bonded device */
3005 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3006 if (arg_count == 1) {
3007 struct ether_addr bond_mac;
3009 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3010 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3011 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3016 /* Set MAC address */
3017 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3019 "Failed to set mac address on bonded device %s\n",
3023 } else if (arg_count > 1) {
3025 "MAC address can be specified only once for bonded device %s\n",
3030 /* Parse/set balance mode transmit policy */
3031 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3032 if (arg_count == 1) {
3033 uint8_t xmit_policy;
3035 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3036 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3039 "Invalid xmit policy specified for bonded device %s\n",
3044 /* Set balance mode transmit policy*/
3045 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3047 "Failed to set balance xmit policy on bonded device %s\n",
3051 } else if (arg_count > 1) {
3053 "Transmit policy can be specified only once for bonded device"
3058 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3059 if (rte_kvargs_process(kvlist,
3060 PMD_BOND_AGG_MODE_KVARG,
3061 &bond_ethdev_parse_slave_agg_mode_kvarg,
3064 "Failed to parse agg selection mode for bonded device %s\n",
3067 if (internals->mode == BONDING_MODE_8023AD)
3068 rte_eth_bond_8023ad_agg_selection_set(port_id,
3072 /* Parse/add slave ports to bonded device */
3073 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3074 struct bond_ethdev_slave_ports slave_ports;
3077 memset(&slave_ports, 0, sizeof(slave_ports));
3079 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3080 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3082 "Failed to parse slave ports for bonded device %s\n",
3087 for (i = 0; i < slave_ports.slave_count; i++) {
3088 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3090 "Failed to add port %d as slave to bonded device %s\n",
3091 slave_ports.slaves[i], name);
3096 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3100 /* Parse/set primary slave port id*/
3101 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3102 if (arg_count == 1) {
3103 uint16_t primary_slave_port_id;
3105 if (rte_kvargs_process(kvlist,
3106 PMD_BOND_PRIMARY_SLAVE_KVARG,
3107 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3108 &primary_slave_port_id) < 0) {
3110 "Invalid primary slave port id specified for bonded device"
3115 /* Set balance mode transmit policy*/
3116 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3119 "Failed to set primary slave port %d on bonded device %s\n",
3120 primary_slave_port_id, name);
3123 } else if (arg_count > 1) {
3125 "Primary slave can be specified only once for bonded device"
3130 /* Parse link status monitor polling interval */
3131 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3132 if (arg_count == 1) {
3133 uint32_t lsc_poll_interval_ms;
3135 if (rte_kvargs_process(kvlist,
3136 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3137 &bond_ethdev_parse_time_ms_kvarg,
3138 &lsc_poll_interval_ms) < 0) {
3140 "Invalid lsc polling interval value specified for bonded"
3141 " device %s\n", name);
3145 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3148 "Failed to set lsc monitor polling interval (%u ms) on"
3149 " bonded device %s\n", lsc_poll_interval_ms, name);
3152 } else if (arg_count > 1) {
3154 "LSC polling interval can be specified only once for bonded"
3155 " device %s\n", name);
3159 /* Parse link up interrupt propagation delay */
3160 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3161 if (arg_count == 1) {
3162 uint32_t link_up_delay_ms;
3164 if (rte_kvargs_process(kvlist,
3165 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3166 &bond_ethdev_parse_time_ms_kvarg,
3167 &link_up_delay_ms) < 0) {
3169 "Invalid link up propagation delay value specified for"
3170 " bonded device %s\n", name);
3174 /* Set balance mode transmit policy*/
3175 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3178 "Failed to set link up propagation delay (%u ms) on bonded"
3179 " device %s\n", link_up_delay_ms, name);
3182 } else if (arg_count > 1) {
3184 "Link up propagation delay can be specified only once for"
3185 " bonded device %s\n", name);
3189 /* Parse link down interrupt propagation delay */
3190 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3191 if (arg_count == 1) {
3192 uint32_t link_down_delay_ms;
3194 if (rte_kvargs_process(kvlist,
3195 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3196 &bond_ethdev_parse_time_ms_kvarg,
3197 &link_down_delay_ms) < 0) {
3199 "Invalid link down propagation delay value specified for"
3200 " bonded device %s\n", name);
3204 /* Set balance mode transmit policy*/
3205 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3208 "Failed to set link down propagation delay (%u ms) on"
3209 " bonded device %s\n", link_down_delay_ms, name);
3212 } else if (arg_count > 1) {
3214 "Link down propagation delay can be specified only once for"
3215 " bonded device %s\n", name);
3222 struct rte_vdev_driver pmd_bond_drv = {
3223 .probe = bond_probe,
3224 .remove = bond_remove,
3227 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3228 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3230 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3234 "xmit_policy=[l2 | l23 | l34] "
3235 "agg_mode=[count | stable | bandwidth] "
3238 "lsc_poll_period_ms=<int> "
3240 "down_delay=<int>");