1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
5 #include <netinet/in.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_ethdev_vdev.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 size_t vlan_offset = 0;
38 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41 vlan_offset = sizeof(struct vlan_hdr);
42 *proto = vlan_hdr->eth_proto;
44 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45 vlan_hdr = vlan_hdr + 1;
46 *proto = vlan_hdr->eth_proto;
47 vlan_offset += sizeof(struct vlan_hdr);
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 struct bond_dev_private *internals;
58 uint16_t num_rx_slave = 0;
59 uint16_t num_rx_total = 0;
63 /* Cast to structure, containing bonded device's port id and queue id */
64 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66 internals = bd_rx_q->dev_private;
69 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70 /* Offset of pointer to *bufs increases as packets are received
71 * from other slaves */
72 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75 num_rx_total += num_rx_slave;
76 nb_pkts -= num_rx_slave;
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87 struct bond_dev_private *internals;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
94 return rte_eth_rx_burst(internals->current_primary_port,
95 bd_rx_q->queue_id, bufs, nb_pkts);
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104 (ethertype == ether_type_slow_be &&
105 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 /*****************************************************************************
109 * Flow director's setup for mode 4 optimization
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113 .dst.addr_bytes = { 0 },
114 .src.addr_bytes = { 0 },
115 .type = RTE_BE16(ETHER_TYPE_SLOW),
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119 .dst.addr_bytes = { 0 },
120 .src.addr_bytes = { 0 },
124 static struct rte_flow_item flow_item_8023ad[] = {
126 .type = RTE_FLOW_ITEM_TYPE_ETH,
127 .spec = &flow_item_eth_type_8023ad,
129 .mask = &flow_item_eth_mask_type_8023ad,
132 .type = RTE_FLOW_ITEM_TYPE_END,
139 const struct rte_flow_attr flow_attr_8023ad = {
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149 uint16_t slave_port) {
150 struct rte_eth_dev_info slave_info;
151 struct rte_flow_error error;
152 struct bond_dev_private *internals = (struct bond_dev_private *)
153 (bond_dev->data->dev_private);
155 const struct rte_flow_action_queue lacp_queue_conf = {
159 const struct rte_flow_action actions[] = {
161 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162 .conf = &lacp_queue_conf
165 .type = RTE_FLOW_ACTION_TYPE_END,
169 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170 flow_item_8023ad, actions, &error);
172 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173 __func__, error.message, slave_port,
174 internals->mode4.dedicated_queues.rx_qid);
178 rte_eth_dev_info_get(slave_port, &slave_info);
179 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183 __func__, slave_port);
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193 struct bond_dev_private *internals = (struct bond_dev_private *)
194 (bond_dev->data->dev_private);
195 struct rte_eth_dev_info bond_info;
198 /* Verify if all slaves in bonding supports flow director and */
199 if (internals->slave_count > 0) {
200 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205 for (idx = 0; idx < internals->slave_count; idx++) {
206 if (bond_ethdev_8023ad_flow_verify(bond_dev,
207 internals->slaves[idx].port_id) != 0)
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218 struct rte_flow_error error;
219 struct bond_dev_private *internals = (struct bond_dev_private *)
220 (bond_dev->data->dev_private);
222 struct rte_flow_action_queue lacp_queue_conf = {
223 .index = internals->mode4.dedicated_queues.rx_qid,
226 const struct rte_flow_action actions[] = {
228 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229 .conf = &lacp_queue_conf
232 .type = RTE_FLOW_ACTION_TYPE_END,
236 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240 "(slave_port=%d queue_id=%d)",
241 error.message, slave_port,
242 internals->mode4.dedicated_queues.rx_qid);
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254 struct bond_dev_private *internals = bd_rx_q->dev_private;
255 uint16_t num_rx_total = 0; /* Total number of received packets */
256 uint16_t slaves[RTE_MAX_ETHPORTS];
257 uint16_t slave_count;
261 /* Copy slave list to protect against slave up/down changes during tx
263 slave_count = internals->active_slave_count;
264 memcpy(slaves, internals->active_slaves,
265 sizeof(internals->active_slaves[0]) * slave_count);
267 for (i = 0, idx = internals->active_slave;
268 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269 idx = idx % slave_count;
271 /* Read packets from this slave */
272 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273 &bufs[num_rx_total], nb_pkts - num_rx_total);
276 internals->active_slave = idx;
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285 struct bond_dev_private *internals;
286 struct bond_tx_queue *bd_tx_q;
288 uint16_t num_of_slaves;
289 uint16_t slaves[RTE_MAX_ETHPORTS];
290 /* positions in slaves, not ID */
291 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
292 uint8_t distributing_count;
294 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
295 uint16_t i, op_slave_idx;
297 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
299 /* Total amount of packets in slave_bufs */
300 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
301 /* Slow packets placed in each slave */
303 if (unlikely(nb_pkts == 0))
306 bd_tx_q = (struct bond_tx_queue *)queue;
307 internals = bd_tx_q->dev_private;
309 /* Copy slave list to protect against slave up/down changes during tx
311 num_of_slaves = internals->active_slave_count;
312 if (num_of_slaves < 1)
315 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
318 distributing_count = 0;
319 for (i = 0; i < num_of_slaves; i++) {
320 struct port *port = &mode_8023ad_ports[slaves[i]];
321 if (ACTOR_STATE(port, DISTRIBUTING))
322 distributing_offsets[distributing_count++] = i;
325 if (likely(distributing_count > 0)) {
326 /* Populate slaves mbuf with the packets which are to be sent */
327 for (i = 0; i < nb_pkts; i++) {
328 /* Select output slave using hash based on xmit policy */
329 op_slave_idx = internals->xmit_hash(bufs[i],
332 /* Populate slave mbuf arrays with mbufs for that slave.
333 * Use only slaves that are currently distributing.
335 uint8_t slave_offset =
336 distributing_offsets[op_slave_idx];
337 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
339 slave_nb_pkts[slave_offset]++;
343 /* Send packet burst on each slave device */
344 for (i = 0; i < num_of_slaves; i++) {
345 if (slave_nb_pkts[i] == 0)
348 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
349 slave_bufs[i], slave_nb_pkts[i]);
351 num_tx_total += num_tx_slave;
352 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
354 /* If tx burst fails move packets to end of bufs */
355 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
356 uint16_t j = nb_pkts - num_tx_fail_total;
357 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
359 bufs[j] = slave_bufs[i][num_tx_slave];
368 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
371 /* Cast to structure, containing bonded device's port id and queue id */
372 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
373 struct bond_dev_private *internals = bd_rx_q->dev_private;
374 struct ether_addr bond_mac;
376 struct ether_hdr *hdr;
378 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
379 uint16_t num_rx_total = 0; /* Total number of received packets */
380 uint16_t slaves[RTE_MAX_ETHPORTS];
381 uint16_t slave_count, idx;
383 uint8_t collecting; /* current slave collecting status */
384 const uint8_t promisc = internals->promiscuous_en;
388 rte_eth_macaddr_get(internals->port_id, &bond_mac);
389 /* Copy slave list to protect against slave up/down changes during tx
391 slave_count = internals->active_slave_count;
392 memcpy(slaves, internals->active_slaves,
393 sizeof(internals->active_slaves[0]) * slave_count);
395 idx = internals->active_slave;
396 if (idx >= slave_count) {
397 internals->active_slave = 0;
400 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
402 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
405 /* Read packets from this slave */
406 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
407 &bufs[num_rx_total], nb_pkts - num_rx_total);
409 for (k = j; k < 2 && k < num_rx_total; k++)
410 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
412 /* Handle slow protocol packets. */
413 while (j < num_rx_total) {
415 /* If packet is not pure L2 and is known, skip it */
416 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
421 if (j + 3 < num_rx_total)
422 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
424 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
425 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
427 /* Remove packet from array if it is slow packet or slave is not
428 * in collecting state or bonding interface is not in promiscuous
429 * mode and packet address does not match. */
430 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
431 !collecting || (!promisc &&
432 !is_multicast_ether_addr(&hdr->d_addr) &&
433 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
435 if (hdr->ether_type == ether_type_slow_be) {
436 bond_mode_8023ad_handle_slow_pkt(
437 internals, slaves[idx], bufs[j]);
439 rte_pktmbuf_free(bufs[j]);
441 /* Packet is managed by mode 4 or dropped, shift the array */
443 if (j < num_rx_total) {
444 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
450 if (unlikely(++idx == slave_count))
454 internals->active_slave = idx;
458 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
459 uint32_t burstnumberRX;
460 uint32_t burstnumberTX;
462 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
465 arp_op_name(uint16_t arp_op, char *buf)
469 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
472 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
474 case ARP_OP_REVREQUEST:
475 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
476 "Reverse ARP Request");
478 case ARP_OP_REVREPLY:
479 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
480 "Reverse ARP Reply");
482 case ARP_OP_INVREQUEST:
483 snprintf(buf, sizeof("Peer Identify Request"), "%s",
484 "Peer Identify Request");
486 case ARP_OP_INVREPLY:
487 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
488 "Peer Identify Reply");
493 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
497 #define MaxIPv4String 16
499 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
503 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
504 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
505 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
509 #define MAX_CLIENTS_NUMBER 128
510 uint8_t active_clients;
511 struct client_stats_t {
514 uint32_t ipv4_rx_packets;
515 uint32_t ipv4_tx_packets;
517 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
520 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
524 for (; i < MAX_CLIENTS_NUMBER; i++) {
525 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
526 /* Just update RX packets number for this client */
527 if (TXorRXindicator == &burstnumberRX)
528 client_stats[i].ipv4_rx_packets++;
530 client_stats[i].ipv4_tx_packets++;
534 /* We have a new client. Insert him to the table, and increment stats */
535 if (TXorRXindicator == &burstnumberRX)
536 client_stats[active_clients].ipv4_rx_packets++;
538 client_stats[active_clients].ipv4_tx_packets++;
539 client_stats[active_clients].ipv4_addr = addr;
540 client_stats[active_clients].port = port;
545 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
546 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
547 RTE_LOG(DEBUG, PMD, \
550 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
552 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
558 eth_h->s_addr.addr_bytes[0], \
559 eth_h->s_addr.addr_bytes[1], \
560 eth_h->s_addr.addr_bytes[2], \
561 eth_h->s_addr.addr_bytes[3], \
562 eth_h->s_addr.addr_bytes[4], \
563 eth_h->s_addr.addr_bytes[5], \
565 eth_h->d_addr.addr_bytes[0], \
566 eth_h->d_addr.addr_bytes[1], \
567 eth_h->d_addr.addr_bytes[2], \
568 eth_h->d_addr.addr_bytes[3], \
569 eth_h->d_addr.addr_bytes[4], \
570 eth_h->d_addr.addr_bytes[5], \
577 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
578 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
580 struct ipv4_hdr *ipv4_h;
581 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
582 struct arp_hdr *arp_h;
589 uint16_t ether_type = eth_h->ether_type;
590 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
592 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
593 snprintf(buf, 16, "%s", info);
596 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
597 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
598 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
599 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
600 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
601 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
603 update_client_stats(ipv4_h->src_addr, port, burstnumber);
605 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
606 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
607 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
608 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
609 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
610 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
611 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
618 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
620 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
621 struct bond_dev_private *internals = bd_tx_q->dev_private;
622 struct ether_hdr *eth_h;
623 uint16_t ether_type, offset;
624 uint16_t nb_recv_pkts;
627 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
629 for (i = 0; i < nb_recv_pkts; i++) {
630 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
631 ether_type = eth_h->ether_type;
632 offset = get_vlan_offset(eth_h, ðer_type);
634 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
636 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
638 bond_mode_alb_arp_recv(eth_h, offset, internals);
640 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
641 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
642 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
650 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
653 struct bond_dev_private *internals;
654 struct bond_tx_queue *bd_tx_q;
656 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
657 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
659 uint16_t num_of_slaves;
660 uint16_t slaves[RTE_MAX_ETHPORTS];
662 uint16_t num_tx_total = 0, num_tx_slave;
664 static int slave_idx = 0;
665 int i, cslave_idx = 0, tx_fail_total = 0;
667 bd_tx_q = (struct bond_tx_queue *)queue;
668 internals = bd_tx_q->dev_private;
670 /* Copy slave list to protect against slave up/down changes during tx
672 num_of_slaves = internals->active_slave_count;
673 memcpy(slaves, internals->active_slaves,
674 sizeof(internals->active_slaves[0]) * num_of_slaves);
676 if (num_of_slaves < 1)
679 /* Populate slaves mbuf with which packets are to be sent on it */
680 for (i = 0; i < nb_pkts; i++) {
681 cslave_idx = (slave_idx + i) % num_of_slaves;
682 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
685 /* increment current slave index so the next call to tx burst starts on the
687 slave_idx = ++cslave_idx;
689 /* Send packet burst on each slave device */
690 for (i = 0; i < num_of_slaves; i++) {
691 if (slave_nb_pkts[i] > 0) {
692 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
693 slave_bufs[i], slave_nb_pkts[i]);
695 /* if tx burst fails move packets to end of bufs */
696 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
697 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
699 tx_fail_total += tx_fail_slave;
701 memcpy(&bufs[nb_pkts - tx_fail_total],
702 &slave_bufs[i][num_tx_slave],
703 tx_fail_slave * sizeof(bufs[0]));
705 num_tx_total += num_tx_slave;
713 bond_ethdev_tx_burst_active_backup(void *queue,
714 struct rte_mbuf **bufs, uint16_t nb_pkts)
716 struct bond_dev_private *internals;
717 struct bond_tx_queue *bd_tx_q;
719 bd_tx_q = (struct bond_tx_queue *)queue;
720 internals = bd_tx_q->dev_private;
722 if (internals->active_slave_count < 1)
725 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
729 static inline uint16_t
730 ether_hash(struct ether_hdr *eth_hdr)
732 unaligned_uint16_t *word_src_addr =
733 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
734 unaligned_uint16_t *word_dst_addr =
735 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
737 return (word_src_addr[0] ^ word_dst_addr[0]) ^
738 (word_src_addr[1] ^ word_dst_addr[1]) ^
739 (word_src_addr[2] ^ word_dst_addr[2]);
742 static inline uint32_t
743 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
745 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
748 static inline uint32_t
749 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
751 unaligned_uint32_t *word_src_addr =
752 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
753 unaligned_uint32_t *word_dst_addr =
754 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
756 return (word_src_addr[0] ^ word_dst_addr[0]) ^
757 (word_src_addr[1] ^ word_dst_addr[1]) ^
758 (word_src_addr[2] ^ word_dst_addr[2]) ^
759 (word_src_addr[3] ^ word_dst_addr[3]);
763 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
765 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
767 uint32_t hash = ether_hash(eth_hdr);
769 return (hash ^= hash >> 8) % slave_count;
773 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
775 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
776 uint16_t proto = eth_hdr->ether_type;
777 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
778 uint32_t hash, l3hash = 0;
780 hash = ether_hash(eth_hdr);
782 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
783 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
784 ((char *)(eth_hdr + 1) + vlan_offset);
785 l3hash = ipv4_hash(ipv4_hdr);
787 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
788 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
789 ((char *)(eth_hdr + 1) + vlan_offset);
790 l3hash = ipv6_hash(ipv6_hdr);
793 hash = hash ^ l3hash;
797 return hash % slave_count;
801 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
804 uint16_t proto = eth_hdr->ether_type;
805 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807 struct udp_hdr *udp_hdr = NULL;
808 struct tcp_hdr *tcp_hdr = NULL;
809 uint32_t hash, l3hash = 0, l4hash = 0;
811 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813 ((char *)(eth_hdr + 1) + vlan_offset);
814 size_t ip_hdr_offset;
816 l3hash = ipv4_hash(ipv4_hdr);
818 /* there is no L4 header in fragmented packet */
819 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
820 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
823 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
824 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
826 l4hash = HASH_L4_PORTS(tcp_hdr);
827 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
828 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
830 l4hash = HASH_L4_PORTS(udp_hdr);
833 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
834 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
835 ((char *)(eth_hdr + 1) + vlan_offset);
836 l3hash = ipv6_hash(ipv6_hdr);
838 if (ipv6_hdr->proto == IPPROTO_TCP) {
839 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
840 l4hash = HASH_L4_PORTS(tcp_hdr);
841 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
842 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
843 l4hash = HASH_L4_PORTS(udp_hdr);
847 hash = l3hash ^ l4hash;
851 return hash % slave_count;
855 uint64_t bwg_left_int;
856 uint64_t bwg_left_remainder;
861 bond_tlb_activate_slave(struct bond_dev_private *internals) {
864 for (i = 0; i < internals->active_slave_count; i++) {
865 tlb_last_obytets[internals->active_slaves[i]] = 0;
870 bandwidth_cmp(const void *a, const void *b)
872 const struct bwg_slave *bwg_a = a;
873 const struct bwg_slave *bwg_b = b;
874 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
875 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
876 (int64_t)bwg_a->bwg_left_remainder;
890 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
891 struct bwg_slave *bwg_slave)
893 struct rte_eth_link link_status;
895 rte_eth_link_get_nowait(port_id, &link_status);
896 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
899 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
900 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
901 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
905 bond_ethdev_update_tlb_slave_cb(void *arg)
907 struct bond_dev_private *internals = arg;
908 struct rte_eth_stats slave_stats;
909 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
913 uint8_t update_stats = 0;
916 internals->slave_update_idx++;
919 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
922 for (i = 0; i < internals->active_slave_count; i++) {
923 slave_id = internals->active_slaves[i];
924 rte_eth_stats_get(slave_id, &slave_stats);
925 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
926 bandwidth_left(slave_id, tx_bytes,
927 internals->slave_update_idx, &bwg_array[i]);
928 bwg_array[i].slave = slave_id;
931 tlb_last_obytets[slave_id] = slave_stats.obytes;
935 if (update_stats == 1)
936 internals->slave_update_idx = 0;
939 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
940 for (i = 0; i < slave_count; i++)
941 internals->tlb_slaves_order[i] = bwg_array[i].slave;
943 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
944 (struct bond_dev_private *)internals);
948 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
950 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
951 struct bond_dev_private *internals = bd_tx_q->dev_private;
953 struct rte_eth_dev *primary_port =
954 &rte_eth_devices[internals->primary_port];
955 uint16_t num_tx_total = 0;
958 uint16_t num_of_slaves = internals->active_slave_count;
959 uint16_t slaves[RTE_MAX_ETHPORTS];
961 struct ether_hdr *ether_hdr;
962 struct ether_addr primary_slave_addr;
963 struct ether_addr active_slave_addr;
965 if (num_of_slaves < 1)
968 memcpy(slaves, internals->tlb_slaves_order,
969 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
972 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
975 for (i = 0; i < 3; i++)
976 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
979 for (i = 0; i < num_of_slaves; i++) {
980 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
981 for (j = num_tx_total; j < nb_pkts; j++) {
983 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
985 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
986 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
987 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
988 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
989 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
993 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
994 bufs + num_tx_total, nb_pkts - num_tx_total);
996 if (num_tx_total == nb_pkts)
1000 return num_tx_total;
1004 bond_tlb_disable(struct bond_dev_private *internals)
1006 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1010 bond_tlb_enable(struct bond_dev_private *internals)
1012 bond_ethdev_update_tlb_slave_cb(internals);
1016 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1018 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1019 struct bond_dev_private *internals = bd_tx_q->dev_private;
1021 struct ether_hdr *eth_h;
1022 uint16_t ether_type, offset;
1024 struct client_data *client_info;
1027 * We create transmit buffers for every slave and one additional to send
1028 * through tlb. In worst case every packet will be send on one port.
1030 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1031 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1034 * We create separate transmit buffers for update packets as they won't
1035 * be counted in num_tx_total.
1037 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1038 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1040 struct rte_mbuf *upd_pkt;
1043 uint16_t num_send, num_not_send = 0;
1044 uint16_t num_tx_total = 0;
1049 /* Search tx buffer for ARP packets and forward them to alb */
1050 for (i = 0; i < nb_pkts; i++) {
1051 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1052 ether_type = eth_h->ether_type;
1053 offset = get_vlan_offset(eth_h, ðer_type);
1055 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1056 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1058 /* Change src mac in eth header */
1059 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1061 /* Add packet to slave tx buffer */
1062 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1063 slave_bufs_pkts[slave_idx]++;
1065 /* If packet is not ARP, send it with TLB policy */
1066 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1068 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1072 /* Update connected client ARP tables */
1073 if (internals->mode6.ntt) {
1074 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1075 client_info = &internals->mode6.client_table[i];
1077 if (client_info->in_use) {
1078 /* Allocate new packet to send ARP update on current slave */
1079 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1080 if (upd_pkt == NULL) {
1081 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1084 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1085 + client_info->vlan_count * sizeof(struct vlan_hdr);
1086 upd_pkt->data_len = pkt_size;
1087 upd_pkt->pkt_len = pkt_size;
1089 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1092 /* Add packet to update tx buffer */
1093 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1094 update_bufs_pkts[slave_idx]++;
1097 internals->mode6.ntt = 0;
1100 /* Send ARP packets on proper slaves */
1101 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1102 if (slave_bufs_pkts[i] > 0) {
1103 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1104 slave_bufs[i], slave_bufs_pkts[i]);
1105 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1106 bufs[nb_pkts - 1 - num_not_send - j] =
1107 slave_bufs[i][nb_pkts - 1 - j];
1110 num_tx_total += num_send;
1111 num_not_send += slave_bufs_pkts[i] - num_send;
1113 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1114 /* Print TX stats including update packets */
1115 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1116 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1117 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1123 /* Send update packets on proper slaves */
1124 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1125 if (update_bufs_pkts[i] > 0) {
1126 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1127 update_bufs_pkts[i]);
1128 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1129 rte_pktmbuf_free(update_bufs[i][j]);
1131 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1132 for (j = 0; j < update_bufs_pkts[i]; j++) {
1133 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1134 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1140 /* Send non-ARP packets using tlb policy */
1141 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1142 num_send = bond_ethdev_tx_burst_tlb(queue,
1143 slave_bufs[RTE_MAX_ETHPORTS],
1144 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1146 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1147 bufs[nb_pkts - 1 - num_not_send - j] =
1148 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1151 num_tx_total += num_send;
1154 return num_tx_total;
1158 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1161 struct bond_dev_private *internals;
1162 struct bond_tx_queue *bd_tx_q;
1164 uint16_t num_of_slaves;
1165 uint16_t slaves[RTE_MAX_ETHPORTS];
1167 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1171 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1172 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1174 bd_tx_q = (struct bond_tx_queue *)queue;
1175 internals = bd_tx_q->dev_private;
1177 /* Copy slave list to protect against slave up/down changes during tx
1179 num_of_slaves = internals->active_slave_count;
1180 memcpy(slaves, internals->active_slaves,
1181 sizeof(internals->active_slaves[0]) * num_of_slaves);
1183 if (num_of_slaves < 1)
1184 return num_tx_total;
1186 /* Populate slaves mbuf with the packets which are to be sent on it */
1187 for (i = 0; i < nb_pkts; i++) {
1188 /* Select output slave using hash based on xmit policy */
1189 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1191 /* Populate slave mbuf arrays with mbufs for that slave */
1192 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1195 /* Send packet burst on each slave device */
1196 for (i = 0; i < num_of_slaves; i++) {
1197 if (slave_nb_pkts[i] > 0) {
1198 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1199 slave_bufs[i], slave_nb_pkts[i]);
1201 /* if tx burst fails move packets to end of bufs */
1202 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1203 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1205 tx_fail_total += slave_tx_fail_count;
1206 memcpy(&bufs[nb_pkts - tx_fail_total],
1207 &slave_bufs[i][num_tx_slave],
1208 slave_tx_fail_count * sizeof(bufs[0]));
1211 num_tx_total += num_tx_slave;
1215 return num_tx_total;
1219 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1222 struct bond_dev_private *internals;
1223 struct bond_tx_queue *bd_tx_q;
1225 uint16_t num_of_slaves;
1226 uint16_t slaves[RTE_MAX_ETHPORTS];
1227 /* positions in slaves, not ID */
1228 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1229 uint8_t distributing_count;
1231 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1232 uint16_t i, op_slave_idx;
1234 /* Allocate additional packets in case 8023AD mode. */
1235 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1237 /* Total amount of packets in slave_bufs */
1238 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1239 /* Slow packets placed in each slave */
1240 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1242 bd_tx_q = (struct bond_tx_queue *)queue;
1243 internals = bd_tx_q->dev_private;
1245 /* Copy slave list to protect against slave up/down changes during tx
1247 num_of_slaves = internals->active_slave_count;
1248 if (num_of_slaves < 1)
1249 return num_tx_total;
1251 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1253 distributing_count = 0;
1254 for (i = 0; i < num_of_slaves; i++) {
1255 struct port *port = &mode_8023ad_ports[slaves[i]];
1257 if (ACTOR_STATE(port, DISTRIBUTING))
1258 distributing_offsets[distributing_count++] = i;
1261 if (likely(distributing_count > 0)) {
1262 /* Populate slaves mbuf with the packets which are to be sent on it */
1263 for (i = 0; i < nb_pkts; i++) {
1264 /* Select output slave using hash based on xmit policy */
1265 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1267 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1268 * slaves that are currently distributing. */
1269 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1270 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1271 slave_nb_pkts[slave_offset]++;
1275 /* Send packet burst on each slave device */
1276 for (i = 0; i < num_of_slaves; i++) {
1277 if (slave_nb_pkts[i] == 0)
1280 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1281 slave_bufs[i], slave_nb_pkts[i]);
1283 /* If tx burst fails drop slow packets */
1284 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1285 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1287 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1288 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1290 /* If tx burst fails move packets to end of bufs */
1291 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1292 uint16_t j = nb_pkts - num_tx_fail_total;
1293 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1294 bufs[j] = slave_bufs[i][num_tx_slave];
1298 /* Check for LACP control packets and send if available */
1299 for (i = 0; i < num_of_slaves; i++) {
1300 struct port *port = &mode_8023ad_ports[slaves[i]];
1301 struct rte_mbuf *ctrl_pkt = NULL;
1303 int pkt_avail = rte_ring_dequeue(port->tx_ring,
1304 (void **)&ctrl_pkt);
1306 if (unlikely(pkt_avail == 0)) {
1307 num_tx_slave = rte_eth_tx_burst(slaves[i],
1308 bd_tx_q->queue_id, &ctrl_pkt, 1);
1311 * re-enqueue LAG control plane packets to buffering
1312 * ring if transmission fails so the packet isn't lost.
1314 if (num_tx_slave != nb_pkts)
1315 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1319 return num_tx_total;
1323 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1326 struct bond_dev_private *internals;
1327 struct bond_tx_queue *bd_tx_q;
1329 uint8_t tx_failed_flag = 0, num_of_slaves;
1330 uint16_t slaves[RTE_MAX_ETHPORTS];
1332 uint16_t max_nb_of_tx_pkts = 0;
1334 int slave_tx_total[RTE_MAX_ETHPORTS];
1335 int i, most_successful_tx_slave = -1;
1337 bd_tx_q = (struct bond_tx_queue *)queue;
1338 internals = bd_tx_q->dev_private;
1340 /* Copy slave list to protect against slave up/down changes during tx
1342 num_of_slaves = internals->active_slave_count;
1343 memcpy(slaves, internals->active_slaves,
1344 sizeof(internals->active_slaves[0]) * num_of_slaves);
1346 if (num_of_slaves < 1)
1349 /* Increment reference count on mbufs */
1350 for (i = 0; i < nb_pkts; i++)
1351 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1353 /* Transmit burst on each active slave */
1354 for (i = 0; i < num_of_slaves; i++) {
1355 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1358 if (unlikely(slave_tx_total[i] < nb_pkts))
1361 /* record the value and slave index for the slave which transmits the
1362 * maximum number of packets */
1363 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1364 max_nb_of_tx_pkts = slave_tx_total[i];
1365 most_successful_tx_slave = i;
1369 /* if slaves fail to transmit packets from burst, the calling application
1370 * is not expected to know about multiple references to packets so we must
1371 * handle failures of all packets except those of the most successful slave
1373 if (unlikely(tx_failed_flag))
1374 for (i = 0; i < num_of_slaves; i++)
1375 if (i != most_successful_tx_slave)
1376 while (slave_tx_total[i] < nb_pkts)
1377 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1379 return max_nb_of_tx_pkts;
1383 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1385 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1387 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1389 * If in mode 4 then save the link properties of the first
1390 * slave, all subsequent slaves must match these properties
1392 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1394 bond_link->link_autoneg = slave_link->link_autoneg;
1395 bond_link->link_duplex = slave_link->link_duplex;
1396 bond_link->link_speed = slave_link->link_speed;
1399 * In any other mode the link properties are set to default
1400 * values of AUTONEG/DUPLEX
1402 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1403 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1408 link_properties_valid(struct rte_eth_dev *ethdev,
1409 struct rte_eth_link *slave_link)
1411 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1413 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1414 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1416 if (bond_link->link_duplex != slave_link->link_duplex ||
1417 bond_link->link_autoneg != slave_link->link_autoneg ||
1418 bond_link->link_speed != slave_link->link_speed)
1426 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1428 struct ether_addr *mac_addr;
1430 if (eth_dev == NULL) {
1431 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1435 if (dst_mac_addr == NULL) {
1436 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1440 mac_addr = eth_dev->data->mac_addrs;
1442 ether_addr_copy(mac_addr, dst_mac_addr);
1447 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1449 struct ether_addr *mac_addr;
1451 if (eth_dev == NULL) {
1452 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1456 if (new_mac_addr == NULL) {
1457 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1461 mac_addr = eth_dev->data->mac_addrs;
1463 /* If new MAC is different to current MAC then update */
1464 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1465 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1471 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1473 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1476 /* Update slave devices MAC addresses */
1477 if (internals->slave_count < 1)
1480 switch (internals->mode) {
1481 case BONDING_MODE_ROUND_ROBIN:
1482 case BONDING_MODE_BALANCE:
1483 case BONDING_MODE_BROADCAST:
1484 for (i = 0; i < internals->slave_count; i++) {
1485 if (rte_eth_dev_default_mac_addr_set(
1486 internals->slaves[i].port_id,
1487 bonded_eth_dev->data->mac_addrs)) {
1488 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1489 internals->slaves[i].port_id);
1494 case BONDING_MODE_8023AD:
1495 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1497 case BONDING_MODE_ACTIVE_BACKUP:
1498 case BONDING_MODE_TLB:
1499 case BONDING_MODE_ALB:
1501 for (i = 0; i < internals->slave_count; i++) {
1502 if (internals->slaves[i].port_id ==
1503 internals->current_primary_port) {
1504 if (rte_eth_dev_default_mac_addr_set(
1505 internals->primary_port,
1506 bonded_eth_dev->data->mac_addrs)) {
1507 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1508 internals->current_primary_port);
1512 if (rte_eth_dev_default_mac_addr_set(
1513 internals->slaves[i].port_id,
1514 &internals->slaves[i].persisted_mac_addr)) {
1515 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1516 internals->slaves[i].port_id);
1527 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1529 struct bond_dev_private *internals;
1531 internals = eth_dev->data->dev_private;
1534 case BONDING_MODE_ROUND_ROBIN:
1535 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1536 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1538 case BONDING_MODE_ACTIVE_BACKUP:
1539 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1540 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1542 case BONDING_MODE_BALANCE:
1543 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1544 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1546 case BONDING_MODE_BROADCAST:
1547 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1548 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1550 case BONDING_MODE_8023AD:
1551 if (bond_mode_8023ad_enable(eth_dev) != 0)
1554 if (internals->mode4.dedicated_queues.enabled == 0) {
1555 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1556 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1557 RTE_LOG(WARNING, PMD,
1558 "Using mode 4, it is necessary to do TX burst "
1559 "and RX burst at least every 100ms.\n");
1561 /* Use flow director's optimization */
1562 eth_dev->rx_pkt_burst =
1563 bond_ethdev_rx_burst_8023ad_fast_queue;
1564 eth_dev->tx_pkt_burst =
1565 bond_ethdev_tx_burst_8023ad_fast_queue;
1568 case BONDING_MODE_TLB:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1572 case BONDING_MODE_ALB:
1573 if (bond_mode_alb_enable(eth_dev) != 0)
1576 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1577 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1583 internals->mode = mode;
1590 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1591 struct rte_eth_dev *slave_eth_dev)
1594 struct bond_dev_private *internals = (struct bond_dev_private *)
1595 bonded_eth_dev->data->dev_private;
1596 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1598 if (port->slow_pool == NULL) {
1600 int slave_id = slave_eth_dev->data->port_id;
1602 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1604 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1605 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1606 slave_eth_dev->data->numa_node);
1608 /* Any memory allocation failure in initialization is critical because
1609 * resources can't be free, so reinitialization is impossible. */
1610 if (port->slow_pool == NULL) {
1611 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1612 slave_id, mem_name, rte_strerror(rte_errno));
1616 if (internals->mode4.dedicated_queues.enabled == 1) {
1617 /* Configure slow Rx queue */
1619 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1620 internals->mode4.dedicated_queues.rx_qid, 128,
1621 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1622 NULL, port->slow_pool);
1625 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1626 slave_eth_dev->data->port_id,
1627 internals->mode4.dedicated_queues.rx_qid,
1632 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1633 internals->mode4.dedicated_queues.tx_qid, 512,
1634 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1638 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1639 slave_eth_dev->data->port_id,
1640 internals->mode4.dedicated_queues.tx_qid,
1649 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1650 struct rte_eth_dev *slave_eth_dev)
1652 struct bond_rx_queue *bd_rx_q;
1653 struct bond_tx_queue *bd_tx_q;
1654 uint16_t nb_rx_queues;
1655 uint16_t nb_tx_queues;
1659 struct rte_flow_error flow_error;
1661 struct bond_dev_private *internals = (struct bond_dev_private *)
1662 bonded_eth_dev->data->dev_private;
1665 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1667 /* Enable interrupts on slave device if supported */
1668 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1669 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1671 /* If RSS is enabled for bonding, try to enable it for slaves */
1672 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1673 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1675 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1676 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1677 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1678 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1680 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1683 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1684 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1685 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1686 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1689 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1690 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1692 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1693 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1695 if (internals->mode == BONDING_MODE_8023AD) {
1696 if (internals->mode4.dedicated_queues.enabled == 1) {
1702 /* Configure device */
1703 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1704 nb_rx_queues, nb_tx_queues,
1705 &(slave_eth_dev->data->dev_conf));
1707 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1708 slave_eth_dev->data->port_id, errval);
1712 /* Setup Rx Queues */
1713 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1714 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1716 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1717 bd_rx_q->nb_rx_desc,
1718 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1719 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1722 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1723 slave_eth_dev->data->port_id, q_id, errval);
1728 /* Setup Tx Queues */
1729 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1730 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1732 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1733 bd_tx_q->nb_tx_desc,
1734 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1738 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1739 slave_eth_dev->data->port_id, q_id, errval);
1744 if (internals->mode == BONDING_MODE_8023AD &&
1745 internals->mode4.dedicated_queues.enabled == 1) {
1746 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1750 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1751 slave_eth_dev->data->port_id) != 0) {
1753 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1754 slave_eth_dev->data->port_id, q_id, errval);
1758 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1759 rte_flow_destroy(slave_eth_dev->data->port_id,
1760 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1763 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1764 slave_eth_dev->data->port_id);
1768 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1770 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1771 slave_eth_dev->data->port_id, errval);
1775 /* If RSS is enabled for bonding, synchronize RETA */
1776 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1778 struct bond_dev_private *internals;
1780 internals = bonded_eth_dev->data->dev_private;
1782 for (i = 0; i < internals->slave_count; i++) {
1783 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1784 errval = rte_eth_dev_rss_reta_update(
1785 slave_eth_dev->data->port_id,
1786 &internals->reta_conf[0],
1787 internals->slaves[i].reta_size);
1789 RTE_LOG(WARNING, PMD,
1790 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1791 " RSS Configuration for bonding may be inconsistent.\n",
1792 slave_eth_dev->data->port_id, errval);
1799 /* If lsc interrupt is set, check initial slave's link status */
1800 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1801 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1802 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1803 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1811 slave_remove(struct bond_dev_private *internals,
1812 struct rte_eth_dev *slave_eth_dev)
1816 for (i = 0; i < internals->slave_count; i++)
1817 if (internals->slaves[i].port_id ==
1818 slave_eth_dev->data->port_id)
1821 if (i < (internals->slave_count - 1))
1822 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1823 sizeof(internals->slaves[0]) *
1824 (internals->slave_count - i - 1));
1826 internals->slave_count--;
1828 /* force reconfiguration of slave interfaces */
1829 _rte_eth_dev_reset(slave_eth_dev);
1833 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1836 slave_add(struct bond_dev_private *internals,
1837 struct rte_eth_dev *slave_eth_dev)
1839 struct bond_slave_details *slave_details =
1840 &internals->slaves[internals->slave_count];
1842 slave_details->port_id = slave_eth_dev->data->port_id;
1843 slave_details->last_link_status = 0;
1845 /* Mark slave devices that don't support interrupts so we can
1846 * compensate when we start the bond
1848 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1849 slave_details->link_status_poll_enabled = 1;
1852 slave_details->link_status_wait_to_complete = 0;
1853 /* clean tlb_last_obytes when adding port for bonding device */
1854 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1855 sizeof(struct ether_addr));
1859 bond_ethdev_primary_set(struct bond_dev_private *internals,
1860 uint16_t slave_port_id)
1864 if (internals->active_slave_count < 1)
1865 internals->current_primary_port = slave_port_id;
1867 /* Search bonded device slave ports for new proposed primary port */
1868 for (i = 0; i < internals->active_slave_count; i++) {
1869 if (internals->active_slaves[i] == slave_port_id)
1870 internals->current_primary_port = slave_port_id;
1875 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1878 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1880 struct bond_dev_private *internals;
1883 /* slave eth dev will be started by bonded device */
1884 if (check_for_bonded_ethdev(eth_dev)) {
1885 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1886 eth_dev->data->port_id);
1890 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1891 eth_dev->data->dev_started = 1;
1893 internals = eth_dev->data->dev_private;
1895 if (internals->slave_count == 0) {
1896 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1900 if (internals->user_defined_mac == 0) {
1901 struct ether_addr *new_mac_addr = NULL;
1903 for (i = 0; i < internals->slave_count; i++)
1904 if (internals->slaves[i].port_id == internals->primary_port)
1905 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1907 if (new_mac_addr == NULL)
1910 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1911 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1912 eth_dev->data->port_id);
1917 /* Update all slave devices MACs*/
1918 if (mac_address_slaves_update(eth_dev) != 0)
1921 /* If bonded device is configure in promiscuous mode then re-apply config */
1922 if (internals->promiscuous_en)
1923 bond_ethdev_promiscuous_enable(eth_dev);
1925 if (internals->mode == BONDING_MODE_8023AD) {
1926 if (internals->mode4.dedicated_queues.enabled == 1) {
1927 internals->mode4.dedicated_queues.rx_qid =
1928 eth_dev->data->nb_rx_queues;
1929 internals->mode4.dedicated_queues.tx_qid =
1930 eth_dev->data->nb_tx_queues;
1935 /* Reconfigure each slave device if starting bonded device */
1936 for (i = 0; i < internals->slave_count; i++) {
1937 struct rte_eth_dev *slave_ethdev =
1938 &(rte_eth_devices[internals->slaves[i].port_id]);
1939 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1941 "bonded port (%d) failed to reconfigure slave device (%d)",
1942 eth_dev->data->port_id,
1943 internals->slaves[i].port_id);
1946 /* We will need to poll for link status if any slave doesn't
1947 * support interrupts
1949 if (internals->slaves[i].link_status_poll_enabled)
1950 internals->link_status_polling_enabled = 1;
1952 /* start polling if needed */
1953 if (internals->link_status_polling_enabled) {
1955 internals->link_status_polling_interval_ms * 1000,
1956 bond_ethdev_slave_link_status_change_monitor,
1957 (void *)&rte_eth_devices[internals->port_id]);
1960 if (internals->user_defined_primary_port)
1961 bond_ethdev_primary_set(internals, internals->primary_port);
1963 if (internals->mode == BONDING_MODE_8023AD)
1964 bond_mode_8023ad_start(eth_dev);
1966 if (internals->mode == BONDING_MODE_TLB ||
1967 internals->mode == BONDING_MODE_ALB)
1968 bond_tlb_enable(internals);
1974 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1978 if (dev->data->rx_queues != NULL) {
1979 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1980 rte_free(dev->data->rx_queues[i]);
1981 dev->data->rx_queues[i] = NULL;
1983 dev->data->nb_rx_queues = 0;
1986 if (dev->data->tx_queues != NULL) {
1987 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1988 rte_free(dev->data->tx_queues[i]);
1989 dev->data->tx_queues[i] = NULL;
1991 dev->data->nb_tx_queues = 0;
1996 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1998 struct bond_dev_private *internals = eth_dev->data->dev_private;
2001 if (internals->mode == BONDING_MODE_8023AD) {
2005 bond_mode_8023ad_stop(eth_dev);
2007 /* Discard all messages to/from mode 4 state machines */
2008 for (i = 0; i < internals->active_slave_count; i++) {
2009 port = &mode_8023ad_ports[internals->active_slaves[i]];
2011 RTE_ASSERT(port->rx_ring != NULL);
2012 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2013 rte_pktmbuf_free(pkt);
2015 RTE_ASSERT(port->tx_ring != NULL);
2016 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2017 rte_pktmbuf_free(pkt);
2021 if (internals->mode == BONDING_MODE_TLB ||
2022 internals->mode == BONDING_MODE_ALB) {
2023 bond_tlb_disable(internals);
2024 for (i = 0; i < internals->active_slave_count; i++)
2025 tlb_last_obytets[internals->active_slaves[i]] = 0;
2028 internals->active_slave_count = 0;
2029 internals->link_status_polling_enabled = 0;
2030 for (i = 0; i < internals->slave_count; i++)
2031 internals->slaves[i].last_link_status = 0;
2033 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2034 eth_dev->data->dev_started = 0;
2038 bond_ethdev_close(struct rte_eth_dev *dev)
2040 struct bond_dev_private *internals = dev->data->dev_private;
2041 uint8_t bond_port_id = internals->port_id;
2044 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2045 while (internals->slave_count != skipped) {
2046 uint16_t port_id = internals->slaves[skipped].port_id;
2048 rte_eth_dev_stop(port_id);
2050 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2052 "Failed to remove port %d from bonded device "
2053 "%s\n", port_id, dev->device->name);
2057 bond_ethdev_free_queues(dev);
2058 rte_bitmap_reset(internals->vlan_filter_bmp);
2061 /* forward declaration */
2062 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2065 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2067 struct bond_dev_private *internals = dev->data->dev_private;
2069 uint16_t max_nb_rx_queues = UINT16_MAX;
2070 uint16_t max_nb_tx_queues = UINT16_MAX;
2072 dev_info->max_mac_addrs = 1;
2074 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2075 internals->candidate_max_rx_pktlen :
2076 ETHER_MAX_JUMBO_FRAME_LEN;
2078 /* Max number of tx/rx queues that the bonded device can support is the
2079 * minimum values of the bonded slaves, as all slaves must be capable
2080 * of supporting the same number of tx/rx queues.
2082 if (internals->slave_count > 0) {
2083 struct rte_eth_dev_info slave_info;
2086 for (idx = 0; idx < internals->slave_count; idx++) {
2087 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2090 if (slave_info.max_rx_queues < max_nb_rx_queues)
2091 max_nb_rx_queues = slave_info.max_rx_queues;
2093 if (slave_info.max_tx_queues < max_nb_tx_queues)
2094 max_nb_tx_queues = slave_info.max_tx_queues;
2098 dev_info->max_rx_queues = max_nb_rx_queues;
2099 dev_info->max_tx_queues = max_nb_tx_queues;
2102 * If dedicated hw queues enabled for link bonding device in LACP mode
2103 * then we need to reduce the maximum number of data path queues by 1.
2105 if (internals->mode == BONDING_MODE_8023AD &&
2106 internals->mode4.dedicated_queues.enabled == 1) {
2107 dev_info->max_rx_queues--;
2108 dev_info->max_tx_queues--;
2111 dev_info->min_rx_bufsize = 0;
2113 dev_info->rx_offload_capa = internals->rx_offload_capa;
2114 dev_info->tx_offload_capa = internals->tx_offload_capa;
2115 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2117 dev_info->reta_size = internals->reta_size;
2121 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2125 struct bond_dev_private *internals = dev->data->dev_private;
2127 /* don't do this while a slave is being added */
2128 rte_spinlock_lock(&internals->lock);
2131 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2133 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2135 for (i = 0; i < internals->slave_count; i++) {
2136 uint16_t port_id = internals->slaves[i].port_id;
2138 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2140 RTE_LOG(WARNING, PMD,
2141 "Setting VLAN filter on slave port %u not supported.\n",
2145 rte_spinlock_unlock(&internals->lock);
2150 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2151 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2152 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2154 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2155 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2156 0, dev->data->numa_node);
2157 if (bd_rx_q == NULL)
2160 bd_rx_q->queue_id = rx_queue_id;
2161 bd_rx_q->dev_private = dev->data->dev_private;
2163 bd_rx_q->nb_rx_desc = nb_rx_desc;
2165 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2166 bd_rx_q->mb_pool = mb_pool;
2168 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2174 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2175 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2176 const struct rte_eth_txconf *tx_conf)
2178 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2179 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2180 0, dev->data->numa_node);
2182 if (bd_tx_q == NULL)
2185 bd_tx_q->queue_id = tx_queue_id;
2186 bd_tx_q->dev_private = dev->data->dev_private;
2188 bd_tx_q->nb_tx_desc = nb_tx_desc;
2189 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2191 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2197 bond_ethdev_rx_queue_release(void *queue)
2206 bond_ethdev_tx_queue_release(void *queue)
2215 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2217 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2218 struct bond_dev_private *internals;
2220 /* Default value for polling slave found is true as we don't want to
2221 * disable the polling thread if we cannot get the lock */
2222 int i, polling_slave_found = 1;
2227 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2228 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2230 if (!bonded_ethdev->data->dev_started ||
2231 !internals->link_status_polling_enabled)
2234 /* If device is currently being configured then don't check slaves link
2235 * status, wait until next period */
2236 if (rte_spinlock_trylock(&internals->lock)) {
2237 if (internals->slave_count > 0)
2238 polling_slave_found = 0;
2240 for (i = 0; i < internals->slave_count; i++) {
2241 if (!internals->slaves[i].link_status_poll_enabled)
2244 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2245 polling_slave_found = 1;
2247 /* Update slave link status */
2248 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2249 internals->slaves[i].link_status_wait_to_complete);
2251 /* if link status has changed since last checked then call lsc
2253 if (slave_ethdev->data->dev_link.link_status !=
2254 internals->slaves[i].last_link_status) {
2255 internals->slaves[i].last_link_status =
2256 slave_ethdev->data->dev_link.link_status;
2258 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2259 RTE_ETH_EVENT_INTR_LSC,
2260 &bonded_ethdev->data->port_id,
2264 rte_spinlock_unlock(&internals->lock);
2267 if (polling_slave_found)
2268 /* Set alarm to continue monitoring link status of slave ethdev's */
2269 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2270 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2274 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2276 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2278 struct bond_dev_private *bond_ctx;
2279 struct rte_eth_link slave_link;
2283 bond_ctx = ethdev->data->dev_private;
2285 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2287 if (ethdev->data->dev_started == 0 ||
2288 bond_ctx->active_slave_count == 0) {
2289 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2293 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2295 if (wait_to_complete)
2296 link_update = rte_eth_link_get;
2298 link_update = rte_eth_link_get_nowait;
2300 switch (bond_ctx->mode) {
2301 case BONDING_MODE_BROADCAST:
2303 * Setting link speed to UINT32_MAX to ensure we pick up the
2304 * value of the first active slave
2306 ethdev->data->dev_link.link_speed = UINT32_MAX;
2309 * link speed is minimum value of all the slaves link speed as
2310 * packet loss will occur on this slave if transmission at rates
2311 * greater than this are attempted
2313 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2314 link_update(bond_ctx->active_slaves[0], &slave_link);
2316 if (slave_link.link_speed <
2317 ethdev->data->dev_link.link_speed)
2318 ethdev->data->dev_link.link_speed =
2319 slave_link.link_speed;
2322 case BONDING_MODE_ACTIVE_BACKUP:
2323 /* Current primary slave */
2324 link_update(bond_ctx->current_primary_port, &slave_link);
2326 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2328 case BONDING_MODE_8023AD:
2329 ethdev->data->dev_link.link_autoneg =
2330 bond_ctx->mode4.slave_link.link_autoneg;
2331 ethdev->data->dev_link.link_duplex =
2332 bond_ctx->mode4.slave_link.link_duplex;
2333 /* fall through to update link speed */
2334 case BONDING_MODE_ROUND_ROBIN:
2335 case BONDING_MODE_BALANCE:
2336 case BONDING_MODE_TLB:
2337 case BONDING_MODE_ALB:
2340 * In theses mode the maximum theoretical link speed is the sum
2343 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2345 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2346 link_update(bond_ctx->active_slaves[idx], &slave_link);
2348 ethdev->data->dev_link.link_speed +=
2349 slave_link.link_speed;
2359 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2361 struct bond_dev_private *internals = dev->data->dev_private;
2362 struct rte_eth_stats slave_stats;
2365 for (i = 0; i < internals->slave_count; i++) {
2366 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2368 stats->ipackets += slave_stats.ipackets;
2369 stats->opackets += slave_stats.opackets;
2370 stats->ibytes += slave_stats.ibytes;
2371 stats->obytes += slave_stats.obytes;
2372 stats->imissed += slave_stats.imissed;
2373 stats->ierrors += slave_stats.ierrors;
2374 stats->oerrors += slave_stats.oerrors;
2375 stats->rx_nombuf += slave_stats.rx_nombuf;
2377 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2378 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2379 stats->q_opackets[j] += slave_stats.q_opackets[j];
2380 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2381 stats->q_obytes[j] += slave_stats.q_obytes[j];
2382 stats->q_errors[j] += slave_stats.q_errors[j];
2391 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2393 struct bond_dev_private *internals = dev->data->dev_private;
2396 for (i = 0; i < internals->slave_count; i++)
2397 rte_eth_stats_reset(internals->slaves[i].port_id);
2401 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2403 struct bond_dev_private *internals = eth_dev->data->dev_private;
2406 internals->promiscuous_en = 1;
2408 switch (internals->mode) {
2409 /* Promiscuous mode is propagated to all slaves */
2410 case BONDING_MODE_ROUND_ROBIN:
2411 case BONDING_MODE_BALANCE:
2412 case BONDING_MODE_BROADCAST:
2413 for (i = 0; i < internals->slave_count; i++)
2414 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2416 /* In mode4 promiscus mode is managed when slave is added/removed */
2417 case BONDING_MODE_8023AD:
2419 /* Promiscuous mode is propagated only to primary slave */
2420 case BONDING_MODE_ACTIVE_BACKUP:
2421 case BONDING_MODE_TLB:
2422 case BONDING_MODE_ALB:
2424 rte_eth_promiscuous_enable(internals->current_primary_port);
2429 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2431 struct bond_dev_private *internals = dev->data->dev_private;
2434 internals->promiscuous_en = 0;
2436 switch (internals->mode) {
2437 /* Promiscuous mode is propagated to all slaves */
2438 case BONDING_MODE_ROUND_ROBIN:
2439 case BONDING_MODE_BALANCE:
2440 case BONDING_MODE_BROADCAST:
2441 for (i = 0; i < internals->slave_count; i++)
2442 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2444 /* In mode4 promiscus mode is set managed when slave is added/removed */
2445 case BONDING_MODE_8023AD:
2447 /* Promiscuous mode is propagated only to primary slave */
2448 case BONDING_MODE_ACTIVE_BACKUP:
2449 case BONDING_MODE_TLB:
2450 case BONDING_MODE_ALB:
2452 rte_eth_promiscuous_disable(internals->current_primary_port);
2457 bond_ethdev_delayed_lsc_propagation(void *arg)
2462 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2463 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2467 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2468 void *param, void *ret_param __rte_unused)
2470 struct rte_eth_dev *bonded_eth_dev;
2471 struct bond_dev_private *internals;
2472 struct rte_eth_link link;
2475 int i, valid_slave = 0;
2477 uint8_t lsc_flag = 0;
2479 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2482 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2484 if (check_for_bonded_ethdev(bonded_eth_dev))
2487 internals = bonded_eth_dev->data->dev_private;
2489 /* If the device isn't started don't handle interrupts */
2490 if (!bonded_eth_dev->data->dev_started)
2493 /* verify that port_id is a valid slave of bonded port */
2494 for (i = 0; i < internals->slave_count; i++) {
2495 if (internals->slaves[i].port_id == port_id) {
2504 /* Search for port in active port list */
2505 active_pos = find_slave_by_id(internals->active_slaves,
2506 internals->active_slave_count, port_id);
2508 rte_eth_link_get_nowait(port_id, &link);
2509 if (link.link_status) {
2510 if (active_pos < internals->active_slave_count)
2513 /* if no active slave ports then set this port to be primary port */
2514 if (internals->active_slave_count < 1) {
2515 /* If first active slave, then change link status */
2516 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2517 internals->current_primary_port = port_id;
2520 mac_address_slaves_update(bonded_eth_dev);
2523 activate_slave(bonded_eth_dev, port_id);
2525 /* If user has defined the primary port then default to using it */
2526 if (internals->user_defined_primary_port &&
2527 internals->primary_port == port_id)
2528 bond_ethdev_primary_set(internals, port_id);
2530 if (active_pos == internals->active_slave_count)
2533 /* Remove from active slave list */
2534 deactivate_slave(bonded_eth_dev, port_id);
2536 if (internals->active_slave_count < 1)
2539 /* Update primary id, take first active slave from list or if none
2540 * available set to -1 */
2541 if (port_id == internals->current_primary_port) {
2542 if (internals->active_slave_count > 0)
2543 bond_ethdev_primary_set(internals,
2544 internals->active_slaves[0]);
2546 internals->current_primary_port = internals->primary_port;
2551 * Update bonded device link properties after any change to active
2554 bond_ethdev_link_update(bonded_eth_dev, 0);
2557 /* Cancel any possible outstanding interrupts if delays are enabled */
2558 if (internals->link_up_delay_ms > 0 ||
2559 internals->link_down_delay_ms > 0)
2560 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2563 if (bonded_eth_dev->data->dev_link.link_status) {
2564 if (internals->link_up_delay_ms > 0)
2565 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2566 bond_ethdev_delayed_lsc_propagation,
2567 (void *)bonded_eth_dev);
2569 _rte_eth_dev_callback_process(bonded_eth_dev,
2570 RTE_ETH_EVENT_INTR_LSC,
2574 if (internals->link_down_delay_ms > 0)
2575 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2576 bond_ethdev_delayed_lsc_propagation,
2577 (void *)bonded_eth_dev);
2579 _rte_eth_dev_callback_process(bonded_eth_dev,
2580 RTE_ETH_EVENT_INTR_LSC,
2588 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2589 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2593 int slave_reta_size;
2594 unsigned reta_count;
2595 struct bond_dev_private *internals = dev->data->dev_private;
2597 if (reta_size != internals->reta_size)
2600 /* Copy RETA table */
2601 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2603 for (i = 0; i < reta_count; i++) {
2604 internals->reta_conf[i].mask = reta_conf[i].mask;
2605 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2606 if ((reta_conf[i].mask >> j) & 0x01)
2607 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2610 /* Fill rest of array */
2611 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2612 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2613 sizeof(internals->reta_conf[0]) * reta_count);
2615 /* Propagate RETA over slaves */
2616 for (i = 0; i < internals->slave_count; i++) {
2617 slave_reta_size = internals->slaves[i].reta_size;
2618 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2619 &internals->reta_conf[0], slave_reta_size);
2628 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2629 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2632 struct bond_dev_private *internals = dev->data->dev_private;
2634 if (reta_size != internals->reta_size)
2637 /* Copy RETA table */
2638 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2639 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2640 if ((reta_conf[i].mask >> j) & 0x01)
2641 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2647 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2648 struct rte_eth_rss_conf *rss_conf)
2651 struct bond_dev_private *internals = dev->data->dev_private;
2652 struct rte_eth_rss_conf bond_rss_conf;
2654 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2656 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2658 if (bond_rss_conf.rss_hf != 0)
2659 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2661 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2662 sizeof(internals->rss_key)) {
2663 if (bond_rss_conf.rss_key_len == 0)
2664 bond_rss_conf.rss_key_len = 40;
2665 internals->rss_key_len = bond_rss_conf.rss_key_len;
2666 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2667 internals->rss_key_len);
2670 for (i = 0; i < internals->slave_count; i++) {
2671 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2681 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2682 struct rte_eth_rss_conf *rss_conf)
2684 struct bond_dev_private *internals = dev->data->dev_private;
2686 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2687 rss_conf->rss_key_len = internals->rss_key_len;
2688 if (rss_conf->rss_key)
2689 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2694 const struct eth_dev_ops default_dev_ops = {
2695 .dev_start = bond_ethdev_start,
2696 .dev_stop = bond_ethdev_stop,
2697 .dev_close = bond_ethdev_close,
2698 .dev_configure = bond_ethdev_configure,
2699 .dev_infos_get = bond_ethdev_info,
2700 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2701 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2702 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2703 .rx_queue_release = bond_ethdev_rx_queue_release,
2704 .tx_queue_release = bond_ethdev_tx_queue_release,
2705 .link_update = bond_ethdev_link_update,
2706 .stats_get = bond_ethdev_stats_get,
2707 .stats_reset = bond_ethdev_stats_reset,
2708 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2709 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2710 .reta_update = bond_ethdev_rss_reta_update,
2711 .reta_query = bond_ethdev_rss_reta_query,
2712 .rss_hash_update = bond_ethdev_rss_hash_update,
2713 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2717 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2719 const char *name = rte_vdev_device_name(dev);
2720 uint8_t socket_id = dev->device.numa_node;
2721 struct bond_dev_private *internals = NULL;
2722 struct rte_eth_dev *eth_dev = NULL;
2723 uint32_t vlan_filter_bmp_size;
2725 /* now do all data allocation - for eth_dev structure, dummy pci driver
2726 * and internal (private) data
2729 /* reserve an ethdev entry */
2730 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2731 if (eth_dev == NULL) {
2732 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2736 internals = eth_dev->data->dev_private;
2737 eth_dev->data->nb_rx_queues = (uint16_t)1;
2738 eth_dev->data->nb_tx_queues = (uint16_t)1;
2740 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2742 if (eth_dev->data->mac_addrs == NULL) {
2743 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2747 eth_dev->dev_ops = &default_dev_ops;
2748 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2750 rte_spinlock_init(&internals->lock);
2752 internals->port_id = eth_dev->data->port_id;
2753 internals->mode = BONDING_MODE_INVALID;
2754 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2755 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2756 internals->xmit_hash = xmit_l2_hash;
2757 internals->user_defined_mac = 0;
2759 internals->link_status_polling_enabled = 0;
2761 internals->link_status_polling_interval_ms =
2762 DEFAULT_POLLING_INTERVAL_10_MS;
2763 internals->link_down_delay_ms = 0;
2764 internals->link_up_delay_ms = 0;
2766 internals->slave_count = 0;
2767 internals->active_slave_count = 0;
2768 internals->rx_offload_capa = 0;
2769 internals->tx_offload_capa = 0;
2770 internals->candidate_max_rx_pktlen = 0;
2771 internals->max_rx_pktlen = 0;
2773 /* Initially allow to choose any offload type */
2774 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2776 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2777 memset(internals->slaves, 0, sizeof(internals->slaves));
2779 /* Set mode 4 default configuration */
2780 bond_mode_8023ad_setup(eth_dev, NULL);
2781 if (bond_ethdev_mode_set(eth_dev, mode)) {
2782 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2783 eth_dev->data->port_id, mode);
2787 vlan_filter_bmp_size =
2788 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2789 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2790 RTE_CACHE_LINE_SIZE);
2791 if (internals->vlan_filter_bmpmem == NULL) {
2793 "Failed to allocate vlan bitmap for bonded device %u\n",
2794 eth_dev->data->port_id);
2798 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2799 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2800 if (internals->vlan_filter_bmp == NULL) {
2802 "Failed to init vlan bitmap for bonded device %u\n",
2803 eth_dev->data->port_id);
2804 rte_free(internals->vlan_filter_bmpmem);
2808 return eth_dev->data->port_id;
2811 rte_free(internals);
2812 if (eth_dev != NULL) {
2813 rte_free(eth_dev->data->mac_addrs);
2814 rte_eth_dev_release_port(eth_dev);
2820 bond_probe(struct rte_vdev_device *dev)
2823 struct bond_dev_private *internals;
2824 struct rte_kvargs *kvlist;
2825 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2826 int arg_count, port_id;
2832 name = rte_vdev_device_name(dev);
2833 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2835 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2836 pmd_bond_init_valid_arguments);
2840 /* Parse link bonding mode */
2841 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2842 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2843 &bond_ethdev_parse_slave_mode_kvarg,
2844 &bonding_mode) != 0) {
2845 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2850 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2851 "device %s\n", name);
2855 /* Parse socket id to create bonding device on */
2856 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2857 if (arg_count == 1) {
2858 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2859 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2861 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2862 "bonded device %s\n", name);
2865 } else if (arg_count > 1) {
2866 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2867 "bonded device %s\n", name);
2870 socket_id = rte_socket_id();
2873 dev->device.numa_node = socket_id;
2875 /* Create link bonding eth device */
2876 port_id = bond_alloc(dev, bonding_mode);
2878 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2879 "socket %u.\n", name, bonding_mode, socket_id);
2882 internals = rte_eth_devices[port_id].data->dev_private;
2883 internals->kvlist = kvlist;
2886 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2887 if (rte_kvargs_process(kvlist,
2888 PMD_BOND_AGG_MODE_KVARG,
2889 &bond_ethdev_parse_slave_agg_mode_kvarg,
2892 "Failed to parse agg selection mode for bonded device %s\n",
2897 if (internals->mode == BONDING_MODE_8023AD)
2898 rte_eth_bond_8023ad_agg_selection_set(port_id,
2901 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2904 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2905 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2909 rte_kvargs_free(kvlist);
2915 bond_remove(struct rte_vdev_device *dev)
2917 struct rte_eth_dev *eth_dev;
2918 struct bond_dev_private *internals;
2924 name = rte_vdev_device_name(dev);
2925 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2927 /* now free all data allocation - for eth_dev structure,
2928 * dummy pci driver and internal (private) data
2931 /* find an ethdev entry */
2932 eth_dev = rte_eth_dev_allocated(name);
2933 if (eth_dev == NULL)
2936 RTE_ASSERT(eth_dev->device == &dev->device);
2938 internals = eth_dev->data->dev_private;
2939 if (internals->slave_count != 0)
2942 if (eth_dev->data->dev_started == 1) {
2943 bond_ethdev_stop(eth_dev);
2944 bond_ethdev_close(eth_dev);
2947 eth_dev->dev_ops = NULL;
2948 eth_dev->rx_pkt_burst = NULL;
2949 eth_dev->tx_pkt_burst = NULL;
2951 internals = eth_dev->data->dev_private;
2952 rte_bitmap_free(internals->vlan_filter_bmp);
2953 rte_free(internals->vlan_filter_bmpmem);
2954 rte_free(eth_dev->data->dev_private);
2955 rte_free(eth_dev->data->mac_addrs);
2957 rte_eth_dev_release_port(eth_dev);
2962 /* this part will resolve the slave portids after all the other pdev and vdev
2963 * have been allocated */
2965 bond_ethdev_configure(struct rte_eth_dev *dev)
2967 const char *name = dev->device->name;
2968 struct bond_dev_private *internals = dev->data->dev_private;
2969 struct rte_kvargs *kvlist = internals->kvlist;
2971 uint16_t port_id = dev - rte_eth_devices;
2974 static const uint8_t default_rss_key[40] = {
2975 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2976 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2977 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2978 0xBE, 0xAC, 0x01, 0xFA
2983 /* If RSS is enabled, fill table and key with default values */
2984 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2985 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2986 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2987 memcpy(internals->rss_key, default_rss_key, 40);
2989 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2990 internals->reta_conf[i].mask = ~0LL;
2991 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2992 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2996 /* set the max_rx_pktlen */
2997 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3000 * if no kvlist, it means that this bonded device has been created
3001 * through the bonding api.
3006 /* Parse MAC address for bonded device */
3007 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3008 if (arg_count == 1) {
3009 struct ether_addr bond_mac;
3011 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3012 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3013 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3018 /* Set MAC address */
3019 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3021 "Failed to set mac address on bonded device %s\n",
3025 } else if (arg_count > 1) {
3027 "MAC address can be specified only once for bonded device %s\n",
3032 /* Parse/set balance mode transmit policy */
3033 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3034 if (arg_count == 1) {
3035 uint8_t xmit_policy;
3037 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3038 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3041 "Invalid xmit policy specified for bonded device %s\n",
3046 /* Set balance mode transmit policy*/
3047 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3049 "Failed to set balance xmit policy on bonded device %s\n",
3053 } else if (arg_count > 1) {
3055 "Transmit policy can be specified only once for bonded device"
3060 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3061 if (rte_kvargs_process(kvlist,
3062 PMD_BOND_AGG_MODE_KVARG,
3063 &bond_ethdev_parse_slave_agg_mode_kvarg,
3066 "Failed to parse agg selection mode for bonded device %s\n",
3069 if (internals->mode == BONDING_MODE_8023AD)
3070 rte_eth_bond_8023ad_agg_selection_set(port_id,
3074 /* Parse/add slave ports to bonded device */
3075 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3076 struct bond_ethdev_slave_ports slave_ports;
3079 memset(&slave_ports, 0, sizeof(slave_ports));
3081 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3082 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3084 "Failed to parse slave ports for bonded device %s\n",
3089 for (i = 0; i < slave_ports.slave_count; i++) {
3090 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3092 "Failed to add port %d as slave to bonded device %s\n",
3093 slave_ports.slaves[i], name);
3098 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3102 /* Parse/set primary slave port id*/
3103 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3104 if (arg_count == 1) {
3105 uint16_t primary_slave_port_id;
3107 if (rte_kvargs_process(kvlist,
3108 PMD_BOND_PRIMARY_SLAVE_KVARG,
3109 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3110 &primary_slave_port_id) < 0) {
3112 "Invalid primary slave port id specified for bonded device"
3117 /* Set balance mode transmit policy*/
3118 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3121 "Failed to set primary slave port %d on bonded device %s\n",
3122 primary_slave_port_id, name);
3125 } else if (arg_count > 1) {
3127 "Primary slave can be specified only once for bonded device"
3132 /* Parse link status monitor polling interval */
3133 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3134 if (arg_count == 1) {
3135 uint32_t lsc_poll_interval_ms;
3137 if (rte_kvargs_process(kvlist,
3138 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3139 &bond_ethdev_parse_time_ms_kvarg,
3140 &lsc_poll_interval_ms) < 0) {
3142 "Invalid lsc polling interval value specified for bonded"
3143 " device %s\n", name);
3147 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3150 "Failed to set lsc monitor polling interval (%u ms) on"
3151 " bonded device %s\n", lsc_poll_interval_ms, name);
3154 } else if (arg_count > 1) {
3156 "LSC polling interval can be specified only once for bonded"
3157 " device %s\n", name);
3161 /* Parse link up interrupt propagation delay */
3162 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3163 if (arg_count == 1) {
3164 uint32_t link_up_delay_ms;
3166 if (rte_kvargs_process(kvlist,
3167 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3168 &bond_ethdev_parse_time_ms_kvarg,
3169 &link_up_delay_ms) < 0) {
3171 "Invalid link up propagation delay value specified for"
3172 " bonded device %s\n", name);
3176 /* Set balance mode transmit policy*/
3177 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3180 "Failed to set link up propagation delay (%u ms) on bonded"
3181 " device %s\n", link_up_delay_ms, name);
3184 } else if (arg_count > 1) {
3186 "Link up propagation delay can be specified only once for"
3187 " bonded device %s\n", name);
3191 /* Parse link down interrupt propagation delay */
3192 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3193 if (arg_count == 1) {
3194 uint32_t link_down_delay_ms;
3196 if (rte_kvargs_process(kvlist,
3197 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3198 &bond_ethdev_parse_time_ms_kvarg,
3199 &link_down_delay_ms) < 0) {
3201 "Invalid link down propagation delay value specified for"
3202 " bonded device %s\n", name);
3206 /* Set balance mode transmit policy*/
3207 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3210 "Failed to set link down propagation delay (%u ms) on"
3211 " bonded device %s\n", link_down_delay_ms, name);
3214 } else if (arg_count > 1) {
3216 "Link down propagation delay can be specified only once for"
3217 " bonded device %s\n", name);
3224 struct rte_vdev_driver pmd_bond_drv = {
3225 .probe = bond_probe,
3226 .remove = bond_remove,
3229 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3230 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3232 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3236 "xmit_policy=[l2 | l23 | l34] "
3237 "agg_mode=[count | stable | bandwidth] "
3240 "lsc_poll_period_ms=<int> "
3242 "down_delay=<int>");