4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
52 #define REORDER_PERIOD_MS 10
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 size_t vlan_offset = 0;
64 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67 vlan_offset = sizeof(struct vlan_hdr);
68 *proto = vlan_hdr->eth_proto;
70 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71 vlan_hdr = vlan_hdr + 1;
72 *proto = vlan_hdr->eth_proto;
73 vlan_offset += sizeof(struct vlan_hdr);
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 struct bond_dev_private *internals;
84 uint16_t num_rx_slave = 0;
85 uint16_t num_rx_total = 0;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
95 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96 /* Offset of pointer to *bufs increases as packets are received
97 * from other slaves */
98 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101 num_rx_total += num_rx_slave;
102 nb_pkts -= num_rx_slave;
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
113 struct bond_dev_private *internals;
115 /* Cast to structure, containing bonded device's port id and queue id */
116 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118 internals = bd_rx_q->dev_private;
120 return rte_eth_rx_burst(internals->current_primary_port,
121 bd_rx_q->queue_id, bufs, nb_pkts);
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
128 /* Cast to structure, containing bonded device's port id and queue id */
129 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130 struct bond_dev_private *internals = bd_rx_q->dev_private;
131 struct ether_addr bond_mac;
133 struct ether_hdr *hdr;
135 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136 uint16_t num_rx_total = 0; /* Total number of received packets */
137 uint8_t slaves[RTE_MAX_ETHPORTS];
140 uint8_t collecting; /* current slave collecting status */
141 const uint8_t promisc = internals->promiscuous_en;
144 rte_eth_macaddr_get(internals->port_id, &bond_mac);
145 /* Copy slave list to protect against slave up/down changes during tx
147 slave_count = internals->active_slave_count;
148 memcpy(slaves, internals->active_slaves,
149 sizeof(internals->active_slaves[0]) * slave_count);
151 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
153 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
155 /* Read packets from this slave */
156 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157 &bufs[num_rx_total], nb_pkts - num_rx_total);
159 for (k = j; k < 2 && k < num_rx_total; k++)
160 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
162 /* Handle slow protocol packets. */
163 while (j < num_rx_total) {
164 if (j + 3 < num_rx_total)
165 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
167 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168 /* Remove packet from array if it is slow packet or slave is not
169 * in collecting state or bondign interface is not in promiscus
170 * mode and packet address does not match. */
171 if (unlikely(hdr->ether_type == ether_type_slow_be ||
172 !collecting || (!promisc &&
173 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
175 if (hdr->ether_type == ether_type_slow_be) {
176 bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
179 rte_pktmbuf_free(bufs[j]);
181 /* Packet is managed by mode 4 or dropped, shift the array */
183 if (j < num_rx_total) {
184 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
202 arp_op_name(uint16_t arp_op, char *buf)
206 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
209 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
211 case ARP_OP_REVREQUEST:
212 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213 "Reverse ARP Request");
215 case ARP_OP_REVREPLY:
216 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217 "Reverse ARP Reply");
219 case ARP_OP_INVREQUEST:
220 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221 "Peer Identify Request");
223 case ARP_OP_INVREPLY:
224 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225 "Peer Identify Reply");
230 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
234 #define MaxIPv4String 16
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
240 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
246 #define MAX_CLIENTS_NUMBER 128
247 uint8_t active_clients;
248 struct client_stats_t {
251 uint32_t ipv4_rx_packets;
252 uint32_t ipv4_tx_packets;
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
261 for (; i < MAX_CLIENTS_NUMBER; i++) {
262 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
263 /* Just update RX packets number for this client */
264 if (TXorRXindicator == &burstnumberRX)
265 client_stats[i].ipv4_rx_packets++;
267 client_stats[i].ipv4_tx_packets++;
271 /* We have a new client. Insert him to the table, and increment stats */
272 if (TXorRXindicator == &burstnumberRX)
273 client_stats[active_clients].ipv4_rx_packets++;
275 client_stats[active_clients].ipv4_tx_packets++;
276 client_stats[active_clients].ipv4_addr = addr;
277 client_stats[active_clients].port = port;
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
284 RTE_LOG(DEBUG, PMD, \
287 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
289 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
295 eth_h->s_addr.addr_bytes[0], \
296 eth_h->s_addr.addr_bytes[1], \
297 eth_h->s_addr.addr_bytes[2], \
298 eth_h->s_addr.addr_bytes[3], \
299 eth_h->s_addr.addr_bytes[4], \
300 eth_h->s_addr.addr_bytes[5], \
302 eth_h->d_addr.addr_bytes[0], \
303 eth_h->d_addr.addr_bytes[1], \
304 eth_h->d_addr.addr_bytes[2], \
305 eth_h->d_addr.addr_bytes[3], \
306 eth_h->d_addr.addr_bytes[4], \
307 eth_h->d_addr.addr_bytes[5], \
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
317 struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319 struct arp_hdr *arp_h;
326 uint16_t ether_type = eth_h->ether_type;
327 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330 snprintf(buf, 16, "%s", info);
333 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
340 update_client_stats(ipv4_h->src_addr, port, burstnumber);
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
357 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358 struct bond_dev_private *internals = bd_tx_q->dev_private;
359 struct ether_hdr *eth_h;
360 uint16_t ether_type, offset;
361 uint16_t nb_recv_pkts;
364 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
366 for (i = 0; i < nb_recv_pkts; i++) {
367 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368 ether_type = eth_h->ether_type;
369 offset = get_vlan_offset(eth_h, ðer_type);
371 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
375 bond_mode_alb_arp_recv(eth_h, offset, internals);
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
390 struct bond_dev_private *internals;
391 struct bond_tx_queue *bd_tx_q;
393 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
396 uint8_t num_of_slaves;
397 uint8_t slaves[RTE_MAX_ETHPORTS];
399 uint16_t num_tx_total = 0, num_tx_slave;
401 static int slave_idx = 0;
402 int i, cslave_idx = 0, tx_fail_total = 0;
404 bd_tx_q = (struct bond_tx_queue *)queue;
405 internals = bd_tx_q->dev_private;
407 /* Copy slave list to protect against slave up/down changes during tx
409 num_of_slaves = internals->active_slave_count;
410 memcpy(slaves, internals->active_slaves,
411 sizeof(internals->active_slaves[0]) * num_of_slaves);
413 if (num_of_slaves < 1)
416 /* Populate slaves mbuf with which packets are to be sent on it */
417 for (i = 0; i < nb_pkts; i++) {
418 cslave_idx = (slave_idx + i) % num_of_slaves;
419 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
422 /* increment current slave index so the next call to tx burst starts on the
424 slave_idx = ++cslave_idx;
426 /* Send packet burst on each slave device */
427 for (i = 0; i < num_of_slaves; i++) {
428 if (slave_nb_pkts[i] > 0) {
429 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430 slave_bufs[i], slave_nb_pkts[i]);
432 /* if tx burst fails move packets to end of bufs */
433 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
436 tx_fail_total += tx_fail_slave;
438 memcpy(&bufs[nb_pkts - tx_fail_total],
439 &slave_bufs[i][num_tx_slave],
440 tx_fail_slave * sizeof(bufs[0]));
442 num_tx_total += num_tx_slave;
450 bond_ethdev_tx_burst_active_backup(void *queue,
451 struct rte_mbuf **bufs, uint16_t nb_pkts)
453 struct bond_dev_private *internals;
454 struct bond_tx_queue *bd_tx_q;
456 bd_tx_q = (struct bond_tx_queue *)queue;
457 internals = bd_tx_q->dev_private;
459 if (internals->active_slave_count < 1)
462 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
469 unaligned_uint16_t *word_src_addr =
470 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471 unaligned_uint16_t *word_dst_addr =
472 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
474 return (word_src_addr[0] ^ word_dst_addr[0]) ^
475 (word_src_addr[1] ^ word_dst_addr[1]) ^
476 (word_src_addr[2] ^ word_dst_addr[2]);
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
482 return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
488 unaligned_uint32_t *word_src_addr =
489 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490 unaligned_uint32_t *word_dst_addr =
491 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
493 return (word_src_addr[0] ^ word_dst_addr[0]) ^
494 (word_src_addr[1] ^ word_dst_addr[1]) ^
495 (word_src_addr[2] ^ word_dst_addr[2]) ^
496 (word_src_addr[3] ^ word_dst_addr[3]);
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
502 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
504 uint32_t hash = ether_hash(eth_hdr);
506 return (hash ^= hash >> 8) % slave_count;
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
512 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513 uint16_t proto = eth_hdr->ether_type;
514 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515 uint32_t hash, l3hash = 0;
517 hash = ether_hash(eth_hdr);
519 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521 ((char *)(eth_hdr + 1) + vlan_offset);
522 l3hash = ipv4_hash(ipv4_hdr);
524 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526 ((char *)(eth_hdr + 1) + vlan_offset);
527 l3hash = ipv6_hash(ipv6_hdr);
530 hash = hash ^ l3hash;
534 return hash % slave_count;
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
540 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541 uint16_t proto = eth_hdr->ether_type;
542 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
544 struct udp_hdr *udp_hdr = NULL;
545 struct tcp_hdr *tcp_hdr = NULL;
546 uint32_t hash, l3hash = 0, l4hash = 0;
548 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550 ((char *)(eth_hdr + 1) + vlan_offset);
551 size_t ip_hdr_offset;
553 l3hash = ipv4_hash(ipv4_hdr);
555 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
558 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
561 l4hash = HASH_L4_PORTS(tcp_hdr);
562 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
565 l4hash = HASH_L4_PORTS(udp_hdr);
567 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569 ((char *)(eth_hdr + 1) + vlan_offset);
570 l3hash = ipv6_hash(ipv6_hdr);
572 if (ipv6_hdr->proto == IPPROTO_TCP) {
573 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574 l4hash = HASH_L4_PORTS(tcp_hdr);
575 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577 l4hash = HASH_L4_PORTS(udp_hdr);
581 hash = l3hash ^ l4hash;
585 return hash % slave_count;
589 uint64_t bwg_left_int;
590 uint64_t bwg_left_remainder;
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
598 for (i = 0; i < internals->active_slave_count; i++) {
599 tlb_last_obytets[internals->active_slaves[i]] = 0;
604 bandwidth_cmp(const void *a, const void *b)
606 const struct bwg_slave *bwg_a = a;
607 const struct bwg_slave *bwg_b = b;
608 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610 (int64_t)bwg_a->bwg_left_remainder;
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625 struct bwg_slave *bwg_slave)
627 struct rte_eth_link link_status;
629 rte_eth_link_get(port_id, &link_status);
630 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
633 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
639 bond_ethdev_update_tlb_slave_cb(void *arg)
641 struct bond_dev_private *internals = arg;
642 struct rte_eth_stats slave_stats;
643 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
647 uint8_t update_stats = 0;
650 internals->slave_update_idx++;
653 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
656 for (i = 0; i < internals->active_slave_count; i++) {
657 slave_id = internals->active_slaves[i];
658 rte_eth_stats_get(slave_id, &slave_stats);
659 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660 bandwidth_left(slave_id, tx_bytes,
661 internals->slave_update_idx, &bwg_array[i]);
662 bwg_array[i].slave = slave_id;
665 tlb_last_obytets[slave_id] = slave_stats.obytes;
669 if (update_stats == 1)
670 internals->slave_update_idx = 0;
673 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674 for (i = 0; i < slave_count; i++)
675 internals->tlb_slaves_order[i] = bwg_array[i].slave;
677 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678 (struct bond_dev_private *)internals);
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
684 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685 struct bond_dev_private *internals = bd_tx_q->dev_private;
687 struct rte_eth_dev *primary_port =
688 &rte_eth_devices[internals->primary_port];
689 uint16_t num_tx_total = 0;
692 uint8_t num_of_slaves = internals->active_slave_count;
693 uint8_t slaves[RTE_MAX_ETHPORTS];
695 struct ether_hdr *ether_hdr;
696 struct ether_addr primary_slave_addr;
697 struct ether_addr active_slave_addr;
699 if (num_of_slaves < 1)
702 memcpy(slaves, internals->tlb_slaves_order,
703 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
706 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
709 for (i = 0; i < 3; i++)
710 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
713 for (i = 0; i < num_of_slaves; i++) {
714 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715 for (j = num_tx_total; j < nb_pkts; j++) {
717 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
719 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
721 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
727 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728 bufs + num_tx_total, nb_pkts - num_tx_total);
730 if (num_tx_total == nb_pkts)
738 bond_tlb_disable(struct bond_dev_private *internals)
740 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
744 bond_tlb_enable(struct bond_dev_private *internals)
746 bond_ethdev_update_tlb_slave_cb(internals);
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
752 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753 struct bond_dev_private *internals = bd_tx_q->dev_private;
755 struct ether_hdr *eth_h;
756 uint16_t ether_type, offset;
758 struct client_data *client_info;
761 * We create transmit buffers for every slave and one additional to send
762 * through tlb. In worst case every packet will be send on one port.
764 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
768 * We create separate transmit buffers for update packets as they wont be
769 * counted in num_tx_total.
771 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
774 struct rte_mbuf *upd_pkt;
777 uint16_t num_send, num_not_send = 0;
778 uint16_t num_tx_total = 0;
783 /* Search tx buffer for ARP packets and forward them to alb */
784 for (i = 0; i < nb_pkts; i++) {
785 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786 ether_type = eth_h->ether_type;
787 offset = get_vlan_offset(eth_h, ðer_type);
789 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
792 /* Change src mac in eth header */
793 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
795 /* Add packet to slave tx buffer */
796 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797 slave_bufs_pkts[slave_idx]++;
799 /* If packet is not ARP, send it with TLB policy */
800 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
802 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
806 /* Update connected client ARP tables */
807 if (internals->mode6.ntt) {
808 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809 client_info = &internals->mode6.client_table[i];
811 if (client_info->in_use) {
812 /* Allocate new packet to send ARP update on current slave */
813 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814 if (upd_pkt == NULL) {
815 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
818 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819 + client_info->vlan_count * sizeof(struct vlan_hdr);
820 upd_pkt->data_len = pkt_size;
821 upd_pkt->pkt_len = pkt_size;
823 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
826 /* Add packet to update tx buffer */
827 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828 update_bufs_pkts[slave_idx]++;
831 internals->mode6.ntt = 0;
834 /* Send ARP packets on proper slaves */
835 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836 if (slave_bufs_pkts[i] > 0) {
837 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838 slave_bufs[i], slave_bufs_pkts[i]);
839 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840 bufs[nb_pkts - 1 - num_not_send - j] =
841 slave_bufs[i][nb_pkts - 1 - j];
844 num_tx_total += num_send;
845 num_not_send += slave_bufs_pkts[i] - num_send;
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848 /* Print TX stats including update packets */
849 for (j = 0; j < slave_bufs_pkts[i]; j++) {
850 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
857 /* Send update packets on proper slaves */
858 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859 if (update_bufs_pkts[i] > 0) {
860 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861 update_bufs_pkts[i]);
862 for (j = num_send; j < update_bufs_pkts[i]; j++) {
863 rte_pktmbuf_free(update_bufs[i][j]);
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866 for (j = 0; j < update_bufs_pkts[i]; j++) {
867 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
874 /* Send non-ARP packets using tlb policy */
875 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876 num_send = bond_ethdev_tx_burst_tlb(queue,
877 slave_bufs[RTE_MAX_ETHPORTS],
878 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
880 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881 bufs[nb_pkts - 1 - num_not_send - j] =
882 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
885 num_tx_total += num_send;
886 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
896 struct bond_dev_private *internals;
897 struct bond_tx_queue *bd_tx_q;
899 uint8_t num_of_slaves;
900 uint8_t slaves[RTE_MAX_ETHPORTS];
902 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
906 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
909 bd_tx_q = (struct bond_tx_queue *)queue;
910 internals = bd_tx_q->dev_private;
912 /* Copy slave list to protect against slave up/down changes during tx
914 num_of_slaves = internals->active_slave_count;
915 memcpy(slaves, internals->active_slaves,
916 sizeof(internals->active_slaves[0]) * num_of_slaves);
918 if (num_of_slaves < 1)
921 /* Populate slaves mbuf with the packets which are to be sent on it */
922 for (i = 0; i < nb_pkts; i++) {
923 /* Select output slave using hash based on xmit policy */
924 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
926 /* Populate slave mbuf arrays with mbufs for that slave */
927 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
930 /* Send packet burst on each slave device */
931 for (i = 0; i < num_of_slaves; i++) {
932 if (slave_nb_pkts[i] > 0) {
933 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934 slave_bufs[i], slave_nb_pkts[i]);
936 /* if tx burst fails move packets to end of bufs */
937 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
940 tx_fail_total += slave_tx_fail_count;
941 memcpy(&bufs[nb_pkts - tx_fail_total],
942 &slave_bufs[i][num_tx_slave],
943 slave_tx_fail_count * sizeof(bufs[0]));
946 num_tx_total += num_tx_slave;
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
957 struct bond_dev_private *internals;
958 struct bond_tx_queue *bd_tx_q;
960 uint8_t num_of_slaves;
961 uint8_t slaves[RTE_MAX_ETHPORTS];
962 /* positions in slaves, not ID */
963 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964 uint8_t distributing_count;
966 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967 uint16_t i, j, op_slave_idx;
968 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
970 /* Allocate additional packets in case 8023AD mode. */
971 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
974 /* Total amount of packets in slave_bufs */
975 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976 /* Slow packets placed in each slave */
977 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
979 bd_tx_q = (struct bond_tx_queue *)queue;
980 internals = bd_tx_q->dev_private;
982 /* Copy slave list to protect against slave up/down changes during tx
984 num_of_slaves = internals->active_slave_count;
985 if (num_of_slaves < 1)
988 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
990 distributing_count = 0;
991 for (i = 0; i < num_of_slaves; i++) {
992 struct port *port = &mode_8023ad_ports[slaves[i]];
994 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
998 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999 slave_bufs[i][j] = slow_pkts[j];
1001 if (ACTOR_STATE(port, DISTRIBUTING))
1002 distributing_offsets[distributing_count++] = i;
1005 if (likely(distributing_count > 0)) {
1006 /* Populate slaves mbuf with the packets which are to be sent on it */
1007 for (i = 0; i < nb_pkts; i++) {
1008 /* Select output slave using hash based on xmit policy */
1009 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1011 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012 * slaves that are currently distributing. */
1013 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015 slave_nb_pkts[slave_offset]++;
1019 /* Send packet burst on each slave device */
1020 for (i = 0; i < num_of_slaves; i++) {
1021 if (slave_nb_pkts[i] == 0)
1024 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025 slave_bufs[i], slave_nb_pkts[i]);
1027 /* If tx burst fails drop slow packets */
1028 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1031 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1034 /* If tx burst fails move packets to end of bufs */
1035 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036 uint16_t j = nb_pkts - num_tx_fail_total;
1037 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038 bufs[j] = slave_bufs[i][num_tx_slave];
1042 return num_tx_total;
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1049 struct bond_dev_private *internals;
1050 struct bond_tx_queue *bd_tx_q;
1052 uint8_t tx_failed_flag = 0, num_of_slaves;
1053 uint8_t slaves[RTE_MAX_ETHPORTS];
1055 uint16_t max_nb_of_tx_pkts = 0;
1057 int slave_tx_total[RTE_MAX_ETHPORTS];
1058 int i, most_successful_tx_slave = -1;
1060 bd_tx_q = (struct bond_tx_queue *)queue;
1061 internals = bd_tx_q->dev_private;
1063 /* Copy slave list to protect against slave up/down changes during tx
1065 num_of_slaves = internals->active_slave_count;
1066 memcpy(slaves, internals->active_slaves,
1067 sizeof(internals->active_slaves[0]) * num_of_slaves);
1069 if (num_of_slaves < 1)
1072 /* Increment reference count on mbufs */
1073 for (i = 0; i < nb_pkts; i++)
1074 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1076 /* Transmit burst on each active slave */
1077 for (i = 0; i < num_of_slaves; i++) {
1078 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1081 if (unlikely(slave_tx_total[i] < nb_pkts))
1084 /* record the value and slave index for the slave which transmits the
1085 * maximum number of packets */
1086 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087 max_nb_of_tx_pkts = slave_tx_total[i];
1088 most_successful_tx_slave = i;
1092 /* if slaves fail to transmit packets from burst, the calling application
1093 * is not expected to know about multiple references to packets so we must
1094 * handle failures of all packets except those of the most successful slave
1096 if (unlikely(tx_failed_flag))
1097 for (i = 0; i < num_of_slaves; i++)
1098 if (i != most_successful_tx_slave)
1099 while (slave_tx_total[i] < nb_pkts)
1100 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1102 return max_nb_of_tx_pkts;
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107 struct rte_eth_link *slave_dev_link)
1109 struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1112 if (slave_dev_link->link_status &&
1113 bonded_eth_dev->data->dev_started) {
1114 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1117 internals->link_props_set = 1;
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1124 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1126 memset(&(bonded_eth_dev->data->dev_link), 0,
1127 sizeof(bonded_eth_dev->data->dev_link));
1129 internals->link_props_set = 0;
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134 struct rte_eth_link *slave_dev_link)
1136 if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137 bonded_dev_link->link_speed != slave_dev_link->link_speed)
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1146 struct ether_addr *mac_addr;
1148 if (eth_dev == NULL) {
1149 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1153 if (dst_mac_addr == NULL) {
1154 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1158 mac_addr = eth_dev->data->mac_addrs;
1160 ether_addr_copy(mac_addr, dst_mac_addr);
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1167 struct ether_addr *mac_addr;
1169 if (eth_dev == NULL) {
1170 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1174 if (new_mac_addr == NULL) {
1175 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1179 mac_addr = eth_dev->data->mac_addrs;
1181 /* If new MAC is different to current MAC then update */
1182 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1191 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1194 /* Update slave devices MAC addresses */
1195 if (internals->slave_count < 1)
1198 switch (internals->mode) {
1199 case BONDING_MODE_ROUND_ROBIN:
1200 case BONDING_MODE_BALANCE:
1201 case BONDING_MODE_BROADCAST:
1202 for (i = 0; i < internals->slave_count; i++) {
1203 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204 bonded_eth_dev->data->mac_addrs)) {
1205 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206 internals->slaves[i].port_id);
1211 case BONDING_MODE_8023AD:
1212 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1214 case BONDING_MODE_ACTIVE_BACKUP:
1215 case BONDING_MODE_TLB:
1216 case BONDING_MODE_ALB:
1218 for (i = 0; i < internals->slave_count; i++) {
1219 if (internals->slaves[i].port_id ==
1220 internals->current_primary_port) {
1221 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222 bonded_eth_dev->data->mac_addrs)) {
1223 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224 internals->current_primary_port);
1228 if (mac_address_set(
1229 &rte_eth_devices[internals->slaves[i].port_id],
1230 &internals->slaves[i].persisted_mac_addr)) {
1231 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232 internals->slaves[i].port_id);
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1245 struct bond_dev_private *internals;
1247 internals = eth_dev->data->dev_private;
1250 case BONDING_MODE_ROUND_ROBIN:
1251 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1254 case BONDING_MODE_ACTIVE_BACKUP:
1255 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1258 case BONDING_MODE_BALANCE:
1259 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1262 case BONDING_MODE_BROADCAST:
1263 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1266 case BONDING_MODE_8023AD:
1267 if (bond_mode_8023ad_enable(eth_dev) != 0)
1270 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272 RTE_LOG(WARNING, PMD,
1273 "Using mode 4, it is necessary to do TX burst and RX burst "
1274 "at least every 100ms.\n");
1276 case BONDING_MODE_TLB:
1277 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1280 case BONDING_MODE_ALB:
1281 if (bond_mode_alb_enable(eth_dev) != 0)
1284 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1291 internals->mode = mode;
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298 struct rte_eth_dev *slave_eth_dev)
1300 struct bond_rx_queue *bd_rx_q;
1301 struct bond_tx_queue *bd_tx_q;
1307 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1309 /* Enable interrupts on slave device if supported */
1310 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1311 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1313 /* If RSS is enabled for bonding, try to enable it for slaves */
1314 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1315 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1317 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1322 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1325 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1328 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1331 /* Configure device */
1332 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1333 bonded_eth_dev->data->nb_rx_queues,
1334 bonded_eth_dev->data->nb_tx_queues,
1335 &(slave_eth_dev->data->dev_conf));
1337 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1338 slave_eth_dev->data->port_id, errval);
1342 /* Setup Rx Queues */
1343 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1344 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1346 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1347 bd_rx_q->nb_rx_desc,
1348 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1349 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1352 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1353 slave_eth_dev->data->port_id, q_id, errval);
1358 /* Setup Tx Queues */
1359 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1360 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1362 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1363 bd_tx_q->nb_tx_desc,
1364 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1368 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1369 slave_eth_dev->data->port_id, q_id, errval);
1375 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1377 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1378 slave_eth_dev->data->port_id, errval);
1382 /* If RSS is enabled for bonding, synchronize RETA */
1383 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1385 struct bond_dev_private *internals;
1387 internals = bonded_eth_dev->data->dev_private;
1389 for (i = 0; i < internals->slave_count; i++) {
1390 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1391 errval = rte_eth_dev_rss_reta_update(
1392 slave_eth_dev->data->port_id,
1393 &internals->reta_conf[0],
1394 internals->slaves[i].reta_size);
1396 RTE_LOG(WARNING, PMD,
1397 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1398 " RSS Configuration for bonding may be inconsistent.\n",
1399 slave_eth_dev->data->port_id, errval);
1406 /* If lsc interrupt is set, check initial slave's link status */
1407 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1408 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1409 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1415 slave_remove(struct bond_dev_private *internals,
1416 struct rte_eth_dev *slave_eth_dev)
1420 for (i = 0; i < internals->slave_count; i++)
1421 if (internals->slaves[i].port_id ==
1422 slave_eth_dev->data->port_id)
1425 if (i < (internals->slave_count - 1))
1426 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1427 sizeof(internals->slaves[0]) *
1428 (internals->slave_count - i - 1));
1430 internals->slave_count--;
1434 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1437 slave_add(struct bond_dev_private *internals,
1438 struct rte_eth_dev *slave_eth_dev)
1440 struct bond_slave_details *slave_details =
1441 &internals->slaves[internals->slave_count];
1443 slave_details->port_id = slave_eth_dev->data->port_id;
1444 slave_details->last_link_status = 0;
1446 /* If slave device doesn't support interrupts then we need to enabled
1447 * polling to monitor link status */
1448 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1449 slave_details->link_status_poll_enabled = 1;
1451 if (!internals->link_status_polling_enabled) {
1452 internals->link_status_polling_enabled = 1;
1454 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1455 bond_ethdev_slave_link_status_change_monitor,
1456 (void *)&rte_eth_devices[internals->port_id]);
1460 slave_details->link_status_wait_to_complete = 0;
1461 /* clean tlb_last_obytes when adding port for bonding device */
1462 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1463 sizeof(struct ether_addr));
1467 bond_ethdev_primary_set(struct bond_dev_private *internals,
1468 uint8_t slave_port_id)
1472 if (internals->active_slave_count < 1)
1473 internals->current_primary_port = slave_port_id;
1475 /* Search bonded device slave ports for new proposed primary port */
1476 for (i = 0; i < internals->active_slave_count; i++) {
1477 if (internals->active_slaves[i] == slave_port_id)
1478 internals->current_primary_port = slave_port_id;
1483 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1486 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1488 struct bond_dev_private *internals;
1491 /* slave eth dev will be started by bonded device */
1492 if (check_for_bonded_ethdev(eth_dev)) {
1493 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1494 eth_dev->data->port_id);
1498 eth_dev->data->dev_link.link_status = 0;
1499 eth_dev->data->dev_started = 1;
1501 internals = eth_dev->data->dev_private;
1503 if (internals->slave_count == 0) {
1504 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1508 if (internals->user_defined_mac == 0) {
1509 struct ether_addr *new_mac_addr = NULL;
1511 for (i = 0; i < internals->slave_count; i++)
1512 if (internals->slaves[i].port_id == internals->primary_port)
1513 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1515 if (new_mac_addr == NULL)
1518 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1519 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1520 eth_dev->data->port_id);
1525 /* Update all slave devices MACs*/
1526 if (mac_address_slaves_update(eth_dev) != 0)
1529 /* If bonded device is configure in promiscuous mode then re-apply config */
1530 if (internals->promiscuous_en)
1531 bond_ethdev_promiscuous_enable(eth_dev);
1533 /* Reconfigure each slave device if starting bonded device */
1534 for (i = 0; i < internals->slave_count; i++) {
1535 if (slave_configure(eth_dev,
1536 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1538 "bonded port (%d) failed to reconfigure slave device (%d)",
1539 eth_dev->data->port_id, internals->slaves[i].port_id);
1544 if (internals->user_defined_primary_port)
1545 bond_ethdev_primary_set(internals, internals->primary_port);
1547 if (internals->mode == BONDING_MODE_8023AD)
1548 bond_mode_8023ad_start(eth_dev);
1550 if (internals->mode == BONDING_MODE_TLB ||
1551 internals->mode == BONDING_MODE_ALB)
1552 bond_tlb_enable(internals);
1558 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1562 if (dev->data->rx_queues != NULL) {
1563 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1564 rte_free(dev->data->rx_queues[i]);
1565 dev->data->rx_queues[i] = NULL;
1567 dev->data->nb_rx_queues = 0;
1570 if (dev->data->tx_queues != NULL) {
1571 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1572 rte_free(dev->data->tx_queues[i]);
1573 dev->data->tx_queues[i] = NULL;
1575 dev->data->nb_tx_queues = 0;
1580 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1582 struct bond_dev_private *internals = eth_dev->data->dev_private;
1585 if (internals->mode == BONDING_MODE_8023AD) {
1589 bond_mode_8023ad_stop(eth_dev);
1591 /* Discard all messages to/from mode 4 state machines */
1592 for (i = 0; i < internals->active_slave_count; i++) {
1593 port = &mode_8023ad_ports[internals->active_slaves[i]];
1595 RTE_VERIFY(port->rx_ring != NULL);
1596 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1597 rte_pktmbuf_free(pkt);
1599 RTE_VERIFY(port->tx_ring != NULL);
1600 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1601 rte_pktmbuf_free(pkt);
1605 if (internals->mode == BONDING_MODE_TLB ||
1606 internals->mode == BONDING_MODE_ALB) {
1607 bond_tlb_disable(internals);
1608 for (i = 0; i < internals->active_slave_count; i++)
1609 tlb_last_obytets[internals->active_slaves[i]] = 0;
1612 internals->active_slave_count = 0;
1613 internals->link_status_polling_enabled = 0;
1615 eth_dev->data->dev_link.link_status = 0;
1616 eth_dev->data->dev_started = 0;
1620 bond_ethdev_close(struct rte_eth_dev *dev)
1622 bond_ethdev_free_queues(dev);
1625 /* forward declaration */
1626 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1629 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1631 struct bond_dev_private *internals = dev->data->dev_private;
1633 dev_info->max_mac_addrs = 1;
1635 dev_info->max_rx_pktlen = (uint32_t)2048;
1637 dev_info->max_rx_queues = (uint16_t)128;
1638 dev_info->max_tx_queues = (uint16_t)512;
1640 dev_info->min_rx_bufsize = 0;
1641 dev_info->pci_dev = NULL;
1643 dev_info->rx_offload_capa = internals->rx_offload_capa;
1644 dev_info->tx_offload_capa = internals->tx_offload_capa;
1645 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1647 dev_info->reta_size = internals->reta_size;
1651 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1652 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1653 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1655 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1656 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1657 0, dev->data->numa_node);
1658 if (bd_rx_q == NULL)
1661 bd_rx_q->queue_id = rx_queue_id;
1662 bd_rx_q->dev_private = dev->data->dev_private;
1664 bd_rx_q->nb_rx_desc = nb_rx_desc;
1666 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1667 bd_rx_q->mb_pool = mb_pool;
1669 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1675 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1676 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1677 const struct rte_eth_txconf *tx_conf)
1679 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
1680 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1681 0, dev->data->numa_node);
1683 if (bd_tx_q == NULL)
1686 bd_tx_q->queue_id = tx_queue_id;
1687 bd_tx_q->dev_private = dev->data->dev_private;
1689 bd_tx_q->nb_tx_desc = nb_tx_desc;
1690 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1692 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1698 bond_ethdev_rx_queue_release(void *queue)
1707 bond_ethdev_tx_queue_release(void *queue)
1716 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1718 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1719 struct bond_dev_private *internals;
1721 /* Default value for polling slave found is true as we don't want to
1722 * disable the polling thread if we cannot get the lock */
1723 int i, polling_slave_found = 1;
1728 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1729 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1731 if (!bonded_ethdev->data->dev_started ||
1732 !internals->link_status_polling_enabled)
1735 /* If device is currently being configured then don't check slaves link
1736 * status, wait until next period */
1737 if (rte_spinlock_trylock(&internals->lock)) {
1738 if (internals->slave_count > 0)
1739 polling_slave_found = 0;
1741 for (i = 0; i < internals->slave_count; i++) {
1742 if (!internals->slaves[i].link_status_poll_enabled)
1745 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1746 polling_slave_found = 1;
1748 /* Update slave link status */
1749 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1750 internals->slaves[i].link_status_wait_to_complete);
1752 /* if link status has changed since last checked then call lsc
1754 if (slave_ethdev->data->dev_link.link_status !=
1755 internals->slaves[i].last_link_status) {
1756 internals->slaves[i].last_link_status =
1757 slave_ethdev->data->dev_link.link_status;
1759 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1760 RTE_ETH_EVENT_INTR_LSC,
1761 &bonded_ethdev->data->port_id);
1764 rte_spinlock_unlock(&internals->lock);
1767 if (polling_slave_found)
1768 /* Set alarm to continue monitoring link status of slave ethdev's */
1769 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1770 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1774 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1775 int wait_to_complete)
1777 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1779 if (!bonded_eth_dev->data->dev_started ||
1780 internals->active_slave_count == 0) {
1781 bonded_eth_dev->data->dev_link.link_status = 0;
1784 struct rte_eth_dev *slave_eth_dev;
1787 for (i = 0; i < internals->active_slave_count; i++) {
1788 slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1790 (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1792 if (slave_eth_dev->data->dev_link.link_status == 1) {
1798 bonded_eth_dev->data->dev_link.link_status = link_up;
1805 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1807 struct bond_dev_private *internals = dev->data->dev_private;
1808 struct rte_eth_stats slave_stats;
1811 for (i = 0; i < internals->slave_count; i++) {
1812 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1814 stats->ipackets += slave_stats.ipackets;
1815 stats->opackets += slave_stats.opackets;
1816 stats->ibytes += slave_stats.ibytes;
1817 stats->obytes += slave_stats.obytes;
1818 stats->imissed += slave_stats.imissed;
1819 stats->ierrors += slave_stats.ierrors;
1820 stats->oerrors += slave_stats.oerrors;
1821 stats->imcasts += slave_stats.imcasts;
1822 stats->rx_nombuf += slave_stats.rx_nombuf;
1824 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1825 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1826 stats->q_opackets[j] += slave_stats.q_opackets[j];
1827 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1828 stats->q_obytes[j] += slave_stats.q_obytes[j];
1829 stats->q_errors[j] += slave_stats.q_errors[j];
1836 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1838 struct bond_dev_private *internals = dev->data->dev_private;
1841 for (i = 0; i < internals->slave_count; i++)
1842 rte_eth_stats_reset(internals->slaves[i].port_id);
1846 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1848 struct bond_dev_private *internals = eth_dev->data->dev_private;
1851 internals->promiscuous_en = 1;
1853 switch (internals->mode) {
1854 /* Promiscuous mode is propagated to all slaves */
1855 case BONDING_MODE_ROUND_ROBIN:
1856 case BONDING_MODE_BALANCE:
1857 case BONDING_MODE_BROADCAST:
1858 for (i = 0; i < internals->slave_count; i++)
1859 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1861 /* In mode4 promiscus mode is managed when slave is added/removed */
1862 case BONDING_MODE_8023AD:
1864 /* Promiscuous mode is propagated only to primary slave */
1865 case BONDING_MODE_ACTIVE_BACKUP:
1866 case BONDING_MODE_TLB:
1867 case BONDING_MODE_ALB:
1869 rte_eth_promiscuous_enable(internals->current_primary_port);
1874 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1876 struct bond_dev_private *internals = dev->data->dev_private;
1879 internals->promiscuous_en = 0;
1881 switch (internals->mode) {
1882 /* Promiscuous mode is propagated to all slaves */
1883 case BONDING_MODE_ROUND_ROBIN:
1884 case BONDING_MODE_BALANCE:
1885 case BONDING_MODE_BROADCAST:
1886 for (i = 0; i < internals->slave_count; i++)
1887 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1889 /* In mode4 promiscus mode is set managed when slave is added/removed */
1890 case BONDING_MODE_8023AD:
1892 /* Promiscuous mode is propagated only to primary slave */
1893 case BONDING_MODE_ACTIVE_BACKUP:
1894 case BONDING_MODE_TLB:
1895 case BONDING_MODE_ALB:
1897 rte_eth_promiscuous_disable(internals->current_primary_port);
1902 bond_ethdev_delayed_lsc_propagation(void *arg)
1907 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1908 RTE_ETH_EVENT_INTR_LSC);
1912 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1915 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1916 struct bond_dev_private *internals;
1917 struct rte_eth_link link;
1919 int i, valid_slave = 0;
1921 uint8_t lsc_flag = 0;
1923 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1926 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1927 slave_eth_dev = &rte_eth_devices[port_id];
1929 if (check_for_bonded_ethdev(bonded_eth_dev))
1932 internals = bonded_eth_dev->data->dev_private;
1934 /* If the device isn't started don't handle interrupts */
1935 if (!bonded_eth_dev->data->dev_started)
1938 /* verify that port_id is a valid slave of bonded port */
1939 for (i = 0; i < internals->slave_count; i++) {
1940 if (internals->slaves[i].port_id == port_id) {
1949 /* Search for port in active port list */
1950 active_pos = find_slave_by_id(internals->active_slaves,
1951 internals->active_slave_count, port_id);
1953 rte_eth_link_get_nowait(port_id, &link);
1954 if (link.link_status) {
1955 if (active_pos < internals->active_slave_count)
1958 /* if no active slave ports then set this port to be primary port */
1959 if (internals->active_slave_count < 1) {
1960 /* If first active slave, then change link status */
1961 bonded_eth_dev->data->dev_link.link_status = 1;
1962 internals->current_primary_port = port_id;
1965 mac_address_slaves_update(bonded_eth_dev);
1967 /* Inherit eth dev link properties from first active slave */
1968 link_properties_set(bonded_eth_dev,
1969 &(slave_eth_dev->data->dev_link));
1972 activate_slave(bonded_eth_dev, port_id);
1974 /* If user has defined the primary port then default to using it */
1975 if (internals->user_defined_primary_port &&
1976 internals->primary_port == port_id)
1977 bond_ethdev_primary_set(internals, port_id);
1979 if (active_pos == internals->active_slave_count)
1982 /* Remove from active slave list */
1983 deactivate_slave(bonded_eth_dev, port_id);
1985 /* No active slaves, change link status to down and reset other
1986 * link properties */
1987 if (internals->active_slave_count < 1) {
1989 bonded_eth_dev->data->dev_link.link_status = 0;
1991 link_properties_reset(bonded_eth_dev);
1994 /* Update primary id, take first active slave from list or if none
1995 * available set to -1 */
1996 if (port_id == internals->current_primary_port) {
1997 if (internals->active_slave_count > 0)
1998 bond_ethdev_primary_set(internals,
1999 internals->active_slaves[0]);
2001 internals->current_primary_port = internals->primary_port;
2006 /* Cancel any possible outstanding interrupts if delays are enabled */
2007 if (internals->link_up_delay_ms > 0 ||
2008 internals->link_down_delay_ms > 0)
2009 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2012 if (bonded_eth_dev->data->dev_link.link_status) {
2013 if (internals->link_up_delay_ms > 0)
2014 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2015 bond_ethdev_delayed_lsc_propagation,
2016 (void *)bonded_eth_dev);
2018 _rte_eth_dev_callback_process(bonded_eth_dev,
2019 RTE_ETH_EVENT_INTR_LSC);
2022 if (internals->link_down_delay_ms > 0)
2023 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2024 bond_ethdev_delayed_lsc_propagation,
2025 (void *)bonded_eth_dev);
2027 _rte_eth_dev_callback_process(bonded_eth_dev,
2028 RTE_ETH_EVENT_INTR_LSC);
2034 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2035 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2039 int slave_reta_size;
2040 unsigned reta_count;
2041 struct bond_dev_private *internals = dev->data->dev_private;
2043 if (reta_size != internals->reta_size)
2046 /* Copy RETA table */
2047 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2049 for (i = 0; i < reta_count; i++) {
2050 internals->reta_conf[i].mask = reta_conf[i].mask;
2051 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2052 if ((reta_conf[i].mask >> j) & 0x01)
2053 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2056 /* Fill rest of array */
2057 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2058 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2059 sizeof(internals->reta_conf[0]) * reta_count);
2061 /* Propagate RETA over slaves */
2062 for (i = 0; i < internals->slave_count; i++) {
2063 slave_reta_size = internals->slaves[i].reta_size;
2064 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2065 &internals->reta_conf[0], slave_reta_size);
2074 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2075 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2078 struct bond_dev_private *internals = dev->data->dev_private;
2080 if (reta_size != internals->reta_size)
2083 /* Copy RETA table */
2084 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2085 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2086 if ((reta_conf[i].mask >> j) & 0x01)
2087 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2093 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2094 struct rte_eth_rss_conf *rss_conf)
2097 struct bond_dev_private *internals = dev->data->dev_private;
2098 struct rte_eth_rss_conf bond_rss_conf;
2100 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2102 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2104 if (bond_rss_conf.rss_hf != 0)
2105 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2107 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2108 sizeof(internals->rss_key)) {
2109 if (bond_rss_conf.rss_key_len == 0)
2110 bond_rss_conf.rss_key_len = 40;
2111 internals->rss_key_len = bond_rss_conf.rss_key_len;
2112 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2113 internals->rss_key_len);
2116 for (i = 0; i < internals->slave_count; i++) {
2117 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2127 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2128 struct rte_eth_rss_conf *rss_conf)
2130 struct bond_dev_private *internals = dev->data->dev_private;
2132 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2133 rss_conf->rss_key_len = internals->rss_key_len;
2134 if (rss_conf->rss_key)
2135 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2140 struct eth_dev_ops default_dev_ops = {
2141 .dev_start = bond_ethdev_start,
2142 .dev_stop = bond_ethdev_stop,
2143 .dev_close = bond_ethdev_close,
2144 .dev_configure = bond_ethdev_configure,
2145 .dev_infos_get = bond_ethdev_info,
2146 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2147 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2148 .rx_queue_release = bond_ethdev_rx_queue_release,
2149 .tx_queue_release = bond_ethdev_tx_queue_release,
2150 .link_update = bond_ethdev_link_update,
2151 .stats_get = bond_ethdev_stats_get,
2152 .stats_reset = bond_ethdev_stats_reset,
2153 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2154 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2155 .reta_update = bond_ethdev_rss_reta_update,
2156 .reta_query = bond_ethdev_rss_reta_query,
2157 .rss_hash_update = bond_ethdev_rss_hash_update,
2158 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2162 bond_init(const char *name, const char *params)
2164 struct bond_dev_private *internals;
2165 struct rte_kvargs *kvlist;
2166 uint8_t bonding_mode, socket_id;
2167 int arg_count, port_id;
2169 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2171 kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2175 /* Parse link bonding mode */
2176 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2177 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2178 &bond_ethdev_parse_slave_mode_kvarg,
2179 &bonding_mode) != 0) {
2180 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2185 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2186 "device %s\n", name);
2190 /* Parse socket id to create bonding device on */
2191 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2192 if (arg_count == 1) {
2193 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2194 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2196 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2197 "bonded device %s\n", name);
2200 } else if (arg_count > 1) {
2201 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2202 "bonded device %s\n", name);
2205 socket_id = rte_socket_id();
2208 /* Create link bonding eth device */
2209 port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2211 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2212 "socket %u.\n", name, bonding_mode, socket_id);
2215 internals = rte_eth_devices[port_id].data->dev_private;
2216 internals->kvlist = kvlist;
2218 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2219 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2223 rte_kvargs_free(kvlist);
2229 bond_uninit(const char *name)
2236 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2238 /* free link bonding eth device */
2239 ret = rte_eth_bond_free(name);
2241 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2246 /* this part will resolve the slave portids after all the other pdev and vdev
2247 * have been allocated */
2249 bond_ethdev_configure(struct rte_eth_dev *dev)
2251 char *name = dev->data->name;
2252 struct bond_dev_private *internals = dev->data->dev_private;
2253 struct rte_kvargs *kvlist = internals->kvlist;
2255 uint8_t port_id = dev - rte_eth_devices;
2257 static const uint8_t default_rss_key[40] = {
2258 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2259 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2260 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2261 0xBE, 0xAC, 0x01, 0xFA
2266 /* If RSS is enabled, fill table and key with default values */
2267 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2268 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2269 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2270 memcpy(internals->rss_key, default_rss_key, 40);
2272 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2273 internals->reta_conf[i].mask = ~0LL;
2274 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2275 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2280 * if no kvlist, it means that this bonded device has been created
2281 * through the bonding api.
2286 /* Parse MAC address for bonded device */
2287 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2288 if (arg_count == 1) {
2289 struct ether_addr bond_mac;
2291 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2292 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2293 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2298 /* Set MAC address */
2299 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2301 "Failed to set mac address on bonded device %s\n",
2305 } else if (arg_count > 1) {
2307 "MAC address can be specified only once for bonded device %s\n",
2312 /* Parse/set balance mode transmit policy */
2313 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2314 if (arg_count == 1) {
2315 uint8_t xmit_policy;
2317 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2318 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2321 "Invalid xmit policy specified for bonded device %s\n",
2326 /* Set balance mode transmit policy*/
2327 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2329 "Failed to set balance xmit policy on bonded device %s\n",
2333 } else if (arg_count > 1) {
2335 "Transmit policy can be specified only once for bonded device"
2340 /* Parse/add slave ports to bonded device */
2341 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2342 struct bond_ethdev_slave_ports slave_ports;
2345 memset(&slave_ports, 0, sizeof(slave_ports));
2347 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2348 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2350 "Failed to parse slave ports for bonded device %s\n",
2355 for (i = 0; i < slave_ports.slave_count; i++) {
2356 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2358 "Failed to add port %d as slave to bonded device %s\n",
2359 slave_ports.slaves[i], name);
2364 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2368 /* Parse/set primary slave port id*/
2369 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2370 if (arg_count == 1) {
2371 uint8_t primary_slave_port_id;
2373 if (rte_kvargs_process(kvlist,
2374 PMD_BOND_PRIMARY_SLAVE_KVARG,
2375 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2376 &primary_slave_port_id) < 0) {
2378 "Invalid primary slave port id specified for bonded device"
2383 /* Set balance mode transmit policy*/
2384 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2387 "Failed to set primary slave port %d on bonded device %s\n",
2388 primary_slave_port_id, name);
2391 } else if (arg_count > 1) {
2393 "Primary slave can be specified only once for bonded device"
2398 /* Parse link status monitor polling interval */
2399 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2400 if (arg_count == 1) {
2401 uint32_t lsc_poll_interval_ms;
2403 if (rte_kvargs_process(kvlist,
2404 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2405 &bond_ethdev_parse_time_ms_kvarg,
2406 &lsc_poll_interval_ms) < 0) {
2408 "Invalid lsc polling interval value specified for bonded"
2409 " device %s\n", name);
2413 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2416 "Failed to set lsc monitor polling interval (%u ms) on"
2417 " bonded device %s\n", lsc_poll_interval_ms, name);
2420 } else if (arg_count > 1) {
2422 "LSC polling interval can be specified only once for bonded"
2423 " device %s\n", name);
2427 /* Parse link up interrupt propagation delay */
2428 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2429 if (arg_count == 1) {
2430 uint32_t link_up_delay_ms;
2432 if (rte_kvargs_process(kvlist,
2433 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2434 &bond_ethdev_parse_time_ms_kvarg,
2435 &link_up_delay_ms) < 0) {
2437 "Invalid link up propagation delay value specified for"
2438 " bonded device %s\n", name);
2442 /* Set balance mode transmit policy*/
2443 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2446 "Failed to set link up propagation delay (%u ms) on bonded"
2447 " device %s\n", link_up_delay_ms, name);
2450 } else if (arg_count > 1) {
2452 "Link up propagation delay can be specified only once for"
2453 " bonded device %s\n", name);
2457 /* Parse link down interrupt propagation delay */
2458 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2459 if (arg_count == 1) {
2460 uint32_t link_down_delay_ms;
2462 if (rte_kvargs_process(kvlist,
2463 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2464 &bond_ethdev_parse_time_ms_kvarg,
2465 &link_down_delay_ms) < 0) {
2467 "Invalid link down propagation delay value specified for"
2468 " bonded device %s\n", name);
2472 /* Set balance mode transmit policy*/
2473 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2476 "Failed to set link down propagation delay (%u ms) on"
2477 " bonded device %s\n", link_down_delay_ms, name);
2480 } else if (arg_count > 1) {
2482 "Link down propagation delay can be specified only once for"
2483 " bonded device %s\n", name);
2490 static struct rte_driver bond_drv = {
2494 .uninit = bond_uninit,
2497 PMD_REGISTER_DRIVER(bond_drv);