4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
42 #include <rte_devargs.h>
43 #include <rte_kvargs.h>
45 #include <rte_alarm.h>
46 #include <rte_cycles.h>
48 #include "rte_eth_bond.h"
49 #include "rte_eth_bond_private.h"
50 #include "rte_eth_bond_8023ad_private.h"
52 #define REORDER_PERIOD_MS 10
54 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56 /* Table for statistics in mode 5 TLB */
57 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
60 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 size_t vlan_offset = 0;
64 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
65 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67 vlan_offset = sizeof(struct vlan_hdr);
68 *proto = vlan_hdr->eth_proto;
70 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
71 vlan_hdr = vlan_hdr + 1;
72 *proto = vlan_hdr->eth_proto;
73 vlan_offset += sizeof(struct vlan_hdr);
80 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 struct bond_dev_private *internals;
84 uint16_t num_rx_slave = 0;
85 uint16_t num_rx_total = 0;
89 /* Cast to structure, containing bonded device's port id and queue id */
90 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 internals = bd_rx_q->dev_private;
95 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
96 /* Offset of pointer to *bufs increases as packets are received
97 * from other slaves */
98 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
99 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101 num_rx_total += num_rx_slave;
102 nb_pkts -= num_rx_slave;
110 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
113 struct bond_dev_private *internals;
115 /* Cast to structure, containing bonded device's port id and queue id */
116 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118 internals = bd_rx_q->dev_private;
120 return rte_eth_rx_burst(internals->current_primary_port,
121 bd_rx_q->queue_id, bufs, nb_pkts);
125 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
128 /* Cast to structure, containing bonded device's port id and queue id */
129 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
130 struct bond_dev_private *internals = bd_rx_q->dev_private;
131 struct ether_addr bond_mac;
133 struct ether_hdr *hdr;
135 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136 uint16_t num_rx_total = 0; /* Total number of received packets */
137 uint8_t slaves[RTE_MAX_ETHPORTS];
140 uint8_t collecting; /* current slave collecting status */
141 const uint8_t promisc = internals->promiscuous_en;
144 rte_eth_macaddr_get(internals->port_id, &bond_mac);
145 /* Copy slave list to protect against slave up/down changes during tx
147 slave_count = internals->active_slave_count;
148 memcpy(slaves, internals->active_slaves,
149 sizeof(internals->active_slaves[0]) * slave_count);
151 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
153 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
155 /* Read packets from this slave */
156 num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
157 &bufs[num_rx_total], nb_pkts - num_rx_total);
159 for (k = j; k < 2 && k < num_rx_total; k++)
160 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
162 /* Handle slow protocol packets. */
163 while (j < num_rx_total) {
164 if (j + 3 < num_rx_total)
165 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
167 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
168 /* Remove packet from array if it is slow packet or slave is not
169 * in collecting state or bondign interface is not in promiscus
170 * mode and packet address does not match. */
171 if (unlikely(hdr->ether_type == ether_type_slow_be ||
172 !collecting || (!promisc &&
173 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
175 if (hdr->ether_type == ether_type_slow_be) {
176 bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
179 rte_pktmbuf_free(bufs[j]);
181 /* Packet is managed by mode 4 or dropped, shift the array */
183 if (j < num_rx_total) {
184 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
195 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
196 uint32_t burstnumberRX;
197 uint32_t burstnumberTX;
199 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
202 arp_op_name(uint16_t arp_op, char *buf)
206 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
209 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
211 case ARP_OP_REVREQUEST:
212 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
213 "Reverse ARP Request");
215 case ARP_OP_REVREPLY:
216 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
217 "Reverse ARP Reply");
219 case ARP_OP_INVREQUEST:
220 snprintf(buf, sizeof("Peer Identify Request"), "%s",
221 "Peer Identify Request");
223 case ARP_OP_INVREPLY:
224 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
225 "Peer Identify Reply");
230 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
234 #define MaxIPv4String 16
236 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
240 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
241 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
242 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
246 #define MAX_CLIENTS_NUMBER 128
247 uint8_t active_clients;
248 struct client_stats_t {
251 uint32_t ipv4_rx_packets;
252 uint32_t ipv4_tx_packets;
254 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
257 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
261 for (; i < MAX_CLIENTS_NUMBER; i++) {
262 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
263 /* Just update RX packets number for this client */
264 if (TXorRXindicator == &burstnumberRX)
265 client_stats[i].ipv4_rx_packets++;
267 client_stats[i].ipv4_tx_packets++;
271 /* We have a new client. Insert him to the table, and increment stats */
272 if (TXorRXindicator == &burstnumberRX)
273 client_stats[active_clients].ipv4_rx_packets++;
275 client_stats[active_clients].ipv4_tx_packets++;
276 client_stats[active_clients].ipv4_addr = addr;
277 client_stats[active_clients].port = port;
282 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
283 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
284 RTE_LOG(DEBUG, PMD, \
287 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
289 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
295 eth_h->s_addr.addr_bytes[0], \
296 eth_h->s_addr.addr_bytes[1], \
297 eth_h->s_addr.addr_bytes[2], \
298 eth_h->s_addr.addr_bytes[3], \
299 eth_h->s_addr.addr_bytes[4], \
300 eth_h->s_addr.addr_bytes[5], \
302 eth_h->d_addr.addr_bytes[0], \
303 eth_h->d_addr.addr_bytes[1], \
304 eth_h->d_addr.addr_bytes[2], \
305 eth_h->d_addr.addr_bytes[3], \
306 eth_h->d_addr.addr_bytes[4], \
307 eth_h->d_addr.addr_bytes[5], \
314 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
315 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
317 struct ipv4_hdr *ipv4_h;
318 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
319 struct arp_hdr *arp_h;
326 uint16_t ether_type = eth_h->ether_type;
327 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
329 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
330 snprintf(buf, 16, "%s", info);
333 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
334 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
335 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
336 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
337 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
338 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
340 update_client_stats(ipv4_h->src_addr, port, burstnumber);
342 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
343 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
344 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
345 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
346 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
347 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
348 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
355 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
357 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
358 struct bond_dev_private *internals = bd_tx_q->dev_private;
359 struct ether_hdr *eth_h;
360 uint16_t ether_type, offset;
361 uint16_t nb_recv_pkts;
364 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
366 for (i = 0; i < nb_recv_pkts; i++) {
367 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
368 ether_type = eth_h->ether_type;
369 offset = get_vlan_offset(eth_h, ðer_type);
371 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
372 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
373 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
375 bond_mode_alb_arp_recv(eth_h, offset, internals);
377 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
378 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
379 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
387 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
390 struct bond_dev_private *internals;
391 struct bond_tx_queue *bd_tx_q;
393 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
394 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
396 uint8_t num_of_slaves;
397 uint8_t slaves[RTE_MAX_ETHPORTS];
399 uint16_t num_tx_total = 0, num_tx_slave;
401 static int slave_idx = 0;
402 int i, cslave_idx = 0, tx_fail_total = 0;
404 bd_tx_q = (struct bond_tx_queue *)queue;
405 internals = bd_tx_q->dev_private;
407 /* Copy slave list to protect against slave up/down changes during tx
409 num_of_slaves = internals->active_slave_count;
410 memcpy(slaves, internals->active_slaves,
411 sizeof(internals->active_slaves[0]) * num_of_slaves);
413 if (num_of_slaves < 1)
416 /* Populate slaves mbuf with which packets are to be sent on it */
417 for (i = 0; i < nb_pkts; i++) {
418 cslave_idx = (slave_idx + i) % num_of_slaves;
419 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
422 /* increment current slave index so the next call to tx burst starts on the
424 slave_idx = ++cslave_idx;
426 /* Send packet burst on each slave device */
427 for (i = 0; i < num_of_slaves; i++) {
428 if (slave_nb_pkts[i] > 0) {
429 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
430 slave_bufs[i], slave_nb_pkts[i]);
432 /* if tx burst fails move packets to end of bufs */
433 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
434 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
436 tx_fail_total += tx_fail_slave;
438 memcpy(&bufs[nb_pkts - tx_fail_total],
439 &slave_bufs[i][num_tx_slave],
440 tx_fail_slave * sizeof(bufs[0]));
442 num_tx_total += num_tx_slave;
450 bond_ethdev_tx_burst_active_backup(void *queue,
451 struct rte_mbuf **bufs, uint16_t nb_pkts)
453 struct bond_dev_private *internals;
454 struct bond_tx_queue *bd_tx_q;
456 bd_tx_q = (struct bond_tx_queue *)queue;
457 internals = bd_tx_q->dev_private;
459 if (internals->active_slave_count < 1)
462 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
466 static inline uint16_t
467 ether_hash(struct ether_hdr *eth_hdr)
469 unaligned_uint16_t *word_src_addr =
470 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
471 unaligned_uint16_t *word_dst_addr =
472 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
474 return (word_src_addr[0] ^ word_dst_addr[0]) ^
475 (word_src_addr[1] ^ word_dst_addr[1]) ^
476 (word_src_addr[2] ^ word_dst_addr[2]);
479 static inline uint32_t
480 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
482 return (ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr);
485 static inline uint32_t
486 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
488 unaligned_uint32_t *word_src_addr =
489 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
490 unaligned_uint32_t *word_dst_addr =
491 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
493 return (word_src_addr[0] ^ word_dst_addr[0]) ^
494 (word_src_addr[1] ^ word_dst_addr[1]) ^
495 (word_src_addr[2] ^ word_dst_addr[2]) ^
496 (word_src_addr[3] ^ word_dst_addr[3]);
500 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
502 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
504 uint32_t hash = ether_hash(eth_hdr);
506 return (hash ^= hash >> 8) % slave_count;
510 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
512 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
513 uint16_t proto = eth_hdr->ether_type;
514 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
515 uint32_t hash, l3hash = 0;
517 hash = ether_hash(eth_hdr);
519 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
520 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
521 ((char *)(eth_hdr + 1) + vlan_offset);
522 l3hash = ipv4_hash(ipv4_hdr);
524 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
525 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
526 ((char *)(eth_hdr + 1) + vlan_offset);
527 l3hash = ipv6_hash(ipv6_hdr);
530 hash = hash ^ l3hash;
534 return hash % slave_count;
538 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
540 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
541 uint16_t proto = eth_hdr->ether_type;
542 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
544 struct udp_hdr *udp_hdr = NULL;
545 struct tcp_hdr *tcp_hdr = NULL;
546 uint32_t hash, l3hash = 0, l4hash = 0;
548 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
549 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
550 ((char *)(eth_hdr + 1) + vlan_offset);
551 size_t ip_hdr_offset;
553 l3hash = ipv4_hash(ipv4_hdr);
555 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
558 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
559 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
561 l4hash = HASH_L4_PORTS(tcp_hdr);
562 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
563 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
565 l4hash = HASH_L4_PORTS(udp_hdr);
567 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
568 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
569 ((char *)(eth_hdr + 1) + vlan_offset);
570 l3hash = ipv6_hash(ipv6_hdr);
572 if (ipv6_hdr->proto == IPPROTO_TCP) {
573 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
574 l4hash = HASH_L4_PORTS(tcp_hdr);
575 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
576 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
577 l4hash = HASH_L4_PORTS(udp_hdr);
581 hash = l3hash ^ l4hash;
585 return hash % slave_count;
589 uint64_t bwg_left_int;
590 uint64_t bwg_left_remainder;
595 bond_tlb_activate_slave(struct bond_dev_private *internals) {
598 for (i = 0; i < internals->active_slave_count; i++) {
599 tlb_last_obytets[internals->active_slaves[i]] = 0;
604 bandwidth_cmp(const void *a, const void *b)
606 const struct bwg_slave *bwg_a = a;
607 const struct bwg_slave *bwg_b = b;
608 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
609 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
610 (int64_t)bwg_a->bwg_left_remainder;
624 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
625 struct bwg_slave *bwg_slave)
627 struct rte_eth_link link_status;
629 rte_eth_link_get(port_id, &link_status);
630 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
633 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
634 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
635 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
639 bond_ethdev_update_tlb_slave_cb(void *arg)
641 struct bond_dev_private *internals = arg;
642 struct rte_eth_stats slave_stats;
643 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
647 uint8_t update_stats = 0;
650 internals->slave_update_idx++;
653 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
656 for (i = 0; i < internals->active_slave_count; i++) {
657 slave_id = internals->active_slaves[i];
658 rte_eth_stats_get(slave_id, &slave_stats);
659 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
660 bandwidth_left(slave_id, tx_bytes,
661 internals->slave_update_idx, &bwg_array[i]);
662 bwg_array[i].slave = slave_id;
665 tlb_last_obytets[slave_id] = slave_stats.obytes;
669 if (update_stats == 1)
670 internals->slave_update_idx = 0;
673 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
674 for (i = 0; i < slave_count; i++)
675 internals->tlb_slaves_order[i] = bwg_array[i].slave;
677 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
678 (struct bond_dev_private *)internals);
682 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
684 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
685 struct bond_dev_private *internals = bd_tx_q->dev_private;
687 struct rte_eth_dev *primary_port =
688 &rte_eth_devices[internals->primary_port];
689 uint16_t num_tx_total = 0;
692 uint8_t num_of_slaves = internals->active_slave_count;
693 uint8_t slaves[RTE_MAX_ETHPORTS];
695 struct ether_hdr *ether_hdr;
696 struct ether_addr primary_slave_addr;
697 struct ether_addr active_slave_addr;
699 if (num_of_slaves < 1)
702 memcpy(slaves, internals->tlb_slaves_order,
703 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
706 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
709 for (i = 0; i < 3; i++)
710 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
713 for (i = 0; i < num_of_slaves; i++) {
714 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
715 for (j = num_tx_total; j < nb_pkts; j++) {
717 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
719 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
720 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
721 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
722 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
723 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
727 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
728 bufs + num_tx_total, nb_pkts - num_tx_total);
730 if (num_tx_total == nb_pkts)
738 bond_tlb_disable(struct bond_dev_private *internals)
740 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
744 bond_tlb_enable(struct bond_dev_private *internals)
746 bond_ethdev_update_tlb_slave_cb(internals);
750 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
752 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
753 struct bond_dev_private *internals = bd_tx_q->dev_private;
755 struct ether_hdr *eth_h;
756 uint16_t ether_type, offset;
758 struct client_data *client_info;
761 * We create transmit buffers for every slave and one additional to send
762 * through tlb. In worst case every packet will be send on one port.
764 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
765 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
768 * We create separate transmit buffers for update packets as they wont be
769 * counted in num_tx_total.
771 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
772 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
774 struct rte_mbuf *upd_pkt;
777 uint16_t num_send, num_not_send = 0;
778 uint16_t num_tx_total = 0;
783 /* Search tx buffer for ARP packets and forward them to alb */
784 for (i = 0; i < nb_pkts; i++) {
785 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
786 ether_type = eth_h->ether_type;
787 offset = get_vlan_offset(eth_h, ðer_type);
789 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
790 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
792 /* Change src mac in eth header */
793 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
795 /* Add packet to slave tx buffer */
796 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
797 slave_bufs_pkts[slave_idx]++;
799 /* If packet is not ARP, send it with TLB policy */
800 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
802 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
806 /* Update connected client ARP tables */
807 if (internals->mode6.ntt) {
808 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
809 client_info = &internals->mode6.client_table[i];
811 if (client_info->in_use) {
812 /* Allocate new packet to send ARP update on current slave */
813 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
814 if (upd_pkt == NULL) {
815 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
818 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
819 + client_info->vlan_count * sizeof(struct vlan_hdr);
820 upd_pkt->data_len = pkt_size;
821 upd_pkt->pkt_len = pkt_size;
823 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
826 /* Add packet to update tx buffer */
827 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
828 update_bufs_pkts[slave_idx]++;
831 internals->mode6.ntt = 0;
834 /* Send ARP packets on proper slaves */
835 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
836 if (slave_bufs_pkts[i] > 0) {
837 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
838 slave_bufs[i], slave_bufs_pkts[i]);
839 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
840 bufs[nb_pkts - 1 - num_not_send - j] =
841 slave_bufs[i][nb_pkts - 1 - j];
844 num_tx_total += num_send;
845 num_not_send += slave_bufs_pkts[i] - num_send;
847 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
848 /* Print TX stats including update packets */
849 for (j = 0; j < slave_bufs_pkts[i]; j++) {
850 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
851 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
857 /* Send update packets on proper slaves */
858 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
859 if (update_bufs_pkts[i] > 0) {
860 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
861 update_bufs_pkts[i]);
862 for (j = num_send; j < update_bufs_pkts[i]; j++) {
863 rte_pktmbuf_free(update_bufs[i][j]);
865 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
866 for (j = 0; j < update_bufs_pkts[i]; j++) {
867 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
868 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
874 /* Send non-ARP packets using tlb policy */
875 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
876 num_send = bond_ethdev_tx_burst_tlb(queue,
877 slave_bufs[RTE_MAX_ETHPORTS],
878 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
880 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
881 bufs[nb_pkts - 1 - num_not_send - j] =
882 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
885 num_tx_total += num_send;
886 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
893 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
896 struct bond_dev_private *internals;
897 struct bond_tx_queue *bd_tx_q;
899 uint8_t num_of_slaves;
900 uint8_t slaves[RTE_MAX_ETHPORTS];
902 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
906 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
907 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
909 bd_tx_q = (struct bond_tx_queue *)queue;
910 internals = bd_tx_q->dev_private;
912 /* Copy slave list to protect against slave up/down changes during tx
914 num_of_slaves = internals->active_slave_count;
915 memcpy(slaves, internals->active_slaves,
916 sizeof(internals->active_slaves[0]) * num_of_slaves);
918 if (num_of_slaves < 1)
921 /* Populate slaves mbuf with the packets which are to be sent on it */
922 for (i = 0; i < nb_pkts; i++) {
923 /* Select output slave using hash based on xmit policy */
924 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
926 /* Populate slave mbuf arrays with mbufs for that slave */
927 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
930 /* Send packet burst on each slave device */
931 for (i = 0; i < num_of_slaves; i++) {
932 if (slave_nb_pkts[i] > 0) {
933 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
934 slave_bufs[i], slave_nb_pkts[i]);
936 /* if tx burst fails move packets to end of bufs */
937 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
938 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
940 tx_fail_total += slave_tx_fail_count;
941 memcpy(&bufs[nb_pkts - tx_fail_total],
942 &slave_bufs[i][num_tx_slave],
943 slave_tx_fail_count * sizeof(bufs[0]));
946 num_tx_total += num_tx_slave;
954 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
957 struct bond_dev_private *internals;
958 struct bond_tx_queue *bd_tx_q;
960 uint8_t num_of_slaves;
961 uint8_t slaves[RTE_MAX_ETHPORTS];
962 /* positions in slaves, not ID */
963 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
964 uint8_t distributing_count;
966 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
967 uint16_t i, j, op_slave_idx;
968 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
970 /* Allocate additional packets in case 8023AD mode. */
971 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
972 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
974 /* Total amount of packets in slave_bufs */
975 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
976 /* Slow packets placed in each slave */
977 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
979 bd_tx_q = (struct bond_tx_queue *)queue;
980 internals = bd_tx_q->dev_private;
982 /* Copy slave list to protect against slave up/down changes during tx
984 num_of_slaves = internals->active_slave_count;
985 if (num_of_slaves < 1)
988 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
990 distributing_count = 0;
991 for (i = 0; i < num_of_slaves; i++) {
992 struct port *port = &mode_8023ad_ports[slaves[i]];
994 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
995 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
996 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
998 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
999 slave_bufs[i][j] = slow_pkts[j];
1001 if (ACTOR_STATE(port, DISTRIBUTING))
1002 distributing_offsets[distributing_count++] = i;
1005 if (likely(distributing_count > 0)) {
1006 /* Populate slaves mbuf with the packets which are to be sent on it */
1007 for (i = 0; i < nb_pkts; i++) {
1008 /* Select output slave using hash based on xmit policy */
1009 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1011 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1012 * slaves that are currently distributing. */
1013 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1014 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1015 slave_nb_pkts[slave_offset]++;
1019 /* Send packet burst on each slave device */
1020 for (i = 0; i < num_of_slaves; i++) {
1021 if (slave_nb_pkts[i] == 0)
1024 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1025 slave_bufs[i], slave_nb_pkts[i]);
1027 /* If tx burst fails drop slow packets */
1028 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1029 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1031 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1032 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1034 /* If tx burst fails move packets to end of bufs */
1035 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1036 uint16_t j = nb_pkts - num_tx_fail_total;
1037 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1038 bufs[j] = slave_bufs[i][num_tx_slave];
1042 return num_tx_total;
1046 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1049 struct bond_dev_private *internals;
1050 struct bond_tx_queue *bd_tx_q;
1052 uint8_t tx_failed_flag = 0, num_of_slaves;
1053 uint8_t slaves[RTE_MAX_ETHPORTS];
1055 uint16_t max_nb_of_tx_pkts = 0;
1057 int slave_tx_total[RTE_MAX_ETHPORTS];
1058 int i, most_successful_tx_slave = -1;
1060 bd_tx_q = (struct bond_tx_queue *)queue;
1061 internals = bd_tx_q->dev_private;
1063 /* Copy slave list to protect against slave up/down changes during tx
1065 num_of_slaves = internals->active_slave_count;
1066 memcpy(slaves, internals->active_slaves,
1067 sizeof(internals->active_slaves[0]) * num_of_slaves);
1069 if (num_of_slaves < 1)
1072 /* Increment reference count on mbufs */
1073 for (i = 0; i < nb_pkts; i++)
1074 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1076 /* Transmit burst on each active slave */
1077 for (i = 0; i < num_of_slaves; i++) {
1078 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1081 if (unlikely(slave_tx_total[i] < nb_pkts))
1084 /* record the value and slave index for the slave which transmits the
1085 * maximum number of packets */
1086 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1087 max_nb_of_tx_pkts = slave_tx_total[i];
1088 most_successful_tx_slave = i;
1092 /* if slaves fail to transmit packets from burst, the calling application
1093 * is not expected to know about multiple references to packets so we must
1094 * handle failures of all packets except those of the most successful slave
1096 if (unlikely(tx_failed_flag))
1097 for (i = 0; i < num_of_slaves; i++)
1098 if (i != most_successful_tx_slave)
1099 while (slave_tx_total[i] < nb_pkts)
1100 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1102 return max_nb_of_tx_pkts;
1106 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1107 struct rte_eth_link *slave_dev_link)
1109 struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1110 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1112 if (slave_dev_link->link_status &&
1113 bonded_eth_dev->data->dev_started) {
1114 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1115 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1117 internals->link_props_set = 1;
1122 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1124 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1126 memset(&(bonded_eth_dev->data->dev_link), 0,
1127 sizeof(bonded_eth_dev->data->dev_link));
1129 internals->link_props_set = 0;
1133 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1134 struct rte_eth_link *slave_dev_link)
1136 if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1137 bonded_dev_link->link_speed != slave_dev_link->link_speed)
1144 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1146 struct ether_addr *mac_addr;
1148 if (eth_dev == NULL) {
1149 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1153 if (dst_mac_addr == NULL) {
1154 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1158 mac_addr = eth_dev->data->mac_addrs;
1160 ether_addr_copy(mac_addr, dst_mac_addr);
1165 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1167 struct ether_addr *mac_addr;
1169 if (eth_dev == NULL) {
1170 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1174 if (new_mac_addr == NULL) {
1175 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1179 mac_addr = eth_dev->data->mac_addrs;
1181 /* If new MAC is different to current MAC then update */
1182 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1183 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1189 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1191 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1194 /* Update slave devices MAC addresses */
1195 if (internals->slave_count < 1)
1198 switch (internals->mode) {
1199 case BONDING_MODE_ROUND_ROBIN:
1200 case BONDING_MODE_BALANCE:
1201 case BONDING_MODE_BROADCAST:
1202 for (i = 0; i < internals->slave_count; i++) {
1203 if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1204 bonded_eth_dev->data->mac_addrs)) {
1205 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1206 internals->slaves[i].port_id);
1211 case BONDING_MODE_8023AD:
1212 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1214 case BONDING_MODE_ACTIVE_BACKUP:
1215 case BONDING_MODE_TLB:
1216 case BONDING_MODE_ALB:
1218 for (i = 0; i < internals->slave_count; i++) {
1219 if (internals->slaves[i].port_id ==
1220 internals->current_primary_port) {
1221 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1222 bonded_eth_dev->data->mac_addrs)) {
1223 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1224 internals->current_primary_port);
1228 if (mac_address_set(
1229 &rte_eth_devices[internals->slaves[i].port_id],
1230 &internals->slaves[i].persisted_mac_addr)) {
1231 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1232 internals->slaves[i].port_id);
1243 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1245 struct bond_dev_private *internals;
1247 internals = eth_dev->data->dev_private;
1250 case BONDING_MODE_ROUND_ROBIN:
1251 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1252 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1254 case BONDING_MODE_ACTIVE_BACKUP:
1255 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1256 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1258 case BONDING_MODE_BALANCE:
1259 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1260 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1262 case BONDING_MODE_BROADCAST:
1263 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1264 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1266 case BONDING_MODE_8023AD:
1267 if (bond_mode_8023ad_enable(eth_dev) != 0)
1270 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1271 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1272 RTE_LOG(WARNING, PMD,
1273 "Using mode 4, it is necessary to do TX burst and RX burst "
1274 "at least every 100ms.\n");
1276 case BONDING_MODE_TLB:
1277 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1278 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1280 case BONDING_MODE_ALB:
1281 if (bond_mode_alb_enable(eth_dev) != 0)
1284 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1285 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1291 internals->mode = mode;
1297 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1298 struct rte_eth_dev *slave_eth_dev)
1300 struct bond_rx_queue *bd_rx_q;
1301 struct bond_tx_queue *bd_tx_q;
1307 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1309 /* Enable interrupts on slave device if supported */
1310 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1311 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1313 /* If RSS is enabled for bonding, try to enable it for slaves */
1314 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1315 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1317 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1318 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1319 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1320 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1322 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1325 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1326 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1327 slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
1330 /* Configure device */
1331 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1332 bonded_eth_dev->data->nb_rx_queues,
1333 bonded_eth_dev->data->nb_tx_queues,
1334 &(slave_eth_dev->data->dev_conf));
1336 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1337 slave_eth_dev->data->port_id, errval);
1341 /* Setup Rx Queues */
1342 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1343 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1345 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1346 bd_rx_q->nb_rx_desc,
1347 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1348 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1351 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1352 slave_eth_dev->data->port_id, q_id, errval);
1357 /* Setup Tx Queues */
1358 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1359 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1361 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1362 bd_tx_q->nb_tx_desc,
1363 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1367 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1368 slave_eth_dev->data->port_id, q_id, errval);
1374 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1376 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1377 slave_eth_dev->data->port_id, errval);
1381 /* If RSS is enabled for bonding, synchronize RETA */
1382 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1384 struct bond_dev_private *internals;
1386 internals = bonded_eth_dev->data->dev_private;
1388 for (i = 0; i < internals->slave_count; i++) {
1389 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1390 errval = rte_eth_dev_rss_reta_update(
1391 slave_eth_dev->data->port_id,
1392 &internals->reta_conf[0],
1393 internals->slaves[i].reta_size);
1395 RTE_LOG(WARNING, PMD,
1396 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1397 " RSS Configuration for bonding may be inconsistent.\n",
1398 slave_eth_dev->data->port_id, errval);
1405 /* If lsc interrupt is set, check initial slave's link status */
1406 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1407 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1408 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1414 slave_remove(struct bond_dev_private *internals,
1415 struct rte_eth_dev *slave_eth_dev)
1419 for (i = 0; i < internals->slave_count; i++)
1420 if (internals->slaves[i].port_id ==
1421 slave_eth_dev->data->port_id)
1424 if (i < (internals->slave_count - 1))
1425 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1426 sizeof(internals->slaves[0]) *
1427 (internals->slave_count - i - 1));
1429 internals->slave_count--;
1433 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1436 slave_add(struct bond_dev_private *internals,
1437 struct rte_eth_dev *slave_eth_dev)
1439 struct bond_slave_details *slave_details =
1440 &internals->slaves[internals->slave_count];
1442 slave_details->port_id = slave_eth_dev->data->port_id;
1443 slave_details->last_link_status = 0;
1445 /* If slave device doesn't support interrupts then we need to enabled
1446 * polling to monitor link status */
1447 if (!(slave_eth_dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC)) {
1448 slave_details->link_status_poll_enabled = 1;
1450 if (!internals->link_status_polling_enabled) {
1451 internals->link_status_polling_enabled = 1;
1453 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1454 bond_ethdev_slave_link_status_change_monitor,
1455 (void *)&rte_eth_devices[internals->port_id]);
1459 slave_details->link_status_wait_to_complete = 0;
1460 /* clean tlb_last_obytes when adding port for bonding device */
1461 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1462 sizeof(struct ether_addr));
1466 bond_ethdev_primary_set(struct bond_dev_private *internals,
1467 uint8_t slave_port_id)
1471 if (internals->active_slave_count < 1)
1472 internals->current_primary_port = slave_port_id;
1474 /* Search bonded device slave ports for new proposed primary port */
1475 for (i = 0; i < internals->active_slave_count; i++) {
1476 if (internals->active_slaves[i] == slave_port_id)
1477 internals->current_primary_port = slave_port_id;
1482 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1485 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1487 struct bond_dev_private *internals;
1490 /* slave eth dev will be started by bonded device */
1491 if (check_for_bonded_ethdev(eth_dev)) {
1492 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1493 eth_dev->data->port_id);
1497 eth_dev->data->dev_link.link_status = 0;
1498 eth_dev->data->dev_started = 1;
1500 internals = eth_dev->data->dev_private;
1502 if (internals->slave_count == 0) {
1503 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1507 if (internals->user_defined_mac == 0) {
1508 struct ether_addr *new_mac_addr = NULL;
1510 for (i = 0; i < internals->slave_count; i++)
1511 if (internals->slaves[i].port_id == internals->primary_port)
1512 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1514 if (new_mac_addr == NULL)
1517 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1518 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1519 eth_dev->data->port_id);
1524 /* Update all slave devices MACs*/
1525 if (mac_address_slaves_update(eth_dev) != 0)
1528 /* If bonded device is configure in promiscuous mode then re-apply config */
1529 if (internals->promiscuous_en)
1530 bond_ethdev_promiscuous_enable(eth_dev);
1532 /* Reconfigure each slave device if starting bonded device */
1533 for (i = 0; i < internals->slave_count; i++) {
1534 if (slave_configure(eth_dev,
1535 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1537 "bonded port (%d) failed to reconfigure slave device (%d)",
1538 eth_dev->data->port_id, internals->slaves[i].port_id);
1543 if (internals->user_defined_primary_port)
1544 bond_ethdev_primary_set(internals, internals->primary_port);
1546 if (internals->mode == BONDING_MODE_8023AD)
1547 bond_mode_8023ad_start(eth_dev);
1549 if (internals->mode == BONDING_MODE_TLB ||
1550 internals->mode == BONDING_MODE_ALB)
1551 bond_tlb_enable(internals);
1557 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1561 if (dev->data->rx_queues != NULL) {
1562 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1563 rte_free(dev->data->rx_queues[i]);
1564 dev->data->rx_queues[i] = NULL;
1566 dev->data->nb_rx_queues = 0;
1569 if (dev->data->tx_queues != NULL) {
1570 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1571 rte_free(dev->data->tx_queues[i]);
1572 dev->data->tx_queues[i] = NULL;
1574 dev->data->nb_tx_queues = 0;
1579 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1581 struct bond_dev_private *internals = eth_dev->data->dev_private;
1584 if (internals->mode == BONDING_MODE_8023AD) {
1588 bond_mode_8023ad_stop(eth_dev);
1590 /* Discard all messages to/from mode 4 state machines */
1591 for (i = 0; i < internals->active_slave_count; i++) {
1592 port = &mode_8023ad_ports[internals->active_slaves[i]];
1594 RTE_VERIFY(port->rx_ring != NULL);
1595 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1596 rte_pktmbuf_free(pkt);
1598 RTE_VERIFY(port->tx_ring != NULL);
1599 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1600 rte_pktmbuf_free(pkt);
1604 if (internals->mode == BONDING_MODE_TLB ||
1605 internals->mode == BONDING_MODE_ALB) {
1606 bond_tlb_disable(internals);
1607 for (i = 0; i < internals->active_slave_count; i++)
1608 tlb_last_obytets[internals->active_slaves[i]] = 0;
1611 internals->active_slave_count = 0;
1612 internals->link_status_polling_enabled = 0;
1614 eth_dev->data->dev_link.link_status = 0;
1615 eth_dev->data->dev_started = 0;
1619 bond_ethdev_close(struct rte_eth_dev *dev)
1621 bond_ethdev_free_queues(dev);
1624 /* forward declaration */
1625 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1628 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1630 struct bond_dev_private *internals = dev->data->dev_private;
1632 dev_info->max_mac_addrs = 1;
1634 dev_info->max_rx_pktlen = (uint32_t)2048;
1636 dev_info->max_rx_queues = (uint16_t)128;
1637 dev_info->max_tx_queues = (uint16_t)512;
1639 dev_info->min_rx_bufsize = 0;
1640 dev_info->pci_dev = NULL;
1642 dev_info->rx_offload_capa = internals->rx_offload_capa;
1643 dev_info->tx_offload_capa = internals->tx_offload_capa;
1644 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1646 dev_info->reta_size = internals->reta_size;
1650 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1651 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1652 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1654 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1655 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1656 0, dev->data->numa_node);
1657 if (bd_rx_q == NULL)
1660 bd_rx_q->queue_id = rx_queue_id;
1661 bd_rx_q->dev_private = dev->data->dev_private;
1663 bd_rx_q->nb_rx_desc = nb_rx_desc;
1665 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1666 bd_rx_q->mb_pool = mb_pool;
1668 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1674 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1675 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1676 const struct rte_eth_txconf *tx_conf)
1678 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
1679 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1680 0, dev->data->numa_node);
1682 if (bd_tx_q == NULL)
1685 bd_tx_q->queue_id = tx_queue_id;
1686 bd_tx_q->dev_private = dev->data->dev_private;
1688 bd_tx_q->nb_tx_desc = nb_tx_desc;
1689 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1691 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1697 bond_ethdev_rx_queue_release(void *queue)
1706 bond_ethdev_tx_queue_release(void *queue)
1715 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1717 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1718 struct bond_dev_private *internals;
1720 /* Default value for polling slave found is true as we don't want to
1721 * disable the polling thread if we cannot get the lock */
1722 int i, polling_slave_found = 1;
1727 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1728 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1730 if (!bonded_ethdev->data->dev_started ||
1731 !internals->link_status_polling_enabled)
1734 /* If device is currently being configured then don't check slaves link
1735 * status, wait until next period */
1736 if (rte_spinlock_trylock(&internals->lock)) {
1737 if (internals->slave_count > 0)
1738 polling_slave_found = 0;
1740 for (i = 0; i < internals->slave_count; i++) {
1741 if (!internals->slaves[i].link_status_poll_enabled)
1744 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1745 polling_slave_found = 1;
1747 /* Update slave link status */
1748 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1749 internals->slaves[i].link_status_wait_to_complete);
1751 /* if link status has changed since last checked then call lsc
1753 if (slave_ethdev->data->dev_link.link_status !=
1754 internals->slaves[i].last_link_status) {
1755 internals->slaves[i].last_link_status =
1756 slave_ethdev->data->dev_link.link_status;
1758 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1759 RTE_ETH_EVENT_INTR_LSC,
1760 &bonded_ethdev->data->port_id);
1763 rte_spinlock_unlock(&internals->lock);
1766 if (polling_slave_found)
1767 /* Set alarm to continue monitoring link status of slave ethdev's */
1768 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1769 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1773 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1774 int wait_to_complete)
1776 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1778 if (!bonded_eth_dev->data->dev_started ||
1779 internals->active_slave_count == 0) {
1780 bonded_eth_dev->data->dev_link.link_status = 0;
1783 struct rte_eth_dev *slave_eth_dev;
1786 for (i = 0; i < internals->active_slave_count; i++) {
1787 slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1789 (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1791 if (slave_eth_dev->data->dev_link.link_status == 1) {
1797 bonded_eth_dev->data->dev_link.link_status = link_up;
1804 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1806 struct bond_dev_private *internals = dev->data->dev_private;
1807 struct rte_eth_stats slave_stats;
1810 for (i = 0; i < internals->slave_count; i++) {
1811 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1813 stats->ipackets += slave_stats.ipackets;
1814 stats->opackets += slave_stats.opackets;
1815 stats->ibytes += slave_stats.ibytes;
1816 stats->obytes += slave_stats.obytes;
1817 stats->ierrors += slave_stats.ierrors;
1818 stats->oerrors += slave_stats.oerrors;
1819 stats->imcasts += slave_stats.imcasts;
1820 stats->rx_nombuf += slave_stats.rx_nombuf;
1821 stats->fdirmatch += slave_stats.fdirmatch;
1822 stats->fdirmiss += slave_stats.fdirmiss;
1823 stats->tx_pause_xon += slave_stats.tx_pause_xon;
1824 stats->rx_pause_xon += slave_stats.rx_pause_xon;
1825 stats->tx_pause_xoff += slave_stats.tx_pause_xoff;
1826 stats->rx_pause_xoff += slave_stats.rx_pause_xoff;
1828 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1829 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1830 stats->q_opackets[j] += slave_stats.q_opackets[j];
1831 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1832 stats->q_obytes[j] += slave_stats.q_obytes[j];
1833 stats->q_errors[j] += slave_stats.q_errors[j];
1840 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1842 struct bond_dev_private *internals = dev->data->dev_private;
1845 for (i = 0; i < internals->slave_count; i++)
1846 rte_eth_stats_reset(internals->slaves[i].port_id);
1850 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1852 struct bond_dev_private *internals = eth_dev->data->dev_private;
1855 internals->promiscuous_en = 1;
1857 switch (internals->mode) {
1858 /* Promiscuous mode is propagated to all slaves */
1859 case BONDING_MODE_ROUND_ROBIN:
1860 case BONDING_MODE_BALANCE:
1861 case BONDING_MODE_BROADCAST:
1862 for (i = 0; i < internals->slave_count; i++)
1863 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1865 /* In mode4 promiscus mode is managed when slave is added/removed */
1866 case BONDING_MODE_8023AD:
1868 /* Promiscuous mode is propagated only to primary slave */
1869 case BONDING_MODE_ACTIVE_BACKUP:
1870 case BONDING_MODE_TLB:
1871 case BONDING_MODE_ALB:
1873 rte_eth_promiscuous_enable(internals->current_primary_port);
1878 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1880 struct bond_dev_private *internals = dev->data->dev_private;
1883 internals->promiscuous_en = 0;
1885 switch (internals->mode) {
1886 /* Promiscuous mode is propagated to all slaves */
1887 case BONDING_MODE_ROUND_ROBIN:
1888 case BONDING_MODE_BALANCE:
1889 case BONDING_MODE_BROADCAST:
1890 for (i = 0; i < internals->slave_count; i++)
1891 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1893 /* In mode4 promiscus mode is set managed when slave is added/removed */
1894 case BONDING_MODE_8023AD:
1896 /* Promiscuous mode is propagated only to primary slave */
1897 case BONDING_MODE_ACTIVE_BACKUP:
1898 case BONDING_MODE_TLB:
1899 case BONDING_MODE_ALB:
1901 rte_eth_promiscuous_disable(internals->current_primary_port);
1906 bond_ethdev_delayed_lsc_propagation(void *arg)
1911 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1912 RTE_ETH_EVENT_INTR_LSC);
1916 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1919 struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1920 struct bond_dev_private *internals;
1921 struct rte_eth_link link;
1923 int i, valid_slave = 0;
1925 uint8_t lsc_flag = 0;
1927 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1930 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1931 slave_eth_dev = &rte_eth_devices[port_id];
1933 if (check_for_bonded_ethdev(bonded_eth_dev))
1936 internals = bonded_eth_dev->data->dev_private;
1938 /* If the device isn't started don't handle interrupts */
1939 if (!bonded_eth_dev->data->dev_started)
1942 /* verify that port_id is a valid slave of bonded port */
1943 for (i = 0; i < internals->slave_count; i++) {
1944 if (internals->slaves[i].port_id == port_id) {
1953 /* Search for port in active port list */
1954 active_pos = find_slave_by_id(internals->active_slaves,
1955 internals->active_slave_count, port_id);
1957 rte_eth_link_get_nowait(port_id, &link);
1958 if (link.link_status) {
1959 if (active_pos < internals->active_slave_count)
1962 /* if no active slave ports then set this port to be primary port */
1963 if (internals->active_slave_count < 1) {
1964 /* If first active slave, then change link status */
1965 bonded_eth_dev->data->dev_link.link_status = 1;
1966 internals->current_primary_port = port_id;
1969 mac_address_slaves_update(bonded_eth_dev);
1971 /* Inherit eth dev link properties from first active slave */
1972 link_properties_set(bonded_eth_dev,
1973 &(slave_eth_dev->data->dev_link));
1976 activate_slave(bonded_eth_dev, port_id);
1978 /* If user has defined the primary port then default to using it */
1979 if (internals->user_defined_primary_port &&
1980 internals->primary_port == port_id)
1981 bond_ethdev_primary_set(internals, port_id);
1983 if (active_pos == internals->active_slave_count)
1986 /* Remove from active slave list */
1987 deactivate_slave(bonded_eth_dev, port_id);
1989 /* No active slaves, change link status to down and reset other
1990 * link properties */
1991 if (internals->active_slave_count < 1) {
1993 bonded_eth_dev->data->dev_link.link_status = 0;
1995 link_properties_reset(bonded_eth_dev);
1998 /* Update primary id, take first active slave from list or if none
1999 * available set to -1 */
2000 if (port_id == internals->current_primary_port) {
2001 if (internals->active_slave_count > 0)
2002 bond_ethdev_primary_set(internals,
2003 internals->active_slaves[0]);
2005 internals->current_primary_port = internals->primary_port;
2010 /* Cancel any possible outstanding interrupts if delays are enabled */
2011 if (internals->link_up_delay_ms > 0 ||
2012 internals->link_down_delay_ms > 0)
2013 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2016 if (bonded_eth_dev->data->dev_link.link_status) {
2017 if (internals->link_up_delay_ms > 0)
2018 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2019 bond_ethdev_delayed_lsc_propagation,
2020 (void *)bonded_eth_dev);
2022 _rte_eth_dev_callback_process(bonded_eth_dev,
2023 RTE_ETH_EVENT_INTR_LSC);
2026 if (internals->link_down_delay_ms > 0)
2027 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2028 bond_ethdev_delayed_lsc_propagation,
2029 (void *)bonded_eth_dev);
2031 _rte_eth_dev_callback_process(bonded_eth_dev,
2032 RTE_ETH_EVENT_INTR_LSC);
2038 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2039 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2043 int slave_reta_size;
2044 unsigned reta_count;
2045 struct bond_dev_private *internals = dev->data->dev_private;
2047 if (reta_size != internals->reta_size)
2050 /* Copy RETA table */
2051 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2053 for (i = 0; i < reta_count; i++) {
2054 internals->reta_conf[i].mask = reta_conf[i].mask;
2055 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2056 if ((reta_conf[i].mask >> j) & 0x01)
2057 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2060 /* Fill rest of array */
2061 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2062 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2063 sizeof(internals->reta_conf[0]) * reta_count);
2065 /* Propagate RETA over slaves */
2066 for (i = 0; i < internals->slave_count; i++) {
2067 slave_reta_size = internals->slaves[i].reta_size;
2068 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2069 &internals->reta_conf[0], slave_reta_size);
2078 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2079 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2082 struct bond_dev_private *internals = dev->data->dev_private;
2084 if (reta_size != internals->reta_size)
2087 /* Copy RETA table */
2088 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2089 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2090 if ((reta_conf[i].mask >> j) & 0x01)
2091 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2097 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2098 struct rte_eth_rss_conf *rss_conf)
2101 struct bond_dev_private *internals = dev->data->dev_private;
2102 struct rte_eth_rss_conf bond_rss_conf;
2104 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2106 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2108 if (bond_rss_conf.rss_hf != 0)
2109 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2111 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2112 sizeof(internals->rss_key)) {
2113 if (bond_rss_conf.rss_key_len == 0)
2114 bond_rss_conf.rss_key_len = 40;
2115 internals->rss_key_len = bond_rss_conf.rss_key_len;
2116 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2117 internals->rss_key_len);
2120 for (i = 0; i < internals->slave_count; i++) {
2121 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2131 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2132 struct rte_eth_rss_conf *rss_conf)
2134 struct bond_dev_private *internals = dev->data->dev_private;
2136 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2137 rss_conf->rss_key_len = internals->rss_key_len;
2138 if (rss_conf->rss_key)
2139 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2144 struct eth_dev_ops default_dev_ops = {
2145 .dev_start = bond_ethdev_start,
2146 .dev_stop = bond_ethdev_stop,
2147 .dev_close = bond_ethdev_close,
2148 .dev_configure = bond_ethdev_configure,
2149 .dev_infos_get = bond_ethdev_info,
2150 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2151 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2152 .rx_queue_release = bond_ethdev_rx_queue_release,
2153 .tx_queue_release = bond_ethdev_tx_queue_release,
2154 .link_update = bond_ethdev_link_update,
2155 .stats_get = bond_ethdev_stats_get,
2156 .stats_reset = bond_ethdev_stats_reset,
2157 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2158 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2159 .reta_update = bond_ethdev_rss_reta_update,
2160 .reta_query = bond_ethdev_rss_reta_query,
2161 .rss_hash_update = bond_ethdev_rss_hash_update,
2162 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2166 bond_init(const char *name, const char *params)
2168 struct bond_dev_private *internals;
2169 struct rte_kvargs *kvlist;
2170 uint8_t bonding_mode, socket_id;
2171 int arg_count, port_id;
2173 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2175 kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2179 /* Parse link bonding mode */
2180 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2181 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2182 &bond_ethdev_parse_slave_mode_kvarg,
2183 &bonding_mode) != 0) {
2184 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2189 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2190 "device %s\n", name);
2194 /* Parse socket id to create bonding device on */
2195 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2196 if (arg_count == 1) {
2197 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2198 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2200 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2201 "bonded device %s\n", name);
2204 } else if (arg_count > 1) {
2205 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2206 "bonded device %s\n", name);
2209 socket_id = rte_socket_id();
2212 /* Create link bonding eth device */
2213 port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2215 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2216 "socket %u.\n", name, bonding_mode, socket_id);
2219 internals = rte_eth_devices[port_id].data->dev_private;
2220 internals->kvlist = kvlist;
2222 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2223 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2227 rte_kvargs_free(kvlist);
2233 bond_uninit(const char *name)
2240 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2242 /* free link bonding eth device */
2243 ret = rte_eth_bond_free(name);
2245 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2250 /* this part will resolve the slave portids after all the other pdev and vdev
2251 * have been allocated */
2253 bond_ethdev_configure(struct rte_eth_dev *dev)
2255 char *name = dev->data->name;
2256 struct bond_dev_private *internals = dev->data->dev_private;
2257 struct rte_kvargs *kvlist = internals->kvlist;
2259 uint8_t port_id = dev - rte_eth_devices;
2261 static const uint8_t default_rss_key[40] = {
2262 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2263 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2264 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2265 0xBE, 0xAC, 0x01, 0xFA
2270 /* If RSS is enabled, fill table and key with default values */
2271 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2272 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2273 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2274 memcpy(internals->rss_key, default_rss_key, 40);
2276 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2277 internals->reta_conf[i].mask = ~0LL;
2278 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2279 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2284 * if no kvlist, it means that this bonded device has been created
2285 * through the bonding api.
2290 /* Parse MAC address for bonded device */
2291 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2292 if (arg_count == 1) {
2293 struct ether_addr bond_mac;
2295 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2296 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2297 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2302 /* Set MAC address */
2303 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2305 "Failed to set mac address on bonded device %s\n",
2309 } else if (arg_count > 1) {
2311 "MAC address can be specified only once for bonded device %s\n",
2316 /* Parse/set balance mode transmit policy */
2317 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2318 if (arg_count == 1) {
2319 uint8_t xmit_policy;
2321 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2322 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2325 "Invalid xmit policy specified for bonded device %s\n",
2330 /* Set balance mode transmit policy*/
2331 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2333 "Failed to set balance xmit policy on bonded device %s\n",
2337 } else if (arg_count > 1) {
2339 "Transmit policy can be specified only once for bonded device"
2344 /* Parse/add slave ports to bonded device */
2345 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2346 struct bond_ethdev_slave_ports slave_ports;
2349 memset(&slave_ports, 0, sizeof(slave_ports));
2351 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2352 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2354 "Failed to parse slave ports for bonded device %s\n",
2359 for (i = 0; i < slave_ports.slave_count; i++) {
2360 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2362 "Failed to add port %d as slave to bonded device %s\n",
2363 slave_ports.slaves[i], name);
2368 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2372 /* Parse/set primary slave port id*/
2373 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2374 if (arg_count == 1) {
2375 uint8_t primary_slave_port_id;
2377 if (rte_kvargs_process(kvlist,
2378 PMD_BOND_PRIMARY_SLAVE_KVARG,
2379 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2380 &primary_slave_port_id) < 0) {
2382 "Invalid primary slave port id specified for bonded device"
2387 /* Set balance mode transmit policy*/
2388 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2391 "Failed to set primary slave port %d on bonded device %s\n",
2392 primary_slave_port_id, name);
2395 } else if (arg_count > 1) {
2397 "Primary slave can be specified only once for bonded device"
2402 /* Parse link status monitor polling interval */
2403 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2404 if (arg_count == 1) {
2405 uint32_t lsc_poll_interval_ms;
2407 if (rte_kvargs_process(kvlist,
2408 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2409 &bond_ethdev_parse_time_ms_kvarg,
2410 &lsc_poll_interval_ms) < 0) {
2412 "Invalid lsc polling interval value specified for bonded"
2413 " device %s\n", name);
2417 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2420 "Failed to set lsc monitor polling interval (%u ms) on"
2421 " bonded device %s\n", lsc_poll_interval_ms, name);
2424 } else if (arg_count > 1) {
2426 "LSC polling interval can be specified only once for bonded"
2427 " device %s\n", name);
2431 /* Parse link up interrupt propagation delay */
2432 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2433 if (arg_count == 1) {
2434 uint32_t link_up_delay_ms;
2436 if (rte_kvargs_process(kvlist,
2437 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2438 &bond_ethdev_parse_time_ms_kvarg,
2439 &link_up_delay_ms) < 0) {
2441 "Invalid link up propagation delay value specified for"
2442 " bonded device %s\n", name);
2446 /* Set balance mode transmit policy*/
2447 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2450 "Failed to set link up propagation delay (%u ms) on bonded"
2451 " device %s\n", link_up_delay_ms, name);
2454 } else if (arg_count > 1) {
2456 "Link up propagation delay can be specified only once for"
2457 " bonded device %s\n", name);
2461 /* Parse link down interrupt propagation delay */
2462 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2463 if (arg_count == 1) {
2464 uint32_t link_down_delay_ms;
2466 if (rte_kvargs_process(kvlist,
2467 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2468 &bond_ethdev_parse_time_ms_kvarg,
2469 &link_down_delay_ms) < 0) {
2471 "Invalid link down propagation delay value specified for"
2472 " bonded device %s\n", name);
2476 /* Set balance mode transmit policy*/
2477 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2480 "Failed to set link down propagation delay (%u ms) on"
2481 " bonded device %s\n", link_down_delay_ms, name);
2484 } else if (arg_count > 1) {
2486 "Link down propagation delay can be specified only once for"
2487 " bonded device %s\n", name);
2494 static struct rte_driver bond_drv = {
2498 .uninit = bond_uninit,
2501 PMD_REGISTER_DRIVER(bond_drv);