X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fbonding%2Frte_eth_bond_pmd.c;h=8847d20c237eeacd1d1a0a90721b7a7bb4fc1d27;hb=caccf8b318cafcdafe39faa3c5ce3eef67007621;hp=c0a2e83f3d04eb6372ef503a7115af35db61913b;hpb=5566a3e35866ce9e5eacf886c27b460ebfcd6ee9;p=dpdk.git diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index c0a2e83f3d..8847d20c23 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -17,6 +17,7 @@ #include #include #include +#include #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" @@ -280,87 +281,114 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, static uint16_t bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, op_slave_idx; + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + /* 2-D array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - if (unlikely(nb_pkts == 0)) - return 0; + uint16_t i, j; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * - num_of_slaves); + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); + + + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; } - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], - distributing_count); + if (unlikely(dist_slave_count < 1)) + return 0; - /* Populate slave mbuf arrays with mbufs for that slave. - * Use only slaves that are currently distributing. - */ - uint8_t slave_offset = - distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = - bufs[i]; - slave_nb_pkts[slave_offset]++; - } + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; } + /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) + for (i = 0; i < dist_slave_count; i++) { + if (slave_nb_bufs[i] == 0) continue; - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); - num_tx_total += num_tx_slave; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + total_tx_count += slave_tx_count; /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, - num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow reordering + * later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) { + slave_bufs[i][j] = + slave_bufs[i][(slave_tx_count - 1) + j]; + } } } - return num_tx_total; + /* + * If there are tx burst failures we move packets to end of bufs to + * preserve expected PMD behaviour of all failed transmitted being + * at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; j < slave_tx_fail_count[i]; j++) + bufs[bufs_idx++] = slave_bufs[i][j]; + } + } + } + + return total_tx_count; } @@ -590,7 +618,7 @@ mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h, uint16_t offset = get_vlan_offset(eth_h, ðer_type); #ifdef RTE_LIBRTE_BOND_DEBUG_ALB - snprintf(buf, 16, "%s", info); + strlcpy(buf, info, 16); #endif if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { @@ -759,96 +787,129 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr) (word_src_addr[3] ^ word_dst_addr[3]); } -uint16_t -xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count) + +void +burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); + struct ether_hdr *eth_hdr; + uint32_t hash; + int i; + + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); - uint32_t hash = ether_hash(eth_hdr); + hash = ether_hash(eth_hdr); - return (hash ^= hash >> 8) % slave_count; + slaves[i] = (hash ^= hash >> 8) % slave_count; + } } -uint16_t -xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count) +void +burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); - uint32_t hash, l3hash = 0; + uint16_t i; + struct ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + uint32_t hash, l3hash; - hash = ether_hash(eth_hdr); + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + l3hash = 0; - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv4_hash(ipv4_hdr); + proto = eth_hdr->ether_type; + hash = ether_hash(eth_hdr); - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - } + vlan_offset = get_vlan_offset(eth_hdr, &proto); - hash = hash ^ l3hash; - hash ^= hash >> 16; - hash ^= hash >> 8; + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv4_hash(ipv4_hdr); - return hash % slave_count; -} + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + } -uint16_t -xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count) -{ - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); + hash = hash ^ l3hash; + hash ^= hash >> 16; + hash ^= hash >> 8; - struct udp_hdr *udp_hdr = NULL; - struct tcp_hdr *tcp_hdr = NULL; - uint32_t hash, l3hash = 0, l4hash = 0; + slaves[i] = hash % slave_count; + } +} - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - size_t ip_hdr_offset; +void +burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) +{ + struct ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + int i; - l3hash = ipv4_hash(ipv4_hdr); + struct udp_hdr *udp_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t hash, l3hash, l4hash; - /* there is no L4 header in fragmented packet */ - if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) { - ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + proto = eth_hdr->ether_type; + vlan_offset = get_vlan_offset(eth_hdr, &proto); + l3hash = 0; + l4hash = 0; + + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + size_t ip_hdr_offset; + + l3hash = ipv4_hash(ipv4_hdr); + + /* there is no L4 header in fragmented packet */ + if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) + == 0)) { + ip_hdr_offset = (ipv4_hdr->version_ihl + & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; - if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv4_hdr->next_proto_id == + IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(udp_hdr); + } + } + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + + if (ipv6_hdr->proto == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); + } else if (ipv6_hdr->proto == IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); l4hash = HASH_L4_PORTS(udp_hdr); } } - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - - if (ipv6_hdr->proto == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv6_hdr->proto == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(udp_hdr); - } - } - hash = l3hash ^ l4hash; - hash ^= hash >> 16; - hash ^= hash >> 8; + hash = l3hash ^ l4hash; + hash ^= hash >> 16; + hash ^= hash >> 8; - return hash % slave_count; + slaves[i] = hash % slave_count; + } } struct bwg_slave { @@ -1156,156 +1217,239 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) static uint16_t bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0; + /* Array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[nb_bufs]; - int i, op_slave_id; + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t i, j; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * num_of_slaves); + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - if (num_of_slaves < 1) - return num_tx_total; + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves); + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; - /* Populate slave mbuf arrays with mbufs for that slave */ - slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i]; + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; } /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] > 0) { - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + for (i = 0; i < slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; - /* if tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); - tx_fail_total += slave_tx_fail_count; - memcpy(&bufs[nb_pkts - tx_fail_total], - &slave_bufs[i][num_tx_slave], - slave_tx_fail_count * sizeof(bufs[0])); + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow reordering + * later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) { + slave_bufs[i][j] = + slave_bufs[i][(slave_tx_count - 1) + j]; } + } + } - num_tx_total += num_tx_slave; + /* + * If there are tx burst failures we move packets to end of bufs to + * preserve expected PMD behaviour of all failed transmitted being + * at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; j < slave_tx_fail_count[i]; j++) + bufs[bufs_idx++] = slave_bufs[i][j]; + } } } - return num_tx_total; + return total_tx_count; } static uint16_t bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, j, op_slave_idx; - const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1; + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; - /* Allocate additional packets in case 8023AD mode. */ - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size]; - void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL }; + /* 2-D array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ - uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + uint16_t i, j; + + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; - - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); - - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring, - slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS, - NULL); - slave_nb_pkts[i] = slave_slow_nb_pkts[i]; + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); - for (j = 0; j < slave_slow_nb_pkts[i]; j++) - slave_bufs[i][j] = slow_pkts[j]; + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; } - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], distributing_count); + if (likely(dist_slave_count > 1)) { + + /* + * Populate slaves mbuf with the packets which are to be sent + * on it, selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, + bufs_slave_port_idxs); - /* Populate slave mbuf arrays with mbufs for that slave. Use only - * slaves that are currently distributing. */ - uint8_t slave_offset = distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i]; - slave_nb_pkts[slave_offset]++; + for (i = 0; i < nb_bufs; i++) { + /* + * Populate slave mbuf arrays with mbufs for that + * slave + */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = + bufs[i]; } - } - /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) - continue; - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + /* Send packet burst on each slave device */ + for (i = 0; i < dist_slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; + + slave_tx_count = rte_eth_tx_burst( + dist_slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); + + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow + * reordering later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) + slave_bufs[i][j] = + slave_bufs[i] + [(slave_tx_count - 1) + + j]; + } + } + + /* + * If there are tx burst failures we move packets to end of + * bufs to preserve expected PMD behaviour of all failed + * transmitted being at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; + j < slave_tx_fail_count[i]; + j++) { + bufs[bufs_idx++] = + slave_bufs[i][j]; + } + } + } + } + } - /* If tx burst fails drop slow packets */ - for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++) - rte_pktmbuf_free(slave_bufs[i][num_tx_slave]); + /* Check for LACP control packets and send if available */ + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; + struct rte_mbuf *ctrl_pkt = NULL; - num_tx_total += num_tx_slave - slave_slow_nb_pkts[i]; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + if (likely(rte_ring_empty(port->tx_ring))) + continue; - /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; + if (rte_ring_dequeue(port->tx_ring, + (void **)&ctrl_pkt) != -ENOENT) { + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, &ctrl_pkt, 1); + /* + * re-enqueue LAG control plane packets to buffering + * ring if transmission fails so the packet isn't lost. + */ + if (slave_tx_count != 1) + rte_ring_enqueue(port->tx_ring, ctrl_pkt); } } - return num_tx_total; + return total_tx_count; } static uint16_t @@ -1471,7 +1615,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) case BONDING_MODE_BALANCE: case BONDING_MODE_BROADCAST: for (i = 0; i < internals->slave_count; i++) { - if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id], + if (rte_eth_dev_default_mac_addr_set( + internals->slaves[i].port_id, bonded_eth_dev->data->mac_addrs)) { RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", internals->slaves[i].port_id); @@ -1489,15 +1634,16 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) for (i = 0; i < internals->slave_count; i++) { if (internals->slaves[i].port_id == internals->current_primary_port) { - if (mac_address_set(&rte_eth_devices[internals->primary_port], + if (rte_eth_dev_default_mac_addr_set( + internals->primary_port, bonded_eth_dev->data->mac_addrs)) { RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", internals->current_primary_port); return -1; } } else { - if (mac_address_set( - &rte_eth_devices[internals->slaves[i].port_id], + if (rte_eth_dev_default_mac_addr_set( + internals->slaves[i].port_id, &internals->slaves[i].persisted_mac_addr)) { RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", internals->slaves[i].port_id); @@ -1673,8 +1819,13 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, bonded_eth_dev->data->dev_conf.rxmode.mq_mode; } - slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter = - bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter; + if (bonded_eth_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) + slave_eth_dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_FILTER; + else + slave_eth_dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_FILTER; nb_rx_queues = bonded_eth_dev->data->nb_rx_queues; nb_tx_queues = bonded_eth_dev->data->nb_tx_queues; @@ -1686,6 +1837,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, } } + errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id, + bonded_eth_dev->data->mtu); + if (errval != 0 && errval != -ENOTSUP) { + RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)", + slave_eth_dev->data->port_id, errval); + return errval; + } + /* Configure device */ errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, nb_rx_queues, nb_tx_queues, @@ -1881,7 +2040,7 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) if (internals->slave_count == 0) { RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); - return -1; + goto out_err; } if (internals->user_defined_mac == 0) { @@ -1892,18 +2051,18 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) new_mac_addr = &internals->slaves[i].persisted_mac_addr; if (new_mac_addr == NULL) - return -1; + goto out_err; if (mac_address_set(eth_dev, new_mac_addr) != 0) { RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", eth_dev->data->port_id); - return -1; + goto out_err; } } /* Update all slave devices MACs*/ if (mac_address_slaves_update(eth_dev) != 0) - return -1; + goto out_err; /* If bonded device is configure in promiscuous mode then re-apply config */ if (internals->promiscuous_en) @@ -1928,7 +2087,7 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) "bonded port (%d) failed to reconfigure slave device (%d)", eth_dev->data->port_id, internals->slaves[i].port_id); - return -1; + goto out_err; } /* We will need to poll for link status if any slave doesn't * support interrupts @@ -1936,6 +2095,7 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) if (internals->slaves[i].link_status_poll_enabled) internals->link_status_polling_enabled = 1; } + /* start polling if needed */ if (internals->link_status_polling_enabled) { rte_eal_alarm_set( @@ -1955,6 +2115,10 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) bond_tlb_enable(internals); return 0; + +out_err: + eth_dev->data->dev_started = 0; + return -1; } static void @@ -2099,6 +2263,8 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->rx_offload_capa = internals->rx_offload_capa; dev_info->tx_offload_capa = internals->tx_offload_capa; + dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa; + dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa; dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; dev_info->reta_size = internals->reta_size; @@ -2447,7 +2613,7 @@ bond_ethdev_delayed_lsc_propagation(void *arg) return; _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, - RTE_ETH_EVENT_INTR_LSC, NULL, NULL); + RTE_ETH_EVENT_INTR_LSC, NULL); } int @@ -2555,7 +2721,7 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, else _rte_eth_dev_callback_process(bonded_eth_dev, RTE_ETH_EVENT_INTR_LSC, - NULL, NULL); + NULL); } else { if (internals->link_down_delay_ms > 0) @@ -2565,7 +2731,7 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, else _rte_eth_dev_callback_process(bonded_eth_dev, RTE_ETH_EVENT_INTR_LSC, - NULL, NULL); + NULL); } } return 0; @@ -2678,6 +2844,45 @@ bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, return 0; } +static int +bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct rte_eth_dev *slave_eth_dev; + struct bond_dev_private *internals = dev->data->dev_private; + int ret, i; + + rte_spinlock_lock(&internals->lock); + + for (i = 0; i < internals->slave_count; i++) { + slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; + if (*slave_eth_dev->dev_ops->mtu_set == NULL) { + rte_spinlock_unlock(&internals->lock); + return -ENOTSUP; + } + } + for (i = 0; i < internals->slave_count; i++) { + ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu); + if (ret < 0) { + rte_spinlock_unlock(&internals->lock); + return ret; + } + } + + rte_spinlock_unlock(&internals->lock); + return 0; +} + +static int +bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr) +{ + if (mac_address_set(dev, addr)) { + RTE_BOND_LOG(ERR, "Failed to update MAC address"); + return -EINVAL; + } + + return 0; +} + const struct eth_dev_ops default_dev_ops = { .dev_start = bond_ethdev_start, .dev_stop = bond_ethdev_stop, @@ -2697,7 +2902,9 @@ const struct eth_dev_ops default_dev_ops = { .reta_update = bond_ethdev_rss_reta_update, .reta_query = bond_ethdev_rss_reta_query, .rss_hash_update = bond_ethdev_rss_hash_update, - .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get + .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get, + .mtu_set = bond_ethdev_mtu_set, + .mac_addr_set = bond_ethdev_mac_address_set }; static int @@ -2740,7 +2947,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode) internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = RTE_MAX_ETHPORTS + 1; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; - internals->xmit_hash = xmit_l2_hash; + internals->burst_xmit_hash = burst_xmit_l2_hash; internals->user_defined_mac = 0; internals->link_status_polling_enabled = 0; @@ -2754,6 +2961,8 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode) internals->active_slave_count = 0; internals->rx_offload_capa = 0; internals->tx_offload_capa = 0; + internals->rx_queue_offload_capa = 0; + internals->tx_queue_offload_capa = 0; internals->candidate_max_rx_pktlen = 0; internals->max_rx_pktlen = 0;