net/bonding: burst mode hash calculation
authorDeclan Doherty <declan.doherty@intel.com>
Tue, 9 Jan 2018 11:34:09 +0000 (11:34 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 16 Jan 2018 17:47:49 +0000 (18:47 +0100)
Change the xmit_hash functions to handle bursts of packet instead of
single packets at a time, and update the affected tx_burst functions.

Signed-off-by: Declan Doherty <declan.doherty@intel.com>
Signed-off-by: Keith Wiles <keith.wiles@intel.com>
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/bonding/rte_eth_bond_private.h

index 07659c6..534a890 100644 (file)
@@ -665,15 +665,12 @@ rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy)
        switch (policy) {
        case BALANCE_XMIT_POLICY_LAYER2:
                internals->balance_xmit_policy = policy;
-               internals->xmit_hash = xmit_l2_hash;
                break;
        case BALANCE_XMIT_POLICY_LAYER23:
                internals->balance_xmit_policy = policy;
-               internals->xmit_hash = xmit_l23_hash;
                break;
        case BALANCE_XMIT_POLICY_LAYER34:
                internals->balance_xmit_policy = policy;
-               internals->xmit_hash = xmit_l34_hash;
                break;
 
        default:
index f7563bf..2e45108 100644 (file)
@@ -280,87 +280,114 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 
 static uint16_t
 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
-               uint16_t nb_pkts)
+               uint16_t nb_bufs)
 {
-       struct bond_dev_private *internals;
-       struct bond_tx_queue *bd_tx_q;
+       struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+       struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-       uint16_t num_of_slaves;
-       uint16_t slaves[RTE_MAX_ETHPORTS];
-        /* positions in slaves, not ID */
-       uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
-       uint8_t distributing_count;
+       uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+       uint16_t slave_count;
 
-       uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
-       uint16_t i, op_slave_idx;
+       uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
+       uint16_t dist_slave_count;
 
-       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+       /* 2-D array to sort mbufs for transmission on each slave into */
+       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+       /* Number of mbufs for transmission on each slave */
+       uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+       /* Mapping array generated by hash function to map mbufs to slaves */
+       uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       /* Total amount of packets in slave_bufs */
-       uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-       /* Slow packets placed in each slave */
+       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       if (unlikely(nb_pkts == 0))
-               return 0;
+       uint16_t i, j;
 
-       bd_tx_q = (struct bond_tx_queue *)queue;
-       internals = bd_tx_q->dev_private;
+       if (unlikely(nb_bufs == 0))
+               return 0;
 
        /* Copy slave list to protect against slave up/down changes during tx
         * bursting */
-       num_of_slaves = internals->active_slave_count;
-       if (num_of_slaves < 1)
-               return num_tx_total;
+       slave_count = internals->active_slave_count;
+       if (unlikely(slave_count < 1))
+               return 0;
 
-       memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
-                       num_of_slaves);
+       memcpy(slave_port_ids, internals->active_slaves,
+                       sizeof(slave_port_ids[0]) * slave_count);
+
+
+       dist_slave_count = 0;
+       for (i = 0; i < slave_count; i++) {
+               struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
 
-       distributing_count = 0;
-       for (i = 0; i < num_of_slaves; i++) {
-               struct port *port = &mode_8023ad_ports[slaves[i]];
                if (ACTOR_STATE(port, DISTRIBUTING))
-                       distributing_offsets[distributing_count++] = i;
+                       dist_slave_port_ids[dist_slave_count++] =
+                                       slave_port_ids[i];
        }
 
-       if (likely(distributing_count > 0)) {
-               /* Populate slaves mbuf with the packets which are to be sent */
-               for (i = 0; i < nb_pkts; i++) {
-                       /* Select output slave using hash based on xmit policy */
-                       op_slave_idx = internals->xmit_hash(bufs[i],
-                                       distributing_count);
+       if (unlikely(dist_slave_count < 1))
+               return 0;
 
-                       /* Populate slave mbuf arrays with mbufs for that slave.
-                        * Use only slaves that are currently distributing.
-                        */
-                       uint8_t slave_offset =
-                                       distributing_offsets[op_slave_idx];
-                       slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
-                                       bufs[i];
-                       slave_nb_pkts[slave_offset]++;
-               }
+       /*
+        * Populate slaves mbuf with the packets which are to be sent on it
+        * selecting output slave using hash based on xmit policy
+        */
+       internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
+                       bufs_slave_port_idxs);
+
+       for (i = 0; i < nb_bufs; i++) {
+               /* Populate slave mbuf arrays with mbufs for that slave. */
+               uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+               slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
        }
 
+
        /* Send packet burst on each slave device */
-       for (i = 0; i < num_of_slaves; i++) {
-               if (slave_nb_pkts[i] == 0)
+       for (i = 0; i < dist_slave_count; i++) {
+               if (slave_nb_bufs[i] == 0)
                        continue;
 
-               num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-                               slave_bufs[i], slave_nb_pkts[i]);
+               slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
+                               bd_tx_q->queue_id, slave_bufs[i],
+                               slave_nb_bufs[i]);
 
-               num_tx_total += num_tx_slave;
-               num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+               total_tx_count += slave_tx_count;
 
                /* If tx burst fails move packets to end of bufs */
-               if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-                       uint16_t j = nb_pkts - num_tx_fail_total;
-                       for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
-                                       num_tx_slave++)
-                               bufs[j] = slave_bufs[i][num_tx_slave];
+               if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+                       slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                                       slave_tx_count;
+                       total_tx_fail_count += slave_tx_fail_count[i];
+
+                       /*
+                        * Shift bufs to beginning of array to allow reordering
+                        * later
+                        */
+                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
+                               slave_bufs[i][j] =
+                                       slave_bufs[i][(slave_tx_count - 1) + j];
+                       }
                }
        }
 
-       return num_tx_total;
+       /*
+        * If there are tx burst failures we move packets to end of bufs to
+        * preserve expected PMD behaviour of all failed transmitted being
+        * at the end of the input mbuf array
+        */
+       if (unlikely(total_tx_fail_count > 0)) {
+               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+               for (i = 0; i < slave_count; i++) {
+                       if (slave_tx_fail_count[i] > 0) {
+                               for (j = 0; j < slave_tx_fail_count[i]; j++)
+                                       bufs[bufs_idx++] = slave_bufs[i][j];
+                       }
+               }
+       }
+
+       return total_tx_count;
 }
 
 
@@ -759,96 +786,129 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr)
                        (word_src_addr[3] ^ word_dst_addr[3]);
 }
 
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+
+void
+burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves)
 {
-       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+       struct ether_hdr *eth_hdr;
+       uint32_t hash;
+       int i;
 
-       uint32_t hash = ether_hash(eth_hdr);
+       for (i = 0; i < nb_pkts; i++) {
+               eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+
+               hash = ether_hash(eth_hdr);
 
-       return (hash ^= hash >> 8) % slave_count;
+               slaves[i++] = (hash ^= hash >> 8) % slave_count;
+       }
 }
 
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+void
+burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves)
 {
-       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-       uint16_t proto = eth_hdr->ether_type;
-       size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
-       uint32_t hash, l3hash = 0;
+       uint16_t i;
+       struct ether_hdr *eth_hdr;
+       uint16_t proto;
+       size_t vlan_offset;
+       uint32_t hash, l3hash;
 
-       hash = ether_hash(eth_hdr);
+       for (i = 0; i < nb_pkts; i++) {
+               eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+               l3hash = 0;
 
-       if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
-               struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-                               ((char *)(eth_hdr + 1) + vlan_offset);
-               l3hash = ipv4_hash(ipv4_hdr);
+               proto = eth_hdr->ether_type;
+               hash = ether_hash(eth_hdr);
 
-       } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
-               struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-                               ((char *)(eth_hdr + 1) + vlan_offset);
-               l3hash = ipv6_hash(ipv6_hdr);
-       }
+               vlan_offset = get_vlan_offset(eth_hdr, &proto);
 
-       hash = hash ^ l3hash;
-       hash ^= hash >> 16;
-       hash ^= hash >> 8;
+               if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+                       struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+                                       ((char *)(eth_hdr + 1) + vlan_offset);
+                       l3hash = ipv4_hash(ipv4_hdr);
 
-       return hash % slave_count;
-}
+               } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+                       struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+                                       ((char *)(eth_hdr + 1) + vlan_offset);
+                       l3hash = ipv6_hash(ipv6_hdr);
+               }
 
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
-{
-       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-       uint16_t proto = eth_hdr->ether_type;
-       size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
+               hash = hash ^ l3hash;
+               hash ^= hash >> 16;
+               hash ^= hash >> 8;
 
-       struct udp_hdr *udp_hdr = NULL;
-       struct tcp_hdr *tcp_hdr = NULL;
-       uint32_t hash, l3hash = 0, l4hash = 0;
+               slaves[i++] = hash % slave_count;
+       }
+}
 
-       if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
-               struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-                               ((char *)(eth_hdr + 1) + vlan_offset);
-               size_t ip_hdr_offset;
+void
+burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves)
+{
+       struct ether_hdr *eth_hdr;
+       uint16_t proto;
+       size_t vlan_offset;
+       int i;
 
-               l3hash = ipv4_hash(ipv4_hdr);
+       struct udp_hdr *udp_hdr;
+       struct tcp_hdr *tcp_hdr;
+       uint32_t hash, l3hash, l4hash;
 
-               /* there is no L4 header in fragmented packet */
-               if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
-                       ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+       for (i = 0; i < nb_pkts; i++) {
+               eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+               proto = eth_hdr->ether_type;
+               vlan_offset = get_vlan_offset(eth_hdr, &proto);
+               l3hash = 0;
+               l4hash = 0;
+
+               if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+                       struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+                                       ((char *)(eth_hdr + 1) + vlan_offset);
+                       size_t ip_hdr_offset;
+
+                       l3hash = ipv4_hash(ipv4_hdr);
+
+                       /* there is no L4 header in fragmented packet */
+                       if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
+                                                               == 0)) {
+                               ip_hdr_offset = (ipv4_hdr->version_ihl
+                                       & IPV4_HDR_IHL_MASK) *
                                        IPV4_IHL_MULTIPLIER;
 
-                       if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
-                               tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
-                                               ip_hdr_offset);
+                               if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
+                                       tcp_hdr = (struct tcp_hdr *)
+                                               ((char *)ipv4_hdr +
+                                                       ip_hdr_offset);
+                                       l4hash = HASH_L4_PORTS(tcp_hdr);
+                               } else if (ipv4_hdr->next_proto_id ==
+                                                               IPPROTO_UDP) {
+                                       udp_hdr = (struct udp_hdr *)
+                                               ((char *)ipv4_hdr +
+                                                       ip_hdr_offset);
+                                       l4hash = HASH_L4_PORTS(udp_hdr);
+                               }
+                       }
+               } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+                       struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+                                       ((char *)(eth_hdr + 1) + vlan_offset);
+                       l3hash = ipv6_hash(ipv6_hdr);
+
+                       if (ipv6_hdr->proto == IPPROTO_TCP) {
+                               tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
                                l4hash = HASH_L4_PORTS(tcp_hdr);
-                       } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
-                               udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
-                                               ip_hdr_offset);
+                       } else if (ipv6_hdr->proto == IPPROTO_UDP) {
+                               udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
                                l4hash = HASH_L4_PORTS(udp_hdr);
                        }
                }
-       } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
-               struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-                               ((char *)(eth_hdr + 1) + vlan_offset);
-               l3hash = ipv6_hash(ipv6_hdr);
-
-               if (ipv6_hdr->proto == IPPROTO_TCP) {
-                       tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
-                       l4hash = HASH_L4_PORTS(tcp_hdr);
-               } else if (ipv6_hdr->proto == IPPROTO_UDP) {
-                       udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
-                       l4hash = HASH_L4_PORTS(udp_hdr);
-               }
-       }
 
-       hash = l3hash ^ l4hash;
-       hash ^= hash >> 16;
-       hash ^= hash >> 8;
+               hash = l3hash ^ l4hash;
+               hash ^= hash >> 16;
+               hash ^= hash >> 8;
 
-       return hash % slave_count;
+               slaves[i++] = hash % slave_count;
+       }
 }
 
 struct bwg_slave {
@@ -1156,167 +1216,239 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static uint16_t
 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
-               uint16_t nb_pkts)
+               uint16_t nb_bufs)
 {
-       struct bond_dev_private *internals;
-       struct bond_tx_queue *bd_tx_q;
+       struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+       struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-       uint16_t num_of_slaves;
-       uint16_t slaves[RTE_MAX_ETHPORTS];
+       uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+       uint16_t slave_count;
 
-       uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
+       /* Array to sort mbufs for transmission on each slave into */
+       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+       /* Number of mbufs for transmission on each slave */
+       uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+       /* Mapping array generated by hash function to map mbufs to slaves */
+       uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       int i, op_slave_id;
+       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
-       uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t i, j;
 
-       bd_tx_q = (struct bond_tx_queue *)queue;
-       internals = bd_tx_q->dev_private;
+       if (unlikely(nb_bufs == 0))
+               return 0;
 
        /* Copy slave list to protect against slave up/down changes during tx
         * bursting */
-       num_of_slaves = internals->active_slave_count;
-       memcpy(slaves, internals->active_slaves,
-                       sizeof(internals->active_slaves[0]) * num_of_slaves);
+       slave_count = internals->active_slave_count;
+       if (unlikely(slave_count < 1))
+               return 0;
 
-       if (num_of_slaves < 1)
-               return num_tx_total;
+       memcpy(slave_port_ids, internals->active_slaves,
+                       sizeof(slave_port_ids[0]) * slave_count);
 
-       /* Populate slaves mbuf with the packets which are to be sent on it  */
-       for (i = 0; i < nb_pkts; i++) {
-               /* Select output slave using hash based on xmit policy */
-               op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
+       /*
+        * Populate slaves mbuf with the packets which are to be sent on it
+        * selecting output slave using hash based on xmit policy
+        */
+       internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
+                       bufs_slave_port_idxs);
+
+       for (i = 0; i < nb_bufs; i++) {
+               /* Populate slave mbuf arrays with mbufs for that slave. */
+               uint8_t slave_idx = bufs_slave_port_idxs[i];
 
-               /* Populate slave mbuf arrays with mbufs for that slave */
-               slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
+               slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
        }
 
        /* Send packet burst on each slave device */
-       for (i = 0; i < num_of_slaves; i++) {
-               if (slave_nb_pkts[i] > 0) {
-                       num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-                                       slave_bufs[i], slave_nb_pkts[i]);
+       for (i = 0; i < slave_count; i++) {
+               if (slave_nb_bufs[i] == 0)
+                       continue;
 
-                       /* if tx burst fails move packets to end of bufs */
-                       if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-                               int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+               slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
+                               bd_tx_q->queue_id, slave_bufs[i],
+                               slave_nb_bufs[i]);
 
-                               tx_fail_total += slave_tx_fail_count;
-                               memcpy(&bufs[nb_pkts - tx_fail_total],
-                                               &slave_bufs[i][num_tx_slave],
-                                               slave_tx_fail_count * sizeof(bufs[0]));
+               total_tx_count += slave_tx_count;
+
+               /* If tx burst fails move packets to end of bufs */
+               if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+                       slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                                       slave_tx_count;
+                       total_tx_fail_count += slave_tx_fail_count[i];
+
+                       /*
+                        * Shift bufs to beginning of array to allow reordering
+                        * later
+                        */
+                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
+                               slave_bufs[i][j] =
+                                       slave_bufs[i][(slave_tx_count - 1) + j];
                        }
+               }
+       }
 
-                       num_tx_total += num_tx_slave;
+       /*
+        * If there are tx burst failures we move packets to end of bufs to
+        * preserve expected PMD behaviour of all failed transmitted being
+        * at the end of the input mbuf array
+        */
+       if (unlikely(total_tx_fail_count > 0)) {
+               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+               for (i = 0; i < slave_count; i++) {
+                       if (slave_tx_fail_count[i] > 0) {
+                               for (j = 0; j < slave_tx_fail_count[i]; j++)
+                                       bufs[bufs_idx++] = slave_bufs[i][j];
+                       }
                }
        }
 
-       return num_tx_total;
+       return total_tx_count;
 }
 
 static uint16_t
 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
-               uint16_t nb_pkts)
+               uint16_t nb_bufs)
 {
-       struct bond_dev_private *internals;
-       struct bond_tx_queue *bd_tx_q;
+       struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+       struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-       uint16_t num_of_slaves;
-       uint16_t slaves[RTE_MAX_ETHPORTS];
-        /* positions in slaves, not ID */
-       uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
-       uint8_t distributing_count;
+       uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+       uint16_t slave_count;
 
-       uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
-       uint16_t i, op_slave_idx;
+       uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
+       uint16_t dist_slave_count;
 
-       /* Allocate additional packets in case 8023AD mode. */
-       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+       /* 2-D array to sort mbufs for transmission on each slave into */
+       struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+       /* Number of mbufs for transmission on each slave */
+       uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+       /* Mapping array generated by hash function to map mbufs to slaves */
+       uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       /* Total amount of packets in slave_bufs */
-       uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-       /* Slow packets placed in each slave */
-       uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       bd_tx_q = (struct bond_tx_queue *)queue;
-       internals = bd_tx_q->dev_private;
+       uint16_t i, j;
+
+       if (unlikely(nb_bufs == 0))
+               return 0;
 
        /* Copy slave list to protect against slave up/down changes during tx
         * bursting */
-       num_of_slaves = internals->active_slave_count;
-       if (num_of_slaves < 1)
-               return num_tx_total;
+       slave_count = internals->active_slave_count;
+       if (unlikely(slave_count < 1))
+               return 0;
 
-       memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+       memcpy(slave_port_ids, internals->active_slaves,
+                       sizeof(slave_port_ids[0]) * slave_count);
 
-       distributing_count = 0;
-       for (i = 0; i < num_of_slaves; i++) {
-               struct port *port = &mode_8023ad_ports[slaves[i]];
+       dist_slave_count = 0;
+       for (i = 0; i < slave_count; i++) {
+               struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
 
                if (ACTOR_STATE(port, DISTRIBUTING))
-                       distributing_offsets[distributing_count++] = i;
+                       dist_slave_port_ids[dist_slave_count++] =
+                                       slave_port_ids[i];
        }
 
-       if (likely(distributing_count > 0)) {
-               /* Populate slaves mbuf with the packets which are to be sent on it */
-               for (i = 0; i < nb_pkts; i++) {
-                       /* Select output slave using hash based on xmit policy */
-                       op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
+       if (likely(dist_slave_count > 1)) {
 
-                       /* Populate slave mbuf arrays with mbufs for that slave. Use only
-                        * slaves that are currently distributing. */
-                       uint8_t slave_offset = distributing_offsets[op_slave_idx];
-                       slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
-                       slave_nb_pkts[slave_offset]++;
-               }
-       }
+               /*
+                * Populate slaves mbuf with the packets which are to be sent
+                * on it, selecting output slave using hash based on xmit policy
+                */
+               internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
+                               bufs_slave_port_idxs);
 
-       /* Send packet burst on each slave device */
-       for (i = 0; i < num_of_slaves; i++) {
-               if (slave_nb_pkts[i] == 0)
-                       continue;
+               for (i = 0; i < nb_bufs; i++) {
+                       /*
+                        * Populate slave mbuf arrays with mbufs for that
+                        * slave
+                        */
+                       uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+                       slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
+                                       bufs[i];
+               }
 
-               num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-                               slave_bufs[i], slave_nb_pkts[i]);
 
-               /* If tx burst fails drop slow packets */
-               for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
-                       rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
+               /* Send packet burst on each slave device */
+               for (i = 0; i < dist_slave_count; i++) {
+                       if (slave_nb_bufs[i] == 0)
+                               continue;
 
-               num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
-               num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+                       slave_tx_count = rte_eth_tx_burst(
+                                       dist_slave_port_ids[i],
+                                       bd_tx_q->queue_id, slave_bufs[i],
+                                       slave_nb_bufs[i]);
+
+                       total_tx_count += slave_tx_count;
+
+                       /* If tx burst fails move packets to end of bufs */
+                       if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+                               slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                                               slave_tx_count;
+                               total_tx_fail_count += slave_tx_fail_count[i];
+
+                               /*
+                                * Shift bufs to beginning of array to allow
+                                * reordering later
+                                */
+                               for (j = 0; j < slave_tx_fail_count[i]; j++)
+                                       slave_bufs[i][j] =
+                                               slave_bufs[i]
+                                                       [(slave_tx_count - 1)
+                                                       + j];
+                       }
+               }
 
-               /* If tx burst fails move packets to end of bufs */
-               if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-                       uint16_t j = nb_pkts - num_tx_fail_total;
-                       for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
-                               bufs[j] = slave_bufs[i][num_tx_slave];
+               /*
+                * If there are tx burst failures we move packets to end of
+                * bufs to preserve expected PMD behaviour of all failed
+                * transmitted being at the end of the input mbuf array
+                */
+               if (unlikely(total_tx_fail_count > 0)) {
+                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+                       for (i = 0; i < slave_count; i++) {
+                               if (slave_tx_fail_count[i] > 0) {
+                                       for (j = 0;
+                                               j < slave_tx_fail_count[i];
+                                               j++) {
+                                               bufs[bufs_idx++] =
+                                                       slave_bufs[i][j];
+                                       }
+                               }
+                       }
                }
        }
 
        /* Check for LACP control packets and send if available */
-       for (i = 0; i < num_of_slaves; i++) {
-               struct port *port = &mode_8023ad_ports[slaves[i]];
+       for (i = 0; i < slave_count; i++) {
+               struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
                struct rte_mbuf *ctrl_pkt = NULL;
 
-               int pkt_avail = rte_ring_dequeue(port->tx_ring,
-                               (void **)&ctrl_pkt);
+               if (likely(rte_ring_empty(port->tx_ring)))
+                       continue;
+
+               rte_ring_dequeue(port->tx_ring, (void **)&ctrl_pkt);
 
-               if (unlikely(pkt_avail == 0)) {
-                       num_tx_slave = rte_eth_tx_burst(slaves[i],
+               slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
                                        bd_tx_q->queue_id, &ctrl_pkt, 1);
 
-                       /*
-                        * re-enqueue LAG control plane packets to buffering
-                        * ring if transmission fails so the packet isn't lost.
-                        */
-                       if (num_tx_slave != nb_pkts)
-                               rte_ring_enqueue(port->tx_ring, ctrl_pkt);
-               }
+               /*
+                * re-enqueue LAG control plane packets to buffering
+                * ring if transmission fails so the packet isn't lost.
+                */
+               if (slave_tx_count != 1)
+                       rte_ring_enqueue(port->tx_ring, ctrl_pkt);
        }
 
-       return num_tx_total;
+       return total_tx_count;
 }
 
 static uint16_t
@@ -2753,7 +2885,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
        internals->mode = BONDING_MODE_INVALID;
        internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
        internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
-       internals->xmit_hash = xmit_l2_hash;
+       internals->burst_xmit_hash = burst_xmit_l2_hash;
        internals->user_defined_mac = 0;
 
        internals->link_status_polling_enabled = 0;
index 3b32311..e2b717f 100644 (file)
@@ -80,8 +80,8 @@ struct bond_slave_details {
        uint16_t reta_size;
 };
 
-
-typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count);
+typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves);
 
 /** Link Bonding PMD device private configuration Structure */
 struct bond_dev_private {
@@ -97,7 +97,7 @@ struct bond_dev_private {
 
        uint8_t balance_xmit_policy;
        /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */
-       xmit_hash_t xmit_hash;
+       burst_xmit_hash_t burst_xmit_hash;
        /**< Transmit policy hash function */
 
        uint8_t user_defined_mac;
@@ -219,14 +219,18 @@ void
 slave_add(struct bond_dev_private *internals,
                struct rte_eth_dev *slave_eth_dev);
 
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+void
+burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves);
 
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+void
+burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves);
+
+void
+burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+               uint8_t slave_count, uint16_t *slaves);
 
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
 
 void
 bond_ethdev_primary_set(struct bond_dev_private *internals,