bond: fix vlan flag interpretation
authorDeclan Doherty <declan.doherty@intel.com>
Tue, 16 Dec 2014 12:45:10 +0000 (12:45 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Thu, 15 Jan 2015 12:41:39 +0000 (13:41 +0100)
This patch contains a fix for link bonding handling of vlan tagged packets in mode 3 and 5.
Currently xmit_slave_hash function misinterprets the PKT_RX_VLAN_PKT flag to mean that
there is a vlan tag within the packet when in actually means that there is a valid entry
in the vlan_tci field in the mbuf.

- Fixed VLAN tag support in hashing functions.
- Adds support for TCP in layer 4 header hashing.
- Splits transmit hashing function into separate functions for each policy to
  reduce branching and to make the code clearer.
- Fixed incorrect flag set in test application packet generator.

Test report: http://dpdk.org/ml/archives/dev/2015-January/010792.html

Signed-off-by: Declan Doherty <declan.doherty@intel.com>
Acked-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Tested-by: SunX Jiajia <sunx.jiajia@intel.com>
app/test/packet_burst_generator.c
lib/librte_net/rte_ip.h
lib/librte_pmd_bond/rte_eth_bond_api.c
lib/librte_pmd_bond/rte_eth_bond_pmd.c
lib/librte_pmd_bond/rte_eth_bond_private.h

index b2824dc..4a89663 100644 (file)
@@ -97,7 +97,7 @@ initialize_eth_header(struct ether_hdr *eth_hdr, struct ether_addr *src_mac,
                vhdr->eth_proto =  rte_cpu_to_be_16(ETHER_TYPE_IPv4);
                vhdr->vlan_tci = van_id;
        } else {
-               eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+               eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
        }
 
 }
index f0ec543..64935d9 100644 (file)
@@ -110,6 +110,14 @@ struct ipv4_hdr {
                                           (((c) & 0xff) << 8)  | \
                                           ((d) & 0xff))
 
+/** Internet header length mask for version_ihl field */
+#define IPV4_HDR_IHL_MASK      (0x0f)
+/**
+ * Internet header length field multiplier (IHL field specifies overall header
+ * length in number of 4-byte words)
+ */
+#define IPV4_IHL_MULTIPLIER    (4)
+
 /* Fragment Offset * Flags. */
 #define        IPV4_HDR_DF_SHIFT       14
 #define        IPV4_HDR_MF_SHIFT       13
index c2a99a3..4ab3267 100644 (file)
@@ -272,6 +272,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
        internals->mode = BONDING_MODE_INVALID;
        internals->current_primary_port = 0;
        internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
+       internals->xmit_hash = xmit_l2_hash;
        internals->user_defined_mac = 0;
        internals->link_props_set = 0;
 
@@ -714,9 +715,16 @@ rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
 
        switch (policy) {
        case BALANCE_XMIT_POLICY_LAYER2:
+               internals->balance_xmit_policy = policy;
+               internals->xmit_hash = xmit_l2_hash;
+               break;
        case BALANCE_XMIT_POLICY_LAYER23:
+               internals->balance_xmit_policy = policy;
+               internals->xmit_hash = xmit_l23_hash;
+               break;
        case BALANCE_XMIT_POLICY_LAYER34:
                internals->balance_xmit_policy = policy;
+               internals->xmit_hash = xmit_l34_hash;
                break;
 
        default:
index bb4a537..e9cec2a 100644 (file)
@@ -31,6 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <stdlib.h>
+#include <netinet/in.h>
+
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
@@ -48,6 +50,9 @@
 #include "rte_eth_bond_8023ad_private.h"
 
 #define REORDER_PERIOD_MS 10
+
+#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
+
 /* Table for statistics in mode 5 TLB */
 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
 
@@ -276,90 +281,105 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr)
                        (word_src_addr[3] ^ word_dst_addr[3]);
 }
 
-static uint32_t
-udp_hash(struct udp_hdr *hdr)
+static inline size_t
+get_vlan_offset(struct ether_hdr *eth_hdr)
 {
-       return hdr->src_port ^ hdr->dst_port;
+       size_t vlan_offset = 0;
+
+       /* Calculate VLAN offset */
+       if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == eth_hdr->ether_type) {
+               struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+               vlan_offset = sizeof(struct vlan_hdr);
+
+               while (rte_cpu_to_be_16(ETHER_TYPE_VLAN) ==
+                               vlan_hdr->eth_proto) {
+                       vlan_hdr = vlan_hdr + 1;
+                       vlan_offset += sizeof(struct vlan_hdr);
+               }
+       }
+       return vlan_offset;
 }
 
-static inline uint16_t
-xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
+uint16_t
+xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
 {
-       struct ether_hdr *eth_hdr;
-       struct udp_hdr *udp_hdr;
-       size_t eth_offset = 0;
-       uint32_t hash = 0;
-
-       if (slave_count == 1)
-               return 0;
+       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
 
-       switch (policy) {
-       case BALANCE_XMIT_POLICY_LAYER2:
-               eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+       uint32_t hash = ether_hash(eth_hdr);
 
-               hash = ether_hash(eth_hdr);
-               hash ^= hash >> 8;
-               return hash % slave_count;
+       return (hash ^= hash >> 8) % slave_count;
+}
 
-       case BALANCE_XMIT_POLICY_LAYER23:
-               eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+uint16_t
+xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+{
+       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+       size_t vlan_offset = get_vlan_offset(eth_hdr);
+       uint32_t hash, l3hash = 0;
 
-               if (buf->ol_flags & PKT_RX_VLAN_PKT)
-                       eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
-               else
-                       eth_offset = sizeof(struct ether_hdr);
+       hash = ether_hash(eth_hdr);
 
-               if (buf->ol_flags & PKT_RX_IPV4_HDR) {
-                       struct ipv4_hdr *ipv4_hdr;
-                       ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(buf,
-                                       unsigned char *) + eth_offset);
+       if (buf->ol_flags & PKT_RX_IPV4_HDR) {
+               struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+                               ((char *)(eth_hdr + 1) + vlan_offset);
+               l3hash = ipv4_hash(ipv4_hdr);
 
-                       hash = ether_hash(eth_hdr) ^ ipv4_hash(ipv4_hdr);
+       } else if  (buf->ol_flags & PKT_RX_IPV6_HDR) {
+               struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+                               ((char *)(eth_hdr + 1) + vlan_offset);
+               l3hash = ipv6_hash(ipv6_hdr);
+       }
 
-               } else {
-                       struct ipv6_hdr *ipv6_hdr;
+       hash = hash ^ l3hash;
+       hash ^= hash >> 16;
+       hash ^= hash >> 8;
 
-                       ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(buf,
-                                       unsigned char *) + eth_offset);
+       return hash % slave_count;
+}
 
-                       hash = ether_hash(eth_hdr) ^ ipv6_hash(ipv6_hdr);
+uint16_t
+xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+{
+       struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+       size_t vlan_offset = get_vlan_offset(eth_hdr);
+       struct udp_hdr *udp_hdr = NULL;
+       struct tcp_hdr *tcp_hdr = NULL;
+       uint32_t hash, l3hash = 0, l4hash = 0;
+
+       if (buf->ol_flags & PKT_RX_IPV4_HDR) {
+               struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+                               ((char *)(eth_hdr + 1) + vlan_offset);
+               size_t ip_hdr_offset;
+
+               l3hash = ipv4_hash(ipv4_hdr);
+
+               ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+                               IPV4_IHL_MULTIPLIER;
+
+               if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
+                       tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
+                                       ip_hdr_offset);
+                       l4hash = HASH_L4_PORTS(tcp_hdr);
+               } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
+                       udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
+                                       ip_hdr_offset);
+                       l4hash = HASH_L4_PORTS(udp_hdr);
                }
-               break;
-
-       case BALANCE_XMIT_POLICY_LAYER34:
-               if (buf->ol_flags & PKT_RX_VLAN_PKT)
-                       eth_offset = sizeof(struct ether_hdr) + sizeof(struct vlan_hdr);
-               else
-                       eth_offset = sizeof(struct ether_hdr);
-
-               if (buf->ol_flags & PKT_RX_IPV4_HDR) {
-                       struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-                                       (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
-
-                       if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
-                               udp_hdr = (struct udp_hdr *)
-                                               (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
-                                                               sizeof(struct ipv4_hdr));
-                               hash = ipv4_hash(ipv4_hdr) ^ udp_hash(udp_hdr);
-                       } else {
-                               hash = ipv4_hash(ipv4_hdr);
-                       }
-               } else {
-                       struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-                                       (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset);
-
-                       if (ipv6_hdr->proto == IPPROTO_UDP) {
-                               udp_hdr = (struct udp_hdr *)
-                                               (rte_pktmbuf_mtod(buf, unsigned char *) + eth_offset +
-                                                               sizeof(struct ipv6_hdr));
-                               hash = ipv6_hash(ipv6_hdr) ^ udp_hash(udp_hdr);
-                       } else {
-                               hash = ipv6_hash(ipv6_hdr);
-                       }
+       } else if  (buf->ol_flags & PKT_RX_IPV6_HDR) {
+               struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+                               ((char *)(eth_hdr + 1) + vlan_offset);
+               l3hash = ipv6_hash(ipv6_hdr);
+
+               if (ipv6_hdr->proto == IPPROTO_TCP) {
+                       tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
+                       l4hash = HASH_L4_PORTS(tcp_hdr);
+               } else if (ipv6_hdr->proto == IPPROTO_UDP) {
+                       udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
+                       l4hash = HASH_L4_PORTS(udp_hdr);
                }
-               break;
        }
 
+       hash = l3hash ^ l4hash;
        hash ^= hash >> 16;
        hash ^= hash >> 8;
 
@@ -536,8 +556,7 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
        /* Populate slaves mbuf with the packets which are to be sent on it  */
        for (i = 0; i < nb_pkts; i++) {
                /* Select output slave using hash based on xmit policy */
-               op_slave_id = xmit_slave_hash(bufs[i], num_of_slaves,
-                               internals->balance_xmit_policy);
+               op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
 
                /* Populate slave mbuf arrays with mbufs for that slave */
                slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
@@ -575,7 +594,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 
        uint8_t num_of_slaves;
        uint8_t slaves[RTE_MAX_ETHPORTS];
-        /* possitions in slaves, not ID */
+        /* positions in slaves, not ID */
        uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
        uint8_t distributing_count;
 
@@ -622,8 +641,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                /* Populate slaves mbuf with the packets which are to be sent on it */
                for (i = 0; i < nb_pkts; i++) {
                        /* Select output slave using hash based on xmit policy */
-                       op_slave_idx = xmit_slave_hash(bufs[i], distributing_count,
-                                       internals->balance_xmit_policy);
+                       op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
 
                        /* Populate slave mbuf arrays with mbufs for that slave. Use only
                         * slaves that are currently distributing. */
index f913c5b..e01e66b 100644 (file)
@@ -108,6 +108,9 @@ struct bond_slave_details {
        struct ether_addr persisted_mac_addr;
 };
 
+
+typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count);
+
 /** Link Bonding PMD device private configuration Structure */
 struct bond_dev_private {
        uint8_t port_id;                                        /**< Port Id of Bonded Port */
@@ -122,6 +125,9 @@ struct bond_dev_private {
 
        uint8_t balance_xmit_policy;
        /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */
+       xmit_hash_t xmit_hash;
+       /**< Transmit policy hash function */
+
        uint8_t user_defined_mac;
        /**< Flag for whether MAC address is user defined or not */
        uint8_t promiscuous_en;
@@ -225,6 +231,15 @@ void
 slave_add(struct bond_dev_private *internals,
                struct rte_eth_dev *slave_eth_dev);
 
+uint16_t
+xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+
+uint16_t
+xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+
+uint16_t
+xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+
 void
 bond_ethdev_primary_set(struct bond_dev_private *internals,
                uint8_t slave_port_id);