X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_hash%2Frte_thash.h;h=a6ddb7bf7f72b264697928c53def5b6315288cb7;hb=2396806765e4b797dd30bc265126c2bbc1a879cc;hp=6156e8a28e3de068ea1b6334297625ba43a9360a;hpb=7574c3ef74287a5a2911705c550806914400a1ed;p=dpdk.git diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 6156e8a28e..a6ddb7bf7f 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -53,14 +53,21 @@ extern "C" { #include #include -#include +#include #include +#include +#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#include +#endif + +#ifdef RTE_ARCH_X86 /* Byte swap mask used for converting IPv6 address * 4-byte chunks to CPU byte order */ static const __m128i rte_thash_ipv6_bswap_mask = { - 0x0405060700010203, 0x0C0D0E0F08090A0B}; + 0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL}; +#endif /** * length in dwords of input tuple to @@ -97,6 +104,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = { struct rte_ipv4_tuple { uint32_t src_addr; uint32_t dst_addr; + RTE_STD_C11 union { struct { uint16_t dport; @@ -114,6 +122,7 @@ struct rte_ipv4_tuple { struct rte_ipv6_tuple { uint8_t src_addr[16]; uint8_t dst_addr[16]; + RTE_STD_C11 union { struct { uint16_t dport; @@ -126,7 +135,11 @@ struct rte_ipv6_tuple { union rte_thash_tuple { struct rte_ipv4_tuple v4; struct rte_ipv6_tuple v6; +#ifdef RTE_ARCH_X86 } __attribute__((aligned(XMM_SIZE))); +#else +}; +#endif /** * Prepare special converted key to use with rte_softrss_be() @@ -157,12 +170,27 @@ rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len) static inline void rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ) { +#ifdef RTE_ARCH_X86 __m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr); *(__m128i *)targ->v6.src_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); *(__m128i *)targ->v6.dst_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); +#elif defined(RTE_MACHINE_CPUFLAG_NEON) + uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr); + vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6)); + ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr); + vst1q_u8((uint8_t *)targ->v6.dst_addr, vrev32q_u8(ipv6)); +#else + int i; + for (i = 0; i < 4; i++) { + *((uint32_t *)targ->v6.src_addr + i) = + rte_be_to_cpu_32(*((const uint32_t *)orig->src_addr + i)); + *((uint32_t *)targ->v6.dst_addr + i) = + rte_be_to_cpu_32(*((const uint32_t *)orig->dst_addr + i)); + } +#endif } /** @@ -180,15 +208,14 @@ static inline uint32_t rte_softrss(uint32_t *input_tuple, uint32_t input_len, const uint8_t *rss_key) { - uint32_t i, j, ret = 0; + uint32_t i, j, map, ret = 0; for (j = 0; j < input_len; j++) { - for (i = 0; i < 32; i++) { - if (input_tuple[j] & (1 << (31 - i))) { - ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << i | + for (map = input_tuple[j]; map; map &= (map - 1)) { + i = rte_bsf32(map); + ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << (31 - i) | (uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >> - (32 - i)); - } + (i + 1)); } } return ret; @@ -211,14 +238,13 @@ static inline uint32_t rte_softrss_be(uint32_t *input_tuple, uint32_t input_len, const uint8_t *rss_key) { - uint32_t i, j, ret = 0; + uint32_t i, j, map, ret = 0; for (j = 0; j < input_len; j++) { - for (i = 0; i < 32; i++) { - if (input_tuple[j] & (1 << (31 - i))) { - ret ^= ((const uint32_t *)rss_key)[j] << i | - (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (32 - i)); - } + for (map = input_tuple[j]; map; map &= (map - 1)) { + i = rte_bsf32(map); + ret ^= ((const uint32_t *)rss_key)[j] << (31 - i) | + (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (i + 1)); } } return ret;