From f1237c33d418e3566474ad40996de7a8647eb6c5 Mon Sep 17 00:00:00 2001 From: Pablo de Lara Date: Wed, 10 Jun 2015 16:25:23 +0100 Subject: [PATCH] hash: update jhash function with the latest available Jenkins hash function was developed originally in 1996, and was integrated in first versions of DPDK. The function has been improved in 2006, achieving up to 35% better performance, compared to the original one. This patch integrates that code into the rte_jhash library. It also updates the precalculated hash values in the unit test, as the code now returns different values (expected). A final note has been added in release notes for stating the changes made. Signed-off-by: Pablo de Lara Acked-by: Bruce Richardson --- app/test/test_hash_functions.c | 12 +- doc/guides/rel_notes/new_features.rst | 5 + lib/librte_hash/rte_jhash.h | 269 +++++++++++++++++++------- 3 files changed, 207 insertions(+), 79 deletions(-) diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c index fcc9d0573a..c6cdccf1a4 100644 --- a/app/test/test_hash_functions.c +++ b/app/test/test_hash_functions.c @@ -55,14 +55,14 @@ * key size = 8, key = 0x0706050403020100 */ static uint32_t hash_values_jhash[2][10] = {{ - 0x821cc2db, 0xa491f494, 0xace4cd87, 0x9e867842, - 0xd32442d6, 0x5fbafeab, 0x9cac434c, 0xecad9b0d, - 0x2dcf235e, 0xaab655d0 + 0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe, + 0x2f4775ab, 0x9ff036cc, 0xeca51474, 0xbc9d6816, + 0x12926a31, 0x1c9fa888 }, { - 0xc1111b14, 0x9a95039e, 0x84f208a0, 0xfa28f3fb, - 0xfa13f7d3, 0xc7aed470, 0x74caa938, 0xa9288066, - 0xd0140735, 0xbf00519d + 0x8270ac65, 0x05fa6668, 0x762df861, 0xda088f2f, + 0x59614cd4, 0x7a94f690, 0xdc1e4993, 0x30825494, + 0x91d0e462, 0x768087fc } }; static uint32_t hash_values_crc[2][10] = {{ diff --git a/doc/guides/rel_notes/new_features.rst b/doc/guides/rel_notes/new_features.rst index 4b4974d403..5b724ab255 100644 --- a/doc/guides/rel_notes/new_features.rst +++ b/doc/guides/rel_notes/new_features.rst @@ -121,4 +121,9 @@ New Features * Job Stats library and Sample Application. +* Enhanced Jenkins hash (jhash) library + +.. note:: The hash values returned by the new jhash library are different + from the ones returned by the previous library. + For further features supported in this release, see Chapter 3 Supported Features. diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h index a4bf5a1b38..1cb5c444be 100644 --- a/lib/librte_hash/rte_jhash.h +++ b/lib/librte_hash/rte_jhash.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,38 +45,62 @@ extern "C" { #endif #include +#include +#include /* jhash.h: Jenkins hash support. * - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net) * * http://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * - * lookup2.c, by Bob Jenkins, December 1996, Public Domain. - * hash(), hash2(), hash3, and mix() are externally useful functions. - * Routines to test the hash are included if SELF_TEST is defined. - * You can use this free for any purpose. It has no warranty. + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. * * $FreeBSD$ */ +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k)))) + /** @internal Internal function. NOTE: Arguments are modified. */ #define __rte_jhash_mix(a, b, c) do { \ - a -= b; a -= c; a ^= (c>>13); \ - b -= c; b -= a; b ^= (a<<8); \ - c -= a; c -= b; c ^= (b>>13); \ - a -= b; a -= c; a ^= (c>>12); \ - b -= c; b -= a; b ^= (a<<16); \ - c -= a; c -= b; c ^= (b>>5); \ - a -= b; a -= c; a ^= (c>>3); \ - b -= c; b -= a; b ^= (a<<10); \ - c -= a; c -= b; c ^= (b>>15); \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c, 16); c += b; \ + b -= a; b ^= rot(a, 19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} while (0) + +#define __rte_jhash_final(a, b, c) do { \ + c ^= b; c -= rot(b, 14); \ + a ^= c; a -= rot(c, 11); \ + b ^= a; b -= rot(a, 25); \ + c ^= b; c -= rot(b, 16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a, 14); \ + c ^= b; c -= rot(b, 24); \ } while (0) /** The golden ratio: an arbitrary value. */ -#define RTE_JHASH_GOLDEN_RATIO 0x9e3779b9 +#define RTE_JHASH_GOLDEN_RATIO 0xdeadbeef + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN +#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k)))) +#else +#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k)))) +#endif + +#define LOWER8b_MASK rte_le_to_cpu_32(0xff) +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff) +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff) /** * The most generic version, hashes an arbitrary sequence @@ -95,42 +119,130 @@ extern "C" { static inline uint32_t rte_jhash(const void *key, uint32_t length, uint32_t initval) { - uint32_t a, b, c, len; - const uint8_t *k = (const uint8_t *)key; - const uint32_t *k32 = (const uint32_t *)key; + uint32_t a, b, c; - len = length; - a = b = RTE_JHASH_GOLDEN_RATIO; - c = initval; + /* Set up the internal state */ + a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval; - while (len >= 12) { - a += k32[0]; - b += k32[1]; - c += k32[2]; + /* Check key alignment. For x86 architecture, first case is always optimal */ +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32) + const uint32_t *k = key; + const uint32_t s = 0; +#else + const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3); + const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT; +#endif - __rte_jhash_mix(a,b,c); + if (s == 0) { + while (length > 12) { + a += k[0]; + b += k[1]; + c += k[2]; - k += (3 * sizeof(uint32_t)), k32 += 3; - len -= (3 * sizeof(uint32_t)); - } + __rte_jhash_mix(a, b, c); - c += length; - switch (len) { - case 11: c += ((uint32_t)k[10] << 24); - case 10: c += ((uint32_t)k[9] << 16); - case 9 : c += ((uint32_t)k[8] << 8); - case 8 : b += ((uint32_t)k[7] << 24); - case 7 : b += ((uint32_t)k[6] << 16); - case 6 : b += ((uint32_t)k[5] << 8); - case 5 : b += k[4]; - case 4 : a += ((uint32_t)k[3] << 24); - case 3 : a += ((uint32_t)k[2] << 16); - case 2 : a += ((uint32_t)k[1] << 8); - case 1 : a += k[0]; - default: break; - }; + k += 3; + length -= 12; + } - __rte_jhash_mix(a,b,c); + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break; + case 10: + c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break; + case 9: + c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break; + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += k[1] & LOWER24b_MASK; a += k[0]; break; + case 6: + b += k[1] & LOWER16b_MASK; a += k[0]; break; + case 5: + b += k[1] & LOWER8b_MASK; a += k[0]; break; + case 4: + a += k[0]; break; + case 3: + a += k[0] & LOWER24b_MASK; break; + case 2: + a += k[0] & LOWER16b_MASK; break; + case 1: + a += k[0] & LOWER8b_MASK; break; + /* zero length strings require no mixing */ + case 0: + return c; + }; + } else { + /* all but the last block: affect some 32 bits of (a, b, c) */ + while (length > 12) { + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s); + __rte_jhash_mix(a, b, c); + + k += 3; + length -= 12; + } + + /* last block: affect all 32 bits of (c) */ + switch (length) { + case 12: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s); + break; + case 11: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK; + break; + case 10: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK; + break; + case 9: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK; + break; + case 8: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + break; + case 7: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK; + break; + case 6: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK; + break; + case 5: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK; + break; + case 4: + a += BIT_SHIFT(k[0], k[1], s); + break; + case 3: + a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK; + break; + case 2: + a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK; + break; + case 1: + a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK; + break; + /* zero length strings require no mixing */ + case 0: + return c; + } + } + + __rte_jhash_final(a, b, c); return c; } @@ -151,33 +263,54 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval) static inline uint32_t rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval) { - uint32_t a, b, c, len; + uint32_t a, b, c; - a = b = RTE_JHASH_GOLDEN_RATIO; - c = initval; - len = length; + /* Set up the internal state */ + a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval; - while (len >= 3) { + /* Handle most of the key */ + while (length > 3) { a += k[0]; b += k[1]; c += k[2]; + __rte_jhash_mix(a, b, c); - k += 3; len -= 3; - } - c += length * 4; + k += 3; + length -= 3; + } - switch (len) { - case 2 : b += k[1]; - case 1 : a += k[0]; - default: break; + /* Handle the last 3 uint32_t's */ + switch (length) { + case 3: + c += k[2]; + /* Fallthrough */ + case 2: + b += k[1]; + /* Fallthrough */ + case 1: + a += k[0]; + __rte_jhash_final(a, b, c); + /* Fallthrough */ + /* case 0: nothing left to add */ + case 0: + break; }; - __rte_jhash_mix(a,b,c); - return c; } +static inline uint32_t +__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) +{ + a += RTE_JHASH_GOLDEN_RATIO + initval; + b += RTE_JHASH_GOLDEN_RATIO + initval; + c += RTE_JHASH_GOLDEN_RATIO + initval; + + __rte_jhash_final(a, b, c); + + return c; +} /** * A special ultra-optimized versions that knows it is hashing exactly @@ -197,17 +330,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval) static inline uint32_t rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) { - a += RTE_JHASH_GOLDEN_RATIO; - b += RTE_JHASH_GOLDEN_RATIO; - c += initval; - - __rte_jhash_mix(a, b, c); - - /* - * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);" - * normally done at the end is not done here. - */ - return c; + return __rte_jhash_3words(a + 12, b + 12, c + 12, initval); } /** @@ -226,7 +349,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) static inline uint32_t rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval) { - return rte_jhash_3words(a, b, 0, initval); + return __rte_jhash_3words(a + 8, b + 8, 8, initval); } /** @@ -243,7 +366,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval) static inline uint32_t rte_jhash_1word(uint32_t a, uint32_t initval) { - return rte_jhash_3words(a, 0, 0, initval); + return __rte_jhash_3words(a + 4, 4, 4, initval); } #ifdef __cplusplus -- 2.20.1