hash: add scalable multi-writer insertion with Intel TSX
authorWei Shen <wei1.shen@intel.com>
Thu, 16 Jun 2016 22:14:14 +0000 (15:14 -0700)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 24 Jun 2016 14:25:07 +0000 (16:25 +0200)
This patch introduced scalable multi-writer Cuckoo Hash insertion
based on a split Cuckoo Search and Move operation using Intel
TSX. It can do scalable hash insertion with 22 cores with little
performance loss and negligible TSX abortion rate.

* Added an extra rte_hash flag definition to switch default single writer
  Cuckoo Hash behavior to multiwriter.
    - If HTM is available, it would use hardware feature for concurrency.
    - If HTM is not available, it would fall back to spinlock.

* Created a rte_cuckoo_hash_x86.h file to hold all x86-arch related
  cuckoo_hash functions. And rte_cuckoo_hash.c uses compile time flag to
  select x86 file or other platform-specific implementations. While HTM check
  is still done at runtime (same idea with
  RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)

* Moved rte_hash private struct definitions to rte_cuckoo_hash.h, to allow
  rte_cuckoo_hash_x86.h or future platform dependent functions to include.

* Following new functions are created for consistent names when new platform
  TM support are added.
    - rte_hash_cuckoo_move_insert_mw_tm: do insertion with bucket movement.
    - rte_hash_cuckoo_insert_mw_tm: do insertion without bucket movement.

* One extra multi-writer test case is added.

Signed-off-by: Wei Shen <wei1.shen@intel.com>
Signed-off-by: Sameh Gobriel <sameh.gobriel@intel.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
app/test/Makefile
app/test/test_hash_multiwriter.c [new file with mode: 0644]
doc/guides/rel_notes/release_16_07.rst
lib/librte_hash/rte_cuckoo_hash.c
lib/librte_hash/rte_cuckoo_hash.h [new file with mode: 0644]
lib/librte_hash/rte_cuckoo_hash_x86.h [new file with mode: 0644]
lib/librte_hash/rte_hash.h

index 9fa03fb..be23e8b 100644 (file)
@@ -123,6 +123,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_scaling.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_multiwriter.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm_perf.c
diff --git a/app/test/test_hash_multiwriter.c b/app/test/test_hash_multiwriter.c
new file mode 100644 (file)
index 0000000..b0f31b0
--- /dev/null
@@ -0,0 +1,287 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <inttypes.h>
+#include <locale.h>
+
+#include <rte_cycles.h>
+#include <rte_hash.h>
+#include <rte_hash_crc.h>
+#include <rte_launch.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+
+#include "test.h"
+
+/*
+ * Check condition and return an error if true. Assumes that "handle" is the
+ * name of the hash structure pointer to be freed.
+ */
+#define RETURN_IF_ERROR(cond, str, ...) do {                            \
+       if (cond) {                                                     \
+               printf("ERROR line %d: " str "\n", __LINE__,            \
+                                                       ##__VA_ARGS__); \
+               if (handle)                                             \
+                       rte_hash_free(handle);                          \
+               return -1;                                              \
+       }                                                               \
+} while (0)
+
+#define RTE_APP_TEST_HASH_MULTIWRITER_FAILED 0
+
+struct {
+       uint32_t *keys;
+       uint32_t *found;
+       uint32_t nb_tsx_insertion;
+       struct rte_hash *h;
+} tbl_multiwriter_test_params;
+
+const uint32_t nb_entries = 16*1024*1024;
+const uint32_t nb_total_tsx_insertion = 15*1024*1024;
+uint32_t rounded_nb_total_tsx_insertion;
+
+static rte_atomic64_t gcycles;
+static rte_atomic64_t ginsertions;
+
+static int use_htm;
+
+static int
+test_hash_multiwriter_worker(__attribute__((unused)) void *arg)
+{
+       uint64_t i, offset;
+       uint32_t lcore_id = rte_lcore_id();
+       uint64_t begin, cycles;
+
+       offset = (lcore_id - rte_get_master_lcore())
+               * tbl_multiwriter_test_params.nb_tsx_insertion;
+
+       printf("Core #%d inserting %d: %'"PRId64" - %'"PRId64"\n",
+              lcore_id, tbl_multiwriter_test_params.nb_tsx_insertion,
+              offset, offset + tbl_multiwriter_test_params.nb_tsx_insertion);
+
+       begin = rte_rdtsc_precise();
+
+       for (i = offset;
+            i < offset + tbl_multiwriter_test_params.nb_tsx_insertion;
+            i++) {
+               if (rte_hash_add_key(tbl_multiwriter_test_params.h,
+                                    tbl_multiwriter_test_params.keys + i) < 0)
+                       break;
+       }
+
+       cycles = rte_rdtsc_precise() - begin;
+       rte_atomic64_add(&gcycles, cycles);
+       rte_atomic64_add(&ginsertions, i - offset);
+
+       for (; i < offset + tbl_multiwriter_test_params.nb_tsx_insertion; i++)
+               tbl_multiwriter_test_params.keys[i]
+                       = RTE_APP_TEST_HASH_MULTIWRITER_FAILED;
+
+       return 0;
+}
+
+
+static int
+test_hash_multiwriter(void)
+{
+       unsigned int i, rounded_nb_total_tsx_insertion;
+       static unsigned calledCount = 1;
+
+       uint32_t *keys;
+       uint32_t *found;
+
+       struct rte_hash_parameters hash_params = {
+               .entries = nb_entries,
+               .key_len = sizeof(uint32_t),
+               .hash_func = rte_hash_crc,
+               .hash_func_init_val = 0,
+               .socket_id = rte_socket_id(),
+       };
+       if (use_htm)
+               hash_params.extra_flag =
+                       RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
+                               | RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+       else
+               hash_params.extra_flag =
+                       RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+
+       struct rte_hash *handle;
+       char name[RTE_HASH_NAMESIZE];
+
+       const void *next_key;
+       void *next_data;
+       uint32_t iter = 0;
+
+       uint32_t duplicated_keys = 0;
+       uint32_t lost_keys = 0;
+
+       snprintf(name, 32, "test%u", calledCount++);
+       hash_params.name = name;
+
+       handle = rte_hash_create(&hash_params);
+       RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+
+       tbl_multiwriter_test_params.h = handle;
+       tbl_multiwriter_test_params.nb_tsx_insertion =
+               nb_total_tsx_insertion / rte_lcore_count();
+
+       rounded_nb_total_tsx_insertion = (nb_total_tsx_insertion /
+               tbl_multiwriter_test_params.nb_tsx_insertion)
+               * tbl_multiwriter_test_params.nb_tsx_insertion;
+
+       rte_srand(rte_rdtsc());
+
+       keys = rte_malloc(NULL, sizeof(uint32_t) * nb_entries, 0);
+
+       if (keys == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err1;
+       }
+
+       found = rte_zmalloc(NULL, sizeof(uint32_t) * nb_entries, 0);
+       if (found == NULL) {
+               printf("RTE_ZMALLOC failed\n");
+               goto err2;
+       }
+
+       for (i = 0; i < nb_entries; i++)
+               keys[i] = i;
+
+       tbl_multiwriter_test_params.keys = keys;
+       tbl_multiwriter_test_params.found = found;
+
+       rte_atomic64_init(&gcycles);
+       rte_atomic64_clear(&gcycles);
+
+       rte_atomic64_init(&ginsertions);
+       rte_atomic64_clear(&ginsertions);
+
+       /* Fire all threads. */
+       rte_eal_mp_remote_launch(test_hash_multiwriter_worker,
+                                NULL, CALL_MASTER);
+       rte_eal_mp_wait_lcore();
+
+       while (rte_hash_iterate(handle, &next_key, &next_data, &iter) >= 0) {
+               /* Search for the key in the list of keys added .*/
+               i = *(const uint32_t *)next_key;
+               tbl_multiwriter_test_params.found[i]++;
+       }
+
+       for (i = 0; i < rounded_nb_total_tsx_insertion; i++) {
+               if (tbl_multiwriter_test_params.keys[i]
+                   != RTE_APP_TEST_HASH_MULTIWRITER_FAILED) {
+                       if (tbl_multiwriter_test_params.found[i] > 1) {
+                               duplicated_keys++;
+                               break;
+                       }
+                       if (tbl_multiwriter_test_params.found[i] == 0) {
+                               lost_keys++;
+                               printf("key %d is lost\n", i);
+                               break;
+                       }
+               }
+       }
+
+       if (duplicated_keys > 0) {
+               printf("%d key duplicated\n", duplicated_keys);
+               goto err3;
+       }
+
+       if (lost_keys > 0) {
+               printf("%d key lost\n", lost_keys);
+               goto err3;
+       }
+
+       printf("No key corrupted during multiwriter insertion.\n");
+
+       unsigned long long int cycles_per_insertion =
+               rte_atomic64_read(&gcycles)/
+               rte_atomic64_read(&ginsertions);
+
+       printf(" cycles per insertion: %llu\n", cycles_per_insertion);
+
+       rte_free(tbl_multiwriter_test_params.found);
+       rte_free(tbl_multiwriter_test_params.keys);
+       rte_hash_free(handle);
+       return 0;
+
+err3:
+       rte_free(tbl_multiwriter_test_params.found);
+err2:
+       rte_free(tbl_multiwriter_test_params.keys);
+err1:
+       rte_hash_free(handle);
+       return -1;
+}
+
+static int
+test_hash_multiwriter_main(void)
+{
+       int r = -1;
+
+       if (rte_lcore_count() == 1) {
+               printf("More than one lcore is required to do multiwriter test\n");
+               return 0;
+       }
+
+
+       setlocale(LC_NUMERIC, "");
+
+
+       if (!rte_tm_supported()) {
+               printf("Hardware transactional memory (lock elision) "
+                       "is NOT supported\n");
+       } else {
+               printf("Hardware transactional memory (lock elision) "
+                       "is supported\n");
+
+               printf("Test multi-writer with Hardware transactional memory\n");
+
+               use_htm = 1;
+               r = test_hash_multiwriter();
+       }
+
+       printf("Test multi-writer without Hardware transactional memory\n");
+       use_htm = 0;
+       r = test_hash_multiwriter();
+
+       return r;
+}
+
+
+static struct test_command hash_scaling_cmd = {
+       .command = "hash_multiwriter_autotest",
+       .callback = test_hash_multiwriter_main,
+};
+
+REGISTER_TEST_COMMAND(hash_scaling_cmd);
index 4fc2aa3..54ebdc7 100644 (file)
@@ -100,6 +100,18 @@ New Features
   A new Crypto PMD has been added, which provides KASUMI F8 (UEA1) ciphering
   and KASUMI F9 (UIA1) hashing.
 
+* **Added multi-writer support for RTE Hash with Intel TSX.**
+
+  The following features/modifications have been added to rte_hash library:
+
+  * Enabled application developers to use an extra flag for rte_hash creation
+    to specify default behavior (multi-thread safe/unsafe) with rte_hash_add_key
+    function.
+  * Changed Cuckoo search algorithm to breadth first search for multi-writer
+    routine and split Cuckoo Search and Move operations in order to reduce
+    transactional code region and improve TSX performance.
+  * Added a hash multi-writer test case for test app.
+
 * **Improved IP Pipeline Application.**
 
   The following features have been added to ip_pipeline application:
index 7b7d1f8..e3cc3a7 100644 (file)
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
 #include <rte_compat.h>
 
 #include "rte_hash.h"
-#if defined(RTE_ARCH_X86)
-#include "rte_cmp_x86.h"
-#endif
+#include "rte_cuckoo_hash.h"
 
-#if defined(RTE_ARCH_ARM64)
-#include "rte_cmp_arm64.h"
+#if defined(RTE_ARCH_X86)
+#include "rte_cuckoo_hash_x86.h"
 #endif
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
@@ -74,153 +72,6 @@ static struct rte_tailq_elem rte_hash_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_hash_tailq)
 
-/* Macro to enable/disable run-time checking of function parameters */
-#if defined(RTE_LIBRTE_HASH_DEBUG)
-#define RETURN_IF_TRUE(cond, retval) do { \
-       if (cond) \
-               return retval; \
-} while (0)
-#else
-#define RETURN_IF_TRUE(cond, retval)
-#endif
-
-/* Hash function used if none is specified */
-#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
-#include <rte_hash_crc.h>
-#define DEFAULT_HASH_FUNC       rte_hash_crc
-#else
-#include <rte_jhash.h>
-#define DEFAULT_HASH_FUNC       rte_jhash
-#endif
-
-/** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES                4
-
-#define NULL_SIGNATURE                 0
-
-#define KEY_ALIGNMENT                  16
-
-#define LCORE_CACHE_SIZE               8
-
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-       KEY_CUSTOM = 0,
-       KEY_16_BYTES,
-       KEY_32_BYTES,
-       KEY_48_BYTES,
-       KEY_64_BYTES,
-       KEY_80_BYTES,
-       KEY_96_BYTES,
-       KEY_112_BYTES,
-       KEY_128_BYTES,
-       KEY_OTHER_BYTES,
-       NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-       NULL,
-       rte_hash_k16_cmp_eq,
-       rte_hash_k32_cmp_eq,
-       rte_hash_k48_cmp_eq,
-       rte_hash_k64_cmp_eq,
-       rte_hash_k80_cmp_eq,
-       rte_hash_k96_cmp_eq,
-       rte_hash_k112_cmp_eq,
-       rte_hash_k128_cmp_eq,
-       memcmp
-};
-#else
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-       KEY_CUSTOM = 0,
-       KEY_OTHER_BYTES,
-       NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-       NULL,
-       memcmp
-};
-
-#endif
-
-struct lcore_cache {
-       unsigned len; /**< Cache len */
-       void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
-} __rte_cache_aligned;
-
-/** A hash table structure. */
-struct rte_hash {
-       char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
-       uint32_t entries;               /**< Total table entries. */
-       uint32_t num_buckets;           /**< Number of buckets in table. */
-       uint32_t key_len;               /**< Length of hash key. */
-       rte_hash_function hash_func;    /**< Function used to calculate hash. */
-       uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
-       rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
-       /**< Custom function used to compare keys. */
-       enum cmp_jump_table_case cmp_jump_table_idx;
-       /**< Indicates which compare function to use. */
-       uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
-                                               from hash signature. */
-       uint32_t key_entry_size;         /**< Size of each key entry. */
-
-       struct rte_ring *free_slots;    /**< Ring that stores all indexes
-                                               of the free slots in the key table */
-       void *key_store;                /**< Table storing all keys and data */
-       struct rte_hash_bucket *buckets;        /**< Table with buckets storing all the
-                                                       hash values and key indexes
-                                                       to the key table*/
-       uint8_t hw_trans_mem_support;   /**< Hardware transactional
-                                                       memory support */
-       struct lcore_cache *local_free_slots;
-       /**< Local cache per lcore, storing some indexes of the free slots */
-} __rte_cache_aligned;
-
-/* Structure storing both primary and secondary hashes */
-struct rte_hash_signatures {
-       union {
-               struct {
-                       hash_sig_t current;
-                       hash_sig_t alt;
-               };
-               uint64_t sig;
-       };
-};
-
-/* Structure that stores key-value pair */
-struct rte_hash_key {
-       union {
-               uintptr_t idata;
-               void *pdata;
-       };
-       /* Variable key size */
-       char key[0];
-} __attribute__((aligned(KEY_ALIGNMENT)));
-
-/** Bucket structure */
-struct rte_hash_bucket {
-       struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
-       /* Includes dummy key index that always contains index 0 */
-       uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
-       uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
-} __rte_cache_aligned;
-
 struct rte_hash *
 rte_hash_find_existing(const char *name)
 {
@@ -372,7 +223,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
 
 /*
  * If x86 architecture is used, select appropriate compare function,
- * which may use x86 instrinsics, otherwise use memcmp
+ * which may use x86 intrinsics, otherwise use memcmp
  */
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
        /* Select function to compare keys */
@@ -431,7 +282,23 @@ rte_hash_create(const struct rte_hash_parameters *params)
        h->free_slots = r;
        h->hw_trans_mem_support = hw_trans_mem_support;
 
-       /* populate the free slots ring. Entry zero is reserved for key misses */
+       /* Turn on multi-writer only with explicit flat from user and TM
+        * support.
+        */
+       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
+               if (h->hw_trans_mem_support) {
+                       h->add_key = ADD_KEY_MULTIWRITER_TM;
+               } else {
+                       h->add_key = ADD_KEY_MULTIWRITER;
+                       h->multiwriter_lock = rte_malloc(NULL,
+                                                       sizeof(rte_spinlock_t),
+                                                       LCORE_CACHE_SIZE);
+                       rte_spinlock_init(h->multiwriter_lock);
+               }
+       } else
+               h->add_key = ADD_KEY_SINGLEWRITER;
+
+       /* Populate free slots ring. Entry zero is reserved for key misses. */
        for (i = 1; i < params->entries + 1; i++)
                rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
 
@@ -482,6 +349,8 @@ rte_hash_free(struct rte_hash *h)
        if (h->hw_trans_mem_support)
                rte_free(h->local_free_slots);
 
+       if (h->add_key == ADD_KEY_MULTIWRITER)
+               rte_free(h->multiwriter_lock);
        rte_ring_free(h->free_slots);
        rte_free(h->key_store);
        rte_free(h->buckets);
@@ -632,6 +501,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
        unsigned lcore_id;
        struct lcore_cache *cached_free_slots = NULL;
 
+       if (h->add_key == ADD_KEY_MULTIWRITER)
+               rte_spinlock_lock(h->multiwriter_lock);
+
        prim_bucket_idx = sig & h->bucket_bitmask;
        prim_bkt = &h->buckets[prim_bucket_idx];
        rte_prefetch0(prim_bkt);
@@ -712,35 +584,67 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
        rte_memcpy(new_k->key, key, h->key_len);
        new_k->pdata = data;
 
-       /* Insert new entry is there is room in the primary bucket */
-       for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               /* Check if slot is available */
-               if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
-                       prim_bkt->signatures[i].current = sig;
-                       prim_bkt->signatures[i].alt = alt_hash;
-                       prim_bkt->key_idx[i] = new_idx;
+#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
+       if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
+               ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
+                               sig, alt_hash, new_idx);
+               if (ret >= 0)
+                       return new_idx - 1;
+
+               /* Primary bucket full, need to make space for new entry */
+               ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
+                                                       alt_hash, new_idx);
+
+               if (ret >= 0)
+                       return new_idx - 1;
+
+               /* Also search secondary bucket to get better occupancy */
+               ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
+                                                       alt_hash, new_idx);
+
+               if (ret >= 0)
+                       return new_idx - 1;
+       } else {
+#endif
+               for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                       /* Check if slot is available */
+                       if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
+                               prim_bkt->signatures[i].current = sig;
+                               prim_bkt->signatures[i].alt = alt_hash;
+                               prim_bkt->key_idx[i] = new_idx;
+                               break;
+                       }
+               }
+
+               if (i != RTE_HASH_BUCKET_ENTRIES) {
+                       if (h->add_key == ADD_KEY_MULTIWRITER)
+                               rte_spinlock_unlock(h->multiwriter_lock);
                        return new_idx - 1;
                }
-       }
 
-       /* Primary bucket is full, so we need to make space for new entry */
-       ret = make_space_bucket(h, prim_bkt);
-       /*
-        * After recursive function.
-        * Insert the new entry in the position of the pushed entry
-        * if successful or return error and
-        * store the new slot back in the ring
-        */
-       if (ret >= 0) {
-               prim_bkt->signatures[ret].current = sig;
-               prim_bkt->signatures[ret].alt = alt_hash;
-               prim_bkt->key_idx[ret] = new_idx;
-               return new_idx - 1;
+               /* Primary bucket full, need to make space for new entry
+                * After recursive function.
+                * Insert the new entry in the position of the pushed entry
+                * if successful or return error and
+                * store the new slot back in the ring
+                */
+               ret = make_space_bucket(h, prim_bkt);
+               if (ret >= 0) {
+                       prim_bkt->signatures[ret].current = sig;
+                       prim_bkt->signatures[ret].alt = alt_hash;
+                       prim_bkt->key_idx[ret] = new_idx;
+                       if (h->add_key == ADD_KEY_MULTIWRITER)
+                               rte_spinlock_unlock(h->multiwriter_lock);
+                       return new_idx - 1;
+               }
+#if defined(RTE_ARCH_X86)
        }
-
+#endif
        /* Error in addition, store new slot back in the ring and return error */
        enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
 
+       if (h->add_key == ADD_KEY_MULTIWRITER)
+               rte_spinlock_unlock(h->multiwriter_lock);
        return ret;
 }
 
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
new file mode 100644 (file)
index 0000000..6c76700
--- /dev/null
@@ -0,0 +1,219 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* rte_cuckoo_hash.h
+ * This file hold Cuckoo Hash private data structures to allows include from
+ * platform specific files like rte_cuckoo_hash_x86.h
+ */
+
+#ifndef _RTE_CUCKOO_HASH_H_
+#define _RTE_CUCKOO_HASH_H_
+
+#if defined(RTE_ARCH_X86)
+#include "rte_cmp_x86.h"
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#endif
+
+/* Macro to enable/disable run-time checking of function parameters */
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define RETURN_IF_TRUE(cond, retval) do { \
+       if (cond) \
+               return retval; \
+} while (0)
+#else
+#define RETURN_IF_TRUE(cond, retval)
+#endif
+
+/* Hash function used if none is specified */
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC       rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC       rte_jhash
+#endif
+
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+       KEY_CUSTOM = 0,
+       KEY_16_BYTES,
+       KEY_32_BYTES,
+       KEY_48_BYTES,
+       KEY_64_BYTES,
+       KEY_80_BYTES,
+       KEY_96_BYTES,
+       KEY_112_BYTES,
+       KEY_128_BYTES,
+       KEY_OTHER_BYTES,
+       NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+       NULL,
+       rte_hash_k16_cmp_eq,
+       rte_hash_k32_cmp_eq,
+       rte_hash_k48_cmp_eq,
+       rte_hash_k64_cmp_eq,
+       rte_hash_k80_cmp_eq,
+       rte_hash_k96_cmp_eq,
+       rte_hash_k112_cmp_eq,
+       rte_hash_k128_cmp_eq,
+       memcmp
+};
+#else
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+       KEY_CUSTOM = 0,
+       KEY_OTHER_BYTES,
+       NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+       NULL,
+       memcmp
+};
+
+#endif
+
+enum add_key_case {
+       ADD_KEY_SINGLEWRITER = 0,
+       ADD_KEY_MULTIWRITER,
+       ADD_KEY_MULTIWRITER_TM,
+};
+
+/** Number of items per bucket. */
+#define RTE_HASH_BUCKET_ENTRIES                4
+
+#define NULL_SIGNATURE                 0
+
+#define KEY_ALIGNMENT                  16
+
+#define LCORE_CACHE_SIZE               64
+
+#define RTE_HASH_BFS_QUEUE_MAX_LEN       1000
+
+#define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
+
+#define RTE_HASH_TSX_MAX_RETRY  10
+
+struct lcore_cache {
+       unsigned len; /**< Cache len */
+       void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
+} __rte_cache_aligned;
+
+/* Structure storing both primary and secondary hashes */
+struct rte_hash_signatures {
+       union {
+               struct {
+                       hash_sig_t current;
+                       hash_sig_t alt;
+               };
+               uint64_t sig;
+       };
+};
+
+/* Structure that stores key-value pair */
+struct rte_hash_key {
+       union {
+               uintptr_t idata;
+               void *pdata;
+       };
+       /* Variable key size */
+       char key[0];
+} __attribute__((aligned(KEY_ALIGNMENT)));
+
+/** Bucket structure */
+struct rte_hash_bucket {
+       struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
+       /* Includes dummy key index that always contains index 0 */
+       uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+       uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+} __rte_cache_aligned;
+
+/** A hash table structure. */
+struct rte_hash {
+       char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
+       uint32_t entries;               /**< Total table entries. */
+       uint32_t num_buckets;           /**< Number of buckets in table. */
+       uint32_t key_len;               /**< Length of hash key. */
+       rte_hash_function hash_func;    /**< Function used to calculate hash. */
+       uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
+       rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
+       /**< Custom function used to compare keys. */
+       enum cmp_jump_table_case cmp_jump_table_idx;
+       /**< Indicates which compare function to use. */
+       uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
+                                               from hash signature. */
+       uint32_t key_entry_size;         /**< Size of each key entry. */
+
+       struct rte_ring *free_slots;    /**< Ring that stores all indexes
+                                               of the free slots in the key table */
+       void *key_store;                /**< Table storing all keys and data */
+       struct rte_hash_bucket *buckets;        /**< Table with buckets storing all the
+                                                       hash values and key indexes
+                                                       to the key table*/
+       uint8_t hw_trans_mem_support;   /**< Hardware transactional
+                                                       memory support */
+       struct lcore_cache *local_free_slots;
+       /**< Local cache per lcore, storing some indexes of the free slots */
+       enum add_key_case add_key; /**< Multi-writer hash add behavior */
+
+       rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+} __rte_cache_aligned;
+
+struct queue_node {
+       struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
+
+       struct queue_node *prev;     /* Parent(bucket) in search path */
+       int prev_slot;               /* Parent(slot) in search path */
+};
+
+#endif
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
new file mode 100644 (file)
index 0000000..fa5630b
--- /dev/null
@@ -0,0 +1,193 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* rte_cuckoo_hash_x86.h
+ * This file holds all x86 specific Cuckoo Hash functions
+ */
+
+/* Only tries to insert at one bucket (@prim_bkt) without trying to push
+ * buckets around
+ */
+static inline unsigned
+rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
+               hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
+{
+       unsigned i, status;
+       unsigned try = 0;
+
+       while (try < RTE_HASH_TSX_MAX_RETRY) {
+               status = rte_xbegin();
+               if (likely(status == RTE_XBEGIN_STARTED)) {
+                       /* Insert new entry if there is room in the primary
+                       * bucket.
+                       */
+                       for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                               /* Check if slot is available */
+                               if (likely(prim_bkt->signatures[i].sig ==
+                                               NULL_SIGNATURE)) {
+                                       prim_bkt->signatures[i].current = sig;
+                                       prim_bkt->signatures[i].alt = alt_hash;
+                                       prim_bkt->key_idx[i] = new_idx;
+                                       break;
+                               }
+                       }
+                       rte_xend();
+
+                       if (i != RTE_HASH_BUCKET_ENTRIES)
+                               return 0;
+
+                       break; /* break off try loop if transaction commits */
+               } else {
+                       /* If we abort we give up this cuckoo path. */
+                       try++;
+                       rte_pause();
+               }
+       }
+
+       return -1;
+}
+
+/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
+ * the path head with new entry (sig, alt_hash, new_idx)
+ */
+static inline int
+rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
+                       struct queue_node *leaf, uint32_t leaf_slot,
+                       hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
+{
+       unsigned try = 0;
+       unsigned status;
+       uint32_t prev_alt_bkt_idx;
+
+       struct queue_node *prev_node, *curr_node = leaf;
+       struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+       uint32_t prev_slot, curr_slot = leaf_slot;
+
+       while (try < RTE_HASH_TSX_MAX_RETRY) {
+               status = rte_xbegin();
+               if (likely(status == RTE_XBEGIN_STARTED)) {
+                       while (likely(curr_node->prev != NULL)) {
+                               prev_node = curr_node->prev;
+                               prev_bkt = prev_node->bkt;
+                               prev_slot = curr_node->prev_slot;
+
+                               prev_alt_bkt_idx
+                                       = prev_bkt->signatures[prev_slot].alt
+                                           & h->bucket_bitmask;
+
+                               if (unlikely(&h->buckets[prev_alt_bkt_idx]
+                                            != curr_bkt)) {
+                                       rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
+                               }
+
+                               /* Need to swap current/alt sig to allow later
+                                * Cuckoo insert to move elements back to its
+                                * primary bucket if available
+                                */
+                               curr_bkt->signatures[curr_slot].alt =
+                                   prev_bkt->signatures[prev_slot].current;
+                               curr_bkt->signatures[curr_slot].current =
+                                   prev_bkt->signatures[prev_slot].alt;
+                               curr_bkt->key_idx[curr_slot]
+                                   = prev_bkt->key_idx[prev_slot];
+
+                               curr_slot = prev_slot;
+                               curr_node = prev_node;
+                               curr_bkt = curr_node->bkt;
+                       }
+
+                       curr_bkt->signatures[curr_slot].current = sig;
+                       curr_bkt->signatures[curr_slot].alt = alt_hash;
+                       curr_bkt->key_idx[curr_slot] = new_idx;
+
+                       rte_xend();
+
+                       return 0;
+               }
+
+               /* If we abort we give up this cuckoo path, since most likely it's
+                * no longer valid as TSX detected data conflict
+                */
+               try++;
+               rte_pause();
+       }
+
+       return -1;
+}
+
+/*
+ * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
+ */
+static inline int
+rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
+                       struct rte_hash_bucket *bkt,
+                       hash_sig_t sig, hash_sig_t alt_hash,
+                       uint32_t new_idx)
+{
+       unsigned i;
+       struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+       struct queue_node *tail, *head;
+       struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+       tail = queue;
+       head = queue + 1;
+       tail->bkt = bkt;
+       tail->prev = NULL;
+       tail->prev_slot = -1;
+
+       /* Cuckoo bfs Search */
+       while (likely(tail != head && head <
+                                       queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) {
+               curr_bkt = tail->bkt;
+               for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                       if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+                               if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
+                                               tail, i, sig,
+                                               alt_hash, new_idx) == 0))
+                                       return 0;
+                       }
+
+                       /* Enqueue new node and keep prev node info */
+                       alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
+                                                   & h->bucket_bitmask]);
+                       head->bkt = alt_bkt;
+                       head->prev = tail;
+                       head->prev_slot = i;
+                       head++;
+               }
+               tail++;
+       }
+
+       return -ENOSPC;
+}
index 724315a..c9612fb 100644 (file)
@@ -60,6 +60,9 @@ extern "C" {
 /** Enable Hardware transactional memory support. */
 #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT 0x01
 
+/** Default behavior of insertion, single writer/multi writer */
+#define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;