#include <rte_compat.h>
#include "rte_hash.h"
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+#include "rte_cmp_x86.h"
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#endif
TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
#endif
/* Hash function used if none is specified */
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
#else
#define KEY_ALIGNMENT 16
-typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len);
-static int rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len);
+#define LCORE_CACHE_SIZE 8
+
+struct lcore_cache {
+ unsigned len; /**< Cache len */
+ void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
+} __rte_cache_aligned;
/** A hash table structure. */
struct rte_hash {
uint32_t num_buckets; /**< Number of buckets in table. */
uint32_t key_len; /**< Length of hash key. */
rte_hash_function hash_func; /**< Function used to calculate hash. */
- rte_hash_cmp_eq_t rte_hash_cmp_eq; /**< Function used to compare keys. */
uint32_t hash_func_init_val; /**< Init value used by hash_func. */
+ rte_hash_cmp_eq_t rte_hash_cmp_eq; /**< Function used to compare keys. */
uint32_t bucket_bitmask; /**< Bitmask for getting bucket index
from hash signature. */
uint32_t key_entry_size; /**< Size of each key entry. */
struct rte_hash_bucket *buckets; /**< Table with buckets storing all the
hash values and key indexes
to the key table*/
+ uint8_t hw_trans_mem_support; /**< Hardware transactional
+ memory support */
+ struct lcore_cache *local_free_slots;
+ /**< Local cache per lcore, storing some indexes of the free slots */
} __rte_cache_aligned;
/* Structure storing both primary and secondary hashes */
void *pdata;
};
/* Variable key size */
- char key[];
+ char key[0];
} __attribute__((aligned(KEY_ALIGNMENT)));
/** Bucket structure */
return h;
}
+void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
+{
+ h->rte_hash_cmp_eq = func;
+}
+
struct rte_hash *
rte_hash_create(const struct rte_hash_parameters *params)
{
struct rte_hash_list *hash_list;
struct rte_ring *r = NULL;
char hash_name[RTE_HASH_NAMESIZE];
- void *ptr, *k = NULL;
+ void *k = NULL;
void *buckets = NULL;
char ring_name[RTE_RING_NAMESIZE];
+ unsigned num_key_slots;
+ unsigned hw_trans_mem_support = 0;
unsigned i;
hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
return NULL;
}
+ /* Check extra flags field to check extra options. */
+ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
+ hw_trans_mem_support = 1;
+
snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
/* Guarantee there's no existing */
const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
- const uint64_t key_tbl_size = key_entry_size * (params->entries + 1);
+ if (hw_trans_mem_support)
+ /*
+ * Increase number of slots by total number of indices
+ * that can be stored in the lcore caches
+ * except for the first cache
+ */
+ num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
+ LCORE_CACHE_SIZE + 1;
+ else
+ num_key_slots = params->entries + 1;
+
+ const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
k = rte_zmalloc_socket(NULL, key_tbl_size,
RTE_CACHE_LINE_SIZE, params->socket_id);
goto err;
}
+/*
+ * If x86 architecture is used, select appropriate compare function,
+ * which may use x86 instrinsics, otherwise use memcmp
+ */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||\
+ defined(RTE_ARCH_X86_X32) || defined(RTE_ARCH_ARM64)
/* Select function to compare keys */
switch (params->key_len) {
case 16:
/* If key is not multiple of 16, use generic memcmp */
h->rte_hash_cmp_eq = memcmp;
}
+#else
+ h->rte_hash_cmp_eq = memcmp;
+#endif
snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
- r = rte_ring_lookup(ring_name);
- if (r != NULL) {
- /* clear the free ring */
- while (rte_ring_dequeue(r, &ptr) == 0)
- rte_pause();
- } else
- r = rte_ring_create(ring_name, rte_align32pow2(params->entries + 1),
- params->socket_id, 0);
+ r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
+ params->socket_id, 0);
if (r == NULL) {
RTE_LOG(ERR, HASH, "memory allocation failed\n");
goto err;
}
+ if (hw_trans_mem_support) {
+ h->local_free_slots = rte_zmalloc_socket(NULL,
+ sizeof(struct lcore_cache) * RTE_MAX_LCORE,
+ RTE_CACHE_LINE_SIZE, params->socket_id);
+ }
+
/* Setup hash context */
snprintf(h->name, sizeof(h->name), "%s", params->name);
h->entries = params->entries;
h->buckets = buckets;
h->hash_func = (params->hash_func == NULL) ?
DEFAULT_HASH_FUNC : params->hash_func;
-
h->key_store = k;
h->free_slots = r;
+ h->hw_trans_mem_support = hw_trans_mem_support;
/* populate the free slots ring. Entry zero is reserved for key misses */
for (i = 1; i < params->entries + 1; i++)
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ if (h->hw_trans_mem_support)
+ rte_free(h->local_free_slots);
+
+ rte_ring_free(h->free_slots);
rte_free(h->key_store);
rte_free(h->buckets);
rte_free(h);
uint32_t tag = primary_hash >> all_bits_shift;
- return (primary_hash ^ ((tag + 1) * alt_bits_xor));
+ return primary_hash ^ ((tag + 1) * alt_bits_xor);
}
void
/* Repopulate the free slots ring. Entry zero is reserved for key misses */
for (i = 1; i < h->entries + 1; i++)
rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
+
+ if (h->hw_trans_mem_support) {
+ /* Reset local caches per lcore */
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ h->local_free_slots[i].len = 0;
+ }
}
/* Search for an entry that can be pushed to its alternative location */
break;
/* All entries have been pushed, so entry cannot be added */
- if (i == RTE_HASH_BUCKET_ENTRIES) {
- /* Reset flag */
- bkt->flag[i] = 0;
+ if (i == RTE_HASH_BUCKET_ENTRIES)
return -ENOSPC;
- }
/* Set flag to indicate that this entry is going to be pushed */
bkt->flag[i] = 1;
}
+/*
+ * Function called to enqueue back an index in the cache/ring,
+ * as slot has not being used and it can be used in the
+ * next addition attempt.
+ */
+static inline void
+enqueue_slot_back(const struct rte_hash *h,
+ struct lcore_cache *cached_free_slots,
+ void *slot_id)
+{
+ if (h->hw_trans_mem_support) {
+ cached_free_slots->objs[cached_free_slots->len] = slot_id;
+ cached_free_slots->len++;
+ } else
+ rte_ring_sp_enqueue(h->free_slots, slot_id);
+}
+
static inline int32_t
__rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
hash_sig_t sig, void *data)
unsigned i;
struct rte_hash_bucket *prim_bkt, *sec_bkt;
struct rte_hash_key *new_k, *k, *keys = h->key_store;
- void *slot_id;
+ void *slot_id = NULL;
uint32_t new_idx;
int ret;
+ unsigned n_slots;
+ unsigned lcore_id;
+ struct lcore_cache *cached_free_slots = NULL;
prim_bucket_idx = sig & h->bucket_bitmask;
prim_bkt = &h->buckets[prim_bucket_idx];
rte_prefetch0(sec_bkt);
/* Get a new slot for storing the new key */
- if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0)
- return -ENOSPC;
+ if (h->hw_trans_mem_support) {
+ lcore_id = rte_lcore_id();
+ cached_free_slots = &h->local_free_slots[lcore_id];
+ /* Try to get a free slot from the local cache */
+ if (cached_free_slots->len == 0) {
+ /* Need to get another burst of free slots from global ring */
+ n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
+ cached_free_slots->objs, LCORE_CACHE_SIZE);
+ if (n_slots == 0)
+ return -ENOSPC;
+
+ cached_free_slots->len += n_slots;
+ }
+
+ /* Get a free slot from the local cache */
+ cached_free_slots->len--;
+ slot_id = cached_free_slots->objs[cached_free_slots->len];
+ } else {
+ if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0)
+ return -ENOSPC;
+ }
+
new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
rte_prefetch0(new_k);
new_idx = (uint32_t)((uintptr_t) slot_id);
/* Check if key is already inserted in primary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
if (prim_bkt->signatures[i].current == sig &&
- prim_bkt->signatures[i].alt == alt_hash) {
+ prim_bkt->signatures[i].alt == alt_hash) {
k = (struct rte_hash_key *) ((char *)keys +
prim_bkt->key_idx[i] * h->key_entry_size);
if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
- rte_ring_sp_enqueue(h->free_slots, &slot_id);
+ /* Enqueue index of free slot back in the ring. */
+ enqueue_slot_back(h, cached_free_slots, slot_id);
/* Update data */
k->pdata = data;
/*
* Return index where key is stored,
* substracting the first dummy index
*/
- return (prim_bkt->key_idx[i] - 1);
+ return prim_bkt->key_idx[i] - 1;
}
}
}
/* Check if key is already inserted in secondary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
if (sec_bkt->signatures[i].alt == sig &&
- sec_bkt->signatures[i].current == alt_hash) {
+ sec_bkt->signatures[i].current == alt_hash) {
k = (struct rte_hash_key *) ((char *)keys +
sec_bkt->key_idx[i] * h->key_entry_size);
if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
- rte_ring_sp_enqueue(h->free_slots, &slot_id);
+ /* Enqueue index of free slot back in the ring. */
+ enqueue_slot_back(h, cached_free_slots, slot_id);
/* Update data */
k->pdata = data;
/*
* Return index where key is stored,
* substracting the first dummy index
*/
- return (sec_bkt->key_idx[i] - 1);
+ return sec_bkt->key_idx[i] - 1;
}
}
}
prim_bkt->signatures[ret].current = sig;
prim_bkt->signatures[ret].alt = alt_hash;
prim_bkt->key_idx[ret] = new_idx;
- return (new_idx - 1);
+ return new_idx - 1;
}
/* Error in addition, store new slot back in the ring and return error */
- rte_ring_sp_enqueue(h->free_slots,
- (void *)((uintptr_t) new_idx));
- return ret;
+ enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
+ return ret;
}
int32_t
* Return index where key is stored,
* substracting the first dummy index
*/
- return (bkt->key_idx[i] - 1);
+ return bkt->key_idx[i] - 1;
}
}
}
* Return index where key is stored,
* substracting the first dummy index
*/
- return (bkt->key_idx[i] - 1);
+ return bkt->key_idx[i] - 1;
}
}
}
return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
}
+static inline void
+remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
+{
+ unsigned lcore_id, n_slots;
+ struct lcore_cache *cached_free_slots;
+
+ bkt->signatures[i].sig = NULL_SIGNATURE;
+ if (h->hw_trans_mem_support) {
+ lcore_id = rte_lcore_id();
+ cached_free_slots = &h->local_free_slots[lcore_id];
+ /* Cache full, need to free it. */
+ if (cached_free_slots->len == LCORE_CACHE_SIZE) {
+ /* Need to enqueue the free slots in global ring. */
+ n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
+ cached_free_slots->objs,
+ LCORE_CACHE_SIZE);
+ cached_free_slots->len -= n_slots;
+ }
+ /* Put index of new free slot in cache. */
+ cached_free_slots->objs[cached_free_slots->len] =
+ (void *)((uintptr_t)bkt->key_idx[i]);
+ cached_free_slots->len++;
+ } else {
+ rte_ring_sp_enqueue(h->free_slots,
+ (void *)((uintptr_t)bkt->key_idx[i]));
+ }
+}
+
static inline int32_t
__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
hash_sig_t sig)
k = (struct rte_hash_key *) ((char *)keys +
bkt->key_idx[i] * h->key_entry_size);
if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
- bkt->signatures[i].sig = NULL_SIGNATURE;
- rte_ring_sp_enqueue(h->free_slots,
- (void *)((uintptr_t)bkt->key_idx[i]));
+ remove_entry(h, bkt, i);
+
/*
* Return index where key is stored,
* substracting the first dummy index
*/
- return (bkt->key_idx[i] - 1);
+ return bkt->key_idx[i] - 1;
}
}
}
k = (struct rte_hash_key *) ((char *)keys +
bkt->key_idx[i] * h->key_entry_size);
if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
- bkt->signatures[i].sig = NULL_SIGNATURE;
- rte_ring_sp_enqueue(h->free_slots,
- (void *)((uintptr_t)bkt->key_idx[i]));
+ remove_entry(h, bkt, i);
+
/*
* Return index where key is stored,
* substracting the first dummy index
*/
- return (bkt->key_idx[i] - 1);
+ return bkt->key_idx[i] - 1;
}
}
}
/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */
static inline void
lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys,
- void *data[], uint64_t *hits, const struct rte_hash *h)
+ const int32_t *positions, void *data[], uint64_t *hits,
+ const struct rte_hash *h)
{
unsigned hit;
+ unsigned key_idx;
hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
if (data != NULL)
data[idx] = key_slot->pdata;
- *hits |= (uint64_t)(hit) << idx;
+ key_idx = positions[idx] + 1;
+ /*
+ * If key index is 0, force hit to be 0, in case key to be looked up
+ * is all zero (as in the dummy slot), which would result in a wrong hit
+ */
+ *hits |= (uint64_t)(hit && !!key_idx) << idx;
}
static inline void
lookup_stage2(idx21, primary_hash21, secondary_hash21,
primary_bkt21, secondary_bkt21, &k_slot21, positions,
&extra_hits_mask, key_store, h);
- lookup_stage3(idx30, k_slot30, keys, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, data, &hits, h);
+ lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+ lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
}
k_slot30 = k_slot20, k_slot31 = k_slot21;
lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
key_store, h);
- lookup_stage3(idx30, k_slot30, keys, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, data, &hits, h);
+ lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+ lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
k_slot30 = k_slot20, k_slot31 = k_slot21;
idx30 = idx20, idx31 = idx21;
lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
key_store, h);
- lookup_stage3(idx30, k_slot30, keys, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, data, &hits, h);
+ lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+ lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
k_slot30 = k_slot20, k_slot31 = k_slot21;
idx30 = idx20, idx31 = idx21;
- lookup_stage3(idx30, k_slot30, keys, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, data, &hits, h);
+ lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+ lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
/* ignore any items we have already found */
extra_hits_mask &= ~hits;
/* Increment iterator */
(*next)++;
- return (position - 1);
-}
-
-/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
-static int
-rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
-{
- const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
- const __m128i k2 = _mm_loadu_si128((const __m128i *) key2);
- const __m128i x = _mm_xor_si128(k1, k2);
-
- return !_mm_test_all_zeros(x, x);
-}
-
-static int
-rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
- rte_hash_k16_cmp_eq((const char *) key1 + 16,
- (const char *) key2 + 16, key_len);
-}
-
-static int
-rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
- rte_hash_k16_cmp_eq((const char *) key1 + 16,
- (const char *) key2 + 16, key_len) ||
- rte_hash_k16_cmp_eq((const char *) key1 + 32,
- (const char *) key2 + 32, key_len);
-}
-
-static int
-rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
- rte_hash_k32_cmp_eq((const char *) key1 + 32,
- (const char *) key2 + 32, key_len);
-}
-
-static int
-rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
- rte_hash_k16_cmp_eq((const char *) key1 + 64,
- (const char *) key2 + 64, key_len);
-}
-
-static int
-rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
- rte_hash_k32_cmp_eq((const char *) key1 + 64,
- (const char *) key2 + 64, key_len);
-}
-
-static int
-rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
- rte_hash_k32_cmp_eq((const char *) key1 + 64,
- (const char *) key2 + 64, key_len) ||
- rte_hash_k16_cmp_eq((const char *) key1 + 96,
- (const char *) key2 + 96, key_len);
-}
-
-static int
-rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
-{
- return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
- rte_hash_k64_cmp_eq((const char *) key1 + 64,
- (const char *) key2 + 64, key_len);
+ return position - 1;
}