X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_hash%2Frte_cuckoo_hash.c;h=71b5b768be2d4270d2389905076a9baf44cc48e5;hb=67f64f2e121ef855959ed4109b2a31e3cc147f35;hp=00ba3a3cb42ef9a89753cb37cbf4bf3129dd30f4;hpb=6f71544ce28f09e9aae452954b3b72c448aad2ec;p=dpdk.git diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 00ba3a3cb4..71b5b768be 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -59,6 +59,13 @@ #include #include "rte_hash.h" +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32) +#include "rte_cmp_x86.h" +#endif + +#if defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#endif TAILQ_HEAD(rte_hash_list, rte_tailq_entry); @@ -78,7 +85,7 @@ EAL_REGISTER_TAILQ(rte_hash_tailq) #endif /* Hash function used if none is specified */ -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) #include #define DEFAULT_HASH_FUNC rte_hash_crc #else @@ -93,15 +100,12 @@ EAL_REGISTER_TAILQ(rte_hash_tailq) #define KEY_ALIGNMENT 16 -typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len); -static int rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len); +#define LCORE_CACHE_SIZE 8 + +struct lcore_cache { + unsigned len; /**< Cache len */ + void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ +} __rte_cache_aligned; /** A hash table structure. */ struct rte_hash { @@ -110,8 +114,8 @@ struct rte_hash { uint32_t num_buckets; /**< Number of buckets in table. */ uint32_t key_len; /**< Length of hash key. */ rte_hash_function hash_func; /**< Function used to calculate hash. */ - rte_hash_cmp_eq_t rte_hash_cmp_eq; /**< Function used to compare keys. */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ + rte_hash_cmp_eq_t rte_hash_cmp_eq; /**< Function used to compare keys. */ uint32_t bucket_bitmask; /**< Bitmask for getting bucket index from hash signature. */ uint32_t key_entry_size; /**< Size of each key entry. */ @@ -122,6 +126,10 @@ struct rte_hash { struct rte_hash_bucket *buckets; /**< Table with buckets storing all the hash values and key indexes to the key table*/ + uint8_t hw_trans_mem_support; /**< Hardware transactional + memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ } __rte_cache_aligned; /* Structure storing both primary and secondary hashes */ @@ -177,6 +185,11 @@ rte_hash_find_existing(const char *name) return h; } +void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) +{ + h->rte_hash_cmp_eq = func; +} + struct rte_hash * rte_hash_create(const struct rte_hash_parameters *params) { @@ -185,9 +198,11 @@ rte_hash_create(const struct rte_hash_parameters *params) struct rte_hash_list *hash_list; struct rte_ring *r = NULL; char hash_name[RTE_HASH_NAMESIZE]; - void *ptr, *k = NULL; + void *k = NULL; void *buckets = NULL; char ring_name[RTE_RING_NAMESIZE]; + unsigned num_key_slots; + unsigned hw_trans_mem_support = 0; unsigned i; hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); @@ -207,6 +222,10 @@ rte_hash_create(const struct rte_hash_parameters *params) return NULL; } + /* Check extra flags field to check extra options. */ + if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT) + hw_trans_mem_support = 1; + snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name); /* Guarantee there's no existing */ @@ -243,7 +262,18 @@ rte_hash_create(const struct rte_hash_parameters *params) const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len; /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */ - const uint64_t key_tbl_size = key_entry_size * (params->entries + 1); + if (hw_trans_mem_support) + /* + * Increase number of slots by total number of indices + * that can be stored in the lcore caches + * except for the first cache + */ + num_key_slots = params->entries + (RTE_MAX_LCORE - 1) * + LCORE_CACHE_SIZE + 1; + else + num_key_slots = params->entries + 1; + + const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots; k = rte_zmalloc_socket(NULL, key_tbl_size, RTE_CACHE_LINE_SIZE, params->socket_id); @@ -253,6 +283,12 @@ rte_hash_create(const struct rte_hash_parameters *params) goto err; } +/* + * If x86 architecture is used, select appropriate compare function, + * which may use x86 instrinsics, otherwise use memcmp + */ +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||\ + defined(RTE_ARCH_X86_X32) || defined(RTE_ARCH_ARM64) /* Select function to compare keys */ switch (params->key_len) { case 16: @@ -283,21 +319,24 @@ rte_hash_create(const struct rte_hash_parameters *params) /* If key is not multiple of 16, use generic memcmp */ h->rte_hash_cmp_eq = memcmp; } +#else + h->rte_hash_cmp_eq = memcmp; +#endif snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); - r = rte_ring_lookup(ring_name); - if (r != NULL) { - /* clear the free ring */ - while (rte_ring_dequeue(r, &ptr) == 0) - rte_pause(); - } else - r = rte_ring_create(ring_name, rte_align32pow2(params->entries + 1), - params->socket_id, 0); + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), + params->socket_id, 0); if (r == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); goto err; } + if (hw_trans_mem_support) { + h->local_free_slots = rte_zmalloc_socket(NULL, + sizeof(struct lcore_cache) * RTE_MAX_LCORE, + RTE_CACHE_LINE_SIZE, params->socket_id); + } + /* Setup hash context */ snprintf(h->name, sizeof(h->name), "%s", params->name); h->entries = params->entries; @@ -310,9 +349,9 @@ rte_hash_create(const struct rte_hash_parameters *params) h->buckets = buckets; h->hash_func = (params->hash_func == NULL) ? DEFAULT_HASH_FUNC : params->hash_func; - h->key_store = k; h->free_slots = r; + h->hw_trans_mem_support = hw_trans_mem_support; /* populate the free slots ring. Entry zero is reserved for key misses */ for (i = 1; i < params->entries + 1; i++) @@ -360,6 +399,10 @@ rte_hash_free(struct rte_hash *h) rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + if (h->hw_trans_mem_support) + rte_free(h->local_free_slots); + + rte_ring_free(h->free_slots); rte_free(h->key_store); rte_free(h->buckets); rte_free(h); @@ -382,7 +425,7 @@ rte_hash_secondary_hash(const hash_sig_t primary_hash) uint32_t tag = primary_hash >> all_bits_shift; - return (primary_hash ^ ((tag + 1) * alt_bits_xor)); + return primary_hash ^ ((tag + 1) * alt_bits_xor); } void @@ -404,6 +447,12 @@ rte_hash_reset(struct rte_hash *h) /* Repopulate the free slots ring. Entry zero is reserved for key misses */ for (i = 1; i < h->entries + 1; i++) rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i)); + + if (h->hw_trans_mem_support) { + /* Reset local caches per lcore */ + for (i = 0; i < RTE_MAX_LCORE; i++) + h->local_free_slots[i].len = 0; + } } /* Search for an entry that can be pushed to its alternative location */ @@ -470,6 +519,23 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) } +/* + * Function called to enqueue back an index in the cache/ring, + * as slot has not being used and it can be used in the + * next addition attempt. + */ +static inline void +enqueue_slot_back(const struct rte_hash *h, + struct lcore_cache *cached_free_slots, + void *slot_id) +{ + if (h->hw_trans_mem_support) { + cached_free_slots->objs[cached_free_slots->len] = slot_id; + cached_free_slots->len++; + } else + rte_ring_sp_enqueue(h->free_slots, slot_id); +} + static inline int32_t __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig, void *data) @@ -479,9 +545,12 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, unsigned i; struct rte_hash_bucket *prim_bkt, *sec_bkt; struct rte_hash_key *new_k, *k, *keys = h->key_store; - void *slot_id; + void *slot_id = NULL; uint32_t new_idx; int ret; + unsigned n_slots; + unsigned lcore_id; + struct lcore_cache *cached_free_slots = NULL; prim_bucket_idx = sig & h->bucket_bitmask; prim_bkt = &h->buckets[prim_bucket_idx]; @@ -493,8 +562,28 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, rte_prefetch0(sec_bkt); /* Get a new slot for storing the new key */ - if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) - return -ENOSPC; + if (h->hw_trans_mem_support) { + lcore_id = rte_lcore_id(); + cached_free_slots = &h->local_free_slots[lcore_id]; + /* Try to get a free slot from the local cache */ + if (cached_free_slots->len == 0) { + /* Need to get another burst of free slots from global ring */ + n_slots = rte_ring_mc_dequeue_burst(h->free_slots, + cached_free_slots->objs, LCORE_CACHE_SIZE); + if (n_slots == 0) + return -ENOSPC; + + cached_free_slots->len += n_slots; + } + + /* Get a free slot from the local cache */ + cached_free_slots->len--; + slot_id = cached_free_slots->objs[cached_free_slots->len]; + } else { + if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) + return -ENOSPC; + } + new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size); rte_prefetch0(new_k); new_idx = (uint32_t)((uintptr_t) slot_id); @@ -502,18 +591,19 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { if (prim_bkt->signatures[i].current == sig && - prim_bkt->signatures[i].alt == alt_hash) { + prim_bkt->signatures[i].alt == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + prim_bkt->key_idx[i] * h->key_entry_size); if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { - rte_ring_sp_enqueue(h->free_slots, &slot_id); + /* Enqueue index of free slot back in the ring. */ + enqueue_slot_back(h, cached_free_slots, slot_id); /* Update data */ k->pdata = data; /* * Return index where key is stored, * substracting the first dummy index */ - return (prim_bkt->key_idx[i] - 1); + return prim_bkt->key_idx[i] - 1; } } } @@ -521,18 +611,19 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { if (sec_bkt->signatures[i].alt == sig && - sec_bkt->signatures[i].current == alt_hash) { + sec_bkt->signatures[i].current == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + sec_bkt->key_idx[i] * h->key_entry_size); if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { - rte_ring_sp_enqueue(h->free_slots, &slot_id); + /* Enqueue index of free slot back in the ring. */ + enqueue_slot_back(h, cached_free_slots, slot_id); /* Update data */ k->pdata = data; /* * Return index where key is stored, * substracting the first dummy index */ - return (sec_bkt->key_idx[i] - 1); + return sec_bkt->key_idx[i] - 1; } } } @@ -564,14 +655,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, prim_bkt->signatures[ret].current = sig; prim_bkt->signatures[ret].alt = alt_hash; prim_bkt->key_idx[ret] = new_idx; - return (new_idx - 1); + return new_idx - 1; } /* Error in addition, store new slot back in the ring and return error */ - rte_ring_sp_enqueue(h->free_slots, - (void *)((uintptr_t) new_idx)); - return ret; + enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx)); + return ret; } int32_t @@ -642,7 +732,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * substracting the first dummy index */ - return (bkt->key_idx[i] - 1); + return bkt->key_idx[i] - 1; } } } @@ -665,7 +755,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * substracting the first dummy index */ - return (bkt->key_idx[i] - 1); + return bkt->key_idx[i] - 1; } } } @@ -703,6 +793,34 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data) return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data); } +static inline void +remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) +{ + unsigned lcore_id, n_slots; + struct lcore_cache *cached_free_slots; + + bkt->signatures[i].sig = NULL_SIGNATURE; + if (h->hw_trans_mem_support) { + lcore_id = rte_lcore_id(); + cached_free_slots = &h->local_free_slots[lcore_id]; + /* Cache full, need to free it. */ + if (cached_free_slots->len == LCORE_CACHE_SIZE) { + /* Need to enqueue the free slots in global ring. */ + n_slots = rte_ring_mp_enqueue_burst(h->free_slots, + cached_free_slots->objs, + LCORE_CACHE_SIZE); + cached_free_slots->len -= n_slots; + } + /* Put index of new free slot in cache. */ + cached_free_slots->objs[cached_free_slots->len] = + (void *)((uintptr_t)bkt->key_idx[i]); + cached_free_slots->len++; + } else { + rte_ring_sp_enqueue(h->free_slots, + (void *)((uintptr_t)bkt->key_idx[i])); + } +} + static inline int32_t __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig) @@ -723,14 +841,13 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { - bkt->signatures[i].sig = NULL_SIGNATURE; - rte_ring_sp_enqueue(h->free_slots, - (void *)((uintptr_t)bkt->key_idx[i])); + remove_entry(h, bkt, i); + /* * Return index where key is stored, * substracting the first dummy index */ - return (bkt->key_idx[i] - 1); + return bkt->key_idx[i] - 1; } } } @@ -747,14 +864,13 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { - bkt->signatures[i].sig = NULL_SIGNATURE; - rte_ring_sp_enqueue(h->free_slots, - (void *)((uintptr_t)bkt->key_idx[i])); + remove_entry(h, bkt, i); + /* * Return index where key is stored, * substracting the first dummy index */ - return (bkt->key_idx[i] - 1); + return bkt->key_idx[i] - 1; } } } @@ -858,15 +974,22 @@ lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, /* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ static inline void lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, - void *data[], uint64_t *hits, const struct rte_hash *h) + const int32_t *positions, void *data[], uint64_t *hits, + const struct rte_hash *h) { unsigned hit; + unsigned key_idx; hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len); if (data != NULL) data[idx] = key_slot->pdata; - *hits |= (uint64_t)(hit) << idx; + key_idx = positions[idx] + 1; + /* + * If key index is 0, force hit to be 0, in case key to be looked up + * is all zero (as in the dummy slot), which would result in a wrong hit + */ + *hits |= (uint64_t)(hit && !!key_idx) << idx; } static inline void @@ -958,8 +1081,8 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, secondary_bkt21, &k_slot21, positions, &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, data, &hits, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); } k_slot30 = k_slot20, k_slot31 = k_slot21; @@ -985,8 +1108,8 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, secondary_bkt21, &k_slot21, positions, &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, data, &hits, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); k_slot30 = k_slot20, k_slot31 = k_slot21; idx30 = idx20, idx31 = idx21; @@ -1006,14 +1129,14 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, secondary_bkt21, &k_slot21, positions, &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, data, &hits, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); k_slot30 = k_slot20, k_slot31 = k_slot21; idx30 = idx20, idx31 = idx21; - lookup_stage3(idx30, k_slot30, keys, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, data, &hits, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); /* ignore any items we have already found */ extra_hits_mask &= ~hits; @@ -1116,76 +1239,5 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 /* Increment iterator */ (*next)++; - return (position - 1); -} - -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ -static int -rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) -{ - const __m128i k1 = _mm_loadu_si128((const __m128i *) key1); - const __m128i k2 = _mm_loadu_si128((const __m128i *) key2); - const __m128i x = _mm_xor_si128(k1, k2); - - return !_mm_test_all_zeros(x, x); -} - -static int -rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len); -} - -static int -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k16_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 16, - (const char *) key2 + 16, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static int -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k32_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 32, - (const char *) key2 + 32, key_len); -} - -static int -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static int -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); -} - -static int -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k32_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len) || - rte_hash_k16_cmp_eq((const char *) key1 + 96, - (const char *) key2 + 96, key_len); -} - -static int -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) -{ - return rte_hash_k64_cmp_eq(key1, key2, key_len) || - rte_hash_k64_cmp_eq((const char *) key1 + 64, - (const char *) key2 + 64, key_len); + return position - 1; }