1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 * Copyright(c) 2018 Arm Limited
11 #include <sys/queue.h>
13 #include <rte_common.h>
14 #include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
28 #include <rte_compat.h>
32 #include "rte_cuckoo_hash.h"
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET) \
35 for (CURRENT_BKT = START_BUCKET; \
36 CURRENT_BKT != NULL; \
37 CURRENT_BKT = CURRENT_BKT->next)
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
41 static struct rte_tailq_elem rte_hash_tailq = {
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
47 rte_hash_find_existing(const char *name)
49 struct rte_hash *h = NULL;
50 struct rte_tailq_entry *te;
51 struct rte_hash_list *hash_list;
53 hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
55 rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56 TAILQ_FOREACH(te, hash_list, next) {
57 h = (struct rte_hash *) te->data;
58 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
61 rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
73 while (lst_bkt->next != NULL)
74 lst_bkt = lst_bkt->next;
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
80 h->cmp_jump_table_idx = KEY_CUSTOM;
81 h->rte_hash_custom_cmp_eq = func;
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
87 if (h->cmp_jump_table_idx == KEY_CUSTOM)
88 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
90 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
94 * We use higher 16 bits of hash as the signature value stored in table.
95 * We use the lower bits for the primary bucket
96 * location. Then we XOR primary bucket location and the signature
97 * to get the secondary bucket location. This is same as
98 * proposed in Bin Fan, et al's paper
99 * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100 * Smarter Hashing". The benefit to use
101 * XOR is that one could derive the alternative bucket location
102 * by only using the current bucket location and the signature.
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
113 return hash & h->bucket_bitmask;
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118 uint32_t cur_bkt_idx, uint16_t sig)
120 return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
124 rte_hash_create(const struct rte_hash_parameters *params)
126 struct rte_hash *h = NULL;
127 struct rte_tailq_entry *te = NULL;
128 struct rte_hash_list *hash_list;
129 struct rte_ring *r = NULL;
130 struct rte_ring *r_ext = NULL;
131 char hash_name[RTE_HASH_NAMESIZE];
133 void *buckets = NULL;
134 void *buckets_ext = NULL;
135 char ring_name[RTE_RING_NAMESIZE];
136 char ext_ring_name[RTE_RING_NAMESIZE];
137 unsigned num_key_slots;
139 unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140 unsigned int ext_table_support = 0;
141 unsigned int readwrite_concur_support = 0;
142 unsigned int writer_takes_lock = 0;
143 unsigned int no_free_on_del = 0;
144 uint32_t *ext_bkt_to_free = NULL;
145 uint32_t *tbl_chng_cnt = NULL;
146 unsigned int readwrite_concur_lf_support = 0;
148 rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
150 hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
152 if (params == NULL) {
153 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
157 /* Check for valid parameters */
158 if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
159 (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
160 (params->key_len == 0)) {
162 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
166 /* Validate correct usage of extra options */
167 if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
168 (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
170 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
171 "rw concurrency lock free\n");
175 /* Check extra flags field to check extra options. */
176 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
177 hw_trans_mem_support = 1;
179 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
181 writer_takes_lock = 1;
184 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
185 readwrite_concur_support = 1;
186 writer_takes_lock = 1;
189 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
190 ext_table_support = 1;
192 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
195 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
196 readwrite_concur_lf_support = 1;
197 /* Enable not freeing internal memory/index on delete */
201 /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
204 * Increase number of slots by total number of indices
205 * that can be stored in the lcore caches
206 * except for the first cache
208 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
209 (LCORE_CACHE_SIZE - 1) + 1;
211 num_key_slots = params->entries + 1;
213 snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
214 /* Create ring (Dummy slot index is not enqueued) */
215 r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
216 params->socket_id, 0);
218 RTE_LOG(ERR, HASH, "memory allocation failed\n");
222 const uint32_t num_buckets = rte_align32pow2(params->entries) /
223 RTE_HASH_BUCKET_ENTRIES;
225 /* Create ring for extendable buckets. */
226 if (ext_table_support) {
227 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
229 r_ext = rte_ring_create(ext_ring_name,
230 rte_align32pow2(num_buckets + 1),
231 params->socket_id, 0);
234 RTE_LOG(ERR, HASH, "ext buckets memory allocation "
240 snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
242 rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
244 /* guarantee there's no existing: this is normally already checked
245 * by ring creation above */
246 TAILQ_FOREACH(te, hash_list, next) {
247 h = (struct rte_hash *) te->data;
248 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
258 te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
260 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
264 h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
265 RTE_CACHE_LINE_SIZE, params->socket_id);
268 RTE_LOG(ERR, HASH, "memory allocation failed\n");
272 buckets = rte_zmalloc_socket(NULL,
273 num_buckets * sizeof(struct rte_hash_bucket),
274 RTE_CACHE_LINE_SIZE, params->socket_id);
276 if (buckets == NULL) {
277 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
281 /* Allocate same number of extendable buckets */
282 if (ext_table_support) {
283 buckets_ext = rte_zmalloc_socket(NULL,
284 num_buckets * sizeof(struct rte_hash_bucket),
285 RTE_CACHE_LINE_SIZE, params->socket_id);
286 if (buckets_ext == NULL) {
287 RTE_LOG(ERR, HASH, "ext buckets memory allocation "
291 /* Populate ext bkt ring. We reserve 0 similar to the
292 * key-data slot, just in case in future we want to
293 * use bucket index for the linked list and 0 means NULL
296 for (i = 1; i <= num_buckets; i++)
297 rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
299 if (readwrite_concur_lf_support) {
300 ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) *
302 if (ext_bkt_to_free == NULL) {
303 RTE_LOG(ERR, HASH, "ext bkt to free memory allocation "
310 const uint32_t key_entry_size =
311 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
313 const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
315 k = rte_zmalloc_socket(NULL, key_tbl_size,
316 RTE_CACHE_LINE_SIZE, params->socket_id);
319 RTE_LOG(ERR, HASH, "memory allocation failed\n");
323 tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
324 RTE_CACHE_LINE_SIZE, params->socket_id);
326 if (tbl_chng_cnt == NULL) {
327 RTE_LOG(ERR, HASH, "memory allocation failed\n");
332 * If x86 architecture is used, select appropriate compare function,
333 * which may use x86 intrinsics, otherwise use memcmp
335 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
336 /* Select function to compare keys */
337 switch (params->key_len) {
339 h->cmp_jump_table_idx = KEY_16_BYTES;
342 h->cmp_jump_table_idx = KEY_32_BYTES;
345 h->cmp_jump_table_idx = KEY_48_BYTES;
348 h->cmp_jump_table_idx = KEY_64_BYTES;
351 h->cmp_jump_table_idx = KEY_80_BYTES;
354 h->cmp_jump_table_idx = KEY_96_BYTES;
357 h->cmp_jump_table_idx = KEY_112_BYTES;
360 h->cmp_jump_table_idx = KEY_128_BYTES;
363 /* If key is not multiple of 16, use generic memcmp */
364 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
367 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
370 if (use_local_cache) {
371 h->local_free_slots = rte_zmalloc_socket(NULL,
372 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
373 RTE_CACHE_LINE_SIZE, params->socket_id);
376 /* Default hash function */
377 #if defined(RTE_ARCH_X86)
378 default_hash_func = (rte_hash_function)rte_hash_crc;
379 #elif defined(RTE_ARCH_ARM64)
380 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
381 default_hash_func = (rte_hash_function)rte_hash_crc;
383 /* Setup hash context */
384 strlcpy(h->name, params->name, sizeof(h->name));
385 h->entries = params->entries;
386 h->key_len = params->key_len;
387 h->key_entry_size = key_entry_size;
388 h->hash_func_init_val = params->hash_func_init_val;
390 h->num_buckets = num_buckets;
391 h->bucket_bitmask = h->num_buckets - 1;
392 h->buckets = buckets;
393 h->buckets_ext = buckets_ext;
394 h->free_ext_bkts = r_ext;
395 h->hash_func = (params->hash_func == NULL) ?
396 default_hash_func : params->hash_func;
399 h->ext_bkt_to_free = ext_bkt_to_free;
400 h->tbl_chng_cnt = tbl_chng_cnt;
401 *h->tbl_chng_cnt = 0;
402 h->hw_trans_mem_support = hw_trans_mem_support;
403 h->use_local_cache = use_local_cache;
404 h->readwrite_concur_support = readwrite_concur_support;
405 h->ext_table_support = ext_table_support;
406 h->writer_takes_lock = writer_takes_lock;
407 h->no_free_on_del = no_free_on_del;
408 h->readwrite_concur_lf_support = readwrite_concur_lf_support;
410 #if defined(RTE_ARCH_X86)
411 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
412 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
414 #elif defined(RTE_ARCH_ARM64)
415 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
416 h->sig_cmp_fn = RTE_HASH_COMPARE_NEON;
419 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
421 /* Writer threads need to take the lock when:
422 * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
423 * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
425 if (h->writer_takes_lock) {
426 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
427 RTE_CACHE_LINE_SIZE);
428 if (h->readwrite_lock == NULL)
431 rte_rwlock_init(h->readwrite_lock);
434 /* Populate free slots ring. Entry zero is reserved for key misses. */
435 for (i = 1; i < num_key_slots; i++)
436 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
438 te->data = (void *) h;
439 TAILQ_INSERT_TAIL(hash_list, te, next);
440 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
444 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
447 rte_ring_free(r_ext);
451 rte_free(buckets_ext);
453 rte_free(tbl_chng_cnt);
454 rte_free(ext_bkt_to_free);
459 rte_hash_free(struct rte_hash *h)
461 struct rte_tailq_entry *te;
462 struct rte_hash_list *hash_list;
467 hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
469 rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
471 /* find out tailq entry */
472 TAILQ_FOREACH(te, hash_list, next) {
473 if (te->data == (void *) h)
478 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
482 TAILQ_REMOVE(hash_list, te, next);
484 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
486 if (h->use_local_cache)
487 rte_free(h->local_free_slots);
488 if (h->writer_takes_lock)
489 rte_free(h->readwrite_lock);
490 rte_ring_free(h->free_slots);
491 rte_ring_free(h->free_ext_bkts);
492 rte_free(h->key_store);
493 rte_free(h->buckets);
494 rte_free(h->buckets_ext);
495 rte_free(h->tbl_chng_cnt);
496 rte_free(h->ext_bkt_to_free);
502 rte_hash_hash(const struct rte_hash *h, const void *key)
504 /* calc hash result by key */
505 return h->hash_func(key, h->key_len, h->hash_func_init_val);
509 rte_hash_count(const struct rte_hash *h)
511 uint32_t tot_ring_cnt, cached_cnt = 0;
517 if (h->use_local_cache) {
518 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
519 (LCORE_CACHE_SIZE - 1);
520 for (i = 0; i < RTE_MAX_LCORE; i++)
521 cached_cnt += h->local_free_slots[i].len;
523 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
526 tot_ring_cnt = h->entries;
527 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
532 /* Read write locks implemented using rte_rwlock */
534 __hash_rw_writer_lock(const struct rte_hash *h)
536 if (h->writer_takes_lock && h->hw_trans_mem_support)
537 rte_rwlock_write_lock_tm(h->readwrite_lock);
538 else if (h->writer_takes_lock)
539 rte_rwlock_write_lock(h->readwrite_lock);
543 __hash_rw_reader_lock(const struct rte_hash *h)
545 if (h->readwrite_concur_support && h->hw_trans_mem_support)
546 rte_rwlock_read_lock_tm(h->readwrite_lock);
547 else if (h->readwrite_concur_support)
548 rte_rwlock_read_lock(h->readwrite_lock);
552 __hash_rw_writer_unlock(const struct rte_hash *h)
554 if (h->writer_takes_lock && h->hw_trans_mem_support)
555 rte_rwlock_write_unlock_tm(h->readwrite_lock);
556 else if (h->writer_takes_lock)
557 rte_rwlock_write_unlock(h->readwrite_lock);
561 __hash_rw_reader_unlock(const struct rte_hash *h)
563 if (h->readwrite_concur_support && h->hw_trans_mem_support)
564 rte_rwlock_read_unlock_tm(h->readwrite_lock);
565 else if (h->readwrite_concur_support)
566 rte_rwlock_read_unlock(h->readwrite_lock);
570 rte_hash_reset(struct rte_hash *h)
573 uint32_t tot_ring_cnt, i;
578 __hash_rw_writer_lock(h);
579 memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
580 memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
581 *h->tbl_chng_cnt = 0;
583 /* clear the free ring */
584 while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
587 /* clear free extendable bucket ring and memory */
588 if (h->ext_table_support) {
589 memset(h->buckets_ext, 0, h->num_buckets *
590 sizeof(struct rte_hash_bucket));
591 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
595 /* Repopulate the free slots ring. Entry zero is reserved for key misses */
596 if (h->use_local_cache)
597 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
598 (LCORE_CACHE_SIZE - 1);
600 tot_ring_cnt = h->entries;
602 for (i = 1; i < tot_ring_cnt + 1; i++)
603 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
605 /* Repopulate the free ext bkt ring. */
606 if (h->ext_table_support) {
607 for (i = 1; i <= h->num_buckets; i++)
608 rte_ring_sp_enqueue(h->free_ext_bkts,
609 (void *)((uintptr_t) i));
612 if (h->use_local_cache) {
613 /* Reset local caches per lcore */
614 for (i = 0; i < RTE_MAX_LCORE; i++)
615 h->local_free_slots[i].len = 0;
617 __hash_rw_writer_unlock(h);
621 * Function called to enqueue back an index in the cache/ring,
622 * as slot has not being used and it can be used in the
623 * next addition attempt.
626 enqueue_slot_back(const struct rte_hash *h,
627 struct lcore_cache *cached_free_slots,
630 if (h->use_local_cache) {
631 cached_free_slots->objs[cached_free_slots->len] = slot_id;
632 cached_free_slots->len++;
634 rte_ring_sp_enqueue(h->free_slots, slot_id);
637 /* Search a key from bucket and update its data.
638 * Writer holds the lock before calling this.
640 static inline int32_t
641 search_and_update(const struct rte_hash *h, void *data, const void *key,
642 struct rte_hash_bucket *bkt, uint16_t sig)
645 struct rte_hash_key *k, *keys = h->key_store;
647 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
648 if (bkt->sig_current[i] == sig) {
649 k = (struct rte_hash_key *) ((char *)keys +
650 bkt->key_idx[i] * h->key_entry_size);
651 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
652 /* 'pdata' acts as the synchronization point
653 * when an existing hash entry is updated.
654 * Key is not updated in this case.
656 __atomic_store_n(&k->pdata,
660 * Return index where key is stored,
661 * subtracting the first dummy index
663 return bkt->key_idx[i] - 1;
670 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
672 * return 1 if matching existing key, return 0 if succeeds, return -1 for no
675 static inline int32_t
676 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
677 struct rte_hash_bucket *prim_bkt,
678 struct rte_hash_bucket *sec_bkt,
679 const struct rte_hash_key *key, void *data,
680 uint16_t sig, uint32_t new_idx,
684 struct rte_hash_bucket *cur_bkt;
687 __hash_rw_writer_lock(h);
688 /* Check if key was inserted after last check but before this
689 * protected region in case of inserting duplicated keys.
691 ret = search_and_update(h, data, key, prim_bkt, sig);
693 __hash_rw_writer_unlock(h);
698 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
699 ret = search_and_update(h, data, key, cur_bkt, sig);
701 __hash_rw_writer_unlock(h);
707 /* Insert new entry if there is room in the primary
710 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
711 /* Check if slot is available */
712 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
713 prim_bkt->sig_current[i] = sig;
714 /* Key can be of arbitrary length, so it is
715 * not possible to store it atomically.
716 * Hence the new key element's memory stores
717 * (key as well as data) should be complete
718 * before it is referenced.
720 __atomic_store_n(&prim_bkt->key_idx[i],
726 __hash_rw_writer_unlock(h);
728 if (i != RTE_HASH_BUCKET_ENTRIES)
735 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
736 * the path head with new entry (sig, alt_hash, new_idx)
737 * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
738 * return 0 if succeeds.
741 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
742 struct rte_hash_bucket *bkt,
743 struct rte_hash_bucket *alt_bkt,
744 const struct rte_hash_key *key, void *data,
745 struct queue_node *leaf, uint32_t leaf_slot,
746 uint16_t sig, uint32_t new_idx,
749 uint32_t prev_alt_bkt_idx;
750 struct rte_hash_bucket *cur_bkt;
751 struct queue_node *prev_node, *curr_node = leaf;
752 struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
753 uint32_t prev_slot, curr_slot = leaf_slot;
756 __hash_rw_writer_lock(h);
758 /* In case empty slot was gone before entering protected region */
759 if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
760 __hash_rw_writer_unlock(h);
764 /* Check if key was inserted after last check but before this
767 ret = search_and_update(h, data, key, bkt, sig);
769 __hash_rw_writer_unlock(h);
774 FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
775 ret = search_and_update(h, data, key, cur_bkt, sig);
777 __hash_rw_writer_unlock(h);
783 while (likely(curr_node->prev != NULL)) {
784 prev_node = curr_node->prev;
785 prev_bkt = prev_node->bkt;
786 prev_slot = curr_node->prev_slot;
788 prev_alt_bkt_idx = get_alt_bucket_index(h,
789 prev_node->cur_bkt_idx,
790 prev_bkt->sig_current[prev_slot]);
792 if (unlikely(&h->buckets[prev_alt_bkt_idx]
794 /* revert it to empty, otherwise duplicated keys */
795 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
798 __hash_rw_writer_unlock(h);
802 if (h->readwrite_concur_lf_support) {
803 /* Inform the previous move. The current move need
804 * not be informed now as the current bucket entry
805 * is present in both primary and secondary.
806 * Since there is one writer, load acquires on
807 * tbl_chng_cnt are not required.
809 __atomic_store_n(h->tbl_chng_cnt,
810 *h->tbl_chng_cnt + 1,
812 /* The store to sig_current should not
813 * move above the store to tbl_chng_cnt.
815 __atomic_thread_fence(__ATOMIC_RELEASE);
818 /* Need to swap current/alt sig to allow later
819 * Cuckoo insert to move elements back to its
820 * primary bucket if available
822 curr_bkt->sig_current[curr_slot] =
823 prev_bkt->sig_current[prev_slot];
824 /* Release the updated bucket entry */
825 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
826 prev_bkt->key_idx[prev_slot],
829 curr_slot = prev_slot;
830 curr_node = prev_node;
831 curr_bkt = curr_node->bkt;
834 if (h->readwrite_concur_lf_support) {
835 /* Inform the previous move. The current move need
836 * not be informed now as the current bucket entry
837 * is present in both primary and secondary.
838 * Since there is one writer, load acquires on
839 * tbl_chng_cnt are not required.
841 __atomic_store_n(h->tbl_chng_cnt,
842 *h->tbl_chng_cnt + 1,
844 /* The store to sig_current should not
845 * move above the store to tbl_chng_cnt.
847 __atomic_thread_fence(__ATOMIC_RELEASE);
850 curr_bkt->sig_current[curr_slot] = sig;
851 /* Release the new bucket entry */
852 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
856 __hash_rw_writer_unlock(h);
863 * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
867 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
868 struct rte_hash_bucket *bkt,
869 struct rte_hash_bucket *sec_bkt,
870 const struct rte_hash_key *key, void *data,
871 uint16_t sig, uint32_t bucket_idx,
872 uint32_t new_idx, int32_t *ret_val)
875 struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
876 struct queue_node *tail, *head;
877 struct rte_hash_bucket *curr_bkt, *alt_bkt;
878 uint32_t cur_idx, alt_idx;
884 tail->prev_slot = -1;
885 tail->cur_bkt_idx = bucket_idx;
887 /* Cuckoo bfs Search */
888 while (likely(tail != head && head <
889 queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
890 RTE_HASH_BUCKET_ENTRIES)) {
891 curr_bkt = tail->bkt;
892 cur_idx = tail->cur_bkt_idx;
893 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
894 if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
895 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
896 bkt, sec_bkt, key, data,
899 if (likely(ret != -1))
903 /* Enqueue new node and keep prev node info */
904 alt_idx = get_alt_bucket_index(h, cur_idx,
905 curr_bkt->sig_current[i]);
906 alt_bkt = &(h->buckets[alt_idx]);
908 head->cur_bkt_idx = alt_idx;
919 static inline int32_t
920 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
921 hash_sig_t sig, void *data)
924 uint32_t prim_bucket_idx, sec_bucket_idx;
925 struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
926 struct rte_hash_key *new_k, *keys = h->key_store;
927 void *slot_id = NULL;
928 void *ext_bkt_id = NULL;
929 uint32_t new_idx, bkt_id;
934 struct lcore_cache *cached_free_slots = NULL;
936 struct rte_hash_bucket *last;
938 short_sig = get_short_sig(sig);
939 prim_bucket_idx = get_prim_bucket_index(h, sig);
940 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
941 prim_bkt = &h->buckets[prim_bucket_idx];
942 sec_bkt = &h->buckets[sec_bucket_idx];
943 rte_prefetch0(prim_bkt);
944 rte_prefetch0(sec_bkt);
946 /* Check if key is already inserted in primary location */
947 __hash_rw_writer_lock(h);
948 ret = search_and_update(h, data, key, prim_bkt, short_sig);
950 __hash_rw_writer_unlock(h);
954 /* Check if key is already inserted in secondary location */
955 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
956 ret = search_and_update(h, data, key, cur_bkt, short_sig);
958 __hash_rw_writer_unlock(h);
963 __hash_rw_writer_unlock(h);
965 /* Did not find a match, so get a new slot for storing the new key */
966 if (h->use_local_cache) {
967 lcore_id = rte_lcore_id();
968 cached_free_slots = &h->local_free_slots[lcore_id];
969 /* Try to get a free slot from the local cache */
970 if (cached_free_slots->len == 0) {
971 /* Need to get another burst of free slots from global ring */
972 n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
973 cached_free_slots->objs,
974 LCORE_CACHE_SIZE, NULL);
979 cached_free_slots->len += n_slots;
982 /* Get a free slot from the local cache */
983 cached_free_slots->len--;
984 slot_id = cached_free_slots->objs[cached_free_slots->len];
986 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
991 new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
992 new_idx = (uint32_t)((uintptr_t) slot_id);
994 memcpy(new_k->key, key, h->key_len);
995 /* Key can be of arbitrary length, so it is not possible to store
996 * it atomically. Hence the new key element's memory stores
997 * (key as well as data) should be complete before it is referenced.
998 * 'pdata' acts as the synchronization point when an existing hash
1001 __atomic_store_n(&new_k->pdata,
1005 /* Find an empty slot and insert */
1006 ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
1007 short_sig, new_idx, &ret_val);
1010 else if (ret == 1) {
1011 enqueue_slot_back(h, cached_free_slots, slot_id);
1015 /* Primary bucket full, need to make space for new entry */
1016 ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1017 short_sig, prim_bucket_idx, new_idx, &ret_val);
1020 else if (ret == 1) {
1021 enqueue_slot_back(h, cached_free_slots, slot_id);
1025 /* Also search secondary bucket to get better occupancy */
1026 ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1027 short_sig, sec_bucket_idx, new_idx, &ret_val);
1031 else if (ret == 1) {
1032 enqueue_slot_back(h, cached_free_slots, slot_id);
1036 /* if ext table not enabled, we failed the insertion */
1037 if (!h->ext_table_support) {
1038 enqueue_slot_back(h, cached_free_slots, slot_id);
1042 /* Now we need to go through the extendable bucket. Protection is needed
1043 * to protect all extendable bucket processes.
1045 __hash_rw_writer_lock(h);
1046 /* We check for duplicates again since could be inserted before the lock */
1047 ret = search_and_update(h, data, key, prim_bkt, short_sig);
1049 enqueue_slot_back(h, cached_free_slots, slot_id);
1053 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1054 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1056 enqueue_slot_back(h, cached_free_slots, slot_id);
1061 /* Search sec and ext buckets to find an empty entry to insert. */
1062 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1063 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1064 /* Check if slot is available */
1065 if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1066 cur_bkt->sig_current[i] = short_sig;
1067 /* Store to signature should not leak after
1068 * the store to key_idx
1070 __atomic_store_n(&cur_bkt->key_idx[i],
1073 __hash_rw_writer_unlock(h);
1079 /* Failed to get an empty entry from extendable buckets. Link a new
1080 * extendable bucket. We first get a free bucket from ring.
1082 if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
1087 bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
1088 /* Use the first location of the new bucket */
1089 (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
1090 /* Store to signature should not leak after
1091 * the store to key_idx
1093 __atomic_store_n(&(h->buckets_ext[bkt_id]).key_idx[0],
1096 /* Link the new bucket to sec bucket linked list */
1097 last = rte_hash_get_last_bkt(sec_bkt);
1098 last->next = &h->buckets_ext[bkt_id];
1099 __hash_rw_writer_unlock(h);
1103 __hash_rw_writer_unlock(h);
1109 rte_hash_add_key_with_hash(const struct rte_hash *h,
1110 const void *key, hash_sig_t sig)
1112 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1113 return __rte_hash_add_key_with_hash(h, key, sig, 0);
1117 rte_hash_add_key(const struct rte_hash *h, const void *key)
1119 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1120 return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1124 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1125 const void *key, hash_sig_t sig, void *data)
1129 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1130 ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1138 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1142 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1144 ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1151 /* Search one bucket to find the match key - uses rw lock */
1152 static inline int32_t
1153 search_one_bucket_l(const struct rte_hash *h, const void *key,
1154 uint16_t sig, void **data,
1155 const struct rte_hash_bucket *bkt)
1158 struct rte_hash_key *k, *keys = h->key_store;
1160 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1161 if (bkt->sig_current[i] == sig &&
1162 bkt->key_idx[i] != EMPTY_SLOT) {
1163 k = (struct rte_hash_key *) ((char *)keys +
1164 bkt->key_idx[i] * h->key_entry_size);
1166 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1170 * Return index where key is stored,
1171 * subtracting the first dummy index
1173 return bkt->key_idx[i] - 1;
1180 /* Search one bucket to find the match key */
1181 static inline int32_t
1182 search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
1183 void **data, const struct rte_hash_bucket *bkt)
1188 struct rte_hash_key *k, *keys = h->key_store;
1190 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1191 key_idx = __atomic_load_n(&bkt->key_idx[i],
1193 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1194 k = (struct rte_hash_key *) ((char *)keys +
1195 key_idx * h->key_entry_size);
1196 pdata = __atomic_load_n(&k->pdata,
1199 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1203 * Return index where key is stored,
1204 * subtracting the first dummy index
1213 static inline int32_t
1214 __rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
1215 hash_sig_t sig, void **data)
1217 uint32_t prim_bucket_idx, sec_bucket_idx;
1218 struct rte_hash_bucket *bkt, *cur_bkt;
1222 short_sig = get_short_sig(sig);
1223 prim_bucket_idx = get_prim_bucket_index(h, sig);
1224 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1226 bkt = &h->buckets[prim_bucket_idx];
1228 __hash_rw_reader_lock(h);
1230 /* Check if key is in primary location */
1231 ret = search_one_bucket_l(h, key, short_sig, data, bkt);
1233 __hash_rw_reader_unlock(h);
1236 /* Calculate secondary hash */
1237 bkt = &h->buckets[sec_bucket_idx];
1239 /* Check if key is in secondary location */
1240 FOR_EACH_BUCKET(cur_bkt, bkt) {
1241 ret = search_one_bucket_l(h, key, short_sig,
1244 __hash_rw_reader_unlock(h);
1249 __hash_rw_reader_unlock(h);
1254 static inline int32_t
1255 __rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
1256 hash_sig_t sig, void **data)
1258 uint32_t prim_bucket_idx, sec_bucket_idx;
1259 struct rte_hash_bucket *bkt, *cur_bkt;
1260 uint32_t cnt_b, cnt_a;
1264 short_sig = get_short_sig(sig);
1265 prim_bucket_idx = get_prim_bucket_index(h, sig);
1266 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1269 /* Load the table change counter before the lookup
1270 * starts. Acquire semantics will make sure that
1271 * loads in search_one_bucket are not hoisted.
1273 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1276 /* Check if key is in primary location */
1277 bkt = &h->buckets[prim_bucket_idx];
1278 ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
1280 __hash_rw_reader_unlock(h);
1283 /* Calculate secondary hash */
1284 bkt = &h->buckets[sec_bucket_idx];
1286 /* Check if key is in secondary location */
1287 FOR_EACH_BUCKET(cur_bkt, bkt) {
1288 ret = search_one_bucket_lf(h, key, short_sig,
1291 __hash_rw_reader_unlock(h);
1296 /* The loads of sig_current in search_one_bucket
1297 * should not move below the load from tbl_chng_cnt.
1299 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1300 /* Re-read the table change counter to check if the
1301 * table has changed during search. If yes, re-do
1303 * This load should not get hoisted. The load
1304 * acquires on cnt_b, key index in primary bucket
1305 * and key index in secondary bucket will make sure
1306 * that it does not get hoisted.
1308 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1310 } while (cnt_b != cnt_a);
1315 static inline int32_t
1316 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1317 hash_sig_t sig, void **data)
1319 if (h->readwrite_concur_lf_support)
1320 return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
1322 return __rte_hash_lookup_with_hash_l(h, key, sig, data);
1326 rte_hash_lookup_with_hash(const struct rte_hash *h,
1327 const void *key, hash_sig_t sig)
1329 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1330 return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1334 rte_hash_lookup(const struct rte_hash *h, const void *key)
1336 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1337 return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1341 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1342 const void *key, hash_sig_t sig, void **data)
1344 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1345 return __rte_hash_lookup_with_hash(h, key, sig, data);
1349 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1351 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1352 return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1356 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1358 unsigned lcore_id, n_slots;
1359 struct lcore_cache *cached_free_slots;
1361 if (h->use_local_cache) {
1362 lcore_id = rte_lcore_id();
1363 cached_free_slots = &h->local_free_slots[lcore_id];
1364 /* Cache full, need to free it. */
1365 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1366 /* Need to enqueue the free slots in global ring. */
1367 n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1368 cached_free_slots->objs,
1369 LCORE_CACHE_SIZE, NULL);
1370 ERR_IF_TRUE((n_slots == 0),
1371 "%s: could not enqueue free slots in global ring\n",
1373 cached_free_slots->len -= n_slots;
1375 /* Put index of new free slot in cache. */
1376 cached_free_slots->objs[cached_free_slots->len] =
1377 (void *)((uintptr_t)bkt->key_idx[i]);
1378 cached_free_slots->len++;
1380 rte_ring_sp_enqueue(h->free_slots,
1381 (void *)((uintptr_t)bkt->key_idx[i]));
1385 /* Compact the linked list by moving key from last entry in linked list to the
1389 __rte_hash_compact_ll(const struct rte_hash *h,
1390 struct rte_hash_bucket *cur_bkt, int pos) {
1392 struct rte_hash_bucket *last_bkt;
1397 last_bkt = rte_hash_get_last_bkt(cur_bkt);
1399 for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1400 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1401 cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1402 __atomic_store_n(&cur_bkt->key_idx[pos],
1403 last_bkt->key_idx[i],
1405 if (h->readwrite_concur_lf_support) {
1406 /* Inform the readers that the table has changed
1407 * Since there is one writer, load acquire on
1408 * tbl_chng_cnt is not required.
1410 __atomic_store_n(h->tbl_chng_cnt,
1411 *h->tbl_chng_cnt + 1,
1413 /* The store to sig_current should
1414 * not move above the store to tbl_chng_cnt.
1416 __atomic_thread_fence(__ATOMIC_RELEASE);
1418 last_bkt->sig_current[i] = NULL_SIGNATURE;
1419 __atomic_store_n(&last_bkt->key_idx[i],
1427 /* Search one bucket and remove the matched key.
1428 * Writer is expected to hold the lock while calling this
1431 static inline int32_t
1432 search_and_remove(const struct rte_hash *h, const void *key,
1433 struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1435 struct rte_hash_key *k, *keys = h->key_store;
1439 /* Check if key is in bucket */
1440 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1441 key_idx = __atomic_load_n(&bkt->key_idx[i],
1443 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1444 k = (struct rte_hash_key *) ((char *)keys +
1445 key_idx * h->key_entry_size);
1446 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1447 bkt->sig_current[i] = NULL_SIGNATURE;
1448 /* Free the key store index if
1449 * no_free_on_del is disabled.
1451 if (!h->no_free_on_del)
1452 remove_entry(h, bkt, i);
1454 __atomic_store_n(&bkt->key_idx[i],
1460 * Return index where key is stored,
1461 * subtracting the first dummy index
1470 static inline int32_t
1471 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1474 uint32_t prim_bucket_idx, sec_bucket_idx;
1475 struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1476 struct rte_hash_bucket *cur_bkt;
1481 short_sig = get_short_sig(sig);
1482 prim_bucket_idx = get_prim_bucket_index(h, sig);
1483 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1484 prim_bkt = &h->buckets[prim_bucket_idx];
1486 __hash_rw_writer_lock(h);
1487 /* look for key in primary bucket */
1488 ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1490 __rte_hash_compact_ll(h, prim_bkt, pos);
1491 last_bkt = prim_bkt->next;
1492 prev_bkt = prim_bkt;
1496 /* Calculate secondary hash */
1497 sec_bkt = &h->buckets[sec_bucket_idx];
1499 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1500 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1502 __rte_hash_compact_ll(h, cur_bkt, pos);
1503 last_bkt = sec_bkt->next;
1509 __hash_rw_writer_unlock(h);
1512 /* Search last bucket to see if empty to be recycled */
1515 __hash_rw_writer_unlock(h);
1518 while (last_bkt->next) {
1519 prev_bkt = last_bkt;
1520 last_bkt = last_bkt->next;
1523 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1524 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1527 /* found empty bucket and recycle */
1528 if (i == RTE_HASH_BUCKET_ENTRIES) {
1529 prev_bkt->next = NULL;
1530 uint32_t index = last_bkt - h->buckets_ext + 1;
1531 /* Recycle the empty bkt if
1532 * no_free_on_del is disabled.
1534 if (h->no_free_on_del)
1535 /* Store index of an empty ext bkt to be recycled
1536 * on calling rte_hash_del_xxx APIs.
1537 * When lock free read-write concurrency is enabled,
1538 * an empty ext bkt cannot be put into free list
1539 * immediately (as readers might be using it still).
1540 * Hence freeing of the ext bkt is piggy-backed to
1541 * freeing of the key index.
1543 h->ext_bkt_to_free[ret] = index;
1545 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1547 __hash_rw_writer_unlock(h);
1552 rte_hash_del_key_with_hash(const struct rte_hash *h,
1553 const void *key, hash_sig_t sig)
1555 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1556 return __rte_hash_del_key_with_hash(h, key, sig);
1560 rte_hash_del_key(const struct rte_hash *h, const void *key)
1562 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1563 return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1567 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1570 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1572 struct rte_hash_key *k, *keys = h->key_store;
1573 k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1578 __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1586 int __rte_experimental
1587 rte_hash_free_key_with_position(const struct rte_hash *h,
1588 const int32_t position)
1590 RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
1592 unsigned int lcore_id, n_slots;
1593 struct lcore_cache *cached_free_slots;
1594 const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1597 if (position >= total_entries)
1599 if (h->ext_table_support && h->readwrite_concur_lf_support) {
1600 uint32_t index = h->ext_bkt_to_free[position];
1602 /* Recycle empty ext bkt to free list. */
1603 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1604 h->ext_bkt_to_free[position] = 0;
1608 if (h->use_local_cache) {
1609 lcore_id = rte_lcore_id();
1610 cached_free_slots = &h->local_free_slots[lcore_id];
1611 /* Cache full, need to free it. */
1612 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1613 /* Need to enqueue the free slots in global ring. */
1614 n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1615 cached_free_slots->objs,
1616 LCORE_CACHE_SIZE, NULL);
1617 RETURN_IF_TRUE((n_slots == 0), -EFAULT);
1618 cached_free_slots->len -= n_slots;
1620 /* Put index of new free slot in cache. */
1621 cached_free_slots->objs[cached_free_slots->len] =
1622 (void *)((uintptr_t)position);
1623 cached_free_slots->len++;
1625 rte_ring_sp_enqueue(h->free_slots,
1626 (void *)((uintptr_t)position));
1633 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1634 const struct rte_hash_bucket *prim_bkt,
1635 const struct rte_hash_bucket *sec_bkt,
1637 enum rte_hash_sig_compare_function sig_cmp_fn)
1641 /* For match mask the first bit of every two bits indicates the match */
1642 switch (sig_cmp_fn) {
1643 #if defined(RTE_MACHINE_CPUFLAG_SSE2)
1644 case RTE_HASH_COMPARE_SSE:
1645 /* Compare all signatures in the bucket */
1646 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1648 (__m128i const *)prim_bkt->sig_current),
1649 _mm_set1_epi16(sig)));
1650 /* Compare all signatures in the bucket */
1651 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1653 (__m128i const *)sec_bkt->sig_current),
1654 _mm_set1_epi16(sig)));
1656 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
1657 case RTE_HASH_COMPARE_NEON: {
1658 uint16x8_t vmat, vsig, x;
1660 int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
1662 vsig = vld1q_dup_u16((uint16_t const *)&sig);
1663 /* Compare all signatures in the primary bucket */
1664 vmat = vceqq_u16(vsig,
1665 vld1q_u16((uint16_t const *)prim_bkt->sig_current));
1666 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1667 x64 = vpaddlq_u32(vpaddlq_u16(x));
1668 *prim_hash_matches = (uint32_t)(vgetq_lane_u64(x64, 0) +
1669 vgetq_lane_u64(x64, 1));
1670 /* Compare all signatures in the secondary bucket */
1671 vmat = vceqq_u16(vsig,
1672 vld1q_u16((uint16_t const *)sec_bkt->sig_current));
1673 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1674 x64 = vpaddlq_u32(vpaddlq_u16(x));
1675 *sec_hash_matches = (uint32_t)(vgetq_lane_u64(x64, 0) +
1676 vgetq_lane_u64(x64, 1)); }
1680 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1681 *prim_hash_matches |=
1682 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1683 *sec_hash_matches |=
1684 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1689 #define PREFETCH_OFFSET 4
1691 __rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
1692 int32_t num_keys, int32_t *positions,
1693 uint64_t *hit_mask, void *data[])
1698 uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1699 uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1700 uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1701 uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1702 const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1703 const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1704 uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1705 uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1706 struct rte_hash_bucket *cur_bkt, *next_bkt;
1708 /* Prefetch first keys */
1709 for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1710 rte_prefetch0(keys[i]);
1713 * Prefetch rest of the keys, calculate primary and
1714 * secondary bucket and prefetch them
1716 for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1717 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1719 prim_hash[i] = rte_hash_hash(h, keys[i]);
1721 sig[i] = get_short_sig(prim_hash[i]);
1722 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1723 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1725 primary_bkt[i] = &h->buckets[prim_index[i]];
1726 secondary_bkt[i] = &h->buckets[sec_index[i]];
1728 rte_prefetch0(primary_bkt[i]);
1729 rte_prefetch0(secondary_bkt[i]);
1732 /* Calculate and prefetch rest of the buckets */
1733 for (; i < num_keys; i++) {
1734 prim_hash[i] = rte_hash_hash(h, keys[i]);
1736 sig[i] = get_short_sig(prim_hash[i]);
1737 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1738 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1740 primary_bkt[i] = &h->buckets[prim_index[i]];
1741 secondary_bkt[i] = &h->buckets[sec_index[i]];
1743 rte_prefetch0(primary_bkt[i]);
1744 rte_prefetch0(secondary_bkt[i]);
1747 __hash_rw_reader_lock(h);
1749 /* Compare signatures and prefetch key slot of first hit */
1750 for (i = 0; i < num_keys; i++) {
1751 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1752 primary_bkt[i], secondary_bkt[i],
1753 sig[i], h->sig_cmp_fn);
1755 if (prim_hitmask[i]) {
1756 uint32_t first_hit =
1757 __builtin_ctzl(prim_hitmask[i])
1760 primary_bkt[i]->key_idx[first_hit];
1761 const struct rte_hash_key *key_slot =
1762 (const struct rte_hash_key *)(
1763 (const char *)h->key_store +
1764 key_idx * h->key_entry_size);
1765 rte_prefetch0(key_slot);
1769 if (sec_hitmask[i]) {
1770 uint32_t first_hit =
1771 __builtin_ctzl(sec_hitmask[i])
1774 secondary_bkt[i]->key_idx[first_hit];
1775 const struct rte_hash_key *key_slot =
1776 (const struct rte_hash_key *)(
1777 (const char *)h->key_store +
1778 key_idx * h->key_entry_size);
1779 rte_prefetch0(key_slot);
1783 /* Compare keys, first hits in primary first */
1784 for (i = 0; i < num_keys; i++) {
1785 positions[i] = -ENOENT;
1786 while (prim_hitmask[i]) {
1787 uint32_t hit_index =
1788 __builtin_ctzl(prim_hitmask[i])
1791 primary_bkt[i]->key_idx[hit_index];
1792 const struct rte_hash_key *key_slot =
1793 (const struct rte_hash_key *)(
1794 (const char *)h->key_store +
1795 key_idx * h->key_entry_size);
1798 * If key index is 0, do not compare key,
1799 * as it is checking the dummy slot
1803 key_slot->key, keys[i], h)) {
1805 data[i] = key_slot->pdata;
1808 positions[i] = key_idx - 1;
1811 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1814 while (sec_hitmask[i]) {
1815 uint32_t hit_index =
1816 __builtin_ctzl(sec_hitmask[i])
1819 secondary_bkt[i]->key_idx[hit_index];
1820 const struct rte_hash_key *key_slot =
1821 (const struct rte_hash_key *)(
1822 (const char *)h->key_store +
1823 key_idx * h->key_entry_size);
1826 * If key index is 0, do not compare key,
1827 * as it is checking the dummy slot
1832 key_slot->key, keys[i], h)) {
1834 data[i] = key_slot->pdata;
1837 positions[i] = key_idx - 1;
1840 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1846 /* all found, do not need to go through ext bkt */
1847 if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1848 if (hit_mask != NULL)
1850 __hash_rw_reader_unlock(h);
1854 /* need to check ext buckets for match */
1855 for (i = 0; i < num_keys; i++) {
1856 if ((hits & (1ULL << i)) != 0)
1858 next_bkt = secondary_bkt[i]->next;
1859 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1861 ret = search_one_bucket_l(h, keys[i],
1862 sig[i], &data[i], cur_bkt);
1864 ret = search_one_bucket_l(h, keys[i],
1865 sig[i], NULL, cur_bkt);
1874 __hash_rw_reader_unlock(h);
1876 if (hit_mask != NULL)
1881 __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
1882 int32_t num_keys, int32_t *positions,
1883 uint64_t *hit_mask, void *data[])
1888 uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1889 uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1890 uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1891 uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1892 const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1893 const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1894 uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1895 uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1896 struct rte_hash_bucket *cur_bkt, *next_bkt;
1897 void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
1898 uint32_t cnt_b, cnt_a;
1900 /* Prefetch first keys */
1901 for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1902 rte_prefetch0(keys[i]);
1905 * Prefetch rest of the keys, calculate primary and
1906 * secondary bucket and prefetch them
1908 for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1909 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1911 prim_hash[i] = rte_hash_hash(h, keys[i]);
1913 sig[i] = get_short_sig(prim_hash[i]);
1914 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1915 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1917 primary_bkt[i] = &h->buckets[prim_index[i]];
1918 secondary_bkt[i] = &h->buckets[sec_index[i]];
1920 rte_prefetch0(primary_bkt[i]);
1921 rte_prefetch0(secondary_bkt[i]);
1924 /* Calculate and prefetch rest of the buckets */
1925 for (; i < num_keys; i++) {
1926 prim_hash[i] = rte_hash_hash(h, keys[i]);
1928 sig[i] = get_short_sig(prim_hash[i]);
1929 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1930 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1932 primary_bkt[i] = &h->buckets[prim_index[i]];
1933 secondary_bkt[i] = &h->buckets[sec_index[i]];
1935 rte_prefetch0(primary_bkt[i]);
1936 rte_prefetch0(secondary_bkt[i]);
1939 for (i = 0; i < num_keys; i++)
1940 positions[i] = -ENOENT;
1943 /* Load the table change counter before the lookup
1944 * starts. Acquire semantics will make sure that
1945 * loads in compare_signatures are not hoisted.
1947 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1950 /* Compare signatures and prefetch key slot of first hit */
1951 for (i = 0; i < num_keys; i++) {
1952 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1953 primary_bkt[i], secondary_bkt[i],
1954 sig[i], h->sig_cmp_fn);
1956 if (prim_hitmask[i]) {
1957 uint32_t first_hit =
1958 __builtin_ctzl(prim_hitmask[i])
1961 primary_bkt[i]->key_idx[first_hit];
1962 const struct rte_hash_key *key_slot =
1963 (const struct rte_hash_key *)(
1964 (const char *)h->key_store +
1965 key_idx * h->key_entry_size);
1966 rte_prefetch0(key_slot);
1970 if (sec_hitmask[i]) {
1971 uint32_t first_hit =
1972 __builtin_ctzl(sec_hitmask[i])
1975 secondary_bkt[i]->key_idx[first_hit];
1976 const struct rte_hash_key *key_slot =
1977 (const struct rte_hash_key *)(
1978 (const char *)h->key_store +
1979 key_idx * h->key_entry_size);
1980 rte_prefetch0(key_slot);
1984 /* Compare keys, first hits in primary first */
1985 for (i = 0; i < num_keys; i++) {
1986 while (prim_hitmask[i]) {
1987 uint32_t hit_index =
1988 __builtin_ctzl(prim_hitmask[i])
1992 &primary_bkt[i]->key_idx[hit_index],
1994 const struct rte_hash_key *key_slot =
1995 (const struct rte_hash_key *)(
1996 (const char *)h->key_store +
1997 key_idx * h->key_entry_size);
1999 if (key_idx != EMPTY_SLOT)
2000 pdata[i] = __atomic_load_n(
2004 * If key index is 0, do not compare key,
2005 * as it is checking the dummy slot
2009 key_slot->key, keys[i], h)) {
2014 positions[i] = key_idx - 1;
2017 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
2020 while (sec_hitmask[i]) {
2021 uint32_t hit_index =
2022 __builtin_ctzl(sec_hitmask[i])
2026 &secondary_bkt[i]->key_idx[hit_index],
2028 const struct rte_hash_key *key_slot =
2029 (const struct rte_hash_key *)(
2030 (const char *)h->key_store +
2031 key_idx * h->key_entry_size);
2033 if (key_idx != EMPTY_SLOT)
2034 pdata[i] = __atomic_load_n(
2038 * If key index is 0, do not compare key,
2039 * as it is checking the dummy slot
2044 key_slot->key, keys[i], h)) {
2049 positions[i] = key_idx - 1;
2052 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
2058 /* all found, do not need to go through ext bkt */
2059 if (hits == ((1ULL << num_keys) - 1)) {
2060 if (hit_mask != NULL)
2064 /* need to check ext buckets for match */
2065 if (h->ext_table_support) {
2066 for (i = 0; i < num_keys; i++) {
2067 if ((hits & (1ULL << i)) != 0)
2069 next_bkt = secondary_bkt[i]->next;
2070 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
2072 ret = search_one_bucket_lf(h,
2076 ret = search_one_bucket_lf(h,
2087 /* The loads of sig_current in compare_signatures
2088 * should not move below the load from tbl_chng_cnt.
2090 __atomic_thread_fence(__ATOMIC_ACQUIRE);
2091 /* Re-read the table change counter to check if the
2092 * table has changed during search. If yes, re-do
2094 * This load should not get hoisted. The load
2095 * acquires on cnt_b, primary key index and secondary
2096 * key index will make sure that it does not get
2099 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
2101 } while (cnt_b != cnt_a);
2103 if (hit_mask != NULL)
2108 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2109 int32_t num_keys, int32_t *positions,
2110 uint64_t *hit_mask, void *data[])
2112 if (h->readwrite_concur_lf_support)
2113 __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
2116 __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
2121 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2122 uint32_t num_keys, int32_t *positions)
2124 RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2125 (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2126 (positions == NULL)), -EINVAL);
2128 __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
2133 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
2134 uint32_t num_keys, uint64_t *hit_mask, void *data[])
2136 RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2137 (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2138 (hit_mask == NULL)), -EINVAL);
2140 int32_t positions[num_keys];
2142 __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
2144 /* Return number of hits */
2145 return __builtin_popcountl(*hit_mask);
2149 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
2151 uint32_t bucket_idx, idx, position;
2152 struct rte_hash_key *next_key;
2154 RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
2156 const uint32_t total_entries_main = h->num_buckets *
2157 RTE_HASH_BUCKET_ENTRIES;
2158 const uint32_t total_entries = total_entries_main << 1;
2160 /* Out of bounds of all buckets (both main table and ext table) */
2161 if (*next >= total_entries_main)
2164 /* Calculate bucket and index of current iterator */
2165 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2166 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2168 /* If current position is empty, go to the next one */
2169 while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
2170 __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
2173 if (*next == total_entries_main)
2175 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2176 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2179 __hash_rw_reader_lock(h);
2180 next_key = (struct rte_hash_key *) ((char *)h->key_store +
2181 position * h->key_entry_size);
2182 /* Return key and data */
2183 *key = next_key->key;
2184 *data = next_key->pdata;
2186 __hash_rw_reader_unlock(h);
2188 /* Increment iterator */
2191 return position - 1;
2193 /* Begin to iterate extendable buckets */
2195 /* Out of total bound or if ext bucket feature is not enabled */
2196 if (*next >= total_entries || !h->ext_table_support)
2199 bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
2200 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2202 while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
2204 if (*next == total_entries)
2206 bucket_idx = (*next - total_entries_main) /
2207 RTE_HASH_BUCKET_ENTRIES;
2208 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2210 __hash_rw_reader_lock(h);
2211 next_key = (struct rte_hash_key *) ((char *)h->key_store +
2212 position * h->key_entry_size);
2213 /* Return key and data */
2214 *key = next_key->key;
2215 *data = next_key->pdata;
2217 __hash_rw_reader_unlock(h);
2219 /* Increment iterator */
2221 return position - 1;