1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 * Copyright(c) 2018 Arm Limited
11 #include <sys/queue.h>
13 #include <rte_common.h>
14 #include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
28 #include <rte_compat.h>
32 #include "rte_cuckoo_hash.h"
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET) \
35 for (CURRENT_BKT = START_BUCKET; \
36 CURRENT_BKT != NULL; \
37 CURRENT_BKT = CURRENT_BKT->next)
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
41 static struct rte_tailq_elem rte_hash_tailq = {
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
47 rte_hash_find_existing(const char *name)
49 struct rte_hash *h = NULL;
50 struct rte_tailq_entry *te;
51 struct rte_hash_list *hash_list;
53 hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
55 rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56 TAILQ_FOREACH(te, hash_list, next) {
57 h = (struct rte_hash *) te->data;
58 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
61 rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
73 while (lst_bkt->next != NULL)
74 lst_bkt = lst_bkt->next;
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
80 h->cmp_jump_table_idx = KEY_CUSTOM;
81 h->rte_hash_custom_cmp_eq = func;
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
87 if (h->cmp_jump_table_idx == KEY_CUSTOM)
88 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
90 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
94 * We use higher 16 bits of hash as the signature value stored in table.
95 * We use the lower bits for the primary bucket
96 * location. Then we XOR primary bucket location and the signature
97 * to get the secondary bucket location. This is same as
98 * proposed in Bin Fan, et al's paper
99 * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100 * Smarter Hashing". The benefit to use
101 * XOR is that one could derive the alternative bucket location
102 * by only using the current bucket location and the signature.
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
113 return hash & h->bucket_bitmask;
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118 uint32_t cur_bkt_idx, uint16_t sig)
120 return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
124 rte_hash_create(const struct rte_hash_parameters *params)
126 struct rte_hash *h = NULL;
127 struct rte_tailq_entry *te = NULL;
128 struct rte_hash_list *hash_list;
129 struct rte_ring *r = NULL;
130 struct rte_ring *r_ext = NULL;
131 char hash_name[RTE_HASH_NAMESIZE];
133 void *buckets = NULL;
134 void *buckets_ext = NULL;
135 char ring_name[RTE_RING_NAMESIZE];
136 char ext_ring_name[RTE_RING_NAMESIZE];
137 unsigned num_key_slots;
139 unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140 unsigned int ext_table_support = 0;
141 unsigned int readwrite_concur_support = 0;
142 unsigned int writer_takes_lock = 0;
143 unsigned int no_free_on_del = 0;
144 uint32_t *ext_bkt_to_free = NULL;
145 uint32_t *tbl_chng_cnt = NULL;
146 unsigned int readwrite_concur_lf_support = 0;
148 rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
150 hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
152 if (params == NULL) {
153 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
157 /* Check for valid parameters */
158 if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
159 (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
160 (params->key_len == 0)) {
162 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
166 /* Validate correct usage of extra options */
167 if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
168 (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
170 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
171 "rw concurrency lock free\n");
175 /* Check extra flags field to check extra options. */
176 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
177 hw_trans_mem_support = 1;
179 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
181 writer_takes_lock = 1;
184 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
185 readwrite_concur_support = 1;
186 writer_takes_lock = 1;
189 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
190 ext_table_support = 1;
192 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
195 if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
196 readwrite_concur_lf_support = 1;
197 /* Enable not freeing internal memory/index on delete */
201 /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
204 * Increase number of slots by total number of indices
205 * that can be stored in the lcore caches
206 * except for the first cache
208 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
209 (LCORE_CACHE_SIZE - 1) + 1;
211 num_key_slots = params->entries + 1;
213 snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
214 /* Create ring (Dummy slot index is not enqueued) */
215 r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
216 params->socket_id, 0);
218 RTE_LOG(ERR, HASH, "memory allocation failed\n");
222 const uint32_t num_buckets = rte_align32pow2(params->entries) /
223 RTE_HASH_BUCKET_ENTRIES;
225 /* Create ring for extendable buckets. */
226 if (ext_table_support) {
227 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
229 r_ext = rte_ring_create(ext_ring_name,
230 rte_align32pow2(num_buckets + 1),
231 params->socket_id, 0);
234 RTE_LOG(ERR, HASH, "ext buckets memory allocation "
240 snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
242 rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
244 /* guarantee there's no existing: this is normally already checked
245 * by ring creation above */
246 TAILQ_FOREACH(te, hash_list, next) {
247 h = (struct rte_hash *) te->data;
248 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
258 te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
260 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
264 h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
265 RTE_CACHE_LINE_SIZE, params->socket_id);
268 RTE_LOG(ERR, HASH, "memory allocation failed\n");
272 buckets = rte_zmalloc_socket(NULL,
273 num_buckets * sizeof(struct rte_hash_bucket),
274 RTE_CACHE_LINE_SIZE, params->socket_id);
276 if (buckets == NULL) {
277 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
281 /* Allocate same number of extendable buckets */
282 if (ext_table_support) {
283 buckets_ext = rte_zmalloc_socket(NULL,
284 num_buckets * sizeof(struct rte_hash_bucket),
285 RTE_CACHE_LINE_SIZE, params->socket_id);
286 if (buckets_ext == NULL) {
287 RTE_LOG(ERR, HASH, "ext buckets memory allocation "
291 /* Populate ext bkt ring. We reserve 0 similar to the
292 * key-data slot, just in case in future we want to
293 * use bucket index for the linked list and 0 means NULL
296 for (i = 1; i <= num_buckets; i++)
297 rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
299 if (readwrite_concur_lf_support) {
300 ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) *
302 if (ext_bkt_to_free == NULL) {
303 RTE_LOG(ERR, HASH, "ext bkt to free memory allocation "
310 const uint32_t key_entry_size =
311 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
313 const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
315 k = rte_zmalloc_socket(NULL, key_tbl_size,
316 RTE_CACHE_LINE_SIZE, params->socket_id);
319 RTE_LOG(ERR, HASH, "memory allocation failed\n");
323 tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
324 RTE_CACHE_LINE_SIZE, params->socket_id);
326 if (tbl_chng_cnt == NULL) {
327 RTE_LOG(ERR, HASH, "memory allocation failed\n");
332 * If x86 architecture is used, select appropriate compare function,
333 * which may use x86 intrinsics, otherwise use memcmp
335 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
336 /* Select function to compare keys */
337 switch (params->key_len) {
339 h->cmp_jump_table_idx = KEY_16_BYTES;
342 h->cmp_jump_table_idx = KEY_32_BYTES;
345 h->cmp_jump_table_idx = KEY_48_BYTES;
348 h->cmp_jump_table_idx = KEY_64_BYTES;
351 h->cmp_jump_table_idx = KEY_80_BYTES;
354 h->cmp_jump_table_idx = KEY_96_BYTES;
357 h->cmp_jump_table_idx = KEY_112_BYTES;
360 h->cmp_jump_table_idx = KEY_128_BYTES;
363 /* If key is not multiple of 16, use generic memcmp */
364 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
367 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
370 if (use_local_cache) {
371 h->local_free_slots = rte_zmalloc_socket(NULL,
372 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
373 RTE_CACHE_LINE_SIZE, params->socket_id);
376 /* Default hash function */
377 #if defined(RTE_ARCH_X86)
378 default_hash_func = (rte_hash_function)rte_hash_crc;
379 #elif defined(RTE_ARCH_ARM64)
380 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
381 default_hash_func = (rte_hash_function)rte_hash_crc;
383 /* Setup hash context */
384 strlcpy(h->name, params->name, sizeof(h->name));
385 h->entries = params->entries;
386 h->key_len = params->key_len;
387 h->key_entry_size = key_entry_size;
388 h->hash_func_init_val = params->hash_func_init_val;
390 h->num_buckets = num_buckets;
391 h->bucket_bitmask = h->num_buckets - 1;
392 h->buckets = buckets;
393 h->buckets_ext = buckets_ext;
394 h->free_ext_bkts = r_ext;
395 h->hash_func = (params->hash_func == NULL) ?
396 default_hash_func : params->hash_func;
399 h->ext_bkt_to_free = ext_bkt_to_free;
400 h->tbl_chng_cnt = tbl_chng_cnt;
401 *h->tbl_chng_cnt = 0;
402 h->hw_trans_mem_support = hw_trans_mem_support;
403 h->use_local_cache = use_local_cache;
404 h->readwrite_concur_support = readwrite_concur_support;
405 h->ext_table_support = ext_table_support;
406 h->writer_takes_lock = writer_takes_lock;
407 h->no_free_on_del = no_free_on_del;
408 h->readwrite_concur_lf_support = readwrite_concur_lf_support;
410 #if defined(RTE_ARCH_X86)
411 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
412 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
414 #elif defined(RTE_ARCH_ARM64)
415 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
416 h->sig_cmp_fn = RTE_HASH_COMPARE_NEON;
419 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
421 /* Writer threads need to take the lock when:
422 * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
423 * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
425 if (h->writer_takes_lock) {
426 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
427 RTE_CACHE_LINE_SIZE);
428 if (h->readwrite_lock == NULL)
431 rte_rwlock_init(h->readwrite_lock);
434 /* Populate free slots ring. Entry zero is reserved for key misses. */
435 for (i = 1; i < num_key_slots; i++)
436 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
438 te->data = (void *) h;
439 TAILQ_INSERT_TAIL(hash_list, te, next);
440 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
444 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
447 rte_ring_free(r_ext);
451 rte_free(buckets_ext);
453 rte_free(tbl_chng_cnt);
454 rte_free(ext_bkt_to_free);
459 rte_hash_free(struct rte_hash *h)
461 struct rte_tailq_entry *te;
462 struct rte_hash_list *hash_list;
467 hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
469 rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
471 /* find out tailq entry */
472 TAILQ_FOREACH(te, hash_list, next) {
473 if (te->data == (void *) h)
478 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
482 TAILQ_REMOVE(hash_list, te, next);
484 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
486 if (h->use_local_cache)
487 rte_free(h->local_free_slots);
488 if (h->writer_takes_lock)
489 rte_free(h->readwrite_lock);
490 rte_ring_free(h->free_slots);
491 rte_ring_free(h->free_ext_bkts);
492 rte_free(h->key_store);
493 rte_free(h->buckets);
494 rte_free(h->buckets_ext);
495 rte_free(h->tbl_chng_cnt);
496 rte_free(h->ext_bkt_to_free);
502 rte_hash_hash(const struct rte_hash *h, const void *key)
504 /* calc hash result by key */
505 return h->hash_func(key, h->key_len, h->hash_func_init_val);
509 rte_hash_count(const struct rte_hash *h)
511 uint32_t tot_ring_cnt, cached_cnt = 0;
517 if (h->use_local_cache) {
518 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
519 (LCORE_CACHE_SIZE - 1);
520 for (i = 0; i < RTE_MAX_LCORE; i++)
521 cached_cnt += h->local_free_slots[i].len;
523 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
526 tot_ring_cnt = h->entries;
527 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
532 /* Read write locks implemented using rte_rwlock */
534 __hash_rw_writer_lock(const struct rte_hash *h)
536 if (h->writer_takes_lock && h->hw_trans_mem_support)
537 rte_rwlock_write_lock_tm(h->readwrite_lock);
538 else if (h->writer_takes_lock)
539 rte_rwlock_write_lock(h->readwrite_lock);
543 __hash_rw_reader_lock(const struct rte_hash *h)
545 if (h->readwrite_concur_support && h->hw_trans_mem_support)
546 rte_rwlock_read_lock_tm(h->readwrite_lock);
547 else if (h->readwrite_concur_support)
548 rte_rwlock_read_lock(h->readwrite_lock);
552 __hash_rw_writer_unlock(const struct rte_hash *h)
554 if (h->writer_takes_lock && h->hw_trans_mem_support)
555 rte_rwlock_write_unlock_tm(h->readwrite_lock);
556 else if (h->writer_takes_lock)
557 rte_rwlock_write_unlock(h->readwrite_lock);
561 __hash_rw_reader_unlock(const struct rte_hash *h)
563 if (h->readwrite_concur_support && h->hw_trans_mem_support)
564 rte_rwlock_read_unlock_tm(h->readwrite_lock);
565 else if (h->readwrite_concur_support)
566 rte_rwlock_read_unlock(h->readwrite_lock);
570 rte_hash_reset(struct rte_hash *h)
573 uint32_t tot_ring_cnt, i;
578 __hash_rw_writer_lock(h);
579 memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
580 memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
581 *h->tbl_chng_cnt = 0;
583 /* clear the free ring */
584 while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
587 /* clear free extendable bucket ring and memory */
588 if (h->ext_table_support) {
589 memset(h->buckets_ext, 0, h->num_buckets *
590 sizeof(struct rte_hash_bucket));
591 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
595 /* Repopulate the free slots ring. Entry zero is reserved for key misses */
596 if (h->use_local_cache)
597 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
598 (LCORE_CACHE_SIZE - 1);
600 tot_ring_cnt = h->entries;
602 for (i = 1; i < tot_ring_cnt + 1; i++)
603 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
605 /* Repopulate the free ext bkt ring. */
606 if (h->ext_table_support) {
607 for (i = 1; i <= h->num_buckets; i++)
608 rte_ring_sp_enqueue(h->free_ext_bkts,
609 (void *)((uintptr_t) i));
612 if (h->use_local_cache) {
613 /* Reset local caches per lcore */
614 for (i = 0; i < RTE_MAX_LCORE; i++)
615 h->local_free_slots[i].len = 0;
617 __hash_rw_writer_unlock(h);
621 * Function called to enqueue back an index in the cache/ring,
622 * as slot has not being used and it can be used in the
623 * next addition attempt.
626 enqueue_slot_back(const struct rte_hash *h,
627 struct lcore_cache *cached_free_slots,
630 if (h->use_local_cache) {
631 cached_free_slots->objs[cached_free_slots->len] = slot_id;
632 cached_free_slots->len++;
634 rte_ring_sp_enqueue(h->free_slots, slot_id);
637 /* Search a key from bucket and update its data.
638 * Writer holds the lock before calling this.
640 static inline int32_t
641 search_and_update(const struct rte_hash *h, void *data, const void *key,
642 struct rte_hash_bucket *bkt, uint16_t sig)
645 struct rte_hash_key *k, *keys = h->key_store;
647 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
648 if (bkt->sig_current[i] == sig) {
649 k = (struct rte_hash_key *) ((char *)keys +
650 bkt->key_idx[i] * h->key_entry_size);
651 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
652 /* 'pdata' acts as the synchronization point
653 * when an existing hash entry is updated.
654 * Key is not updated in this case.
656 __atomic_store_n(&k->pdata,
660 * Return index where key is stored,
661 * subtracting the first dummy index
663 return bkt->key_idx[i] - 1;
670 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
672 * return 1 if matching existing key, return 0 if succeeds, return -1 for no
675 static inline int32_t
676 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
677 struct rte_hash_bucket *prim_bkt,
678 struct rte_hash_bucket *sec_bkt,
679 const struct rte_hash_key *key, void *data,
680 uint16_t sig, uint32_t new_idx,
684 struct rte_hash_bucket *cur_bkt;
687 __hash_rw_writer_lock(h);
688 /* Check if key was inserted after last check but before this
689 * protected region in case of inserting duplicated keys.
691 ret = search_and_update(h, data, key, prim_bkt, sig);
693 __hash_rw_writer_unlock(h);
698 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
699 ret = search_and_update(h, data, key, cur_bkt, sig);
701 __hash_rw_writer_unlock(h);
707 /* Insert new entry if there is room in the primary
710 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
711 /* Check if slot is available */
712 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
713 prim_bkt->sig_current[i] = sig;
714 /* Key can be of arbitrary length, so it is
715 * not possible to store it atomically.
716 * Hence the new key element's memory stores
717 * (key as well as data) should be complete
718 * before it is referenced.
720 __atomic_store_n(&prim_bkt->key_idx[i],
726 __hash_rw_writer_unlock(h);
728 if (i != RTE_HASH_BUCKET_ENTRIES)
735 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
736 * the path head with new entry (sig, alt_hash, new_idx)
737 * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
738 * return 0 if succeeds.
741 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
742 struct rte_hash_bucket *bkt,
743 struct rte_hash_bucket *alt_bkt,
744 const struct rte_hash_key *key, void *data,
745 struct queue_node *leaf, uint32_t leaf_slot,
746 uint16_t sig, uint32_t new_idx,
749 uint32_t prev_alt_bkt_idx;
750 struct rte_hash_bucket *cur_bkt;
751 struct queue_node *prev_node, *curr_node = leaf;
752 struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
753 uint32_t prev_slot, curr_slot = leaf_slot;
756 __hash_rw_writer_lock(h);
758 /* In case empty slot was gone before entering protected region */
759 if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
760 __hash_rw_writer_unlock(h);
764 /* Check if key was inserted after last check but before this
767 ret = search_and_update(h, data, key, bkt, sig);
769 __hash_rw_writer_unlock(h);
774 FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
775 ret = search_and_update(h, data, key, cur_bkt, sig);
777 __hash_rw_writer_unlock(h);
783 while (likely(curr_node->prev != NULL)) {
784 prev_node = curr_node->prev;
785 prev_bkt = prev_node->bkt;
786 prev_slot = curr_node->prev_slot;
788 prev_alt_bkt_idx = get_alt_bucket_index(h,
789 prev_node->cur_bkt_idx,
790 prev_bkt->sig_current[prev_slot]);
792 if (unlikely(&h->buckets[prev_alt_bkt_idx]
794 /* revert it to empty, otherwise duplicated keys */
795 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
798 __hash_rw_writer_unlock(h);
802 if (h->readwrite_concur_lf_support) {
803 /* Inform the previous move. The current move need
804 * not be informed now as the current bucket entry
805 * is present in both primary and secondary.
806 * Since there is one writer, load acquires on
807 * tbl_chng_cnt are not required.
809 __atomic_store_n(h->tbl_chng_cnt,
810 *h->tbl_chng_cnt + 1,
812 /* The store to sig_current should not
813 * move above the store to tbl_chng_cnt.
815 __atomic_thread_fence(__ATOMIC_RELEASE);
818 /* Need to swap current/alt sig to allow later
819 * Cuckoo insert to move elements back to its
820 * primary bucket if available
822 curr_bkt->sig_current[curr_slot] =
823 prev_bkt->sig_current[prev_slot];
824 /* Release the updated bucket entry */
825 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
826 prev_bkt->key_idx[prev_slot],
829 curr_slot = prev_slot;
830 curr_node = prev_node;
831 curr_bkt = curr_node->bkt;
834 if (h->readwrite_concur_lf_support) {
835 /* Inform the previous move. The current move need
836 * not be informed now as the current bucket entry
837 * is present in both primary and secondary.
838 * Since there is one writer, load acquires on
839 * tbl_chng_cnt are not required.
841 __atomic_store_n(h->tbl_chng_cnt,
842 *h->tbl_chng_cnt + 1,
844 /* The store to sig_current should not
845 * move above the store to tbl_chng_cnt.
847 __atomic_thread_fence(__ATOMIC_RELEASE);
850 curr_bkt->sig_current[curr_slot] = sig;
851 /* Release the new bucket entry */
852 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
856 __hash_rw_writer_unlock(h);
863 * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
867 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
868 struct rte_hash_bucket *bkt,
869 struct rte_hash_bucket *sec_bkt,
870 const struct rte_hash_key *key, void *data,
871 uint16_t sig, uint32_t bucket_idx,
872 uint32_t new_idx, int32_t *ret_val)
875 struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
876 struct queue_node *tail, *head;
877 struct rte_hash_bucket *curr_bkt, *alt_bkt;
878 uint32_t cur_idx, alt_idx;
884 tail->prev_slot = -1;
885 tail->cur_bkt_idx = bucket_idx;
887 /* Cuckoo bfs Search */
888 while (likely(tail != head && head <
889 queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
890 RTE_HASH_BUCKET_ENTRIES)) {
891 curr_bkt = tail->bkt;
892 cur_idx = tail->cur_bkt_idx;
893 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
894 if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
895 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
896 bkt, sec_bkt, key, data,
899 if (likely(ret != -1))
903 /* Enqueue new node and keep prev node info */
904 alt_idx = get_alt_bucket_index(h, cur_idx,
905 curr_bkt->sig_current[i]);
906 alt_bkt = &(h->buckets[alt_idx]);
908 head->cur_bkt_idx = alt_idx;
919 static inline int32_t
920 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
921 hash_sig_t sig, void *data)
924 uint32_t prim_bucket_idx, sec_bucket_idx;
925 struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
926 struct rte_hash_key *new_k, *keys = h->key_store;
927 void *slot_id = NULL;
928 void *ext_bkt_id = NULL;
929 uint32_t new_idx, bkt_id;
934 struct lcore_cache *cached_free_slots = NULL;
936 struct rte_hash_bucket *last;
938 short_sig = get_short_sig(sig);
939 prim_bucket_idx = get_prim_bucket_index(h, sig);
940 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
941 prim_bkt = &h->buckets[prim_bucket_idx];
942 sec_bkt = &h->buckets[sec_bucket_idx];
943 rte_prefetch0(prim_bkt);
944 rte_prefetch0(sec_bkt);
946 /* Check if key is already inserted in primary location */
947 __hash_rw_writer_lock(h);
948 ret = search_and_update(h, data, key, prim_bkt, short_sig);
950 __hash_rw_writer_unlock(h);
954 /* Check if key is already inserted in secondary location */
955 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
956 ret = search_and_update(h, data, key, cur_bkt, short_sig);
958 __hash_rw_writer_unlock(h);
963 __hash_rw_writer_unlock(h);
965 /* Did not find a match, so get a new slot for storing the new key */
966 if (h->use_local_cache) {
967 lcore_id = rte_lcore_id();
968 cached_free_slots = &h->local_free_slots[lcore_id];
969 /* Try to get a free slot from the local cache */
970 if (cached_free_slots->len == 0) {
971 /* Need to get another burst of free slots from global ring */
972 n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
973 cached_free_slots->objs,
974 LCORE_CACHE_SIZE, NULL);
979 cached_free_slots->len += n_slots;
982 /* Get a free slot from the local cache */
983 cached_free_slots->len--;
984 slot_id = cached_free_slots->objs[cached_free_slots->len];
986 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
991 new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
992 new_idx = (uint32_t)((uintptr_t) slot_id);
994 memcpy(new_k->key, key, h->key_len);
995 /* Key can be of arbitrary length, so it is not possible to store
996 * it atomically. Hence the new key element's memory stores
997 * (key as well as data) should be complete before it is referenced.
998 * 'pdata' acts as the synchronization point when an existing hash
1001 __atomic_store_n(&new_k->pdata,
1005 /* Find an empty slot and insert */
1006 ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
1007 short_sig, new_idx, &ret_val);
1010 else if (ret == 1) {
1011 enqueue_slot_back(h, cached_free_slots, slot_id);
1015 /* Primary bucket full, need to make space for new entry */
1016 ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1017 short_sig, prim_bucket_idx, new_idx, &ret_val);
1020 else if (ret == 1) {
1021 enqueue_slot_back(h, cached_free_slots, slot_id);
1025 /* Also search secondary bucket to get better occupancy */
1026 ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1027 short_sig, sec_bucket_idx, new_idx, &ret_val);
1031 else if (ret == 1) {
1032 enqueue_slot_back(h, cached_free_slots, slot_id);
1036 /* if ext table not enabled, we failed the insertion */
1037 if (!h->ext_table_support) {
1038 enqueue_slot_back(h, cached_free_slots, slot_id);
1042 /* Now we need to go through the extendable bucket. Protection is needed
1043 * to protect all extendable bucket processes.
1045 __hash_rw_writer_lock(h);
1046 /* We check for duplicates again since could be inserted before the lock */
1047 ret = search_and_update(h, data, key, prim_bkt, short_sig);
1049 enqueue_slot_back(h, cached_free_slots, slot_id);
1053 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1054 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1056 enqueue_slot_back(h, cached_free_slots, slot_id);
1061 /* Search sec and ext buckets to find an empty entry to insert. */
1062 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1063 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1064 /* Check if slot is available */
1065 if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1066 cur_bkt->sig_current[i] = short_sig;
1067 /* Store to signature should not leak after
1068 * the store to key_idx
1070 __atomic_store_n(&cur_bkt->key_idx[i],
1073 __hash_rw_writer_unlock(h);
1079 /* Failed to get an empty entry from extendable buckets. Link a new
1080 * extendable bucket. We first get a free bucket from ring.
1082 if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
1087 bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
1088 /* Use the first location of the new bucket */
1089 (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
1090 /* Store to signature should not leak after
1091 * the store to key_idx
1093 __atomic_store_n(&(h->buckets_ext[bkt_id]).key_idx[0],
1096 /* Link the new bucket to sec bucket linked list */
1097 last = rte_hash_get_last_bkt(sec_bkt);
1098 last->next = &h->buckets_ext[bkt_id];
1099 __hash_rw_writer_unlock(h);
1103 __hash_rw_writer_unlock(h);
1109 rte_hash_add_key_with_hash(const struct rte_hash *h,
1110 const void *key, hash_sig_t sig)
1112 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1113 return __rte_hash_add_key_with_hash(h, key, sig, 0);
1117 rte_hash_add_key(const struct rte_hash *h, const void *key)
1119 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1120 return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1124 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1125 const void *key, hash_sig_t sig, void *data)
1129 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1130 ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1138 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1142 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1144 ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1151 /* Search one bucket to find the match key - uses rw lock */
1152 static inline int32_t
1153 search_one_bucket_l(const struct rte_hash *h, const void *key,
1154 uint16_t sig, void **data,
1155 const struct rte_hash_bucket *bkt)
1158 struct rte_hash_key *k, *keys = h->key_store;
1160 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1161 if (bkt->sig_current[i] == sig &&
1162 bkt->key_idx[i] != EMPTY_SLOT) {
1163 k = (struct rte_hash_key *) ((char *)keys +
1164 bkt->key_idx[i] * h->key_entry_size);
1166 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1170 * Return index where key is stored,
1171 * subtracting the first dummy index
1173 return bkt->key_idx[i] - 1;
1180 /* Search one bucket to find the match key */
1181 static inline int32_t
1182 search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
1183 void **data, const struct rte_hash_bucket *bkt)
1188 struct rte_hash_key *k, *keys = h->key_store;
1190 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1191 key_idx = __atomic_load_n(&bkt->key_idx[i],
1193 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1194 k = (struct rte_hash_key *) ((char *)keys +
1195 key_idx * h->key_entry_size);
1196 pdata = __atomic_load_n(&k->pdata,
1199 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1203 * Return index where key is stored,
1204 * subtracting the first dummy index
1213 static inline int32_t
1214 __rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
1215 hash_sig_t sig, void **data)
1217 uint32_t prim_bucket_idx, sec_bucket_idx;
1218 struct rte_hash_bucket *bkt, *cur_bkt;
1222 short_sig = get_short_sig(sig);
1223 prim_bucket_idx = get_prim_bucket_index(h, sig);
1224 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1226 bkt = &h->buckets[prim_bucket_idx];
1228 __hash_rw_reader_lock(h);
1230 /* Check if key is in primary location */
1231 ret = search_one_bucket_l(h, key, short_sig, data, bkt);
1233 __hash_rw_reader_unlock(h);
1236 /* Calculate secondary hash */
1237 bkt = &h->buckets[sec_bucket_idx];
1239 /* Check if key is in secondary location */
1240 FOR_EACH_BUCKET(cur_bkt, bkt) {
1241 ret = search_one_bucket_l(h, key, short_sig,
1244 __hash_rw_reader_unlock(h);
1249 __hash_rw_reader_unlock(h);
1254 static inline int32_t
1255 __rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
1256 hash_sig_t sig, void **data)
1258 uint32_t prim_bucket_idx, sec_bucket_idx;
1259 struct rte_hash_bucket *bkt, *cur_bkt;
1260 uint32_t cnt_b, cnt_a;
1264 short_sig = get_short_sig(sig);
1265 prim_bucket_idx = get_prim_bucket_index(h, sig);
1266 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1269 /* Load the table change counter before the lookup
1270 * starts. Acquire semantics will make sure that
1271 * loads in search_one_bucket are not hoisted.
1273 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1276 /* Check if key is in primary location */
1277 bkt = &h->buckets[prim_bucket_idx];
1278 ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
1280 __hash_rw_reader_unlock(h);
1283 /* Calculate secondary hash */
1284 bkt = &h->buckets[sec_bucket_idx];
1286 /* Check if key is in secondary location */
1287 FOR_EACH_BUCKET(cur_bkt, bkt) {
1288 ret = search_one_bucket_lf(h, key, short_sig,
1291 __hash_rw_reader_unlock(h);
1296 /* The loads of sig_current in search_one_bucket
1297 * should not move below the load from tbl_chng_cnt.
1299 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1300 /* Re-read the table change counter to check if the
1301 * table has changed during search. If yes, re-do
1303 * This load should not get hoisted. The load
1304 * acquires on cnt_b, key index in primary bucket
1305 * and key index in secondary bucket will make sure
1306 * that it does not get hoisted.
1308 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1310 } while (cnt_b != cnt_a);
1315 static inline int32_t
1316 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1317 hash_sig_t sig, void **data)
1319 if (h->readwrite_concur_lf_support)
1320 return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
1322 return __rte_hash_lookup_with_hash_l(h, key, sig, data);
1326 rte_hash_lookup_with_hash(const struct rte_hash *h,
1327 const void *key, hash_sig_t sig)
1329 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1330 return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1334 rte_hash_lookup(const struct rte_hash *h, const void *key)
1336 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1337 return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1341 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1342 const void *key, hash_sig_t sig, void **data)
1344 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1345 return __rte_hash_lookup_with_hash(h, key, sig, data);
1349 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1351 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1352 return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1356 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1358 unsigned lcore_id, n_slots;
1359 struct lcore_cache *cached_free_slots;
1361 if (h->use_local_cache) {
1362 lcore_id = rte_lcore_id();
1363 cached_free_slots = &h->local_free_slots[lcore_id];
1364 /* Cache full, need to free it. */
1365 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1366 /* Need to enqueue the free slots in global ring. */
1367 n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1368 cached_free_slots->objs,
1369 LCORE_CACHE_SIZE, NULL);
1370 ERR_IF_TRUE((n_slots == 0),
1371 "%s: could not enqueue free slots in global ring\n",
1373 cached_free_slots->len -= n_slots;
1375 /* Put index of new free slot in cache. */
1376 cached_free_slots->objs[cached_free_slots->len] =
1377 (void *)((uintptr_t)bkt->key_idx[i]);
1378 cached_free_slots->len++;
1380 rte_ring_sp_enqueue(h->free_slots,
1381 (void *)((uintptr_t)bkt->key_idx[i]));
1385 /* Compact the linked list by moving key from last entry in linked list to the
1389 __rte_hash_compact_ll(const struct rte_hash *h,
1390 struct rte_hash_bucket *cur_bkt, int pos) {
1392 struct rte_hash_bucket *last_bkt;
1397 last_bkt = rte_hash_get_last_bkt(cur_bkt);
1399 for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1400 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1401 cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1402 __atomic_store_n(&cur_bkt->key_idx[pos],
1403 last_bkt->key_idx[i],
1405 if (h->readwrite_concur_lf_support) {
1406 /* Inform the readers that the table has changed
1407 * Since there is one writer, load acquire on
1408 * tbl_chng_cnt is not required.
1410 __atomic_store_n(h->tbl_chng_cnt,
1411 *h->tbl_chng_cnt + 1,
1413 /* The store to sig_current should
1414 * not move above the store to tbl_chng_cnt.
1416 __atomic_thread_fence(__ATOMIC_RELEASE);
1418 last_bkt->sig_current[i] = NULL_SIGNATURE;
1419 __atomic_store_n(&last_bkt->key_idx[i],
1427 /* Search one bucket and remove the matched key.
1428 * Writer is expected to hold the lock while calling this
1431 static inline int32_t
1432 search_and_remove(const struct rte_hash *h, const void *key,
1433 struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1435 struct rte_hash_key *k, *keys = h->key_store;
1439 /* Check if key is in bucket */
1440 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1441 key_idx = __atomic_load_n(&bkt->key_idx[i],
1443 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1444 k = (struct rte_hash_key *) ((char *)keys +
1445 key_idx * h->key_entry_size);
1446 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1447 bkt->sig_current[i] = NULL_SIGNATURE;
1448 /* Free the key store index if
1449 * no_free_on_del is disabled.
1451 if (!h->no_free_on_del)
1452 remove_entry(h, bkt, i);
1454 __atomic_store_n(&bkt->key_idx[i],
1460 * Return index where key is stored,
1461 * subtracting the first dummy index
1470 static inline int32_t
1471 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1474 uint32_t prim_bucket_idx, sec_bucket_idx;
1475 struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1476 struct rte_hash_bucket *cur_bkt;
1481 short_sig = get_short_sig(sig);
1482 prim_bucket_idx = get_prim_bucket_index(h, sig);
1483 sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1484 prim_bkt = &h->buckets[prim_bucket_idx];
1486 __hash_rw_writer_lock(h);
1487 /* look for key in primary bucket */
1488 ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1490 __rte_hash_compact_ll(h, prim_bkt, pos);
1491 last_bkt = prim_bkt->next;
1492 prev_bkt = prim_bkt;
1496 /* Calculate secondary hash */
1497 sec_bkt = &h->buckets[sec_bucket_idx];
1499 FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1500 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1502 __rte_hash_compact_ll(h, cur_bkt, pos);
1503 last_bkt = sec_bkt->next;
1509 __hash_rw_writer_unlock(h);
1512 /* Search last bucket to see if empty to be recycled */
1515 __hash_rw_writer_unlock(h);
1518 while (last_bkt->next) {
1519 prev_bkt = last_bkt;
1520 last_bkt = last_bkt->next;
1523 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1524 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1527 /* found empty bucket and recycle */
1528 if (i == RTE_HASH_BUCKET_ENTRIES) {
1529 prev_bkt->next = NULL;
1530 uint32_t index = last_bkt - h->buckets_ext + 1;
1531 /* Recycle the empty bkt if
1532 * no_free_on_del is disabled.
1534 if (h->no_free_on_del)
1535 /* Store index of an empty ext bkt to be recycled
1536 * on calling rte_hash_del_xxx APIs.
1537 * When lock free read-write concurrency is enabled,
1538 * an empty ext bkt cannot be put into free list
1539 * immediately (as readers might be using it still).
1540 * Hence freeing of the ext bkt is piggy-backed to
1541 * freeing of the key index.
1543 h->ext_bkt_to_free[ret] = index;
1545 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1547 __hash_rw_writer_unlock(h);
1552 rte_hash_del_key_with_hash(const struct rte_hash *h,
1553 const void *key, hash_sig_t sig)
1555 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1556 return __rte_hash_del_key_with_hash(h, key, sig);
1560 rte_hash_del_key(const struct rte_hash *h, const void *key)
1562 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1563 return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1567 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1570 RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1572 struct rte_hash_key *k, *keys = h->key_store;
1573 k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1578 __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1586 int __rte_experimental
1587 rte_hash_free_key_with_position(const struct rte_hash *h,
1588 const int32_t position)
1590 /* Key index where key is stored, adding the first dummy index */
1591 uint32_t key_idx = position + 1;
1593 RETURN_IF_TRUE(((h == NULL) || (key_idx == EMPTY_SLOT)), -EINVAL);
1595 unsigned int lcore_id, n_slots;
1596 struct lcore_cache *cached_free_slots;
1597 const uint32_t total_entries = h->use_local_cache ?
1598 h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
1602 if (key_idx >= total_entries)
1604 if (h->ext_table_support && h->readwrite_concur_lf_support) {
1605 uint32_t index = h->ext_bkt_to_free[position];
1607 /* Recycle empty ext bkt to free list. */
1608 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1609 h->ext_bkt_to_free[position] = 0;
1613 if (h->use_local_cache) {
1614 lcore_id = rte_lcore_id();
1615 cached_free_slots = &h->local_free_slots[lcore_id];
1616 /* Cache full, need to free it. */
1617 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1618 /* Need to enqueue the free slots in global ring. */
1619 n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1620 cached_free_slots->objs,
1621 LCORE_CACHE_SIZE, NULL);
1622 RETURN_IF_TRUE((n_slots == 0), -EFAULT);
1623 cached_free_slots->len -= n_slots;
1625 /* Put index of new free slot in cache. */
1626 cached_free_slots->objs[cached_free_slots->len] =
1627 (void *)((uintptr_t)key_idx);
1628 cached_free_slots->len++;
1630 rte_ring_sp_enqueue(h->free_slots,
1631 (void *)((uintptr_t)key_idx));
1638 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1639 const struct rte_hash_bucket *prim_bkt,
1640 const struct rte_hash_bucket *sec_bkt,
1642 enum rte_hash_sig_compare_function sig_cmp_fn)
1646 /* For match mask the first bit of every two bits indicates the match */
1647 switch (sig_cmp_fn) {
1648 #if defined(RTE_MACHINE_CPUFLAG_SSE2)
1649 case RTE_HASH_COMPARE_SSE:
1650 /* Compare all signatures in the bucket */
1651 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1653 (__m128i const *)prim_bkt->sig_current),
1654 _mm_set1_epi16(sig)));
1655 /* Compare all signatures in the bucket */
1656 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1658 (__m128i const *)sec_bkt->sig_current),
1659 _mm_set1_epi16(sig)));
1661 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
1662 case RTE_HASH_COMPARE_NEON: {
1663 uint16x8_t vmat, vsig, x;
1664 int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
1666 vsig = vld1q_dup_u16((uint16_t const *)&sig);
1667 /* Compare all signatures in the primary bucket */
1668 vmat = vceqq_u16(vsig,
1669 vld1q_u16((uint16_t const *)prim_bkt->sig_current));
1670 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1671 *prim_hash_matches = (uint32_t)(vaddvq_u16(x));
1672 /* Compare all signatures in the secondary bucket */
1673 vmat = vceqq_u16(vsig,
1674 vld1q_u16((uint16_t const *)sec_bkt->sig_current));
1675 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1676 *sec_hash_matches = (uint32_t)(vaddvq_u16(x));
1681 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1682 *prim_hash_matches |=
1683 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1684 *sec_hash_matches |=
1685 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1690 #define PREFETCH_OFFSET 4
1692 __rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
1693 int32_t num_keys, int32_t *positions,
1694 uint64_t *hit_mask, void *data[])
1699 uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1700 uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1701 uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1702 uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1703 const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1704 const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1705 uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1706 uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1707 struct rte_hash_bucket *cur_bkt, *next_bkt;
1709 /* Prefetch first keys */
1710 for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1711 rte_prefetch0(keys[i]);
1714 * Prefetch rest of the keys, calculate primary and
1715 * secondary bucket and prefetch them
1717 for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1718 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1720 prim_hash[i] = rte_hash_hash(h, keys[i]);
1722 sig[i] = get_short_sig(prim_hash[i]);
1723 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1724 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1726 primary_bkt[i] = &h->buckets[prim_index[i]];
1727 secondary_bkt[i] = &h->buckets[sec_index[i]];
1729 rte_prefetch0(primary_bkt[i]);
1730 rte_prefetch0(secondary_bkt[i]);
1733 /* Calculate and prefetch rest of the buckets */
1734 for (; i < num_keys; i++) {
1735 prim_hash[i] = rte_hash_hash(h, keys[i]);
1737 sig[i] = get_short_sig(prim_hash[i]);
1738 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1739 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1741 primary_bkt[i] = &h->buckets[prim_index[i]];
1742 secondary_bkt[i] = &h->buckets[sec_index[i]];
1744 rte_prefetch0(primary_bkt[i]);
1745 rte_prefetch0(secondary_bkt[i]);
1748 __hash_rw_reader_lock(h);
1750 /* Compare signatures and prefetch key slot of first hit */
1751 for (i = 0; i < num_keys; i++) {
1752 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1753 primary_bkt[i], secondary_bkt[i],
1754 sig[i], h->sig_cmp_fn);
1756 if (prim_hitmask[i]) {
1757 uint32_t first_hit =
1758 __builtin_ctzl(prim_hitmask[i])
1761 primary_bkt[i]->key_idx[first_hit];
1762 const struct rte_hash_key *key_slot =
1763 (const struct rte_hash_key *)(
1764 (const char *)h->key_store +
1765 key_idx * h->key_entry_size);
1766 rte_prefetch0(key_slot);
1770 if (sec_hitmask[i]) {
1771 uint32_t first_hit =
1772 __builtin_ctzl(sec_hitmask[i])
1775 secondary_bkt[i]->key_idx[first_hit];
1776 const struct rte_hash_key *key_slot =
1777 (const struct rte_hash_key *)(
1778 (const char *)h->key_store +
1779 key_idx * h->key_entry_size);
1780 rte_prefetch0(key_slot);
1784 /* Compare keys, first hits in primary first */
1785 for (i = 0; i < num_keys; i++) {
1786 positions[i] = -ENOENT;
1787 while (prim_hitmask[i]) {
1788 uint32_t hit_index =
1789 __builtin_ctzl(prim_hitmask[i])
1792 primary_bkt[i]->key_idx[hit_index];
1793 const struct rte_hash_key *key_slot =
1794 (const struct rte_hash_key *)(
1795 (const char *)h->key_store +
1796 key_idx * h->key_entry_size);
1799 * If key index is 0, do not compare key,
1800 * as it is checking the dummy slot
1804 key_slot->key, keys[i], h)) {
1806 data[i] = key_slot->pdata;
1809 positions[i] = key_idx - 1;
1812 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1815 while (sec_hitmask[i]) {
1816 uint32_t hit_index =
1817 __builtin_ctzl(sec_hitmask[i])
1820 secondary_bkt[i]->key_idx[hit_index];
1821 const struct rte_hash_key *key_slot =
1822 (const struct rte_hash_key *)(
1823 (const char *)h->key_store +
1824 key_idx * h->key_entry_size);
1827 * If key index is 0, do not compare key,
1828 * as it is checking the dummy slot
1833 key_slot->key, keys[i], h)) {
1835 data[i] = key_slot->pdata;
1838 positions[i] = key_idx - 1;
1841 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1847 /* all found, do not need to go through ext bkt */
1848 if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1849 if (hit_mask != NULL)
1851 __hash_rw_reader_unlock(h);
1855 /* need to check ext buckets for match */
1856 for (i = 0; i < num_keys; i++) {
1857 if ((hits & (1ULL << i)) != 0)
1859 next_bkt = secondary_bkt[i]->next;
1860 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1862 ret = search_one_bucket_l(h, keys[i],
1863 sig[i], &data[i], cur_bkt);
1865 ret = search_one_bucket_l(h, keys[i],
1866 sig[i], NULL, cur_bkt);
1875 __hash_rw_reader_unlock(h);
1877 if (hit_mask != NULL)
1882 __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
1883 int32_t num_keys, int32_t *positions,
1884 uint64_t *hit_mask, void *data[])
1889 uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1890 uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1891 uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1892 uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1893 const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1894 const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1895 uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1896 uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1897 struct rte_hash_bucket *cur_bkt, *next_bkt;
1898 void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
1899 uint32_t cnt_b, cnt_a;
1901 /* Prefetch first keys */
1902 for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1903 rte_prefetch0(keys[i]);
1906 * Prefetch rest of the keys, calculate primary and
1907 * secondary bucket and prefetch them
1909 for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1910 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1912 prim_hash[i] = rte_hash_hash(h, keys[i]);
1914 sig[i] = get_short_sig(prim_hash[i]);
1915 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1916 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1918 primary_bkt[i] = &h->buckets[prim_index[i]];
1919 secondary_bkt[i] = &h->buckets[sec_index[i]];
1921 rte_prefetch0(primary_bkt[i]);
1922 rte_prefetch0(secondary_bkt[i]);
1925 /* Calculate and prefetch rest of the buckets */
1926 for (; i < num_keys; i++) {
1927 prim_hash[i] = rte_hash_hash(h, keys[i]);
1929 sig[i] = get_short_sig(prim_hash[i]);
1930 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1931 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1933 primary_bkt[i] = &h->buckets[prim_index[i]];
1934 secondary_bkt[i] = &h->buckets[sec_index[i]];
1936 rte_prefetch0(primary_bkt[i]);
1937 rte_prefetch0(secondary_bkt[i]);
1940 for (i = 0; i < num_keys; i++)
1941 positions[i] = -ENOENT;
1944 /* Load the table change counter before the lookup
1945 * starts. Acquire semantics will make sure that
1946 * loads in compare_signatures are not hoisted.
1948 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1951 /* Compare signatures and prefetch key slot of first hit */
1952 for (i = 0; i < num_keys; i++) {
1953 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1954 primary_bkt[i], secondary_bkt[i],
1955 sig[i], h->sig_cmp_fn);
1957 if (prim_hitmask[i]) {
1958 uint32_t first_hit =
1959 __builtin_ctzl(prim_hitmask[i])
1962 primary_bkt[i]->key_idx[first_hit];
1963 const struct rte_hash_key *key_slot =
1964 (const struct rte_hash_key *)(
1965 (const char *)h->key_store +
1966 key_idx * h->key_entry_size);
1967 rte_prefetch0(key_slot);
1971 if (sec_hitmask[i]) {
1972 uint32_t first_hit =
1973 __builtin_ctzl(sec_hitmask[i])
1976 secondary_bkt[i]->key_idx[first_hit];
1977 const struct rte_hash_key *key_slot =
1978 (const struct rte_hash_key *)(
1979 (const char *)h->key_store +
1980 key_idx * h->key_entry_size);
1981 rte_prefetch0(key_slot);
1985 /* Compare keys, first hits in primary first */
1986 for (i = 0; i < num_keys; i++) {
1987 while (prim_hitmask[i]) {
1988 uint32_t hit_index =
1989 __builtin_ctzl(prim_hitmask[i])
1993 &primary_bkt[i]->key_idx[hit_index],
1995 const struct rte_hash_key *key_slot =
1996 (const struct rte_hash_key *)(
1997 (const char *)h->key_store +
1998 key_idx * h->key_entry_size);
2000 if (key_idx != EMPTY_SLOT)
2001 pdata[i] = __atomic_load_n(
2005 * If key index is 0, do not compare key,
2006 * as it is checking the dummy slot
2010 key_slot->key, keys[i], h)) {
2015 positions[i] = key_idx - 1;
2018 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
2021 while (sec_hitmask[i]) {
2022 uint32_t hit_index =
2023 __builtin_ctzl(sec_hitmask[i])
2027 &secondary_bkt[i]->key_idx[hit_index],
2029 const struct rte_hash_key *key_slot =
2030 (const struct rte_hash_key *)(
2031 (const char *)h->key_store +
2032 key_idx * h->key_entry_size);
2034 if (key_idx != EMPTY_SLOT)
2035 pdata[i] = __atomic_load_n(
2039 * If key index is 0, do not compare key,
2040 * as it is checking the dummy slot
2045 key_slot->key, keys[i], h)) {
2050 positions[i] = key_idx - 1;
2053 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
2059 /* all found, do not need to go through ext bkt */
2060 if (hits == ((1ULL << num_keys) - 1)) {
2061 if (hit_mask != NULL)
2065 /* need to check ext buckets for match */
2066 if (h->ext_table_support) {
2067 for (i = 0; i < num_keys; i++) {
2068 if ((hits & (1ULL << i)) != 0)
2070 next_bkt = secondary_bkt[i]->next;
2071 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
2073 ret = search_one_bucket_lf(h,
2077 ret = search_one_bucket_lf(h,
2088 /* The loads of sig_current in compare_signatures
2089 * should not move below the load from tbl_chng_cnt.
2091 __atomic_thread_fence(__ATOMIC_ACQUIRE);
2092 /* Re-read the table change counter to check if the
2093 * table has changed during search. If yes, re-do
2095 * This load should not get hoisted. The load
2096 * acquires on cnt_b, primary key index and secondary
2097 * key index will make sure that it does not get
2100 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
2102 } while (cnt_b != cnt_a);
2104 if (hit_mask != NULL)
2109 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2110 int32_t num_keys, int32_t *positions,
2111 uint64_t *hit_mask, void *data[])
2113 if (h->readwrite_concur_lf_support)
2114 __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
2117 __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
2122 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2123 uint32_t num_keys, int32_t *positions)
2125 RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2126 (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2127 (positions == NULL)), -EINVAL);
2129 __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
2134 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
2135 uint32_t num_keys, uint64_t *hit_mask, void *data[])
2137 RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2138 (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2139 (hit_mask == NULL)), -EINVAL);
2141 int32_t positions[num_keys];
2143 __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
2145 /* Return number of hits */
2146 return __builtin_popcountl(*hit_mask);
2150 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
2152 uint32_t bucket_idx, idx, position;
2153 struct rte_hash_key *next_key;
2155 RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
2157 const uint32_t total_entries_main = h->num_buckets *
2158 RTE_HASH_BUCKET_ENTRIES;
2159 const uint32_t total_entries = total_entries_main << 1;
2161 /* Out of bounds of all buckets (both main table and ext table) */
2162 if (*next >= total_entries_main)
2165 /* Calculate bucket and index of current iterator */
2166 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2167 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2169 /* If current position is empty, go to the next one */
2170 while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
2171 __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
2174 if (*next == total_entries_main)
2176 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2177 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2180 __hash_rw_reader_lock(h);
2181 next_key = (struct rte_hash_key *) ((char *)h->key_store +
2182 position * h->key_entry_size);
2183 /* Return key and data */
2184 *key = next_key->key;
2185 *data = next_key->pdata;
2187 __hash_rw_reader_unlock(h);
2189 /* Increment iterator */
2192 return position - 1;
2194 /* Begin to iterate extendable buckets */
2196 /* Out of total bound or if ext bucket feature is not enabled */
2197 if (*next >= total_entries || !h->ext_table_support)
2200 bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
2201 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2203 while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
2205 if (*next == total_entries)
2207 bucket_idx = (*next - total_entries_main) /
2208 RTE_HASH_BUCKET_ENTRIES;
2209 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2211 __hash_rw_reader_lock(h);
2212 next_key = (struct rte_hash_key *) ((char *)h->key_store +
2213 position * h->key_entry_size);
2214 /* Return key and data */
2215 *key = next_key->key;
2216 *data = next_key->pdata;
2218 __hash_rw_reader_unlock(h);
2220 /* Increment iterator */
2222 return position - 1;