hash: add extendable bucket feature
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdint.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
14 #include <rte_log.h>
15 #include <rte_memcpy.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring.h>
28 #include <rte_compat.h>
29 #include <rte_pause.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
35         for (CURRENT_BKT = START_BUCKET;                                      \
36                 CURRENT_BKT != NULL;                                          \
37                 CURRENT_BKT = CURRENT_BKT->next)
38
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
40
41 static struct rte_tailq_elem rte_hash_tailq = {
42         .name = "RTE_HASH",
43 };
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
45
46 struct rte_hash *
47 rte_hash_find_existing(const char *name)
48 {
49         struct rte_hash *h = NULL;
50         struct rte_tailq_entry *te;
51         struct rte_hash_list *hash_list;
52
53         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
54
55         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56         TAILQ_FOREACH(te, hash_list, next) {
57                 h = (struct rte_hash *) te->data;
58                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
59                         break;
60         }
61         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
62
63         if (te == NULL) {
64                 rte_errno = ENOENT;
65                 return NULL;
66         }
67         return h;
68 }
69
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
72 {
73         while (lst_bkt->next != NULL)
74                 lst_bkt = lst_bkt->next;
75         return lst_bkt;
76 }
77
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
79 {
80         h->cmp_jump_table_idx = KEY_CUSTOM;
81         h->rte_hash_custom_cmp_eq = func;
82 }
83
84 static inline int
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
86 {
87         if (h->cmp_jump_table_idx == KEY_CUSTOM)
88                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
89         else
90                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
91 }
92
93 struct rte_hash *
94 rte_hash_create(const struct rte_hash_parameters *params)
95 {
96         struct rte_hash *h = NULL;
97         struct rte_tailq_entry *te = NULL;
98         struct rte_hash_list *hash_list;
99         struct rte_ring *r = NULL;
100         struct rte_ring *r_ext = NULL;
101         char hash_name[RTE_HASH_NAMESIZE];
102         void *k = NULL;
103         void *buckets = NULL;
104         void *buckets_ext = NULL;
105         char ring_name[RTE_RING_NAMESIZE];
106         char ext_ring_name[RTE_RING_NAMESIZE];
107         unsigned num_key_slots;
108         unsigned i;
109         unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
110         unsigned int ext_table_support = 0;
111         unsigned int readwrite_concur_support = 0;
112
113         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
114
115         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
116
117         if (params == NULL) {
118                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
119                 return NULL;
120         }
121
122         /* Check for valid parameters */
123         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
124                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
125                         (params->key_len == 0)) {
126                 rte_errno = EINVAL;
127                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
128                 return NULL;
129         }
130
131         /* Check extra flags field to check extra options. */
132         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
133                 hw_trans_mem_support = 1;
134
135         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
136                 multi_writer_support = 1;
137
138         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
139                 readwrite_concur_support = 1;
140                 multi_writer_support = 1;
141         }
142
143         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
144                 ext_table_support = 1;
145
146         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
147         if (multi_writer_support)
148                 /*
149                  * Increase number of slots by total number of indices
150                  * that can be stored in the lcore caches
151                  * except for the first cache
152                  */
153                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
154                                         (LCORE_CACHE_SIZE - 1) + 1;
155         else
156                 num_key_slots = params->entries + 1;
157
158         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
159         /* Create ring (Dummy slot index is not enqueued) */
160         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
161                         params->socket_id, 0);
162         if (r == NULL) {
163                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
164                 goto err;
165         }
166
167         const uint32_t num_buckets = rte_align32pow2(params->entries) /
168                                                 RTE_HASH_BUCKET_ENTRIES;
169
170         /* Create ring for extendable buckets. */
171         if (ext_table_support) {
172                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
173                                                                 params->name);
174                 r_ext = rte_ring_create(ext_ring_name,
175                                 rte_align32pow2(num_buckets + 1),
176                                 params->socket_id, 0);
177
178                 if (r_ext == NULL) {
179                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
180                                                                 "failed\n");
181                         goto err;
182                 }
183         }
184
185         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
186
187         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
188
189         /* guarantee there's no existing: this is normally already checked
190          * by ring creation above */
191         TAILQ_FOREACH(te, hash_list, next) {
192                 h = (struct rte_hash *) te->data;
193                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
194                         break;
195         }
196         h = NULL;
197         if (te != NULL) {
198                 rte_errno = EEXIST;
199                 te = NULL;
200                 goto err_unlock;
201         }
202
203         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
204         if (te == NULL) {
205                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
206                 goto err_unlock;
207         }
208
209         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
210                                         RTE_CACHE_LINE_SIZE, params->socket_id);
211
212         if (h == NULL) {
213                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
214                 goto err_unlock;
215         }
216
217         buckets = rte_zmalloc_socket(NULL,
218                                 num_buckets * sizeof(struct rte_hash_bucket),
219                                 RTE_CACHE_LINE_SIZE, params->socket_id);
220
221         if (buckets == NULL) {
222                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
223                 goto err_unlock;
224         }
225
226         /* Allocate same number of extendable buckets */
227         if (ext_table_support) {
228                 buckets_ext = rte_zmalloc_socket(NULL,
229                                 num_buckets * sizeof(struct rte_hash_bucket),
230                                 RTE_CACHE_LINE_SIZE, params->socket_id);
231                 if (buckets_ext == NULL) {
232                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
233                                                         "failed\n");
234                         goto err_unlock;
235                 }
236                 /* Populate ext bkt ring. We reserve 0 similar to the
237                  * key-data slot, just in case in future we want to
238                  * use bucket index for the linked list and 0 means NULL
239                  * for next bucket
240                  */
241                 for (i = 1; i <= num_buckets; i++)
242                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
243         }
244
245         const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
246         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
247
248         k = rte_zmalloc_socket(NULL, key_tbl_size,
249                         RTE_CACHE_LINE_SIZE, params->socket_id);
250
251         if (k == NULL) {
252                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
253                 goto err_unlock;
254         }
255
256 /*
257  * If x86 architecture is used, select appropriate compare function,
258  * which may use x86 intrinsics, otherwise use memcmp
259  */
260 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
261         /* Select function to compare keys */
262         switch (params->key_len) {
263         case 16:
264                 h->cmp_jump_table_idx = KEY_16_BYTES;
265                 break;
266         case 32:
267                 h->cmp_jump_table_idx = KEY_32_BYTES;
268                 break;
269         case 48:
270                 h->cmp_jump_table_idx = KEY_48_BYTES;
271                 break;
272         case 64:
273                 h->cmp_jump_table_idx = KEY_64_BYTES;
274                 break;
275         case 80:
276                 h->cmp_jump_table_idx = KEY_80_BYTES;
277                 break;
278         case 96:
279                 h->cmp_jump_table_idx = KEY_96_BYTES;
280                 break;
281         case 112:
282                 h->cmp_jump_table_idx = KEY_112_BYTES;
283                 break;
284         case 128:
285                 h->cmp_jump_table_idx = KEY_128_BYTES;
286                 break;
287         default:
288                 /* If key is not multiple of 16, use generic memcmp */
289                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
290         }
291 #else
292         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
293 #endif
294
295         if (multi_writer_support) {
296                 h->local_free_slots = rte_zmalloc_socket(NULL,
297                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
298                                 RTE_CACHE_LINE_SIZE, params->socket_id);
299         }
300
301         /* Default hash function */
302 #if defined(RTE_ARCH_X86)
303         default_hash_func = (rte_hash_function)rte_hash_crc;
304 #elif defined(RTE_ARCH_ARM64)
305         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
306                 default_hash_func = (rte_hash_function)rte_hash_crc;
307 #endif
308         /* Setup hash context */
309         snprintf(h->name, sizeof(h->name), "%s", params->name);
310         h->entries = params->entries;
311         h->key_len = params->key_len;
312         h->key_entry_size = key_entry_size;
313         h->hash_func_init_val = params->hash_func_init_val;
314
315         h->num_buckets = num_buckets;
316         h->bucket_bitmask = h->num_buckets - 1;
317         h->buckets = buckets;
318         h->buckets_ext = buckets_ext;
319         h->free_ext_bkts = r_ext;
320         h->hash_func = (params->hash_func == NULL) ?
321                 default_hash_func : params->hash_func;
322         h->key_store = k;
323         h->free_slots = r;
324         h->hw_trans_mem_support = hw_trans_mem_support;
325         h->multi_writer_support = multi_writer_support;
326         h->readwrite_concur_support = readwrite_concur_support;
327         h->ext_table_support = ext_table_support;
328
329 #if defined(RTE_ARCH_X86)
330         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
331                 h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
332         else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
333                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
334         else
335 #endif
336                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
337
338         /* Turn on multi-writer only with explicit flag from user and TM
339          * support.
340          */
341         if (h->multi_writer_support) {
342                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
343                                                 RTE_CACHE_LINE_SIZE);
344                 if (h->readwrite_lock == NULL)
345                         goto err_unlock;
346
347                 rte_rwlock_init(h->readwrite_lock);
348         }
349
350         /* Populate free slots ring. Entry zero is reserved for key misses. */
351         for (i = 1; i < num_key_slots; i++)
352                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
353
354         te->data = (void *) h;
355         TAILQ_INSERT_TAIL(hash_list, te, next);
356         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
357
358         return h;
359 err_unlock:
360         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
361 err:
362         rte_ring_free(r);
363         rte_ring_free(r_ext);
364         rte_free(te);
365         rte_free(h);
366         rte_free(buckets);
367         rte_free(buckets_ext);
368         rte_free(k);
369         return NULL;
370 }
371
372 void
373 rte_hash_free(struct rte_hash *h)
374 {
375         struct rte_tailq_entry *te;
376         struct rte_hash_list *hash_list;
377
378         if (h == NULL)
379                 return;
380
381         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
382
383         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
384
385         /* find out tailq entry */
386         TAILQ_FOREACH(te, hash_list, next) {
387                 if (te->data == (void *) h)
388                         break;
389         }
390
391         if (te == NULL) {
392                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
393                 return;
394         }
395
396         TAILQ_REMOVE(hash_list, te, next);
397
398         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
399
400         if (h->multi_writer_support) {
401                 rte_free(h->local_free_slots);
402                 rte_free(h->readwrite_lock);
403         }
404         rte_ring_free(h->free_slots);
405         rte_ring_free(h->free_ext_bkts);
406         rte_free(h->key_store);
407         rte_free(h->buckets);
408         rte_free(h->buckets_ext);
409         rte_free(h);
410         rte_free(te);
411 }
412
413 hash_sig_t
414 rte_hash_hash(const struct rte_hash *h, const void *key)
415 {
416         /* calc hash result by key */
417         return h->hash_func(key, h->key_len, h->hash_func_init_val);
418 }
419
420 /* Calc the secondary hash value from the primary hash value of a given key */
421 static inline hash_sig_t
422 rte_hash_secondary_hash(const hash_sig_t primary_hash)
423 {
424         static const unsigned all_bits_shift = 12;
425         static const unsigned alt_bits_xor = 0x5bd1e995;
426
427         uint32_t tag = primary_hash >> all_bits_shift;
428
429         return primary_hash ^ ((tag + 1) * alt_bits_xor);
430 }
431
432 int32_t
433 rte_hash_count(const struct rte_hash *h)
434 {
435         uint32_t tot_ring_cnt, cached_cnt = 0;
436         uint32_t i, ret;
437
438         if (h == NULL)
439                 return -EINVAL;
440
441         if (h->multi_writer_support) {
442                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
443                                         (LCORE_CACHE_SIZE - 1);
444                 for (i = 0; i < RTE_MAX_LCORE; i++)
445                         cached_cnt += h->local_free_slots[i].len;
446
447                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
448                                                                 cached_cnt;
449         } else {
450                 tot_ring_cnt = h->entries;
451                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
452         }
453         return ret;
454 }
455
456 /* Read write locks implemented using rte_rwlock */
457 static inline void
458 __hash_rw_writer_lock(const struct rte_hash *h)
459 {
460         if (h->multi_writer_support && h->hw_trans_mem_support)
461                 rte_rwlock_write_lock_tm(h->readwrite_lock);
462         else if (h->multi_writer_support)
463                 rte_rwlock_write_lock(h->readwrite_lock);
464 }
465
466 static inline void
467 __hash_rw_reader_lock(const struct rte_hash *h)
468 {
469         if (h->readwrite_concur_support && h->hw_trans_mem_support)
470                 rte_rwlock_read_lock_tm(h->readwrite_lock);
471         else if (h->readwrite_concur_support)
472                 rte_rwlock_read_lock(h->readwrite_lock);
473 }
474
475 static inline void
476 __hash_rw_writer_unlock(const struct rte_hash *h)
477 {
478         if (h->multi_writer_support && h->hw_trans_mem_support)
479                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
480         else if (h->multi_writer_support)
481                 rte_rwlock_write_unlock(h->readwrite_lock);
482 }
483
484 static inline void
485 __hash_rw_reader_unlock(const struct rte_hash *h)
486 {
487         if (h->readwrite_concur_support && h->hw_trans_mem_support)
488                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
489         else if (h->readwrite_concur_support)
490                 rte_rwlock_read_unlock(h->readwrite_lock);
491 }
492
493 void
494 rte_hash_reset(struct rte_hash *h)
495 {
496         void *ptr;
497         uint32_t tot_ring_cnt, i;
498
499         if (h == NULL)
500                 return;
501
502         __hash_rw_writer_lock(h);
503         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
504         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
505
506         /* clear the free ring */
507         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
508                 rte_pause();
509
510         /* clear free extendable bucket ring and memory */
511         if (h->ext_table_support) {
512                 memset(h->buckets_ext, 0, h->num_buckets *
513                                                 sizeof(struct rte_hash_bucket));
514                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
515                         rte_pause();
516         }
517
518         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
519         if (h->multi_writer_support)
520                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
521                                         (LCORE_CACHE_SIZE - 1);
522         else
523                 tot_ring_cnt = h->entries;
524
525         for (i = 1; i < tot_ring_cnt + 1; i++)
526                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
527
528         /* Repopulate the free ext bkt ring. */
529         if (h->ext_table_support) {
530                 for (i = 1; i <= h->num_buckets; i++)
531                         rte_ring_sp_enqueue(h->free_ext_bkts,
532                                                 (void *)((uintptr_t) i));
533         }
534
535         if (h->multi_writer_support) {
536                 /* Reset local caches per lcore */
537                 for (i = 0; i < RTE_MAX_LCORE; i++)
538                         h->local_free_slots[i].len = 0;
539         }
540         __hash_rw_writer_unlock(h);
541 }
542
543 /*
544  * Function called to enqueue back an index in the cache/ring,
545  * as slot has not being used and it can be used in the
546  * next addition attempt.
547  */
548 static inline void
549 enqueue_slot_back(const struct rte_hash *h,
550                 struct lcore_cache *cached_free_slots,
551                 void *slot_id)
552 {
553         if (h->multi_writer_support) {
554                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
555                 cached_free_slots->len++;
556         } else
557                 rte_ring_sp_enqueue(h->free_slots, slot_id);
558 }
559
560 /* Search a key from bucket and update its data */
561 static inline int32_t
562 search_and_update(const struct rte_hash *h, void *data, const void *key,
563         struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
564 {
565         int i;
566         struct rte_hash_key *k, *keys = h->key_store;
567
568         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
569                 if (bkt->sig_current[i] == sig &&
570                                 bkt->sig_alt[i] == alt_hash) {
571                         k = (struct rte_hash_key *) ((char *)keys +
572                                         bkt->key_idx[i] * h->key_entry_size);
573                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
574                                 /* Update data */
575                                 k->pdata = data;
576                                 /*
577                                  * Return index where key is stored,
578                                  * subtracting the first dummy index
579                                  */
580                                 return bkt->key_idx[i] - 1;
581                         }
582                 }
583         }
584         return -1;
585 }
586
587 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
588  * buckets around.
589  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
590  * empty entry.
591  */
592 static inline int32_t
593 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
594                 struct rte_hash_bucket *prim_bkt,
595                 struct rte_hash_bucket *sec_bkt,
596                 const struct rte_hash_key *key, void *data,
597                 hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
598                 int32_t *ret_val)
599 {
600         unsigned int i;
601         struct rte_hash_bucket *cur_bkt;
602         int32_t ret;
603
604         __hash_rw_writer_lock(h);
605         /* Check if key was inserted after last check but before this
606          * protected region in case of inserting duplicated keys.
607          */
608         ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
609         if (ret != -1) {
610                 __hash_rw_writer_unlock(h);
611                 *ret_val = ret;
612                 return 1;
613         }
614
615         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
616                 ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
617                 if (ret != -1) {
618                         __hash_rw_writer_unlock(h);
619                         *ret_val = ret;
620                         return 1;
621                 }
622         }
623
624         /* Insert new entry if there is room in the primary
625          * bucket.
626          */
627         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
628                 /* Check if slot is available */
629                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
630                         prim_bkt->sig_current[i] = sig;
631                         prim_bkt->sig_alt[i] = alt_hash;
632                         prim_bkt->key_idx[i] = new_idx;
633                         break;
634                 }
635         }
636         __hash_rw_writer_unlock(h);
637
638         if (i != RTE_HASH_BUCKET_ENTRIES)
639                 return 0;
640
641         /* no empty entry */
642         return -1;
643 }
644
645 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
646  * the path head with new entry (sig, alt_hash, new_idx)
647  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
648  * return 0 if succeeds.
649  */
650 static inline int
651 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
652                         struct rte_hash_bucket *bkt,
653                         struct rte_hash_bucket *alt_bkt,
654                         const struct rte_hash_key *key, void *data,
655                         struct queue_node *leaf, uint32_t leaf_slot,
656                         hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
657                         int32_t *ret_val)
658 {
659         uint32_t prev_alt_bkt_idx;
660         struct rte_hash_bucket *cur_bkt;
661         struct queue_node *prev_node, *curr_node = leaf;
662         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
663         uint32_t prev_slot, curr_slot = leaf_slot;
664         int32_t ret;
665
666         __hash_rw_writer_lock(h);
667
668         /* In case empty slot was gone before entering protected region */
669         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
670                 __hash_rw_writer_unlock(h);
671                 return -1;
672         }
673
674         /* Check if key was inserted after last check but before this
675          * protected region.
676          */
677         ret = search_and_update(h, data, key, bkt, sig, alt_hash);
678         if (ret != -1) {
679                 __hash_rw_writer_unlock(h);
680                 *ret_val = ret;
681                 return 1;
682         }
683
684         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
685                 ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
686                 if (ret != -1) {
687                         __hash_rw_writer_unlock(h);
688                         *ret_val = ret;
689                         return 1;
690                 }
691         }
692
693         while (likely(curr_node->prev != NULL)) {
694                 prev_node = curr_node->prev;
695                 prev_bkt = prev_node->bkt;
696                 prev_slot = curr_node->prev_slot;
697
698                 prev_alt_bkt_idx =
699                         prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
700
701                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
702                                 != curr_bkt)) {
703                         /* revert it to empty, otherwise duplicated keys */
704                         curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
705                         __hash_rw_writer_unlock(h);
706                         return -1;
707                 }
708
709                 /* Need to swap current/alt sig to allow later
710                  * Cuckoo insert to move elements back to its
711                  * primary bucket if available
712                  */
713                 curr_bkt->sig_alt[curr_slot] =
714                          prev_bkt->sig_current[prev_slot];
715                 curr_bkt->sig_current[curr_slot] =
716                         prev_bkt->sig_alt[prev_slot];
717                 curr_bkt->key_idx[curr_slot] =
718                         prev_bkt->key_idx[prev_slot];
719
720                 curr_slot = prev_slot;
721                 curr_node = prev_node;
722                 curr_bkt = curr_node->bkt;
723         }
724
725         curr_bkt->sig_current[curr_slot] = sig;
726         curr_bkt->sig_alt[curr_slot] = alt_hash;
727         curr_bkt->key_idx[curr_slot] = new_idx;
728
729         __hash_rw_writer_unlock(h);
730
731         return 0;
732
733 }
734
735 /*
736  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
737  * Cuckoo
738  */
739 static inline int
740 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
741                         struct rte_hash_bucket *bkt,
742                         struct rte_hash_bucket *sec_bkt,
743                         const struct rte_hash_key *key, void *data,
744                         hash_sig_t sig, hash_sig_t alt_hash,
745                         uint32_t new_idx, int32_t *ret_val)
746 {
747         unsigned int i;
748         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
749         struct queue_node *tail, *head;
750         struct rte_hash_bucket *curr_bkt, *alt_bkt;
751
752         tail = queue;
753         head = queue + 1;
754         tail->bkt = bkt;
755         tail->prev = NULL;
756         tail->prev_slot = -1;
757
758         /* Cuckoo bfs Search */
759         while (likely(tail != head && head <
760                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
761                                         RTE_HASH_BUCKET_ENTRIES)) {
762                 curr_bkt = tail->bkt;
763                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
764                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
765                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
766                                                 bkt, sec_bkt, key, data,
767                                                 tail, i, sig, alt_hash,
768                                                 new_idx, ret_val);
769                                 if (likely(ret != -1))
770                                         return ret;
771                         }
772
773                         /* Enqueue new node and keep prev node info */
774                         alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
775                                                     & h->bucket_bitmask]);
776                         head->bkt = alt_bkt;
777                         head->prev = tail;
778                         head->prev_slot = i;
779                         head++;
780                 }
781                 tail++;
782         }
783
784         return -ENOSPC;
785 }
786
787 static inline int32_t
788 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
789                                                 hash_sig_t sig, void *data)
790 {
791         hash_sig_t alt_hash;
792         uint32_t prim_bucket_idx, sec_bucket_idx;
793         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
794         struct rte_hash_key *new_k, *keys = h->key_store;
795         void *slot_id = NULL;
796         void *ext_bkt_id = NULL;
797         uint32_t new_idx, bkt_id;
798         int ret;
799         unsigned n_slots;
800         unsigned lcore_id;
801         unsigned int i;
802         struct lcore_cache *cached_free_slots = NULL;
803         int32_t ret_val;
804         struct rte_hash_bucket *last;
805
806         prim_bucket_idx = sig & h->bucket_bitmask;
807         prim_bkt = &h->buckets[prim_bucket_idx];
808         rte_prefetch0(prim_bkt);
809
810         alt_hash = rte_hash_secondary_hash(sig);
811         sec_bucket_idx = alt_hash & h->bucket_bitmask;
812         sec_bkt = &h->buckets[sec_bucket_idx];
813         rte_prefetch0(sec_bkt);
814
815         /* Check if key is already inserted in primary location */
816         __hash_rw_writer_lock(h);
817         ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
818         if (ret != -1) {
819                 __hash_rw_writer_unlock(h);
820                 return ret;
821         }
822
823         /* Check if key is already inserted in secondary location */
824         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
825                 ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
826                 if (ret != -1) {
827                         __hash_rw_writer_unlock(h);
828                         return ret;
829                 }
830         }
831         __hash_rw_writer_unlock(h);
832
833         /* Did not find a match, so get a new slot for storing the new key */
834         if (h->multi_writer_support) {
835                 lcore_id = rte_lcore_id();
836                 cached_free_slots = &h->local_free_slots[lcore_id];
837                 /* Try to get a free slot from the local cache */
838                 if (cached_free_slots->len == 0) {
839                         /* Need to get another burst of free slots from global ring */
840                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
841                                         cached_free_slots->objs,
842                                         LCORE_CACHE_SIZE, NULL);
843                         if (n_slots == 0) {
844                                 return -ENOSPC;
845                         }
846
847                         cached_free_slots->len += n_slots;
848                 }
849
850                 /* Get a free slot from the local cache */
851                 cached_free_slots->len--;
852                 slot_id = cached_free_slots->objs[cached_free_slots->len];
853         } else {
854                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
855                         return -ENOSPC;
856                 }
857         }
858
859         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
860         new_idx = (uint32_t)((uintptr_t) slot_id);
861         /* Copy key */
862         rte_memcpy(new_k->key, key, h->key_len);
863         new_k->pdata = data;
864
865
866         /* Find an empty slot and insert */
867         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
868                                         sig, alt_hash, new_idx, &ret_val);
869         if (ret == 0)
870                 return new_idx - 1;
871         else if (ret == 1) {
872                 enqueue_slot_back(h, cached_free_slots, slot_id);
873                 return ret_val;
874         }
875
876         /* Primary bucket full, need to make space for new entry */
877         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
878                                         sig, alt_hash, new_idx, &ret_val);
879         if (ret == 0)
880                 return new_idx - 1;
881         else if (ret == 1) {
882                 enqueue_slot_back(h, cached_free_slots, slot_id);
883                 return ret_val;
884         }
885
886         /* Also search secondary bucket to get better occupancy */
887         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
888                                         alt_hash, sig, new_idx, &ret_val);
889
890         if (ret == 0)
891                 return new_idx - 1;
892         else if (ret == 1) {
893                 enqueue_slot_back(h, cached_free_slots, slot_id);
894                 return ret_val;
895         }
896
897         /* if ext table not enabled, we failed the insertion */
898         if (!h->ext_table_support) {
899                 enqueue_slot_back(h, cached_free_slots, slot_id);
900                 return ret;
901         }
902
903         /* Now we need to go through the extendable bucket. Protection is needed
904          * to protect all extendable bucket processes.
905          */
906         __hash_rw_writer_lock(h);
907         /* We check for duplicates again since could be inserted before the lock */
908         ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
909         if (ret != -1) {
910                 enqueue_slot_back(h, cached_free_slots, slot_id);
911                 goto failure;
912         }
913
914         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
915                 ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
916                 if (ret != -1) {
917                         enqueue_slot_back(h, cached_free_slots, slot_id);
918                         goto failure;
919                 }
920         }
921
922         /* Search sec and ext buckets to find an empty entry to insert. */
923         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
924                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
925                         /* Check if slot is available */
926                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
927                                 cur_bkt->sig_current[i] = alt_hash;
928                                 cur_bkt->sig_alt[i] = sig;
929                                 cur_bkt->key_idx[i] = new_idx;
930                                 __hash_rw_writer_unlock(h);
931                                 return new_idx - 1;
932                         }
933                 }
934         }
935
936         /* Failed to get an empty entry from extendable buckets. Link a new
937          * extendable bucket. We first get a free bucket from ring.
938          */
939         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
940                 ret = -ENOSPC;
941                 goto failure;
942         }
943
944         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
945         /* Use the first location of the new bucket */
946         (h->buckets_ext[bkt_id]).sig_current[0] = alt_hash;
947         (h->buckets_ext[bkt_id]).sig_alt[0] = sig;
948         (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
949         /* Link the new bucket to sec bucket linked list */
950         last = rte_hash_get_last_bkt(sec_bkt);
951         last->next = &h->buckets_ext[bkt_id];
952         __hash_rw_writer_unlock(h);
953         return new_idx - 1;
954
955 failure:
956         __hash_rw_writer_unlock(h);
957         return ret;
958
959 }
960
961 int32_t
962 rte_hash_add_key_with_hash(const struct rte_hash *h,
963                         const void *key, hash_sig_t sig)
964 {
965         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
966         return __rte_hash_add_key_with_hash(h, key, sig, 0);
967 }
968
969 int32_t
970 rte_hash_add_key(const struct rte_hash *h, const void *key)
971 {
972         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
973         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
974 }
975
976 int
977 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
978                         const void *key, hash_sig_t sig, void *data)
979 {
980         int ret;
981
982         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
983         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
984         if (ret >= 0)
985                 return 0;
986         else
987                 return ret;
988 }
989
990 int
991 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
992 {
993         int ret;
994
995         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
996
997         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
998         if (ret >= 0)
999                 return 0;
1000         else
1001                 return ret;
1002 }
1003
1004 /* Search one bucket to find the match key */
1005 static inline int32_t
1006 search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
1007                         void **data, const struct rte_hash_bucket *bkt)
1008 {
1009         int i;
1010         struct rte_hash_key *k, *keys = h->key_store;
1011
1012         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1013                 if (bkt->sig_current[i] == sig &&
1014                                 bkt->key_idx[i] != EMPTY_SLOT) {
1015                         k = (struct rte_hash_key *) ((char *)keys +
1016                                         bkt->key_idx[i] * h->key_entry_size);
1017                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1018                                 if (data != NULL)
1019                                         *data = k->pdata;
1020                                 /*
1021                                  * Return index where key is stored,
1022                                  * subtracting the first dummy index
1023                                  */
1024                                 return bkt->key_idx[i] - 1;
1025                         }
1026                 }
1027         }
1028         return -1;
1029 }
1030
1031 static inline int32_t
1032 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1033                                         hash_sig_t sig, void **data)
1034 {
1035         uint32_t bucket_idx;
1036         hash_sig_t alt_hash;
1037         struct rte_hash_bucket *bkt, *cur_bkt;
1038         int ret;
1039
1040         bucket_idx = sig & h->bucket_bitmask;
1041         bkt = &h->buckets[bucket_idx];
1042
1043         __hash_rw_reader_lock(h);
1044
1045         /* Check if key is in primary location */
1046         ret = search_one_bucket(h, key, sig, data, bkt);
1047         if (ret != -1) {
1048                 __hash_rw_reader_unlock(h);
1049                 return ret;
1050         }
1051         /* Calculate secondary hash */
1052         alt_hash = rte_hash_secondary_hash(sig);
1053         bucket_idx = alt_hash & h->bucket_bitmask;
1054         bkt = &h->buckets[bucket_idx];
1055
1056         /* Check if key is in secondary location */
1057         FOR_EACH_BUCKET(cur_bkt, bkt) {
1058                 ret = search_one_bucket(h, key, alt_hash, data, cur_bkt);
1059                 if (ret != -1) {
1060                         __hash_rw_reader_unlock(h);
1061                         return ret;
1062                 }
1063         }
1064         __hash_rw_reader_unlock(h);
1065         return -ENOENT;
1066 }
1067
1068 int32_t
1069 rte_hash_lookup_with_hash(const struct rte_hash *h,
1070                         const void *key, hash_sig_t sig)
1071 {
1072         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1073         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1074 }
1075
1076 int32_t
1077 rte_hash_lookup(const struct rte_hash *h, const void *key)
1078 {
1079         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1080         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1081 }
1082
1083 int
1084 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1085                         const void *key, hash_sig_t sig, void **data)
1086 {
1087         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1088         return __rte_hash_lookup_with_hash(h, key, sig, data);
1089 }
1090
1091 int
1092 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1093 {
1094         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1095         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1096 }
1097
1098 static inline void
1099 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1100 {
1101         unsigned lcore_id, n_slots;
1102         struct lcore_cache *cached_free_slots;
1103
1104         bkt->sig_current[i] = NULL_SIGNATURE;
1105         bkt->sig_alt[i] = NULL_SIGNATURE;
1106         if (h->multi_writer_support) {
1107                 lcore_id = rte_lcore_id();
1108                 cached_free_slots = &h->local_free_slots[lcore_id];
1109                 /* Cache full, need to free it. */
1110                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1111                         /* Need to enqueue the free slots in global ring. */
1112                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1113                                                 cached_free_slots->objs,
1114                                                 LCORE_CACHE_SIZE, NULL);
1115                         cached_free_slots->len -= n_slots;
1116                 }
1117                 /* Put index of new free slot in cache. */
1118                 cached_free_slots->objs[cached_free_slots->len] =
1119                                 (void *)((uintptr_t)bkt->key_idx[i]);
1120                 cached_free_slots->len++;
1121         } else {
1122                 rte_ring_sp_enqueue(h->free_slots,
1123                                 (void *)((uintptr_t)bkt->key_idx[i]));
1124         }
1125 }
1126
1127 /* Compact the linked list by moving key from last entry in linked list to the
1128  * empty slot.
1129  */
1130 static inline void
1131 __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
1132         int i;
1133         struct rte_hash_bucket *last_bkt;
1134
1135         if (!cur_bkt->next)
1136                 return;
1137
1138         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1139
1140         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1141                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1142                         cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
1143                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1144                         cur_bkt->sig_alt[pos] = last_bkt->sig_alt[i];
1145                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1146                         last_bkt->sig_alt[i] = NULL_SIGNATURE;
1147                         last_bkt->key_idx[i] = EMPTY_SLOT;
1148                         return;
1149                 }
1150         }
1151 }
1152
1153 /* Search one bucket and remove the matched key */
1154 static inline int32_t
1155 search_and_remove(const struct rte_hash *h, const void *key,
1156                         struct rte_hash_bucket *bkt, hash_sig_t sig, int *pos)
1157 {
1158         struct rte_hash_key *k, *keys = h->key_store;
1159         unsigned int i;
1160         int32_t ret;
1161
1162         /* Check if key is in bucket */
1163         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1164                 if (bkt->sig_current[i] == sig &&
1165                                 bkt->key_idx[i] != EMPTY_SLOT) {
1166                         k = (struct rte_hash_key *) ((char *)keys +
1167                                         bkt->key_idx[i] * h->key_entry_size);
1168                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1169                                 remove_entry(h, bkt, i);
1170
1171                                 /* Return index where key is stored,
1172                                  * subtracting the first dummy index
1173                                  */
1174                                 ret = bkt->key_idx[i] - 1;
1175                                 bkt->key_idx[i] = EMPTY_SLOT;
1176                                 *pos = i;
1177                                 return ret;
1178                         }
1179                 }
1180         }
1181         return -1;
1182 }
1183
1184 static inline int32_t
1185 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1186                                                 hash_sig_t sig)
1187 {
1188         uint32_t bucket_idx;
1189         hash_sig_t alt_hash;
1190         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1191         struct rte_hash_bucket *cur_bkt;
1192         int pos;
1193         int32_t ret, i;
1194
1195         bucket_idx = sig & h->bucket_bitmask;
1196         prim_bkt = &h->buckets[bucket_idx];
1197
1198         __hash_rw_writer_lock(h);
1199         /* look for key in primary bucket */
1200         ret = search_and_remove(h, key, prim_bkt, sig, &pos);
1201         if (ret != -1) {
1202                 __rte_hash_compact_ll(prim_bkt, pos);
1203                 last_bkt = prim_bkt->next;
1204                 prev_bkt = prim_bkt;
1205                 goto return_bkt;
1206         }
1207
1208         /* Calculate secondary hash */
1209         alt_hash = rte_hash_secondary_hash(sig);
1210         bucket_idx = alt_hash & h->bucket_bitmask;
1211         sec_bkt = &h->buckets[bucket_idx];
1212
1213         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1214                 ret = search_and_remove(h, key, cur_bkt, alt_hash, &pos);
1215                 if (ret != -1) {
1216                         __rte_hash_compact_ll(cur_bkt, pos);
1217                         last_bkt = sec_bkt->next;
1218                         prev_bkt = sec_bkt;
1219                         goto return_bkt;
1220                 }
1221         }
1222
1223         __hash_rw_writer_unlock(h);
1224         return -ENOENT;
1225
1226 /* Search last bucket to see if empty to be recycled */
1227 return_bkt:
1228         if (!last_bkt) {
1229                 __hash_rw_writer_unlock(h);
1230                 return ret;
1231         }
1232         while (last_bkt->next) {
1233                 prev_bkt = last_bkt;
1234                 last_bkt = last_bkt->next;
1235         }
1236
1237         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1238                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1239                         break;
1240         }
1241         /* found empty bucket and recycle */
1242         if (i == RTE_HASH_BUCKET_ENTRIES) {
1243                 prev_bkt->next = last_bkt->next = NULL;
1244                 uint32_t index = last_bkt - h->buckets_ext + 1;
1245                 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1246         }
1247
1248         __hash_rw_writer_unlock(h);
1249         return ret;
1250 }
1251
1252 int32_t
1253 rte_hash_del_key_with_hash(const struct rte_hash *h,
1254                         const void *key, hash_sig_t sig)
1255 {
1256         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1257         return __rte_hash_del_key_with_hash(h, key, sig);
1258 }
1259
1260 int32_t
1261 rte_hash_del_key(const struct rte_hash *h, const void *key)
1262 {
1263         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1264         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1265 }
1266
1267 int
1268 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1269                                void **key)
1270 {
1271         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1272
1273         struct rte_hash_key *k, *keys = h->key_store;
1274         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1275                                      h->key_entry_size);
1276         *key = k->key;
1277
1278         if (position !=
1279             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1280                                         NULL)) {
1281                 return -ENOENT;
1282         }
1283
1284         return 0;
1285 }
1286
1287 static inline void
1288 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1289                         const struct rte_hash_bucket *prim_bkt,
1290                         const struct rte_hash_bucket *sec_bkt,
1291                         hash_sig_t prim_hash, hash_sig_t sec_hash,
1292                         enum rte_hash_sig_compare_function sig_cmp_fn)
1293 {
1294         unsigned int i;
1295
1296         switch (sig_cmp_fn) {
1297 #ifdef RTE_MACHINE_CPUFLAG_AVX2
1298         case RTE_HASH_COMPARE_AVX2:
1299                 *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
1300                                 _mm256_load_si256(
1301                                         (__m256i const *)prim_bkt->sig_current),
1302                                 _mm256_set1_epi32(prim_hash)));
1303                 *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
1304                                 _mm256_load_si256(
1305                                         (__m256i const *)sec_bkt->sig_current),
1306                                 _mm256_set1_epi32(sec_hash)));
1307                 break;
1308 #endif
1309 #ifdef RTE_MACHINE_CPUFLAG_SSE2
1310         case RTE_HASH_COMPARE_SSE:
1311                 /* Compare the first 4 signatures in the bucket */
1312                 *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
1313                                 _mm_load_si128(
1314                                         (__m128i const *)prim_bkt->sig_current),
1315                                 _mm_set1_epi32(prim_hash)));
1316                 *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
1317                                 _mm_load_si128(
1318                                         (__m128i const *)&prim_bkt->sig_current[4]),
1319                                 _mm_set1_epi32(prim_hash)))) << 4;
1320                 /* Compare the first 4 signatures in the bucket */
1321                 *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
1322                                 _mm_load_si128(
1323                                         (__m128i const *)sec_bkt->sig_current),
1324                                 _mm_set1_epi32(sec_hash)));
1325                 *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
1326                                 _mm_load_si128(
1327                                         (__m128i const *)&sec_bkt->sig_current[4]),
1328                                 _mm_set1_epi32(sec_hash)))) << 4;
1329                 break;
1330 #endif
1331         default:
1332                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1333                         *prim_hash_matches |=
1334                                 ((prim_hash == prim_bkt->sig_current[i]) << i);
1335                         *sec_hash_matches |=
1336                                 ((sec_hash == sec_bkt->sig_current[i]) << i);
1337                 }
1338         }
1339
1340 }
1341
1342 #define PREFETCH_OFFSET 4
1343 static inline void
1344 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1345                         int32_t num_keys, int32_t *positions,
1346                         uint64_t *hit_mask, void *data[])
1347 {
1348         uint64_t hits = 0;
1349         int32_t i;
1350         int32_t ret;
1351         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1352         uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
1353         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1354         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1355         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1356         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1357         struct rte_hash_bucket *cur_bkt, *next_bkt;
1358
1359         /* Prefetch first keys */
1360         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1361                 rte_prefetch0(keys[i]);
1362
1363         /*
1364          * Prefetch rest of the keys, calculate primary and
1365          * secondary bucket and prefetch them
1366          */
1367         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1368                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1369
1370                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1371                 sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
1372
1373                 primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
1374                 secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
1375
1376                 rte_prefetch0(primary_bkt[i]);
1377                 rte_prefetch0(secondary_bkt[i]);
1378         }
1379
1380         /* Calculate and prefetch rest of the buckets */
1381         for (; i < num_keys; i++) {
1382                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1383                 sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
1384
1385                 primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
1386                 secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
1387
1388                 rte_prefetch0(primary_bkt[i]);
1389                 rte_prefetch0(secondary_bkt[i]);
1390         }
1391
1392         __hash_rw_reader_lock(h);
1393         /* Compare signatures and prefetch key slot of first hit */
1394         for (i = 0; i < num_keys; i++) {
1395                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1396                                 primary_bkt[i], secondary_bkt[i],
1397                                 prim_hash[i], sec_hash[i], h->sig_cmp_fn);
1398
1399                 if (prim_hitmask[i]) {
1400                         uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
1401                         uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
1402                         const struct rte_hash_key *key_slot =
1403                                 (const struct rte_hash_key *)(
1404                                 (const char *)h->key_store +
1405                                 key_idx * h->key_entry_size);
1406                         rte_prefetch0(key_slot);
1407                         continue;
1408                 }
1409
1410                 if (sec_hitmask[i]) {
1411                         uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
1412                         uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
1413                         const struct rte_hash_key *key_slot =
1414                                 (const struct rte_hash_key *)(
1415                                 (const char *)h->key_store +
1416                                 key_idx * h->key_entry_size);
1417                         rte_prefetch0(key_slot);
1418                 }
1419         }
1420
1421         /* Compare keys, first hits in primary first */
1422         for (i = 0; i < num_keys; i++) {
1423                 positions[i] = -ENOENT;
1424                 while (prim_hitmask[i]) {
1425                         uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
1426
1427                         uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
1428                         const struct rte_hash_key *key_slot =
1429                                 (const struct rte_hash_key *)(
1430                                 (const char *)h->key_store +
1431                                 key_idx * h->key_entry_size);
1432                         /*
1433                          * If key index is 0, do not compare key,
1434                          * as it is checking the dummy slot
1435                          */
1436                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1437                                 if (data != NULL)
1438                                         data[i] = key_slot->pdata;
1439
1440                                 hits |= 1ULL << i;
1441                                 positions[i] = key_idx - 1;
1442                                 goto next_key;
1443                         }
1444                         prim_hitmask[i] &= ~(1 << (hit_index));
1445                 }
1446
1447                 while (sec_hitmask[i]) {
1448                         uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
1449
1450                         uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
1451                         const struct rte_hash_key *key_slot =
1452                                 (const struct rte_hash_key *)(
1453                                 (const char *)h->key_store +
1454                                 key_idx * h->key_entry_size);
1455                         /*
1456                          * If key index is 0, do not compare key,
1457                          * as it is checking the dummy slot
1458                          */
1459
1460                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1461                                 if (data != NULL)
1462                                         data[i] = key_slot->pdata;
1463
1464                                 hits |= 1ULL << i;
1465                                 positions[i] = key_idx - 1;
1466                                 goto next_key;
1467                         }
1468                         sec_hitmask[i] &= ~(1 << (hit_index));
1469                 }
1470
1471 next_key:
1472                 continue;
1473         }
1474
1475         /* all found, do not need to go through ext bkt */
1476         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1477                 if (hit_mask != NULL)
1478                         *hit_mask = hits;
1479                 __hash_rw_reader_unlock(h);
1480                 return;
1481         }
1482
1483         /* need to check ext buckets for match */
1484         for (i = 0; i < num_keys; i++) {
1485                 if ((hits & (1ULL << i)) != 0)
1486                         continue;
1487                 next_bkt = secondary_bkt[i]->next;
1488                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1489                         if (data != NULL)
1490                                 ret = search_one_bucket(h, keys[i],
1491                                                 sec_hash[i], &data[i], cur_bkt);
1492                         else
1493                                 ret = search_one_bucket(h, keys[i],
1494                                                 sec_hash[i], NULL, cur_bkt);
1495                         if (ret != -1) {
1496                                 positions[i] = ret;
1497                                 hits |= 1ULL << i;
1498                                 break;
1499                         }
1500                 }
1501         }
1502
1503         __hash_rw_reader_unlock(h);
1504
1505         if (hit_mask != NULL)
1506                 *hit_mask = hits;
1507 }
1508
1509 int
1510 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1511                       uint32_t num_keys, int32_t *positions)
1512 {
1513         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1514                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1515                         (positions == NULL)), -EINVAL);
1516
1517         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1518         return 0;
1519 }
1520
1521 int
1522 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1523                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1524 {
1525         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1526                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1527                         (hit_mask == NULL)), -EINVAL);
1528
1529         int32_t positions[num_keys];
1530
1531         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1532
1533         /* Return number of hits */
1534         return __builtin_popcountl(*hit_mask);
1535 }
1536
1537 int32_t
1538 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1539 {
1540         uint32_t bucket_idx, idx, position;
1541         struct rte_hash_key *next_key;
1542
1543         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1544
1545         const uint32_t total_entries_main = h->num_buckets *
1546                                                         RTE_HASH_BUCKET_ENTRIES;
1547         const uint32_t total_entries = total_entries_main << 1;
1548
1549         /* Out of bounds of all buckets (both main table and ext table) */
1550         if (*next >= total_entries_main)
1551                 goto extend_table;
1552
1553         /* Calculate bucket and index of current iterator */
1554         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1555         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1556
1557         /* If current position is empty, go to the next one */
1558         while ((position = h->buckets[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1559                 (*next)++;
1560                 /* End of table */
1561                 if (*next == total_entries_main)
1562                         goto extend_table;
1563                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1564                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1565         }
1566
1567         __hash_rw_reader_lock(h);
1568         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1569                                 position * h->key_entry_size);
1570         /* Return key and data */
1571         *key = next_key->key;
1572         *data = next_key->pdata;
1573
1574         __hash_rw_reader_unlock(h);
1575
1576         /* Increment iterator */
1577         (*next)++;
1578
1579         return position - 1;
1580
1581 /* Begin to iterate extendable buckets */
1582 extend_table:
1583         /* Out of total bound or if ext bucket feature is not enabled */
1584         if (*next >= total_entries || !h->ext_table_support)
1585                 return -ENOENT;
1586
1587         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
1588         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1589
1590         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1591                 (*next)++;
1592                 if (*next == total_entries)
1593                         return -ENOENT;
1594                 bucket_idx = (*next - total_entries_main) /
1595                                                 RTE_HASH_BUCKET_ENTRIES;
1596                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1597         }
1598         __hash_rw_reader_lock(h);
1599         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1600                                 position * h->key_entry_size);
1601         /* Return key and data */
1602         *key = next_key->key;
1603         *data = next_key->pdata;
1604
1605         __hash_rw_reader_unlock(h);
1606
1607         /* Increment iterator */
1608         (*next)++;
1609         return position - 1;
1610 }