hash: separate lock-free and r/w lock lookup
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  * Copyright(c) 2018 Arm Limited
4  */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <sys/queue.h>
12
13 #include <rte_common.h>
14 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
15 #include <rte_log.h>
16 #include <rte_memcpy.h>
17 #include <rte_prefetch.h>
18 #include <rte_branch_prediction.h>
19 #include <rte_malloc.h>
20 #include <rte_eal.h>
21 #include <rte_eal_memconfig.h>
22 #include <rte_per_lcore.h>
23 #include <rte_errno.h>
24 #include <rte_string_fns.h>
25 #include <rte_cpuflags.h>
26 #include <rte_rwlock.h>
27 #include <rte_spinlock.h>
28 #include <rte_ring.h>
29 #include <rte_compat.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
35         for (CURRENT_BKT = START_BUCKET;                                      \
36                 CURRENT_BKT != NULL;                                          \
37                 CURRENT_BKT = CURRENT_BKT->next)
38
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
40
41 static struct rte_tailq_elem rte_hash_tailq = {
42         .name = "RTE_HASH",
43 };
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
45
46 struct rte_hash *
47 rte_hash_find_existing(const char *name)
48 {
49         struct rte_hash *h = NULL;
50         struct rte_tailq_entry *te;
51         struct rte_hash_list *hash_list;
52
53         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
54
55         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56         TAILQ_FOREACH(te, hash_list, next) {
57                 h = (struct rte_hash *) te->data;
58                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
59                         break;
60         }
61         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
62
63         if (te == NULL) {
64                 rte_errno = ENOENT;
65                 return NULL;
66         }
67         return h;
68 }
69
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
72 {
73         while (lst_bkt->next != NULL)
74                 lst_bkt = lst_bkt->next;
75         return lst_bkt;
76 }
77
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
79 {
80         h->cmp_jump_table_idx = KEY_CUSTOM;
81         h->rte_hash_custom_cmp_eq = func;
82 }
83
84 static inline int
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
86 {
87         if (h->cmp_jump_table_idx == KEY_CUSTOM)
88                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
89         else
90                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
91 }
92
93 /*
94  * We use higher 16 bits of hash as the signature value stored in table.
95  * We use the lower bits for the primary bucket
96  * location. Then we XOR primary bucket location and the signature
97  * to get the secondary bucket location. This is same as
98  * proposed in Bin Fan, et al's paper
99  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100  * Smarter Hashing". The benefit to use
101  * XOR is that one could derive the alternative bucket location
102  * by only using the current bucket location and the signature.
103  */
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
106 {
107         return hash >> 16;
108 }
109
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
112 {
113         return hash & h->bucket_bitmask;
114 }
115
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118                         uint32_t cur_bkt_idx, uint16_t sig)
119 {
120         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
121 }
122
123 struct rte_hash *
124 rte_hash_create(const struct rte_hash_parameters *params)
125 {
126         struct rte_hash *h = NULL;
127         struct rte_tailq_entry *te = NULL;
128         struct rte_hash_list *hash_list;
129         struct rte_ring *r = NULL;
130         struct rte_ring *r_ext = NULL;
131         char hash_name[RTE_HASH_NAMESIZE];
132         void *k = NULL;
133         void *buckets = NULL;
134         void *buckets_ext = NULL;
135         char ring_name[RTE_RING_NAMESIZE];
136         char ext_ring_name[RTE_RING_NAMESIZE];
137         unsigned num_key_slots;
138         unsigned i;
139         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140         unsigned int ext_table_support = 0;
141         unsigned int readwrite_concur_support = 0;
142         unsigned int writer_takes_lock = 0;
143         unsigned int no_free_on_del = 0;
144         uint32_t *tbl_chng_cnt = NULL;
145         unsigned int readwrite_concur_lf_support = 0;
146
147         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
148
149         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
150
151         if (params == NULL) {
152                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
153                 return NULL;
154         }
155
156         /* Check for valid parameters */
157         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
158                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
159                         (params->key_len == 0)) {
160                 rte_errno = EINVAL;
161                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
162                 return NULL;
163         }
164
165         /* Validate correct usage of extra options */
166         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
167             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
168                 rte_errno = EINVAL;
169                 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
170                         "rw concurrency lock free\n");
171                 return NULL;
172         }
173
174         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
175             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
176                 rte_errno = EINVAL;
177                 RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
178                         "feature not supported with rw concurrency "
179                         "lock free\n");
180                 return NULL;
181         }
182
183         /* Check extra flags field to check extra options. */
184         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
185                 hw_trans_mem_support = 1;
186
187         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
188                 use_local_cache = 1;
189                 writer_takes_lock = 1;
190         }
191
192         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
193                 readwrite_concur_support = 1;
194                 writer_takes_lock = 1;
195         }
196
197         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
198                 ext_table_support = 1;
199
200         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
201                 no_free_on_del = 1;
202
203         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
204                 readwrite_concur_lf_support = 1;
205                 /* Enable not freeing internal memory/index on delete */
206                 no_free_on_del = 1;
207         }
208
209         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
210         if (use_local_cache)
211                 /*
212                  * Increase number of slots by total number of indices
213                  * that can be stored in the lcore caches
214                  * except for the first cache
215                  */
216                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
217                                         (LCORE_CACHE_SIZE - 1) + 1;
218         else
219                 num_key_slots = params->entries + 1;
220
221         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
222         /* Create ring (Dummy slot index is not enqueued) */
223         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
224                         params->socket_id, 0);
225         if (r == NULL) {
226                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
227                 goto err;
228         }
229
230         const uint32_t num_buckets = rte_align32pow2(params->entries) /
231                                                 RTE_HASH_BUCKET_ENTRIES;
232
233         /* Create ring for extendable buckets. */
234         if (ext_table_support) {
235                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
236                                                                 params->name);
237                 r_ext = rte_ring_create(ext_ring_name,
238                                 rte_align32pow2(num_buckets + 1),
239                                 params->socket_id, 0);
240
241                 if (r_ext == NULL) {
242                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
243                                                                 "failed\n");
244                         goto err;
245                 }
246         }
247
248         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
249
250         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
251
252         /* guarantee there's no existing: this is normally already checked
253          * by ring creation above */
254         TAILQ_FOREACH(te, hash_list, next) {
255                 h = (struct rte_hash *) te->data;
256                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
257                         break;
258         }
259         h = NULL;
260         if (te != NULL) {
261                 rte_errno = EEXIST;
262                 te = NULL;
263                 goto err_unlock;
264         }
265
266         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
267         if (te == NULL) {
268                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
269                 goto err_unlock;
270         }
271
272         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
273                                         RTE_CACHE_LINE_SIZE, params->socket_id);
274
275         if (h == NULL) {
276                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
277                 goto err_unlock;
278         }
279
280         buckets = rte_zmalloc_socket(NULL,
281                                 num_buckets * sizeof(struct rte_hash_bucket),
282                                 RTE_CACHE_LINE_SIZE, params->socket_id);
283
284         if (buckets == NULL) {
285                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
286                 goto err_unlock;
287         }
288
289         /* Allocate same number of extendable buckets */
290         if (ext_table_support) {
291                 buckets_ext = rte_zmalloc_socket(NULL,
292                                 num_buckets * sizeof(struct rte_hash_bucket),
293                                 RTE_CACHE_LINE_SIZE, params->socket_id);
294                 if (buckets_ext == NULL) {
295                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
296                                                         "failed\n");
297                         goto err_unlock;
298                 }
299                 /* Populate ext bkt ring. We reserve 0 similar to the
300                  * key-data slot, just in case in future we want to
301                  * use bucket index for the linked list and 0 means NULL
302                  * for next bucket
303                  */
304                 for (i = 1; i <= num_buckets; i++)
305                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
306         }
307
308         const uint32_t key_entry_size =
309                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
310                           KEY_ALIGNMENT);
311         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
312
313         k = rte_zmalloc_socket(NULL, key_tbl_size,
314                         RTE_CACHE_LINE_SIZE, params->socket_id);
315
316         if (k == NULL) {
317                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
318                 goto err_unlock;
319         }
320
321         tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
322                         RTE_CACHE_LINE_SIZE, params->socket_id);
323
324         if (tbl_chng_cnt == NULL) {
325                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
326                 goto err_unlock;
327         }
328
329 /*
330  * If x86 architecture is used, select appropriate compare function,
331  * which may use x86 intrinsics, otherwise use memcmp
332  */
333 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
334         /* Select function to compare keys */
335         switch (params->key_len) {
336         case 16:
337                 h->cmp_jump_table_idx = KEY_16_BYTES;
338                 break;
339         case 32:
340                 h->cmp_jump_table_idx = KEY_32_BYTES;
341                 break;
342         case 48:
343                 h->cmp_jump_table_idx = KEY_48_BYTES;
344                 break;
345         case 64:
346                 h->cmp_jump_table_idx = KEY_64_BYTES;
347                 break;
348         case 80:
349                 h->cmp_jump_table_idx = KEY_80_BYTES;
350                 break;
351         case 96:
352                 h->cmp_jump_table_idx = KEY_96_BYTES;
353                 break;
354         case 112:
355                 h->cmp_jump_table_idx = KEY_112_BYTES;
356                 break;
357         case 128:
358                 h->cmp_jump_table_idx = KEY_128_BYTES;
359                 break;
360         default:
361                 /* If key is not multiple of 16, use generic memcmp */
362                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
363         }
364 #else
365         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
366 #endif
367
368         if (use_local_cache) {
369                 h->local_free_slots = rte_zmalloc_socket(NULL,
370                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
371                                 RTE_CACHE_LINE_SIZE, params->socket_id);
372         }
373
374         /* Default hash function */
375 #if defined(RTE_ARCH_X86)
376         default_hash_func = (rte_hash_function)rte_hash_crc;
377 #elif defined(RTE_ARCH_ARM64)
378         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
379                 default_hash_func = (rte_hash_function)rte_hash_crc;
380 #endif
381         /* Setup hash context */
382         snprintf(h->name, sizeof(h->name), "%s", params->name);
383         h->entries = params->entries;
384         h->key_len = params->key_len;
385         h->key_entry_size = key_entry_size;
386         h->hash_func_init_val = params->hash_func_init_val;
387
388         h->num_buckets = num_buckets;
389         h->bucket_bitmask = h->num_buckets - 1;
390         h->buckets = buckets;
391         h->buckets_ext = buckets_ext;
392         h->free_ext_bkts = r_ext;
393         h->hash_func = (params->hash_func == NULL) ?
394                 default_hash_func : params->hash_func;
395         h->key_store = k;
396         h->free_slots = r;
397         h->tbl_chng_cnt = tbl_chng_cnt;
398         *h->tbl_chng_cnt = 0;
399         h->hw_trans_mem_support = hw_trans_mem_support;
400         h->use_local_cache = use_local_cache;
401         h->readwrite_concur_support = readwrite_concur_support;
402         h->ext_table_support = ext_table_support;
403         h->writer_takes_lock = writer_takes_lock;
404         h->no_free_on_del = no_free_on_del;
405         h->readwrite_concur_lf_support = readwrite_concur_lf_support;
406
407 #if defined(RTE_ARCH_X86)
408         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
409                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
410         else
411 #endif
412                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
413
414         /* Writer threads need to take the lock when:
415          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
416          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
417          */
418         if (h->writer_takes_lock) {
419                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
420                                                 RTE_CACHE_LINE_SIZE);
421                 if (h->readwrite_lock == NULL)
422                         goto err_unlock;
423
424                 rte_rwlock_init(h->readwrite_lock);
425         }
426
427         /* Populate free slots ring. Entry zero is reserved for key misses. */
428         for (i = 1; i < num_key_slots; i++)
429                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
430
431         te->data = (void *) h;
432         TAILQ_INSERT_TAIL(hash_list, te, next);
433         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
434
435         return h;
436 err_unlock:
437         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
438 err:
439         rte_ring_free(r);
440         rte_ring_free(r_ext);
441         rte_free(te);
442         rte_free(h);
443         rte_free(buckets);
444         rte_free(buckets_ext);
445         rte_free(k);
446         rte_free(tbl_chng_cnt);
447         return NULL;
448 }
449
450 void
451 rte_hash_free(struct rte_hash *h)
452 {
453         struct rte_tailq_entry *te;
454         struct rte_hash_list *hash_list;
455
456         if (h == NULL)
457                 return;
458
459         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
460
461         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
462
463         /* find out tailq entry */
464         TAILQ_FOREACH(te, hash_list, next) {
465                 if (te->data == (void *) h)
466                         break;
467         }
468
469         if (te == NULL) {
470                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
471                 return;
472         }
473
474         TAILQ_REMOVE(hash_list, te, next);
475
476         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
477
478         if (h->use_local_cache)
479                 rte_free(h->local_free_slots);
480         if (h->writer_takes_lock)
481                 rte_free(h->readwrite_lock);
482         rte_ring_free(h->free_slots);
483         rte_ring_free(h->free_ext_bkts);
484         rte_free(h->key_store);
485         rte_free(h->buckets);
486         rte_free(h->buckets_ext);
487         rte_free(h->tbl_chng_cnt);
488         rte_free(h);
489         rte_free(te);
490 }
491
492 hash_sig_t
493 rte_hash_hash(const struct rte_hash *h, const void *key)
494 {
495         /* calc hash result by key */
496         return h->hash_func(key, h->key_len, h->hash_func_init_val);
497 }
498
499 int32_t
500 rte_hash_count(const struct rte_hash *h)
501 {
502         uint32_t tot_ring_cnt, cached_cnt = 0;
503         uint32_t i, ret;
504
505         if (h == NULL)
506                 return -EINVAL;
507
508         if (h->use_local_cache) {
509                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
510                                         (LCORE_CACHE_SIZE - 1);
511                 for (i = 0; i < RTE_MAX_LCORE; i++)
512                         cached_cnt += h->local_free_slots[i].len;
513
514                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
515                                                                 cached_cnt;
516         } else {
517                 tot_ring_cnt = h->entries;
518                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
519         }
520         return ret;
521 }
522
523 /* Read write locks implemented using rte_rwlock */
524 static inline void
525 __hash_rw_writer_lock(const struct rte_hash *h)
526 {
527         if (h->writer_takes_lock && h->hw_trans_mem_support)
528                 rte_rwlock_write_lock_tm(h->readwrite_lock);
529         else if (h->writer_takes_lock)
530                 rte_rwlock_write_lock(h->readwrite_lock);
531 }
532
533 static inline void
534 __hash_rw_reader_lock(const struct rte_hash *h)
535 {
536         if (h->readwrite_concur_support && h->hw_trans_mem_support)
537                 rte_rwlock_read_lock_tm(h->readwrite_lock);
538         else if (h->readwrite_concur_support)
539                 rte_rwlock_read_lock(h->readwrite_lock);
540 }
541
542 static inline void
543 __hash_rw_writer_unlock(const struct rte_hash *h)
544 {
545         if (h->writer_takes_lock && h->hw_trans_mem_support)
546                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
547         else if (h->writer_takes_lock)
548                 rte_rwlock_write_unlock(h->readwrite_lock);
549 }
550
551 static inline void
552 __hash_rw_reader_unlock(const struct rte_hash *h)
553 {
554         if (h->readwrite_concur_support && h->hw_trans_mem_support)
555                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
556         else if (h->readwrite_concur_support)
557                 rte_rwlock_read_unlock(h->readwrite_lock);
558 }
559
560 void
561 rte_hash_reset(struct rte_hash *h)
562 {
563         void *ptr;
564         uint32_t tot_ring_cnt, i;
565
566         if (h == NULL)
567                 return;
568
569         __hash_rw_writer_lock(h);
570         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
571         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
572         *h->tbl_chng_cnt = 0;
573
574         /* clear the free ring */
575         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
576                 continue;
577
578         /* clear free extendable bucket ring and memory */
579         if (h->ext_table_support) {
580                 memset(h->buckets_ext, 0, h->num_buckets *
581                                                 sizeof(struct rte_hash_bucket));
582                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
583                         continue;
584         }
585
586         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
587         if (h->use_local_cache)
588                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
589                                         (LCORE_CACHE_SIZE - 1);
590         else
591                 tot_ring_cnt = h->entries;
592
593         for (i = 1; i < tot_ring_cnt + 1; i++)
594                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
595
596         /* Repopulate the free ext bkt ring. */
597         if (h->ext_table_support) {
598                 for (i = 1; i <= h->num_buckets; i++)
599                         rte_ring_sp_enqueue(h->free_ext_bkts,
600                                                 (void *)((uintptr_t) i));
601         }
602
603         if (h->use_local_cache) {
604                 /* Reset local caches per lcore */
605                 for (i = 0; i < RTE_MAX_LCORE; i++)
606                         h->local_free_slots[i].len = 0;
607         }
608         __hash_rw_writer_unlock(h);
609 }
610
611 /*
612  * Function called to enqueue back an index in the cache/ring,
613  * as slot has not being used and it can be used in the
614  * next addition attempt.
615  */
616 static inline void
617 enqueue_slot_back(const struct rte_hash *h,
618                 struct lcore_cache *cached_free_slots,
619                 void *slot_id)
620 {
621         if (h->use_local_cache) {
622                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
623                 cached_free_slots->len++;
624         } else
625                 rte_ring_sp_enqueue(h->free_slots, slot_id);
626 }
627
628 /* Search a key from bucket and update its data.
629  * Writer holds the lock before calling this.
630  */
631 static inline int32_t
632 search_and_update(const struct rte_hash *h, void *data, const void *key,
633         struct rte_hash_bucket *bkt, uint16_t sig)
634 {
635         int i;
636         struct rte_hash_key *k, *keys = h->key_store;
637
638         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
639                 if (bkt->sig_current[i] == sig) {
640                         k = (struct rte_hash_key *) ((char *)keys +
641                                         bkt->key_idx[i] * h->key_entry_size);
642                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
643                                 /* 'pdata' acts as the synchronization point
644                                  * when an existing hash entry is updated.
645                                  * Key is not updated in this case.
646                                  */
647                                 __atomic_store_n(&k->pdata,
648                                         data,
649                                         __ATOMIC_RELEASE);
650                                 /*
651                                  * Return index where key is stored,
652                                  * subtracting the first dummy index
653                                  */
654                                 return bkt->key_idx[i] - 1;
655                         }
656                 }
657         }
658         return -1;
659 }
660
661 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
662  * buckets around.
663  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
664  * empty entry.
665  */
666 static inline int32_t
667 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
668                 struct rte_hash_bucket *prim_bkt,
669                 struct rte_hash_bucket *sec_bkt,
670                 const struct rte_hash_key *key, void *data,
671                 uint16_t sig, uint32_t new_idx,
672                 int32_t *ret_val)
673 {
674         unsigned int i;
675         struct rte_hash_bucket *cur_bkt;
676         int32_t ret;
677
678         __hash_rw_writer_lock(h);
679         /* Check if key was inserted after last check but before this
680          * protected region in case of inserting duplicated keys.
681          */
682         ret = search_and_update(h, data, key, prim_bkt, sig);
683         if (ret != -1) {
684                 __hash_rw_writer_unlock(h);
685                 *ret_val = ret;
686                 return 1;
687         }
688
689         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
690                 ret = search_and_update(h, data, key, cur_bkt, sig);
691                 if (ret != -1) {
692                         __hash_rw_writer_unlock(h);
693                         *ret_val = ret;
694                         return 1;
695                 }
696         }
697
698         /* Insert new entry if there is room in the primary
699          * bucket.
700          */
701         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
702                 /* Check if slot is available */
703                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
704                         prim_bkt->sig_current[i] = sig;
705                         /* Key can be of arbitrary length, so it is
706                          * not possible to store it atomically.
707                          * Hence the new key element's memory stores
708                          * (key as well as data) should be complete
709                          * before it is referenced.
710                          */
711                         __atomic_store_n(&prim_bkt->key_idx[i],
712                                          new_idx,
713                                          __ATOMIC_RELEASE);
714                         break;
715                 }
716         }
717         __hash_rw_writer_unlock(h);
718
719         if (i != RTE_HASH_BUCKET_ENTRIES)
720                 return 0;
721
722         /* no empty entry */
723         return -1;
724 }
725
726 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
727  * the path head with new entry (sig, alt_hash, new_idx)
728  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
729  * return 0 if succeeds.
730  */
731 static inline int
732 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
733                         struct rte_hash_bucket *bkt,
734                         struct rte_hash_bucket *alt_bkt,
735                         const struct rte_hash_key *key, void *data,
736                         struct queue_node *leaf, uint32_t leaf_slot,
737                         uint16_t sig, uint32_t new_idx,
738                         int32_t *ret_val)
739 {
740         uint32_t prev_alt_bkt_idx;
741         struct rte_hash_bucket *cur_bkt;
742         struct queue_node *prev_node, *curr_node = leaf;
743         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
744         uint32_t prev_slot, curr_slot = leaf_slot;
745         int32_t ret;
746
747         __hash_rw_writer_lock(h);
748
749         /* In case empty slot was gone before entering protected region */
750         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
751                 __hash_rw_writer_unlock(h);
752                 return -1;
753         }
754
755         /* Check if key was inserted after last check but before this
756          * protected region.
757          */
758         ret = search_and_update(h, data, key, bkt, sig);
759         if (ret != -1) {
760                 __hash_rw_writer_unlock(h);
761                 *ret_val = ret;
762                 return 1;
763         }
764
765         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
766                 ret = search_and_update(h, data, key, cur_bkt, sig);
767                 if (ret != -1) {
768                         __hash_rw_writer_unlock(h);
769                         *ret_val = ret;
770                         return 1;
771                 }
772         }
773
774         while (likely(curr_node->prev != NULL)) {
775                 prev_node = curr_node->prev;
776                 prev_bkt = prev_node->bkt;
777                 prev_slot = curr_node->prev_slot;
778
779                 prev_alt_bkt_idx = get_alt_bucket_index(h,
780                                         prev_node->cur_bkt_idx,
781                                         prev_bkt->sig_current[prev_slot]);
782
783                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
784                                 != curr_bkt)) {
785                         /* revert it to empty, otherwise duplicated keys */
786                         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
787                                 EMPTY_SLOT,
788                                 __ATOMIC_RELEASE);
789                         __hash_rw_writer_unlock(h);
790                         return -1;
791                 }
792
793                 if (h->readwrite_concur_lf_support) {
794                         /* Inform the previous move. The current move need
795                          * not be informed now as the current bucket entry
796                          * is present in both primary and secondary.
797                          * Since there is one writer, load acquires on
798                          * tbl_chng_cnt are not required.
799                          */
800                         __atomic_store_n(h->tbl_chng_cnt,
801                                          *h->tbl_chng_cnt + 1,
802                                          __ATOMIC_RELEASE);
803                         /* The stores to sig_alt and sig_current should not
804                          * move above the store to tbl_chng_cnt.
805                          */
806                         __atomic_thread_fence(__ATOMIC_RELEASE);
807                 }
808
809                 /* Need to swap current/alt sig to allow later
810                  * Cuckoo insert to move elements back to its
811                  * primary bucket if available
812                  */
813                 curr_bkt->sig_current[curr_slot] =
814                         prev_bkt->sig_current[prev_slot];
815                 /* Release the updated bucket entry */
816                 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
817                         prev_bkt->key_idx[prev_slot],
818                         __ATOMIC_RELEASE);
819
820                 curr_slot = prev_slot;
821                 curr_node = prev_node;
822                 curr_bkt = curr_node->bkt;
823         }
824
825         if (h->readwrite_concur_lf_support) {
826                 /* Inform the previous move. The current move need
827                  * not be informed now as the current bucket entry
828                  * is present in both primary and secondary.
829                  * Since there is one writer, load acquires on
830                  * tbl_chng_cnt are not required.
831                  */
832                 __atomic_store_n(h->tbl_chng_cnt,
833                                  *h->tbl_chng_cnt + 1,
834                                  __ATOMIC_RELEASE);
835                 /* The stores to sig_alt and sig_current should not
836                  * move above the store to tbl_chng_cnt.
837                  */
838                 __atomic_thread_fence(__ATOMIC_RELEASE);
839         }
840
841         curr_bkt->sig_current[curr_slot] = sig;
842         /* Release the new bucket entry */
843         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
844                          new_idx,
845                          __ATOMIC_RELEASE);
846
847         __hash_rw_writer_unlock(h);
848
849         return 0;
850
851 }
852
853 /*
854  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
855  * Cuckoo
856  */
857 static inline int
858 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
859                         struct rte_hash_bucket *bkt,
860                         struct rte_hash_bucket *sec_bkt,
861                         const struct rte_hash_key *key, void *data,
862                         uint16_t sig, uint32_t bucket_idx,
863                         uint32_t new_idx, int32_t *ret_val)
864 {
865         unsigned int i;
866         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
867         struct queue_node *tail, *head;
868         struct rte_hash_bucket *curr_bkt, *alt_bkt;
869         uint32_t cur_idx, alt_idx;
870
871         tail = queue;
872         head = queue + 1;
873         tail->bkt = bkt;
874         tail->prev = NULL;
875         tail->prev_slot = -1;
876         tail->cur_bkt_idx = bucket_idx;
877
878         /* Cuckoo bfs Search */
879         while (likely(tail != head && head <
880                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
881                                         RTE_HASH_BUCKET_ENTRIES)) {
882                 curr_bkt = tail->bkt;
883                 cur_idx = tail->cur_bkt_idx;
884                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
885                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
886                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
887                                                 bkt, sec_bkt, key, data,
888                                                 tail, i, sig,
889                                                 new_idx, ret_val);
890                                 if (likely(ret != -1))
891                                         return ret;
892                         }
893
894                         /* Enqueue new node and keep prev node info */
895                         alt_idx = get_alt_bucket_index(h, cur_idx,
896                                                 curr_bkt->sig_current[i]);
897                         alt_bkt = &(h->buckets[alt_idx]);
898                         head->bkt = alt_bkt;
899                         head->cur_bkt_idx = alt_idx;
900                         head->prev = tail;
901                         head->prev_slot = i;
902                         head++;
903                 }
904                 tail++;
905         }
906
907         return -ENOSPC;
908 }
909
910 static inline int32_t
911 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
912                                                 hash_sig_t sig, void *data)
913 {
914         uint16_t short_sig;
915         uint32_t prim_bucket_idx, sec_bucket_idx;
916         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
917         struct rte_hash_key *new_k, *keys = h->key_store;
918         void *slot_id = NULL;
919         void *ext_bkt_id = NULL;
920         uint32_t new_idx, bkt_id;
921         int ret;
922         unsigned n_slots;
923         unsigned lcore_id;
924         unsigned int i;
925         struct lcore_cache *cached_free_slots = NULL;
926         int32_t ret_val;
927         struct rte_hash_bucket *last;
928
929         short_sig = get_short_sig(sig);
930         prim_bucket_idx = get_prim_bucket_index(h, sig);
931         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
932         prim_bkt = &h->buckets[prim_bucket_idx];
933         sec_bkt = &h->buckets[sec_bucket_idx];
934         rte_prefetch0(prim_bkt);
935         rte_prefetch0(sec_bkt);
936
937         /* Check if key is already inserted in primary location */
938         __hash_rw_writer_lock(h);
939         ret = search_and_update(h, data, key, prim_bkt, short_sig);
940         if (ret != -1) {
941                 __hash_rw_writer_unlock(h);
942                 return ret;
943         }
944
945         /* Check if key is already inserted in secondary location */
946         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
947                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
948                 if (ret != -1) {
949                         __hash_rw_writer_unlock(h);
950                         return ret;
951                 }
952         }
953
954         __hash_rw_writer_unlock(h);
955
956         /* Did not find a match, so get a new slot for storing the new key */
957         if (h->use_local_cache) {
958                 lcore_id = rte_lcore_id();
959                 cached_free_slots = &h->local_free_slots[lcore_id];
960                 /* Try to get a free slot from the local cache */
961                 if (cached_free_slots->len == 0) {
962                         /* Need to get another burst of free slots from global ring */
963                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
964                                         cached_free_slots->objs,
965                                         LCORE_CACHE_SIZE, NULL);
966                         if (n_slots == 0) {
967                                 return -ENOSPC;
968                         }
969
970                         cached_free_slots->len += n_slots;
971                 }
972
973                 /* Get a free slot from the local cache */
974                 cached_free_slots->len--;
975                 slot_id = cached_free_slots->objs[cached_free_slots->len];
976         } else {
977                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
978                         return -ENOSPC;
979                 }
980         }
981
982         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
983         new_idx = (uint32_t)((uintptr_t) slot_id);
984         /* Copy key */
985         rte_memcpy(new_k->key, key, h->key_len);
986         /* Key can be of arbitrary length, so it is not possible to store
987          * it atomically. Hence the new key element's memory stores
988          * (key as well as data) should be complete before it is referenced.
989          * 'pdata' acts as the synchronization point when an existing hash
990          * entry is updated.
991          */
992         __atomic_store_n(&new_k->pdata,
993                 data,
994                 __ATOMIC_RELEASE);
995
996         /* Find an empty slot and insert */
997         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
998                                         short_sig, new_idx, &ret_val);
999         if (ret == 0)
1000                 return new_idx - 1;
1001         else if (ret == 1) {
1002                 enqueue_slot_back(h, cached_free_slots, slot_id);
1003                 return ret_val;
1004         }
1005
1006         /* Primary bucket full, need to make space for new entry */
1007         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1008                                 short_sig, prim_bucket_idx, new_idx, &ret_val);
1009         if (ret == 0)
1010                 return new_idx - 1;
1011         else if (ret == 1) {
1012                 enqueue_slot_back(h, cached_free_slots, slot_id);
1013                 return ret_val;
1014         }
1015
1016         /* Also search secondary bucket to get better occupancy */
1017         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1018                                 short_sig, sec_bucket_idx, new_idx, &ret_val);
1019
1020         if (ret == 0)
1021                 return new_idx - 1;
1022         else if (ret == 1) {
1023                 enqueue_slot_back(h, cached_free_slots, slot_id);
1024                 return ret_val;
1025         }
1026
1027         /* if ext table not enabled, we failed the insertion */
1028         if (!h->ext_table_support) {
1029                 enqueue_slot_back(h, cached_free_slots, slot_id);
1030                 return ret;
1031         }
1032
1033         /* Now we need to go through the extendable bucket. Protection is needed
1034          * to protect all extendable bucket processes.
1035          */
1036         __hash_rw_writer_lock(h);
1037         /* We check for duplicates again since could be inserted before the lock */
1038         ret = search_and_update(h, data, key, prim_bkt, short_sig);
1039         if (ret != -1) {
1040                 enqueue_slot_back(h, cached_free_slots, slot_id);
1041                 goto failure;
1042         }
1043
1044         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1045                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1046                 if (ret != -1) {
1047                         enqueue_slot_back(h, cached_free_slots, slot_id);
1048                         goto failure;
1049                 }
1050         }
1051
1052         /* Search sec and ext buckets to find an empty entry to insert. */
1053         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1054                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1055                         /* Check if slot is available */
1056                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1057                                 cur_bkt->sig_current[i] = short_sig;
1058                                 cur_bkt->key_idx[i] = new_idx;
1059                                 __hash_rw_writer_unlock(h);
1060                                 return new_idx - 1;
1061                         }
1062                 }
1063         }
1064
1065         /* Failed to get an empty entry from extendable buckets. Link a new
1066          * extendable bucket. We first get a free bucket from ring.
1067          */
1068         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
1069                 ret = -ENOSPC;
1070                 goto failure;
1071         }
1072
1073         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
1074         /* Use the first location of the new bucket */
1075         (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
1076         (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
1077         /* Link the new bucket to sec bucket linked list */
1078         last = rte_hash_get_last_bkt(sec_bkt);
1079         last->next = &h->buckets_ext[bkt_id];
1080         __hash_rw_writer_unlock(h);
1081         return new_idx - 1;
1082
1083 failure:
1084         __hash_rw_writer_unlock(h);
1085         return ret;
1086
1087 }
1088
1089 int32_t
1090 rte_hash_add_key_with_hash(const struct rte_hash *h,
1091                         const void *key, hash_sig_t sig)
1092 {
1093         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1094         return __rte_hash_add_key_with_hash(h, key, sig, 0);
1095 }
1096
1097 int32_t
1098 rte_hash_add_key(const struct rte_hash *h, const void *key)
1099 {
1100         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1101         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1102 }
1103
1104 int
1105 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1106                         const void *key, hash_sig_t sig, void *data)
1107 {
1108         int ret;
1109
1110         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1111         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1112         if (ret >= 0)
1113                 return 0;
1114         else
1115                 return ret;
1116 }
1117
1118 int
1119 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1120 {
1121         int ret;
1122
1123         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1124
1125         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1126         if (ret >= 0)
1127                 return 0;
1128         else
1129                 return ret;
1130 }
1131
1132 /* Search one bucket to find the match key - uses rw lock */
1133 static inline int32_t
1134 search_one_bucket_l(const struct rte_hash *h, const void *key,
1135                 uint16_t sig, void **data,
1136                 const struct rte_hash_bucket *bkt)
1137 {
1138         int i;
1139         struct rte_hash_key *k, *keys = h->key_store;
1140
1141         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1142                 if (bkt->sig_current[i] == sig &&
1143                                 bkt->key_idx[i] != EMPTY_SLOT) {
1144                         k = (struct rte_hash_key *) ((char *)keys +
1145                                         bkt->key_idx[i] * h->key_entry_size);
1146
1147                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1148                                 if (data != NULL)
1149                                         *data = k->pdata;
1150                                 /*
1151                                  * Return index where key is stored,
1152                                  * subtracting the first dummy index
1153                                  */
1154                                 return bkt->key_idx[i] - 1;
1155                         }
1156                 }
1157         }
1158         return -1;
1159 }
1160
1161 /* Search one bucket to find the match key */
1162 static inline int32_t
1163 search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
1164                         void **data, const struct rte_hash_bucket *bkt)
1165 {
1166         int i;
1167         uint32_t key_idx;
1168         void *pdata;
1169         struct rte_hash_key *k, *keys = h->key_store;
1170
1171         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1172                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1173                                           __ATOMIC_ACQUIRE);
1174                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1175                         k = (struct rte_hash_key *) ((char *)keys +
1176                                         key_idx * h->key_entry_size);
1177                         pdata = __atomic_load_n(&k->pdata,
1178                                         __ATOMIC_ACQUIRE);
1179
1180                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1181                                 if (data != NULL)
1182                                         *data = pdata;
1183                                 /*
1184                                  * Return index where key is stored,
1185                                  * subtracting the first dummy index
1186                                  */
1187                                 return key_idx - 1;
1188                         }
1189                 }
1190         }
1191         return -1;
1192 }
1193
1194 static inline int32_t
1195 __rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
1196                                 hash_sig_t sig, void **data)
1197 {
1198         uint32_t prim_bucket_idx, sec_bucket_idx;
1199         struct rte_hash_bucket *bkt, *cur_bkt;
1200         int ret;
1201         uint16_t short_sig;
1202
1203         short_sig = get_short_sig(sig);
1204         prim_bucket_idx = get_prim_bucket_index(h, sig);
1205         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1206
1207         bkt = &h->buckets[prim_bucket_idx];
1208
1209         __hash_rw_reader_lock(h);
1210
1211         /* Check if key is in primary location */
1212         ret = search_one_bucket_l(h, key, short_sig, data, bkt);
1213         if (ret != -1) {
1214                 __hash_rw_reader_unlock(h);
1215                 return ret;
1216         }
1217         /* Calculate secondary hash */
1218         bkt = &h->buckets[sec_bucket_idx];
1219
1220         /* Check if key is in secondary location */
1221         FOR_EACH_BUCKET(cur_bkt, bkt) {
1222                 ret = search_one_bucket_l(h, key, short_sig,
1223                                         data, cur_bkt);
1224                 if (ret != -1) {
1225                         __hash_rw_reader_unlock(h);
1226                         return ret;
1227                 }
1228         }
1229
1230         __hash_rw_reader_unlock(h);
1231
1232         return -ENOENT;
1233 }
1234
1235 static inline int32_t
1236 __rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
1237                                         hash_sig_t sig, void **data)
1238 {
1239         uint32_t prim_bucket_idx, sec_bucket_idx;
1240         struct rte_hash_bucket *bkt, *cur_bkt;
1241         uint32_t cnt_b, cnt_a;
1242         int ret;
1243         uint16_t short_sig;
1244
1245         short_sig = get_short_sig(sig);
1246         prim_bucket_idx = get_prim_bucket_index(h, sig);
1247         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1248
1249         do {
1250                 /* Load the table change counter before the lookup
1251                  * starts. Acquire semantics will make sure that
1252                  * loads in search_one_bucket are not hoisted.
1253                  */
1254                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1255                                 __ATOMIC_ACQUIRE);
1256
1257                 /* Check if key is in primary location */
1258                 bkt = &h->buckets[prim_bucket_idx];
1259                 ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
1260                 if (ret != -1) {
1261                         __hash_rw_reader_unlock(h);
1262                         return ret;
1263                 }
1264                 /* Calculate secondary hash */
1265                 bkt = &h->buckets[sec_bucket_idx];
1266
1267                 /* Check if key is in secondary location */
1268                 FOR_EACH_BUCKET(cur_bkt, bkt) {
1269                         ret = search_one_bucket_lf(h, key, short_sig,
1270                                                 data, cur_bkt);
1271                         if (ret != -1) {
1272                                 __hash_rw_reader_unlock(h);
1273                                 return ret;
1274                         }
1275                 }
1276
1277                 /* The loads of sig_current in search_one_bucket
1278                  * should not move below the load from tbl_chng_cnt.
1279                  */
1280                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1281                 /* Re-read the table change counter to check if the
1282                  * table has changed during search. If yes, re-do
1283                  * the search.
1284                  * This load should not get hoisted. The load
1285                  * acquires on cnt_b, key index in primary bucket
1286                  * and key index in secondary bucket will make sure
1287                  * that it does not get hoisted.
1288                  */
1289                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1290                                         __ATOMIC_ACQUIRE);
1291         } while (cnt_b != cnt_a);
1292
1293         return -ENOENT;
1294 }
1295
1296 static inline int32_t
1297 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1298                                         hash_sig_t sig, void **data)
1299 {
1300         if (h->readwrite_concur_lf_support)
1301                 return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
1302         else
1303                 return __rte_hash_lookup_with_hash_l(h, key, sig, data);
1304 }
1305
1306 int32_t
1307 rte_hash_lookup_with_hash(const struct rte_hash *h,
1308                         const void *key, hash_sig_t sig)
1309 {
1310         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1311         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1312 }
1313
1314 int32_t
1315 rte_hash_lookup(const struct rte_hash *h, const void *key)
1316 {
1317         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1318         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1319 }
1320
1321 int
1322 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1323                         const void *key, hash_sig_t sig, void **data)
1324 {
1325         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1326         return __rte_hash_lookup_with_hash(h, key, sig, data);
1327 }
1328
1329 int
1330 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1331 {
1332         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1333         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1334 }
1335
1336 static inline void
1337 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1338 {
1339         unsigned lcore_id, n_slots;
1340         struct lcore_cache *cached_free_slots;
1341
1342         if (h->use_local_cache) {
1343                 lcore_id = rte_lcore_id();
1344                 cached_free_slots = &h->local_free_slots[lcore_id];
1345                 /* Cache full, need to free it. */
1346                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1347                         /* Need to enqueue the free slots in global ring. */
1348                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1349                                                 cached_free_slots->objs,
1350                                                 LCORE_CACHE_SIZE, NULL);
1351                         cached_free_slots->len -= n_slots;
1352                 }
1353                 /* Put index of new free slot in cache. */
1354                 cached_free_slots->objs[cached_free_slots->len] =
1355                                 (void *)((uintptr_t)bkt->key_idx[i]);
1356                 cached_free_slots->len++;
1357         } else {
1358                 rte_ring_sp_enqueue(h->free_slots,
1359                                 (void *)((uintptr_t)bkt->key_idx[i]));
1360         }
1361 }
1362
1363 /* Compact the linked list by moving key from last entry in linked list to the
1364  * empty slot.
1365  */
1366 static inline void
1367 __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
1368         int i;
1369         struct rte_hash_bucket *last_bkt;
1370
1371         if (!cur_bkt->next)
1372                 return;
1373
1374         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1375
1376         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1377                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1378                         cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
1379                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1380                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1381                         last_bkt->key_idx[i] = EMPTY_SLOT;
1382                         return;
1383                 }
1384         }
1385 }
1386
1387 /* Search one bucket and remove the matched key.
1388  * Writer is expected to hold the lock while calling this
1389  * function.
1390  */
1391 static inline int32_t
1392 search_and_remove(const struct rte_hash *h, const void *key,
1393                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1394 {
1395         struct rte_hash_key *k, *keys = h->key_store;
1396         unsigned int i;
1397         uint32_t key_idx;
1398
1399         /* Check if key is in bucket */
1400         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1401                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1402                                           __ATOMIC_ACQUIRE);
1403                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1404                         k = (struct rte_hash_key *) ((char *)keys +
1405                                         key_idx * h->key_entry_size);
1406                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1407                                 bkt->sig_current[i] = NULL_SIGNATURE;
1408                                 /* Free the key store index if
1409                                  * no_free_on_del is disabled.
1410                                  */
1411                                 if (!h->no_free_on_del)
1412                                         remove_entry(h, bkt, i);
1413
1414                                 __atomic_store_n(&bkt->key_idx[i],
1415                                                  EMPTY_SLOT,
1416                                                  __ATOMIC_RELEASE);
1417
1418                                 *pos = i;
1419                                 /*
1420                                  * Return index where key is stored,
1421                                  * subtracting the first dummy index
1422                                  */
1423                                 return key_idx - 1;
1424                         }
1425                 }
1426         }
1427         return -1;
1428 }
1429
1430 static inline int32_t
1431 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1432                                                 hash_sig_t sig)
1433 {
1434         uint32_t prim_bucket_idx, sec_bucket_idx;
1435         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1436         struct rte_hash_bucket *cur_bkt;
1437         int pos;
1438         int32_t ret, i;
1439         uint16_t short_sig;
1440
1441         short_sig = get_short_sig(sig);
1442         prim_bucket_idx = get_prim_bucket_index(h, sig);
1443         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1444         prim_bkt = &h->buckets[prim_bucket_idx];
1445
1446         __hash_rw_writer_lock(h);
1447         /* look for key in primary bucket */
1448         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1449         if (ret != -1) {
1450                 __rte_hash_compact_ll(prim_bkt, pos);
1451                 last_bkt = prim_bkt->next;
1452                 prev_bkt = prim_bkt;
1453                 goto return_bkt;
1454         }
1455
1456         /* Calculate secondary hash */
1457         sec_bkt = &h->buckets[sec_bucket_idx];
1458
1459         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1460                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1461                 if (ret != -1) {
1462                         __rte_hash_compact_ll(cur_bkt, pos);
1463                         last_bkt = sec_bkt->next;
1464                         prev_bkt = sec_bkt;
1465                         goto return_bkt;
1466                 }
1467         }
1468
1469         __hash_rw_writer_unlock(h);
1470         return -ENOENT;
1471
1472 /* Search last bucket to see if empty to be recycled */
1473 return_bkt:
1474         if (!last_bkt) {
1475                 __hash_rw_writer_unlock(h);
1476                 return ret;
1477         }
1478         while (last_bkt->next) {
1479                 prev_bkt = last_bkt;
1480                 last_bkt = last_bkt->next;
1481         }
1482
1483         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1484                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1485                         break;
1486         }
1487         /* found empty bucket and recycle */
1488         if (i == RTE_HASH_BUCKET_ENTRIES) {
1489                 prev_bkt->next = last_bkt->next = NULL;
1490                 uint32_t index = last_bkt - h->buckets_ext + 1;
1491                 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1492         }
1493
1494         __hash_rw_writer_unlock(h);
1495         return ret;
1496 }
1497
1498 int32_t
1499 rte_hash_del_key_with_hash(const struct rte_hash *h,
1500                         const void *key, hash_sig_t sig)
1501 {
1502         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1503         return __rte_hash_del_key_with_hash(h, key, sig);
1504 }
1505
1506 int32_t
1507 rte_hash_del_key(const struct rte_hash *h, const void *key)
1508 {
1509         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1510         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1511 }
1512
1513 int
1514 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1515                                void **key)
1516 {
1517         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1518
1519         struct rte_hash_key *k, *keys = h->key_store;
1520         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1521                                      h->key_entry_size);
1522         *key = k->key;
1523
1524         if (position !=
1525             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1526                                         NULL)) {
1527                 return -ENOENT;
1528         }
1529
1530         return 0;
1531 }
1532
1533 int __rte_experimental
1534 rte_hash_free_key_with_position(const struct rte_hash *h,
1535                                 const int32_t position)
1536 {
1537         RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
1538
1539         unsigned int lcore_id, n_slots;
1540         struct lcore_cache *cached_free_slots;
1541         const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1542
1543         /* Out of bounds */
1544         if (position >= total_entries)
1545                 return -EINVAL;
1546
1547         if (h->use_local_cache) {
1548                 lcore_id = rte_lcore_id();
1549                 cached_free_slots = &h->local_free_slots[lcore_id];
1550                 /* Cache full, need to free it. */
1551                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1552                         /* Need to enqueue the free slots in global ring. */
1553                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1554                                                 cached_free_slots->objs,
1555                                                 LCORE_CACHE_SIZE, NULL);
1556                         cached_free_slots->len -= n_slots;
1557                 }
1558                 /* Put index of new free slot in cache. */
1559                 cached_free_slots->objs[cached_free_slots->len] =
1560                                         (void *)((uintptr_t)position);
1561                 cached_free_slots->len++;
1562         } else {
1563                 rte_ring_sp_enqueue(h->free_slots,
1564                                 (void *)((uintptr_t)position));
1565         }
1566
1567         return 0;
1568 }
1569
1570 static inline void
1571 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1572                         const struct rte_hash_bucket *prim_bkt,
1573                         const struct rte_hash_bucket *sec_bkt,
1574                         uint16_t sig,
1575                         enum rte_hash_sig_compare_function sig_cmp_fn)
1576 {
1577         unsigned int i;
1578
1579         /* For match mask the first bit of every two bits indicates the match */
1580         switch (sig_cmp_fn) {
1581 #ifdef RTE_MACHINE_CPUFLAG_SSE2
1582         case RTE_HASH_COMPARE_SSE:
1583                 /* Compare all signatures in the bucket */
1584                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1585                                 _mm_load_si128(
1586                                         (__m128i const *)prim_bkt->sig_current),
1587                                 _mm_set1_epi16(sig)));
1588                 /* Compare all signatures in the bucket */
1589                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1590                                 _mm_load_si128(
1591                                         (__m128i const *)sec_bkt->sig_current),
1592                                 _mm_set1_epi16(sig)));
1593                 break;
1594 #endif
1595         default:
1596                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1597                         *prim_hash_matches |=
1598                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1599                         *sec_hash_matches |=
1600                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1601                 }
1602         }
1603 }
1604
1605 #define PREFETCH_OFFSET 4
1606 static inline void
1607 __rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
1608                         int32_t num_keys, int32_t *positions,
1609                         uint64_t *hit_mask, void *data[])
1610 {
1611         uint64_t hits = 0;
1612         int32_t i;
1613         int32_t ret;
1614         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1615         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1616         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1617         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1618         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1619         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1620         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1621         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1622         struct rte_hash_bucket *cur_bkt, *next_bkt;
1623
1624         /* Prefetch first keys */
1625         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1626                 rte_prefetch0(keys[i]);
1627
1628         /*
1629          * Prefetch rest of the keys, calculate primary and
1630          * secondary bucket and prefetch them
1631          */
1632         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1633                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1634
1635                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1636
1637                 sig[i] = get_short_sig(prim_hash[i]);
1638                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1639                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1640
1641                 primary_bkt[i] = &h->buckets[prim_index[i]];
1642                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1643
1644                 rte_prefetch0(primary_bkt[i]);
1645                 rte_prefetch0(secondary_bkt[i]);
1646         }
1647
1648         /* Calculate and prefetch rest of the buckets */
1649         for (; i < num_keys; i++) {
1650                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1651
1652                 sig[i] = get_short_sig(prim_hash[i]);
1653                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1654                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1655
1656                 primary_bkt[i] = &h->buckets[prim_index[i]];
1657                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1658
1659                 rte_prefetch0(primary_bkt[i]);
1660                 rte_prefetch0(secondary_bkt[i]);
1661         }
1662
1663         __hash_rw_reader_lock(h);
1664
1665         /* Compare signatures and prefetch key slot of first hit */
1666         for (i = 0; i < num_keys; i++) {
1667                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1668                         primary_bkt[i], secondary_bkt[i],
1669                         sig[i], h->sig_cmp_fn);
1670
1671                 if (prim_hitmask[i]) {
1672                         uint32_t first_hit =
1673                                         __builtin_ctzl(prim_hitmask[i])
1674                                         >> 1;
1675                         uint32_t key_idx =
1676                                 primary_bkt[i]->key_idx[first_hit];
1677                         const struct rte_hash_key *key_slot =
1678                                 (const struct rte_hash_key *)(
1679                                 (const char *)h->key_store +
1680                                 key_idx * h->key_entry_size);
1681                         rte_prefetch0(key_slot);
1682                         continue;
1683                 }
1684
1685                 if (sec_hitmask[i]) {
1686                         uint32_t first_hit =
1687                                         __builtin_ctzl(sec_hitmask[i])
1688                                         >> 1;
1689                         uint32_t key_idx =
1690                                 secondary_bkt[i]->key_idx[first_hit];
1691                         const struct rte_hash_key *key_slot =
1692                                 (const struct rte_hash_key *)(
1693                                 (const char *)h->key_store +
1694                                 key_idx * h->key_entry_size);
1695                         rte_prefetch0(key_slot);
1696                 }
1697         }
1698
1699         /* Compare keys, first hits in primary first */
1700         for (i = 0; i < num_keys; i++) {
1701                 positions[i] = -ENOENT;
1702                 while (prim_hitmask[i]) {
1703                         uint32_t hit_index =
1704                                         __builtin_ctzl(prim_hitmask[i])
1705                                         >> 1;
1706                         uint32_t key_idx =
1707                                 primary_bkt[i]->key_idx[hit_index];
1708                         const struct rte_hash_key *key_slot =
1709                                 (const struct rte_hash_key *)(
1710                                 (const char *)h->key_store +
1711                                 key_idx * h->key_entry_size);
1712
1713                         /*
1714                          * If key index is 0, do not compare key,
1715                          * as it is checking the dummy slot
1716                          */
1717                         if (!!key_idx &
1718                                 !rte_hash_cmp_eq(
1719                                         key_slot->key, keys[i], h)) {
1720                                 if (data != NULL)
1721                                         data[i] = key_slot->pdata;
1722
1723                                 hits |= 1ULL << i;
1724                                 positions[i] = key_idx - 1;
1725                                 goto next_key;
1726                         }
1727                         prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1728                 }
1729
1730                 while (sec_hitmask[i]) {
1731                         uint32_t hit_index =
1732                                         __builtin_ctzl(sec_hitmask[i])
1733                                         >> 1;
1734                         uint32_t key_idx =
1735                                 secondary_bkt[i]->key_idx[hit_index];
1736                         const struct rte_hash_key *key_slot =
1737                                 (const struct rte_hash_key *)(
1738                                 (const char *)h->key_store +
1739                                 key_idx * h->key_entry_size);
1740
1741                         /*
1742                          * If key index is 0, do not compare key,
1743                          * as it is checking the dummy slot
1744                          */
1745
1746                         if (!!key_idx &
1747                                 !rte_hash_cmp_eq(
1748                                         key_slot->key, keys[i], h)) {
1749                                 if (data != NULL)
1750                                         data[i] = key_slot->pdata;
1751
1752                                 hits |= 1ULL << i;
1753                                 positions[i] = key_idx - 1;
1754                                 goto next_key;
1755                         }
1756                         sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1757                 }
1758 next_key:
1759                 continue;
1760         }
1761
1762         /* all found, do not need to go through ext bkt */
1763         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1764                 if (hit_mask != NULL)
1765                         *hit_mask = hits;
1766                 __hash_rw_reader_unlock(h);
1767                 return;
1768         }
1769
1770         /* need to check ext buckets for match */
1771         for (i = 0; i < num_keys; i++) {
1772                 if ((hits & (1ULL << i)) != 0)
1773                         continue;
1774                 next_bkt = secondary_bkt[i]->next;
1775                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1776                         if (data != NULL)
1777                                 ret = search_one_bucket_l(h, keys[i],
1778                                                 sig[i], &data[i], cur_bkt);
1779                         else
1780                                 ret = search_one_bucket_l(h, keys[i],
1781                                                 sig[i], NULL, cur_bkt);
1782                         if (ret != -1) {
1783                                 positions[i] = ret;
1784                                 hits |= 1ULL << i;
1785                                 break;
1786                         }
1787                 }
1788         }
1789
1790         __hash_rw_reader_unlock(h);
1791
1792         if (hit_mask != NULL)
1793                 *hit_mask = hits;
1794 }
1795
1796 static inline void
1797 __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
1798                         int32_t num_keys, int32_t *positions,
1799                         uint64_t *hit_mask, void *data[])
1800 {
1801         uint64_t hits = 0;
1802         int32_t i;
1803         int32_t ret;
1804         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1805         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1806         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1807         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1808         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1809         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1810         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1811         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1812         struct rte_hash_bucket *cur_bkt, *next_bkt;
1813         void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
1814         uint32_t cnt_b, cnt_a;
1815
1816         /* Prefetch first keys */
1817         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1818                 rte_prefetch0(keys[i]);
1819
1820         /*
1821          * Prefetch rest of the keys, calculate primary and
1822          * secondary bucket and prefetch them
1823          */
1824         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1825                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1826
1827                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1828
1829                 sig[i] = get_short_sig(prim_hash[i]);
1830                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1831                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1832
1833                 primary_bkt[i] = &h->buckets[prim_index[i]];
1834                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1835
1836                 rte_prefetch0(primary_bkt[i]);
1837                 rte_prefetch0(secondary_bkt[i]);
1838         }
1839
1840         /* Calculate and prefetch rest of the buckets */
1841         for (; i < num_keys; i++) {
1842                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1843
1844                 sig[i] = get_short_sig(prim_hash[i]);
1845                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1846                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1847
1848                 primary_bkt[i] = &h->buckets[prim_index[i]];
1849                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1850
1851                 rte_prefetch0(primary_bkt[i]);
1852                 rte_prefetch0(secondary_bkt[i]);
1853         }
1854
1855         do {
1856                 /* Load the table change counter before the lookup
1857                  * starts. Acquire semantics will make sure that
1858                  * loads in compare_signatures are not hoisted.
1859                  */
1860                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1861                                         __ATOMIC_ACQUIRE);
1862
1863                 /* Compare signatures and prefetch key slot of first hit */
1864                 for (i = 0; i < num_keys; i++) {
1865                         compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1866                                 primary_bkt[i], secondary_bkt[i],
1867                                 sig[i], h->sig_cmp_fn);
1868
1869                         if (prim_hitmask[i]) {
1870                                 uint32_t first_hit =
1871                                                 __builtin_ctzl(prim_hitmask[i])
1872                                                 >> 1;
1873                                 uint32_t key_idx =
1874                                         primary_bkt[i]->key_idx[first_hit];
1875                                 const struct rte_hash_key *key_slot =
1876                                         (const struct rte_hash_key *)(
1877                                         (const char *)h->key_store +
1878                                         key_idx * h->key_entry_size);
1879                                 rte_prefetch0(key_slot);
1880                                 continue;
1881                         }
1882
1883                         if (sec_hitmask[i]) {
1884                                 uint32_t first_hit =
1885                                                 __builtin_ctzl(sec_hitmask[i])
1886                                                 >> 1;
1887                                 uint32_t key_idx =
1888                                         secondary_bkt[i]->key_idx[first_hit];
1889                                 const struct rte_hash_key *key_slot =
1890                                         (const struct rte_hash_key *)(
1891                                         (const char *)h->key_store +
1892                                         key_idx * h->key_entry_size);
1893                                 rte_prefetch0(key_slot);
1894                         }
1895                 }
1896
1897                 /* Compare keys, first hits in primary first */
1898                 for (i = 0; i < num_keys; i++) {
1899                         positions[i] = -ENOENT;
1900                         while (prim_hitmask[i]) {
1901                                 uint32_t hit_index =
1902                                                 __builtin_ctzl(prim_hitmask[i])
1903                                                 >> 1;
1904                                 uint32_t key_idx =
1905                                 __atomic_load_n(
1906                                         &primary_bkt[i]->key_idx[hit_index],
1907                                         __ATOMIC_ACQUIRE);
1908                                 const struct rte_hash_key *key_slot =
1909                                         (const struct rte_hash_key *)(
1910                                         (const char *)h->key_store +
1911                                         key_idx * h->key_entry_size);
1912
1913                                 if (key_idx != EMPTY_SLOT)
1914                                         pdata[i] = __atomic_load_n(
1915                                                         &key_slot->pdata,
1916                                                         __ATOMIC_ACQUIRE);
1917                                 /*
1918                                  * If key index is 0, do not compare key,
1919                                  * as it is checking the dummy slot
1920                                  */
1921                                 if (!!key_idx &
1922                                         !rte_hash_cmp_eq(
1923                                                 key_slot->key, keys[i], h)) {
1924                                         if (data != NULL)
1925                                                 data[i] = pdata[i];
1926
1927                                         hits |= 1ULL << i;
1928                                         positions[i] = key_idx - 1;
1929                                         goto next_key;
1930                                 }
1931                                 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1932                         }
1933
1934                         while (sec_hitmask[i]) {
1935                                 uint32_t hit_index =
1936                                                 __builtin_ctzl(sec_hitmask[i])
1937                                                 >> 1;
1938                                 uint32_t key_idx =
1939                                 __atomic_load_n(
1940                                         &secondary_bkt[i]->key_idx[hit_index],
1941                                         __ATOMIC_ACQUIRE);
1942                                 const struct rte_hash_key *key_slot =
1943                                         (const struct rte_hash_key *)(
1944                                         (const char *)h->key_store +
1945                                         key_idx * h->key_entry_size);
1946
1947                                 if (key_idx != EMPTY_SLOT)
1948                                         pdata[i] = __atomic_load_n(
1949                                                         &key_slot->pdata,
1950                                                         __ATOMIC_ACQUIRE);
1951                                 /*
1952                                  * If key index is 0, do not compare key,
1953                                  * as it is checking the dummy slot
1954                                  */
1955
1956                                 if (!!key_idx &
1957                                         !rte_hash_cmp_eq(
1958                                                 key_slot->key, keys[i], h)) {
1959                                         if (data != NULL)
1960                                                 data[i] = pdata[i];
1961
1962                                         hits |= 1ULL << i;
1963                                         positions[i] = key_idx - 1;
1964                                         goto next_key;
1965                                 }
1966                                 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1967                         }
1968 next_key:
1969                         continue;
1970                 }
1971
1972                 /* The loads of sig_current in compare_signatures
1973                  * should not move below the load from tbl_chng_cnt.
1974                  */
1975                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1976                 /* Re-read the table change counter to check if the
1977                  * table has changed during search. If yes, re-do
1978                  * the search.
1979                  * This load should not get hoisted. The load
1980                  * acquires on cnt_b, primary key index and secondary
1981                  * key index will make sure that it does not get
1982                  * hoisted.
1983                  */
1984                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1985                                         __ATOMIC_ACQUIRE);
1986         } while (cnt_b != cnt_a);
1987
1988         /* all found, do not need to go through ext bkt */
1989         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1990                 if (hit_mask != NULL)
1991                         *hit_mask = hits;
1992                 __hash_rw_reader_unlock(h);
1993                 return;
1994         }
1995
1996         /* need to check ext buckets for match */
1997         for (i = 0; i < num_keys; i++) {
1998                 if ((hits & (1ULL << i)) != 0)
1999                         continue;
2000                 next_bkt = secondary_bkt[i]->next;
2001                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
2002                         if (data != NULL)
2003                                 ret = search_one_bucket_lf(h, keys[i],
2004                                                 sig[i], &data[i], cur_bkt);
2005                         else
2006                                 ret = search_one_bucket_lf(h, keys[i],
2007                                                 sig[i], NULL, cur_bkt);
2008                         if (ret != -1) {
2009                                 positions[i] = ret;
2010                                 hits |= 1ULL << i;
2011                                 break;
2012                         }
2013                 }
2014         }
2015
2016         if (hit_mask != NULL)
2017                 *hit_mask = hits;
2018 }
2019
2020 static inline void
2021 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2022                         int32_t num_keys, int32_t *positions,
2023                         uint64_t *hit_mask, void *data[])
2024 {
2025         if (h->readwrite_concur_lf_support)
2026                 return __rte_hash_lookup_bulk_lf(h, keys, num_keys,
2027                                                 positions, hit_mask, data);
2028         else
2029                 return __rte_hash_lookup_bulk_l(h, keys, num_keys,
2030                                                 positions, hit_mask, data);
2031 }
2032
2033 int
2034 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2035                       uint32_t num_keys, int32_t *positions)
2036 {
2037         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2038                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2039                         (positions == NULL)), -EINVAL);
2040
2041         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
2042         return 0;
2043 }
2044
2045 int
2046 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
2047                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
2048 {
2049         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2050                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2051                         (hit_mask == NULL)), -EINVAL);
2052
2053         int32_t positions[num_keys];
2054
2055         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
2056
2057         /* Return number of hits */
2058         return __builtin_popcountl(*hit_mask);
2059 }
2060
2061 int32_t
2062 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
2063 {
2064         uint32_t bucket_idx, idx, position;
2065         struct rte_hash_key *next_key;
2066
2067         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
2068
2069         const uint32_t total_entries_main = h->num_buckets *
2070                                                         RTE_HASH_BUCKET_ENTRIES;
2071         const uint32_t total_entries = total_entries_main << 1;
2072
2073         /* Out of bounds of all buckets (both main table and ext table) */
2074         if (*next >= total_entries_main)
2075                 goto extend_table;
2076
2077         /* Calculate bucket and index of current iterator */
2078         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2079         idx = *next % RTE_HASH_BUCKET_ENTRIES;
2080
2081         /* If current position is empty, go to the next one */
2082         while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
2083                                         __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
2084                 (*next)++;
2085                 /* End of table */
2086                 if (*next == total_entries_main)
2087                         goto extend_table;
2088                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2089                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2090         }
2091
2092         __hash_rw_reader_lock(h);
2093         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2094                                 position * h->key_entry_size);
2095         /* Return key and data */
2096         *key = next_key->key;
2097         *data = next_key->pdata;
2098
2099         __hash_rw_reader_unlock(h);
2100
2101         /* Increment iterator */
2102         (*next)++;
2103
2104         return position - 1;
2105
2106 /* Begin to iterate extendable buckets */
2107 extend_table:
2108         /* Out of total bound or if ext bucket feature is not enabled */
2109         if (*next >= total_entries || !h->ext_table_support)
2110                 return -ENOENT;
2111
2112         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
2113         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2114
2115         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
2116                 (*next)++;
2117                 if (*next == total_entries)
2118                         return -ENOENT;
2119                 bucket_idx = (*next - total_entries_main) /
2120                                                 RTE_HASH_BUCKET_ENTRIES;
2121                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2122         }
2123         __hash_rw_reader_lock(h);
2124         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2125                                 position * h->key_entry_size);
2126         /* Return key and data */
2127         *key = next_key->key;
2128         *data = next_key->pdata;
2129
2130         __hash_rw_reader_unlock(h);
2131
2132         /* Increment iterator */
2133         (*next)++;
2134         return position - 1;
2135 }