953928f2729208cb17bf779360e3f0b8e4cbb21b
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  * Copyright(c) 2018 Arm Limited
4  */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <sys/queue.h>
12
13 #include <rte_common.h>
14 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
15 #include <rte_log.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring.h>
28 #include <rte_compat.h>
29 #include <rte_vect.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
35         for (CURRENT_BKT = START_BUCKET;                                      \
36                 CURRENT_BKT != NULL;                                          \
37                 CURRENT_BKT = CURRENT_BKT->next)
38
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
40
41 static struct rte_tailq_elem rte_hash_tailq = {
42         .name = "RTE_HASH",
43 };
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
45
46 struct rte_hash *
47 rte_hash_find_existing(const char *name)
48 {
49         struct rte_hash *h = NULL;
50         struct rte_tailq_entry *te;
51         struct rte_hash_list *hash_list;
52
53         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
54
55         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56         TAILQ_FOREACH(te, hash_list, next) {
57                 h = (struct rte_hash *) te->data;
58                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
59                         break;
60         }
61         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
62
63         if (te == NULL) {
64                 rte_errno = ENOENT;
65                 return NULL;
66         }
67         return h;
68 }
69
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
72 {
73         while (lst_bkt->next != NULL)
74                 lst_bkt = lst_bkt->next;
75         return lst_bkt;
76 }
77
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
79 {
80         h->cmp_jump_table_idx = KEY_CUSTOM;
81         h->rte_hash_custom_cmp_eq = func;
82 }
83
84 static inline int
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
86 {
87         if (h->cmp_jump_table_idx == KEY_CUSTOM)
88                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
89         else
90                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
91 }
92
93 /*
94  * We use higher 16 bits of hash as the signature value stored in table.
95  * We use the lower bits for the primary bucket
96  * location. Then we XOR primary bucket location and the signature
97  * to get the secondary bucket location. This is same as
98  * proposed in Bin Fan, et al's paper
99  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100  * Smarter Hashing". The benefit to use
101  * XOR is that one could derive the alternative bucket location
102  * by only using the current bucket location and the signature.
103  */
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
106 {
107         return hash >> 16;
108 }
109
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
112 {
113         return hash & h->bucket_bitmask;
114 }
115
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118                         uint32_t cur_bkt_idx, uint16_t sig)
119 {
120         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
121 }
122
123 struct rte_hash *
124 rte_hash_create(const struct rte_hash_parameters *params)
125 {
126         struct rte_hash *h = NULL;
127         struct rte_tailq_entry *te = NULL;
128         struct rte_hash_list *hash_list;
129         struct rte_ring *r = NULL;
130         struct rte_ring *r_ext = NULL;
131         char hash_name[RTE_HASH_NAMESIZE];
132         void *k = NULL;
133         void *buckets = NULL;
134         void *buckets_ext = NULL;
135         char ring_name[RTE_RING_NAMESIZE];
136         char ext_ring_name[RTE_RING_NAMESIZE];
137         unsigned num_key_slots;
138         unsigned i;
139         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140         unsigned int ext_table_support = 0;
141         unsigned int readwrite_concur_support = 0;
142         unsigned int writer_takes_lock = 0;
143         unsigned int no_free_on_del = 0;
144         uint32_t *ext_bkt_to_free = NULL;
145         uint32_t *tbl_chng_cnt = NULL;
146         unsigned int readwrite_concur_lf_support = 0;
147
148         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
149
150         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
151
152         if (params == NULL) {
153                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
154                 return NULL;
155         }
156
157         /* Check for valid parameters */
158         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
159                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
160                         (params->key_len == 0)) {
161                 rte_errno = EINVAL;
162                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
163                 return NULL;
164         }
165
166         /* Validate correct usage of extra options */
167         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
168             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
169                 rte_errno = EINVAL;
170                 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
171                         "rw concurrency lock free\n");
172                 return NULL;
173         }
174
175         /* Check extra flags field to check extra options. */
176         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
177                 hw_trans_mem_support = 1;
178
179         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
180                 use_local_cache = 1;
181                 writer_takes_lock = 1;
182         }
183
184         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
185                 readwrite_concur_support = 1;
186                 writer_takes_lock = 1;
187         }
188
189         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
190                 ext_table_support = 1;
191
192         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
193                 no_free_on_del = 1;
194
195         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
196                 readwrite_concur_lf_support = 1;
197                 /* Enable not freeing internal memory/index on delete */
198                 no_free_on_del = 1;
199         }
200
201         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
202         if (use_local_cache)
203                 /*
204                  * Increase number of slots by total number of indices
205                  * that can be stored in the lcore caches
206                  * except for the first cache
207                  */
208                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
209                                         (LCORE_CACHE_SIZE - 1) + 1;
210         else
211                 num_key_slots = params->entries + 1;
212
213         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
214         /* Create ring (Dummy slot index is not enqueued) */
215         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
216                         params->socket_id, 0);
217         if (r == NULL) {
218                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
219                 goto err;
220         }
221
222         const uint32_t num_buckets = rte_align32pow2(params->entries) /
223                                                 RTE_HASH_BUCKET_ENTRIES;
224
225         /* Create ring for extendable buckets. */
226         if (ext_table_support) {
227                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
228                                                                 params->name);
229                 r_ext = rte_ring_create(ext_ring_name,
230                                 rte_align32pow2(num_buckets + 1),
231                                 params->socket_id, 0);
232
233                 if (r_ext == NULL) {
234                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
235                                                                 "failed\n");
236                         goto err;
237                 }
238         }
239
240         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
241
242         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
243
244         /* guarantee there's no existing: this is normally already checked
245          * by ring creation above */
246         TAILQ_FOREACH(te, hash_list, next) {
247                 h = (struct rte_hash *) te->data;
248                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
249                         break;
250         }
251         h = NULL;
252         if (te != NULL) {
253                 rte_errno = EEXIST;
254                 te = NULL;
255                 goto err_unlock;
256         }
257
258         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
259         if (te == NULL) {
260                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
261                 goto err_unlock;
262         }
263
264         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
265                                         RTE_CACHE_LINE_SIZE, params->socket_id);
266
267         if (h == NULL) {
268                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
269                 goto err_unlock;
270         }
271
272         buckets = rte_zmalloc_socket(NULL,
273                                 num_buckets * sizeof(struct rte_hash_bucket),
274                                 RTE_CACHE_LINE_SIZE, params->socket_id);
275
276         if (buckets == NULL) {
277                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
278                 goto err_unlock;
279         }
280
281         /* Allocate same number of extendable buckets */
282         if (ext_table_support) {
283                 buckets_ext = rte_zmalloc_socket(NULL,
284                                 num_buckets * sizeof(struct rte_hash_bucket),
285                                 RTE_CACHE_LINE_SIZE, params->socket_id);
286                 if (buckets_ext == NULL) {
287                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
288                                                         "failed\n");
289                         goto err_unlock;
290                 }
291                 /* Populate ext bkt ring. We reserve 0 similar to the
292                  * key-data slot, just in case in future we want to
293                  * use bucket index for the linked list and 0 means NULL
294                  * for next bucket
295                  */
296                 for (i = 1; i <= num_buckets; i++)
297                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
298
299                 if (readwrite_concur_lf_support) {
300                         ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) *
301                                                                 num_key_slots, 0);
302                         if (ext_bkt_to_free == NULL) {
303                                 RTE_LOG(ERR, HASH, "ext bkt to free memory allocation "
304                                                                 "failed\n");
305                                 goto err_unlock;
306                         }
307                 }
308         }
309
310         const uint32_t key_entry_size =
311                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
312                           KEY_ALIGNMENT);
313         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
314
315         k = rte_zmalloc_socket(NULL, key_tbl_size,
316                         RTE_CACHE_LINE_SIZE, params->socket_id);
317
318         if (k == NULL) {
319                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
320                 goto err_unlock;
321         }
322
323         tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
324                         RTE_CACHE_LINE_SIZE, params->socket_id);
325
326         if (tbl_chng_cnt == NULL) {
327                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
328                 goto err_unlock;
329         }
330
331 /*
332  * If x86 architecture is used, select appropriate compare function,
333  * which may use x86 intrinsics, otherwise use memcmp
334  */
335 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
336         /* Select function to compare keys */
337         switch (params->key_len) {
338         case 16:
339                 h->cmp_jump_table_idx = KEY_16_BYTES;
340                 break;
341         case 32:
342                 h->cmp_jump_table_idx = KEY_32_BYTES;
343                 break;
344         case 48:
345                 h->cmp_jump_table_idx = KEY_48_BYTES;
346                 break;
347         case 64:
348                 h->cmp_jump_table_idx = KEY_64_BYTES;
349                 break;
350         case 80:
351                 h->cmp_jump_table_idx = KEY_80_BYTES;
352                 break;
353         case 96:
354                 h->cmp_jump_table_idx = KEY_96_BYTES;
355                 break;
356         case 112:
357                 h->cmp_jump_table_idx = KEY_112_BYTES;
358                 break;
359         case 128:
360                 h->cmp_jump_table_idx = KEY_128_BYTES;
361                 break;
362         default:
363                 /* If key is not multiple of 16, use generic memcmp */
364                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
365         }
366 #else
367         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
368 #endif
369
370         if (use_local_cache) {
371                 h->local_free_slots = rte_zmalloc_socket(NULL,
372                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
373                                 RTE_CACHE_LINE_SIZE, params->socket_id);
374         }
375
376         /* Default hash function */
377 #if defined(RTE_ARCH_X86)
378         default_hash_func = (rte_hash_function)rte_hash_crc;
379 #elif defined(RTE_ARCH_ARM64)
380         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
381                 default_hash_func = (rte_hash_function)rte_hash_crc;
382 #endif
383         /* Setup hash context */
384         strlcpy(h->name, params->name, sizeof(h->name));
385         h->entries = params->entries;
386         h->key_len = params->key_len;
387         h->key_entry_size = key_entry_size;
388         h->hash_func_init_val = params->hash_func_init_val;
389
390         h->num_buckets = num_buckets;
391         h->bucket_bitmask = h->num_buckets - 1;
392         h->buckets = buckets;
393         h->buckets_ext = buckets_ext;
394         h->free_ext_bkts = r_ext;
395         h->hash_func = (params->hash_func == NULL) ?
396                 default_hash_func : params->hash_func;
397         h->key_store = k;
398         h->free_slots = r;
399         h->ext_bkt_to_free = ext_bkt_to_free;
400         h->tbl_chng_cnt = tbl_chng_cnt;
401         *h->tbl_chng_cnt = 0;
402         h->hw_trans_mem_support = hw_trans_mem_support;
403         h->use_local_cache = use_local_cache;
404         h->readwrite_concur_support = readwrite_concur_support;
405         h->ext_table_support = ext_table_support;
406         h->writer_takes_lock = writer_takes_lock;
407         h->no_free_on_del = no_free_on_del;
408         h->readwrite_concur_lf_support = readwrite_concur_lf_support;
409
410 #if defined(RTE_ARCH_X86)
411         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
412                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
413         else
414 #elif defined(RTE_ARCH_ARM64)
415         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
416                 h->sig_cmp_fn = RTE_HASH_COMPARE_NEON;
417         else
418 #endif
419                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
420
421         /* Writer threads need to take the lock when:
422          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
423          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
424          */
425         if (h->writer_takes_lock) {
426                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
427                                                 RTE_CACHE_LINE_SIZE);
428                 if (h->readwrite_lock == NULL)
429                         goto err_unlock;
430
431                 rte_rwlock_init(h->readwrite_lock);
432         }
433
434         /* Populate free slots ring. Entry zero is reserved for key misses. */
435         for (i = 1; i < num_key_slots; i++)
436                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
437
438         te->data = (void *) h;
439         TAILQ_INSERT_TAIL(hash_list, te, next);
440         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
441
442         return h;
443 err_unlock:
444         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
445 err:
446         rte_ring_free(r);
447         rte_ring_free(r_ext);
448         rte_free(te);
449         rte_free(h);
450         rte_free(buckets);
451         rte_free(buckets_ext);
452         rte_free(k);
453         rte_free(tbl_chng_cnt);
454         rte_free(ext_bkt_to_free);
455         return NULL;
456 }
457
458 void
459 rte_hash_free(struct rte_hash *h)
460 {
461         struct rte_tailq_entry *te;
462         struct rte_hash_list *hash_list;
463
464         if (h == NULL)
465                 return;
466
467         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
468
469         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
470
471         /* find out tailq entry */
472         TAILQ_FOREACH(te, hash_list, next) {
473                 if (te->data == (void *) h)
474                         break;
475         }
476
477         if (te == NULL) {
478                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
479                 return;
480         }
481
482         TAILQ_REMOVE(hash_list, te, next);
483
484         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
485
486         if (h->use_local_cache)
487                 rte_free(h->local_free_slots);
488         if (h->writer_takes_lock)
489                 rte_free(h->readwrite_lock);
490         rte_ring_free(h->free_slots);
491         rte_ring_free(h->free_ext_bkts);
492         rte_free(h->key_store);
493         rte_free(h->buckets);
494         rte_free(h->buckets_ext);
495         rte_free(h->tbl_chng_cnt);
496         rte_free(h->ext_bkt_to_free);
497         rte_free(h);
498         rte_free(te);
499 }
500
501 hash_sig_t
502 rte_hash_hash(const struct rte_hash *h, const void *key)
503 {
504         /* calc hash result by key */
505         return h->hash_func(key, h->key_len, h->hash_func_init_val);
506 }
507
508 int32_t
509 rte_hash_count(const struct rte_hash *h)
510 {
511         uint32_t tot_ring_cnt, cached_cnt = 0;
512         uint32_t i, ret;
513
514         if (h == NULL)
515                 return -EINVAL;
516
517         if (h->use_local_cache) {
518                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
519                                         (LCORE_CACHE_SIZE - 1);
520                 for (i = 0; i < RTE_MAX_LCORE; i++)
521                         cached_cnt += h->local_free_slots[i].len;
522
523                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
524                                                                 cached_cnt;
525         } else {
526                 tot_ring_cnt = h->entries;
527                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
528         }
529         return ret;
530 }
531
532 /* Read write locks implemented using rte_rwlock */
533 static inline void
534 __hash_rw_writer_lock(const struct rte_hash *h)
535 {
536         if (h->writer_takes_lock && h->hw_trans_mem_support)
537                 rte_rwlock_write_lock_tm(h->readwrite_lock);
538         else if (h->writer_takes_lock)
539                 rte_rwlock_write_lock(h->readwrite_lock);
540 }
541
542 static inline void
543 __hash_rw_reader_lock(const struct rte_hash *h)
544 {
545         if (h->readwrite_concur_support && h->hw_trans_mem_support)
546                 rte_rwlock_read_lock_tm(h->readwrite_lock);
547         else if (h->readwrite_concur_support)
548                 rte_rwlock_read_lock(h->readwrite_lock);
549 }
550
551 static inline void
552 __hash_rw_writer_unlock(const struct rte_hash *h)
553 {
554         if (h->writer_takes_lock && h->hw_trans_mem_support)
555                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
556         else if (h->writer_takes_lock)
557                 rte_rwlock_write_unlock(h->readwrite_lock);
558 }
559
560 static inline void
561 __hash_rw_reader_unlock(const struct rte_hash *h)
562 {
563         if (h->readwrite_concur_support && h->hw_trans_mem_support)
564                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
565         else if (h->readwrite_concur_support)
566                 rte_rwlock_read_unlock(h->readwrite_lock);
567 }
568
569 void
570 rte_hash_reset(struct rte_hash *h)
571 {
572         void *ptr;
573         uint32_t tot_ring_cnt, i;
574
575         if (h == NULL)
576                 return;
577
578         __hash_rw_writer_lock(h);
579         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
580         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
581         *h->tbl_chng_cnt = 0;
582
583         /* clear the free ring */
584         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
585                 continue;
586
587         /* clear free extendable bucket ring and memory */
588         if (h->ext_table_support) {
589                 memset(h->buckets_ext, 0, h->num_buckets *
590                                                 sizeof(struct rte_hash_bucket));
591                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
592                         continue;
593         }
594
595         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
596         if (h->use_local_cache)
597                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
598                                         (LCORE_CACHE_SIZE - 1);
599         else
600                 tot_ring_cnt = h->entries;
601
602         for (i = 1; i < tot_ring_cnt + 1; i++)
603                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
604
605         /* Repopulate the free ext bkt ring. */
606         if (h->ext_table_support) {
607                 for (i = 1; i <= h->num_buckets; i++)
608                         rte_ring_sp_enqueue(h->free_ext_bkts,
609                                                 (void *)((uintptr_t) i));
610         }
611
612         if (h->use_local_cache) {
613                 /* Reset local caches per lcore */
614                 for (i = 0; i < RTE_MAX_LCORE; i++)
615                         h->local_free_slots[i].len = 0;
616         }
617         __hash_rw_writer_unlock(h);
618 }
619
620 /*
621  * Function called to enqueue back an index in the cache/ring,
622  * as slot has not being used and it can be used in the
623  * next addition attempt.
624  */
625 static inline void
626 enqueue_slot_back(const struct rte_hash *h,
627                 struct lcore_cache *cached_free_slots,
628                 void *slot_id)
629 {
630         if (h->use_local_cache) {
631                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
632                 cached_free_slots->len++;
633         } else
634                 rte_ring_sp_enqueue(h->free_slots, slot_id);
635 }
636
637 /* Search a key from bucket and update its data.
638  * Writer holds the lock before calling this.
639  */
640 static inline int32_t
641 search_and_update(const struct rte_hash *h, void *data, const void *key,
642         struct rte_hash_bucket *bkt, uint16_t sig)
643 {
644         int i;
645         struct rte_hash_key *k, *keys = h->key_store;
646
647         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
648                 if (bkt->sig_current[i] == sig) {
649                         k = (struct rte_hash_key *) ((char *)keys +
650                                         bkt->key_idx[i] * h->key_entry_size);
651                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
652                                 /* 'pdata' acts as the synchronization point
653                                  * when an existing hash entry is updated.
654                                  * Key is not updated in this case.
655                                  */
656                                 __atomic_store_n(&k->pdata,
657                                         data,
658                                         __ATOMIC_RELEASE);
659                                 /*
660                                  * Return index where key is stored,
661                                  * subtracting the first dummy index
662                                  */
663                                 return bkt->key_idx[i] - 1;
664                         }
665                 }
666         }
667         return -1;
668 }
669
670 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
671  * buckets around.
672  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
673  * empty entry.
674  */
675 static inline int32_t
676 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
677                 struct rte_hash_bucket *prim_bkt,
678                 struct rte_hash_bucket *sec_bkt,
679                 const struct rte_hash_key *key, void *data,
680                 uint16_t sig, uint32_t new_idx,
681                 int32_t *ret_val)
682 {
683         unsigned int i;
684         struct rte_hash_bucket *cur_bkt;
685         int32_t ret;
686
687         __hash_rw_writer_lock(h);
688         /* Check if key was inserted after last check but before this
689          * protected region in case of inserting duplicated keys.
690          */
691         ret = search_and_update(h, data, key, prim_bkt, sig);
692         if (ret != -1) {
693                 __hash_rw_writer_unlock(h);
694                 *ret_val = ret;
695                 return 1;
696         }
697
698         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
699                 ret = search_and_update(h, data, key, cur_bkt, sig);
700                 if (ret != -1) {
701                         __hash_rw_writer_unlock(h);
702                         *ret_val = ret;
703                         return 1;
704                 }
705         }
706
707         /* Insert new entry if there is room in the primary
708          * bucket.
709          */
710         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
711                 /* Check if slot is available */
712                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
713                         prim_bkt->sig_current[i] = sig;
714                         /* Key can be of arbitrary length, so it is
715                          * not possible to store it atomically.
716                          * Hence the new key element's memory stores
717                          * (key as well as data) should be complete
718                          * before it is referenced.
719                          */
720                         __atomic_store_n(&prim_bkt->key_idx[i],
721                                          new_idx,
722                                          __ATOMIC_RELEASE);
723                         break;
724                 }
725         }
726         __hash_rw_writer_unlock(h);
727
728         if (i != RTE_HASH_BUCKET_ENTRIES)
729                 return 0;
730
731         /* no empty entry */
732         return -1;
733 }
734
735 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
736  * the path head with new entry (sig, alt_hash, new_idx)
737  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
738  * return 0 if succeeds.
739  */
740 static inline int
741 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
742                         struct rte_hash_bucket *bkt,
743                         struct rte_hash_bucket *alt_bkt,
744                         const struct rte_hash_key *key, void *data,
745                         struct queue_node *leaf, uint32_t leaf_slot,
746                         uint16_t sig, uint32_t new_idx,
747                         int32_t *ret_val)
748 {
749         uint32_t prev_alt_bkt_idx;
750         struct rte_hash_bucket *cur_bkt;
751         struct queue_node *prev_node, *curr_node = leaf;
752         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
753         uint32_t prev_slot, curr_slot = leaf_slot;
754         int32_t ret;
755
756         __hash_rw_writer_lock(h);
757
758         /* In case empty slot was gone before entering protected region */
759         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
760                 __hash_rw_writer_unlock(h);
761                 return -1;
762         }
763
764         /* Check if key was inserted after last check but before this
765          * protected region.
766          */
767         ret = search_and_update(h, data, key, bkt, sig);
768         if (ret != -1) {
769                 __hash_rw_writer_unlock(h);
770                 *ret_val = ret;
771                 return 1;
772         }
773
774         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
775                 ret = search_and_update(h, data, key, cur_bkt, sig);
776                 if (ret != -1) {
777                         __hash_rw_writer_unlock(h);
778                         *ret_val = ret;
779                         return 1;
780                 }
781         }
782
783         while (likely(curr_node->prev != NULL)) {
784                 prev_node = curr_node->prev;
785                 prev_bkt = prev_node->bkt;
786                 prev_slot = curr_node->prev_slot;
787
788                 prev_alt_bkt_idx = get_alt_bucket_index(h,
789                                         prev_node->cur_bkt_idx,
790                                         prev_bkt->sig_current[prev_slot]);
791
792                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
793                                 != curr_bkt)) {
794                         /* revert it to empty, otherwise duplicated keys */
795                         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
796                                 EMPTY_SLOT,
797                                 __ATOMIC_RELEASE);
798                         __hash_rw_writer_unlock(h);
799                         return -1;
800                 }
801
802                 if (h->readwrite_concur_lf_support) {
803                         /* Inform the previous move. The current move need
804                          * not be informed now as the current bucket entry
805                          * is present in both primary and secondary.
806                          * Since there is one writer, load acquires on
807                          * tbl_chng_cnt are not required.
808                          */
809                         __atomic_store_n(h->tbl_chng_cnt,
810                                          *h->tbl_chng_cnt + 1,
811                                          __ATOMIC_RELEASE);
812                         /* The store to sig_current should not
813                          * move above the store to tbl_chng_cnt.
814                          */
815                         __atomic_thread_fence(__ATOMIC_RELEASE);
816                 }
817
818                 /* Need to swap current/alt sig to allow later
819                  * Cuckoo insert to move elements back to its
820                  * primary bucket if available
821                  */
822                 curr_bkt->sig_current[curr_slot] =
823                         prev_bkt->sig_current[prev_slot];
824                 /* Release the updated bucket entry */
825                 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
826                         prev_bkt->key_idx[prev_slot],
827                         __ATOMIC_RELEASE);
828
829                 curr_slot = prev_slot;
830                 curr_node = prev_node;
831                 curr_bkt = curr_node->bkt;
832         }
833
834         if (h->readwrite_concur_lf_support) {
835                 /* Inform the previous move. The current move need
836                  * not be informed now as the current bucket entry
837                  * is present in both primary and secondary.
838                  * Since there is one writer, load acquires on
839                  * tbl_chng_cnt are not required.
840                  */
841                 __atomic_store_n(h->tbl_chng_cnt,
842                                  *h->tbl_chng_cnt + 1,
843                                  __ATOMIC_RELEASE);
844                 /* The store to sig_current should not
845                  * move above the store to tbl_chng_cnt.
846                  */
847                 __atomic_thread_fence(__ATOMIC_RELEASE);
848         }
849
850         curr_bkt->sig_current[curr_slot] = sig;
851         /* Release the new bucket entry */
852         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
853                          new_idx,
854                          __ATOMIC_RELEASE);
855
856         __hash_rw_writer_unlock(h);
857
858         return 0;
859
860 }
861
862 /*
863  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
864  * Cuckoo
865  */
866 static inline int
867 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
868                         struct rte_hash_bucket *bkt,
869                         struct rte_hash_bucket *sec_bkt,
870                         const struct rte_hash_key *key, void *data,
871                         uint16_t sig, uint32_t bucket_idx,
872                         uint32_t new_idx, int32_t *ret_val)
873 {
874         unsigned int i;
875         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
876         struct queue_node *tail, *head;
877         struct rte_hash_bucket *curr_bkt, *alt_bkt;
878         uint32_t cur_idx, alt_idx;
879
880         tail = queue;
881         head = queue + 1;
882         tail->bkt = bkt;
883         tail->prev = NULL;
884         tail->prev_slot = -1;
885         tail->cur_bkt_idx = bucket_idx;
886
887         /* Cuckoo bfs Search */
888         while (likely(tail != head && head <
889                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
890                                         RTE_HASH_BUCKET_ENTRIES)) {
891                 curr_bkt = tail->bkt;
892                 cur_idx = tail->cur_bkt_idx;
893                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
894                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
895                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
896                                                 bkt, sec_bkt, key, data,
897                                                 tail, i, sig,
898                                                 new_idx, ret_val);
899                                 if (likely(ret != -1))
900                                         return ret;
901                         }
902
903                         /* Enqueue new node and keep prev node info */
904                         alt_idx = get_alt_bucket_index(h, cur_idx,
905                                                 curr_bkt->sig_current[i]);
906                         alt_bkt = &(h->buckets[alt_idx]);
907                         head->bkt = alt_bkt;
908                         head->cur_bkt_idx = alt_idx;
909                         head->prev = tail;
910                         head->prev_slot = i;
911                         head++;
912                 }
913                 tail++;
914         }
915
916         return -ENOSPC;
917 }
918
919 static inline int32_t
920 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
921                                                 hash_sig_t sig, void *data)
922 {
923         uint16_t short_sig;
924         uint32_t prim_bucket_idx, sec_bucket_idx;
925         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
926         struct rte_hash_key *new_k, *keys = h->key_store;
927         void *slot_id = NULL;
928         void *ext_bkt_id = NULL;
929         uint32_t new_idx, bkt_id;
930         int ret;
931         unsigned n_slots;
932         unsigned lcore_id;
933         unsigned int i;
934         struct lcore_cache *cached_free_slots = NULL;
935         int32_t ret_val;
936         struct rte_hash_bucket *last;
937
938         short_sig = get_short_sig(sig);
939         prim_bucket_idx = get_prim_bucket_index(h, sig);
940         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
941         prim_bkt = &h->buckets[prim_bucket_idx];
942         sec_bkt = &h->buckets[sec_bucket_idx];
943         rte_prefetch0(prim_bkt);
944         rte_prefetch0(sec_bkt);
945
946         /* Check if key is already inserted in primary location */
947         __hash_rw_writer_lock(h);
948         ret = search_and_update(h, data, key, prim_bkt, short_sig);
949         if (ret != -1) {
950                 __hash_rw_writer_unlock(h);
951                 return ret;
952         }
953
954         /* Check if key is already inserted in secondary location */
955         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
956                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
957                 if (ret != -1) {
958                         __hash_rw_writer_unlock(h);
959                         return ret;
960                 }
961         }
962
963         __hash_rw_writer_unlock(h);
964
965         /* Did not find a match, so get a new slot for storing the new key */
966         if (h->use_local_cache) {
967                 lcore_id = rte_lcore_id();
968                 cached_free_slots = &h->local_free_slots[lcore_id];
969                 /* Try to get a free slot from the local cache */
970                 if (cached_free_slots->len == 0) {
971                         /* Need to get another burst of free slots from global ring */
972                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
973                                         cached_free_slots->objs,
974                                         LCORE_CACHE_SIZE, NULL);
975                         if (n_slots == 0) {
976                                 return -ENOSPC;
977                         }
978
979                         cached_free_slots->len += n_slots;
980                 }
981
982                 /* Get a free slot from the local cache */
983                 cached_free_slots->len--;
984                 slot_id = cached_free_slots->objs[cached_free_slots->len];
985         } else {
986                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
987                         return -ENOSPC;
988                 }
989         }
990
991         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
992         new_idx = (uint32_t)((uintptr_t) slot_id);
993         /* Copy key */
994         memcpy(new_k->key, key, h->key_len);
995         /* Key can be of arbitrary length, so it is not possible to store
996          * it atomically. Hence the new key element's memory stores
997          * (key as well as data) should be complete before it is referenced.
998          * 'pdata' acts as the synchronization point when an existing hash
999          * entry is updated.
1000          */
1001         __atomic_store_n(&new_k->pdata,
1002                 data,
1003                 __ATOMIC_RELEASE);
1004
1005         /* Find an empty slot and insert */
1006         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
1007                                         short_sig, new_idx, &ret_val);
1008         if (ret == 0)
1009                 return new_idx - 1;
1010         else if (ret == 1) {
1011                 enqueue_slot_back(h, cached_free_slots, slot_id);
1012                 return ret_val;
1013         }
1014
1015         /* Primary bucket full, need to make space for new entry */
1016         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1017                                 short_sig, prim_bucket_idx, new_idx, &ret_val);
1018         if (ret == 0)
1019                 return new_idx - 1;
1020         else if (ret == 1) {
1021                 enqueue_slot_back(h, cached_free_slots, slot_id);
1022                 return ret_val;
1023         }
1024
1025         /* Also search secondary bucket to get better occupancy */
1026         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1027                                 short_sig, sec_bucket_idx, new_idx, &ret_val);
1028
1029         if (ret == 0)
1030                 return new_idx - 1;
1031         else if (ret == 1) {
1032                 enqueue_slot_back(h, cached_free_slots, slot_id);
1033                 return ret_val;
1034         }
1035
1036         /* if ext table not enabled, we failed the insertion */
1037         if (!h->ext_table_support) {
1038                 enqueue_slot_back(h, cached_free_slots, slot_id);
1039                 return ret;
1040         }
1041
1042         /* Now we need to go through the extendable bucket. Protection is needed
1043          * to protect all extendable bucket processes.
1044          */
1045         __hash_rw_writer_lock(h);
1046         /* We check for duplicates again since could be inserted before the lock */
1047         ret = search_and_update(h, data, key, prim_bkt, short_sig);
1048         if (ret != -1) {
1049                 enqueue_slot_back(h, cached_free_slots, slot_id);
1050                 goto failure;
1051         }
1052
1053         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1054                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1055                 if (ret != -1) {
1056                         enqueue_slot_back(h, cached_free_slots, slot_id);
1057                         goto failure;
1058                 }
1059         }
1060
1061         /* Search sec and ext buckets to find an empty entry to insert. */
1062         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1063                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1064                         /* Check if slot is available */
1065                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1066                                 cur_bkt->sig_current[i] = short_sig;
1067                                 /* Store to signature should not leak after
1068                                  * the store to key_idx
1069                                  */
1070                                 __atomic_store_n(&cur_bkt->key_idx[i],
1071                                                  new_idx,
1072                                                  __ATOMIC_RELEASE);
1073                                 __hash_rw_writer_unlock(h);
1074                                 return new_idx - 1;
1075                         }
1076                 }
1077         }
1078
1079         /* Failed to get an empty entry from extendable buckets. Link a new
1080          * extendable bucket. We first get a free bucket from ring.
1081          */
1082         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
1083                 ret = -ENOSPC;
1084                 goto failure;
1085         }
1086
1087         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
1088         /* Use the first location of the new bucket */
1089         (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
1090         /* Store to signature should not leak after
1091          * the store to key_idx
1092          */
1093         __atomic_store_n(&(h->buckets_ext[bkt_id]).key_idx[0],
1094                          new_idx,
1095                          __ATOMIC_RELEASE);
1096         /* Link the new bucket to sec bucket linked list */
1097         last = rte_hash_get_last_bkt(sec_bkt);
1098         last->next = &h->buckets_ext[bkt_id];
1099         __hash_rw_writer_unlock(h);
1100         return new_idx - 1;
1101
1102 failure:
1103         __hash_rw_writer_unlock(h);
1104         return ret;
1105
1106 }
1107
1108 int32_t
1109 rte_hash_add_key_with_hash(const struct rte_hash *h,
1110                         const void *key, hash_sig_t sig)
1111 {
1112         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1113         return __rte_hash_add_key_with_hash(h, key, sig, 0);
1114 }
1115
1116 int32_t
1117 rte_hash_add_key(const struct rte_hash *h, const void *key)
1118 {
1119         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1120         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1121 }
1122
1123 int
1124 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1125                         const void *key, hash_sig_t sig, void *data)
1126 {
1127         int ret;
1128
1129         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1130         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1131         if (ret >= 0)
1132                 return 0;
1133         else
1134                 return ret;
1135 }
1136
1137 int
1138 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1139 {
1140         int ret;
1141
1142         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1143
1144         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1145         if (ret >= 0)
1146                 return 0;
1147         else
1148                 return ret;
1149 }
1150
1151 /* Search one bucket to find the match key - uses rw lock */
1152 static inline int32_t
1153 search_one_bucket_l(const struct rte_hash *h, const void *key,
1154                 uint16_t sig, void **data,
1155                 const struct rte_hash_bucket *bkt)
1156 {
1157         int i;
1158         struct rte_hash_key *k, *keys = h->key_store;
1159
1160         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1161                 if (bkt->sig_current[i] == sig &&
1162                                 bkt->key_idx[i] != EMPTY_SLOT) {
1163                         k = (struct rte_hash_key *) ((char *)keys +
1164                                         bkt->key_idx[i] * h->key_entry_size);
1165
1166                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1167                                 if (data != NULL)
1168                                         *data = k->pdata;
1169                                 /*
1170                                  * Return index where key is stored,
1171                                  * subtracting the first dummy index
1172                                  */
1173                                 return bkt->key_idx[i] - 1;
1174                         }
1175                 }
1176         }
1177         return -1;
1178 }
1179
1180 /* Search one bucket to find the match key */
1181 static inline int32_t
1182 search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
1183                         void **data, const struct rte_hash_bucket *bkt)
1184 {
1185         int i;
1186         uint32_t key_idx;
1187         void *pdata;
1188         struct rte_hash_key *k, *keys = h->key_store;
1189
1190         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1191                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1192                                           __ATOMIC_ACQUIRE);
1193                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1194                         k = (struct rte_hash_key *) ((char *)keys +
1195                                         key_idx * h->key_entry_size);
1196                         pdata = __atomic_load_n(&k->pdata,
1197                                         __ATOMIC_ACQUIRE);
1198
1199                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1200                                 if (data != NULL)
1201                                         *data = pdata;
1202                                 /*
1203                                  * Return index where key is stored,
1204                                  * subtracting the first dummy index
1205                                  */
1206                                 return key_idx - 1;
1207                         }
1208                 }
1209         }
1210         return -1;
1211 }
1212
1213 static inline int32_t
1214 __rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
1215                                 hash_sig_t sig, void **data)
1216 {
1217         uint32_t prim_bucket_idx, sec_bucket_idx;
1218         struct rte_hash_bucket *bkt, *cur_bkt;
1219         int ret;
1220         uint16_t short_sig;
1221
1222         short_sig = get_short_sig(sig);
1223         prim_bucket_idx = get_prim_bucket_index(h, sig);
1224         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1225
1226         bkt = &h->buckets[prim_bucket_idx];
1227
1228         __hash_rw_reader_lock(h);
1229
1230         /* Check if key is in primary location */
1231         ret = search_one_bucket_l(h, key, short_sig, data, bkt);
1232         if (ret != -1) {
1233                 __hash_rw_reader_unlock(h);
1234                 return ret;
1235         }
1236         /* Calculate secondary hash */
1237         bkt = &h->buckets[sec_bucket_idx];
1238
1239         /* Check if key is in secondary location */
1240         FOR_EACH_BUCKET(cur_bkt, bkt) {
1241                 ret = search_one_bucket_l(h, key, short_sig,
1242                                         data, cur_bkt);
1243                 if (ret != -1) {
1244                         __hash_rw_reader_unlock(h);
1245                         return ret;
1246                 }
1247         }
1248
1249         __hash_rw_reader_unlock(h);
1250
1251         return -ENOENT;
1252 }
1253
1254 static inline int32_t
1255 __rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
1256                                         hash_sig_t sig, void **data)
1257 {
1258         uint32_t prim_bucket_idx, sec_bucket_idx;
1259         struct rte_hash_bucket *bkt, *cur_bkt;
1260         uint32_t cnt_b, cnt_a;
1261         int ret;
1262         uint16_t short_sig;
1263
1264         short_sig = get_short_sig(sig);
1265         prim_bucket_idx = get_prim_bucket_index(h, sig);
1266         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1267
1268         do {
1269                 /* Load the table change counter before the lookup
1270                  * starts. Acquire semantics will make sure that
1271                  * loads in search_one_bucket are not hoisted.
1272                  */
1273                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1274                                 __ATOMIC_ACQUIRE);
1275
1276                 /* Check if key is in primary location */
1277                 bkt = &h->buckets[prim_bucket_idx];
1278                 ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
1279                 if (ret != -1) {
1280                         __hash_rw_reader_unlock(h);
1281                         return ret;
1282                 }
1283                 /* Calculate secondary hash */
1284                 bkt = &h->buckets[sec_bucket_idx];
1285
1286                 /* Check if key is in secondary location */
1287                 FOR_EACH_BUCKET(cur_bkt, bkt) {
1288                         ret = search_one_bucket_lf(h, key, short_sig,
1289                                                 data, cur_bkt);
1290                         if (ret != -1) {
1291                                 __hash_rw_reader_unlock(h);
1292                                 return ret;
1293                         }
1294                 }
1295
1296                 /* The loads of sig_current in search_one_bucket
1297                  * should not move below the load from tbl_chng_cnt.
1298                  */
1299                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1300                 /* Re-read the table change counter to check if the
1301                  * table has changed during search. If yes, re-do
1302                  * the search.
1303                  * This load should not get hoisted. The load
1304                  * acquires on cnt_b, key index in primary bucket
1305                  * and key index in secondary bucket will make sure
1306                  * that it does not get hoisted.
1307                  */
1308                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1309                                         __ATOMIC_ACQUIRE);
1310         } while (cnt_b != cnt_a);
1311
1312         return -ENOENT;
1313 }
1314
1315 static inline int32_t
1316 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1317                                         hash_sig_t sig, void **data)
1318 {
1319         if (h->readwrite_concur_lf_support)
1320                 return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
1321         else
1322                 return __rte_hash_lookup_with_hash_l(h, key, sig, data);
1323 }
1324
1325 int32_t
1326 rte_hash_lookup_with_hash(const struct rte_hash *h,
1327                         const void *key, hash_sig_t sig)
1328 {
1329         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1330         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1331 }
1332
1333 int32_t
1334 rte_hash_lookup(const struct rte_hash *h, const void *key)
1335 {
1336         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1337         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1338 }
1339
1340 int
1341 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1342                         const void *key, hash_sig_t sig, void **data)
1343 {
1344         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1345         return __rte_hash_lookup_with_hash(h, key, sig, data);
1346 }
1347
1348 int
1349 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1350 {
1351         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1352         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1353 }
1354
1355 static inline void
1356 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1357 {
1358         unsigned lcore_id, n_slots;
1359         struct lcore_cache *cached_free_slots;
1360
1361         if (h->use_local_cache) {
1362                 lcore_id = rte_lcore_id();
1363                 cached_free_slots = &h->local_free_slots[lcore_id];
1364                 /* Cache full, need to free it. */
1365                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1366                         /* Need to enqueue the free slots in global ring. */
1367                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1368                                                 cached_free_slots->objs,
1369                                                 LCORE_CACHE_SIZE, NULL);
1370                         ERR_IF_TRUE((n_slots == 0),
1371                                 "%s: could not enqueue free slots in global ring\n",
1372                                 __func__);
1373                         cached_free_slots->len -= n_slots;
1374                 }
1375                 /* Put index of new free slot in cache. */
1376                 cached_free_slots->objs[cached_free_slots->len] =
1377                                 (void *)((uintptr_t)bkt->key_idx[i]);
1378                 cached_free_slots->len++;
1379         } else {
1380                 rte_ring_sp_enqueue(h->free_slots,
1381                                 (void *)((uintptr_t)bkt->key_idx[i]));
1382         }
1383 }
1384
1385 /* Compact the linked list by moving key from last entry in linked list to the
1386  * empty slot.
1387  */
1388 static inline void
1389 __rte_hash_compact_ll(const struct rte_hash *h,
1390                         struct rte_hash_bucket *cur_bkt, int pos) {
1391         int i;
1392         struct rte_hash_bucket *last_bkt;
1393
1394         if (!cur_bkt->next)
1395                 return;
1396
1397         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1398
1399         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1400                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1401                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1402                         __atomic_store_n(&cur_bkt->key_idx[pos],
1403                                          last_bkt->key_idx[i],
1404                                          __ATOMIC_RELEASE);
1405                         if (h->readwrite_concur_lf_support) {
1406                                 /* Inform the readers that the table has changed
1407                                  * Since there is one writer, load acquire on
1408                                  * tbl_chng_cnt is not required.
1409                                  */
1410                                 __atomic_store_n(h->tbl_chng_cnt,
1411                                          *h->tbl_chng_cnt + 1,
1412                                          __ATOMIC_RELEASE);
1413                                 /* The store to sig_current should
1414                                  * not move above the store to tbl_chng_cnt.
1415                                  */
1416                                 __atomic_thread_fence(__ATOMIC_RELEASE);
1417                         }
1418                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1419                         __atomic_store_n(&last_bkt->key_idx[i],
1420                                          EMPTY_SLOT,
1421                                          __ATOMIC_RELEASE);
1422                         return;
1423                 }
1424         }
1425 }
1426
1427 /* Search one bucket and remove the matched key.
1428  * Writer is expected to hold the lock while calling this
1429  * function.
1430  */
1431 static inline int32_t
1432 search_and_remove(const struct rte_hash *h, const void *key,
1433                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1434 {
1435         struct rte_hash_key *k, *keys = h->key_store;
1436         unsigned int i;
1437         uint32_t key_idx;
1438
1439         /* Check if key is in bucket */
1440         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1441                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1442                                           __ATOMIC_ACQUIRE);
1443                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1444                         k = (struct rte_hash_key *) ((char *)keys +
1445                                         key_idx * h->key_entry_size);
1446                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1447                                 bkt->sig_current[i] = NULL_SIGNATURE;
1448                                 /* Free the key store index if
1449                                  * no_free_on_del is disabled.
1450                                  */
1451                                 if (!h->no_free_on_del)
1452                                         remove_entry(h, bkt, i);
1453
1454                                 __atomic_store_n(&bkt->key_idx[i],
1455                                                  EMPTY_SLOT,
1456                                                  __ATOMIC_RELEASE);
1457
1458                                 *pos = i;
1459                                 /*
1460                                  * Return index where key is stored,
1461                                  * subtracting the first dummy index
1462                                  */
1463                                 return key_idx - 1;
1464                         }
1465                 }
1466         }
1467         return -1;
1468 }
1469
1470 static inline int32_t
1471 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1472                                                 hash_sig_t sig)
1473 {
1474         uint32_t prim_bucket_idx, sec_bucket_idx;
1475         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1476         struct rte_hash_bucket *cur_bkt;
1477         int pos;
1478         int32_t ret, i;
1479         uint16_t short_sig;
1480
1481         short_sig = get_short_sig(sig);
1482         prim_bucket_idx = get_prim_bucket_index(h, sig);
1483         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1484         prim_bkt = &h->buckets[prim_bucket_idx];
1485
1486         __hash_rw_writer_lock(h);
1487         /* look for key in primary bucket */
1488         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1489         if (ret != -1) {
1490                 __rte_hash_compact_ll(h, prim_bkt, pos);
1491                 last_bkt = prim_bkt->next;
1492                 prev_bkt = prim_bkt;
1493                 goto return_bkt;
1494         }
1495
1496         /* Calculate secondary hash */
1497         sec_bkt = &h->buckets[sec_bucket_idx];
1498
1499         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1500                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1501                 if (ret != -1) {
1502                         __rte_hash_compact_ll(h, cur_bkt, pos);
1503                         last_bkt = sec_bkt->next;
1504                         prev_bkt = sec_bkt;
1505                         goto return_bkt;
1506                 }
1507         }
1508
1509         __hash_rw_writer_unlock(h);
1510         return -ENOENT;
1511
1512 /* Search last bucket to see if empty to be recycled */
1513 return_bkt:
1514         if (!last_bkt) {
1515                 __hash_rw_writer_unlock(h);
1516                 return ret;
1517         }
1518         while (last_bkt->next) {
1519                 prev_bkt = last_bkt;
1520                 last_bkt = last_bkt->next;
1521         }
1522
1523         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1524                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1525                         break;
1526         }
1527         /* found empty bucket and recycle */
1528         if (i == RTE_HASH_BUCKET_ENTRIES) {
1529                 prev_bkt->next = NULL;
1530                 uint32_t index = last_bkt - h->buckets_ext + 1;
1531                 /* Recycle the empty bkt if
1532                  * no_free_on_del is disabled.
1533                  */
1534                 if (h->no_free_on_del)
1535                         /* Store index of an empty ext bkt to be recycled
1536                          * on calling rte_hash_del_xxx APIs.
1537                          * When lock free read-write concurrency is enabled,
1538                          * an empty ext bkt cannot be put into free list
1539                          * immediately (as readers might be using it still).
1540                          * Hence freeing of the ext bkt is piggy-backed to
1541                          * freeing of the key index.
1542                          */
1543                         h->ext_bkt_to_free[ret] = index;
1544                 else
1545                         rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1546         }
1547         __hash_rw_writer_unlock(h);
1548         return ret;
1549 }
1550
1551 int32_t
1552 rte_hash_del_key_with_hash(const struct rte_hash *h,
1553                         const void *key, hash_sig_t sig)
1554 {
1555         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1556         return __rte_hash_del_key_with_hash(h, key, sig);
1557 }
1558
1559 int32_t
1560 rte_hash_del_key(const struct rte_hash *h, const void *key)
1561 {
1562         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1563         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1564 }
1565
1566 int
1567 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1568                                void **key)
1569 {
1570         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1571
1572         struct rte_hash_key *k, *keys = h->key_store;
1573         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1574                                      h->key_entry_size);
1575         *key = k->key;
1576
1577         if (position !=
1578             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1579                                         NULL)) {
1580                 return -ENOENT;
1581         }
1582
1583         return 0;
1584 }
1585
1586 int __rte_experimental
1587 rte_hash_free_key_with_position(const struct rte_hash *h,
1588                                 const int32_t position)
1589 {
1590         /* Key index where key is stored, adding the first dummy index */
1591         uint32_t key_idx = position + 1;
1592
1593         RETURN_IF_TRUE(((h == NULL) || (key_idx == EMPTY_SLOT)), -EINVAL);
1594
1595         unsigned int lcore_id, n_slots;
1596         struct lcore_cache *cached_free_slots;
1597         const uint32_t total_entries = h->use_local_cache ?
1598                 h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
1599                                                         : h->entries + 1;
1600
1601         /* Out of bounds */
1602         if (key_idx >= total_entries)
1603                 return -EINVAL;
1604         if (h->ext_table_support && h->readwrite_concur_lf_support) {
1605                 uint32_t index = h->ext_bkt_to_free[position];
1606                 if (index) {
1607                         /* Recycle empty ext bkt to free list. */
1608                         rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1609                         h->ext_bkt_to_free[position] = 0;
1610                 }
1611         }
1612
1613         if (h->use_local_cache) {
1614                 lcore_id = rte_lcore_id();
1615                 cached_free_slots = &h->local_free_slots[lcore_id];
1616                 /* Cache full, need to free it. */
1617                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1618                         /* Need to enqueue the free slots in global ring. */
1619                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1620                                                 cached_free_slots->objs,
1621                                                 LCORE_CACHE_SIZE, NULL);
1622                         RETURN_IF_TRUE((n_slots == 0), -EFAULT);
1623                         cached_free_slots->len -= n_slots;
1624                 }
1625                 /* Put index of new free slot in cache. */
1626                 cached_free_slots->objs[cached_free_slots->len] =
1627                                         (void *)((uintptr_t)key_idx);
1628                 cached_free_slots->len++;
1629         } else {
1630                 rte_ring_sp_enqueue(h->free_slots,
1631                                 (void *)((uintptr_t)key_idx));
1632         }
1633
1634         return 0;
1635 }
1636
1637 static inline void
1638 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1639                         const struct rte_hash_bucket *prim_bkt,
1640                         const struct rte_hash_bucket *sec_bkt,
1641                         uint16_t sig,
1642                         enum rte_hash_sig_compare_function sig_cmp_fn)
1643 {
1644         unsigned int i;
1645
1646         /* For match mask the first bit of every two bits indicates the match */
1647         switch (sig_cmp_fn) {
1648 #if defined(RTE_MACHINE_CPUFLAG_SSE2)
1649         case RTE_HASH_COMPARE_SSE:
1650                 /* Compare all signatures in the bucket */
1651                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1652                                 _mm_load_si128(
1653                                         (__m128i const *)prim_bkt->sig_current),
1654                                 _mm_set1_epi16(sig)));
1655                 /* Compare all signatures in the bucket */
1656                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1657                                 _mm_load_si128(
1658                                         (__m128i const *)sec_bkt->sig_current),
1659                                 _mm_set1_epi16(sig)));
1660                 break;
1661 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
1662         case RTE_HASH_COMPARE_NEON: {
1663                 uint16x8_t vmat, vsig, x;
1664                 int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
1665
1666                 vsig = vld1q_dup_u16((uint16_t const *)&sig);
1667                 /* Compare all signatures in the primary bucket */
1668                 vmat = vceqq_u16(vsig,
1669                         vld1q_u16((uint16_t const *)prim_bkt->sig_current));
1670                 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1671                 *prim_hash_matches = (uint32_t)(vaddvq_u16(x));
1672                 /* Compare all signatures in the secondary bucket */
1673                 vmat = vceqq_u16(vsig,
1674                         vld1q_u16((uint16_t const *)sec_bkt->sig_current));
1675                 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1676                 *sec_hash_matches = (uint32_t)(vaddvq_u16(x));
1677                 }
1678                 break;
1679 #endif
1680         default:
1681                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1682                         *prim_hash_matches |=
1683                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1684                         *sec_hash_matches |=
1685                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1686                 }
1687         }
1688 }
1689
1690 #define PREFETCH_OFFSET 4
1691 static inline void
1692 __rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
1693                         int32_t num_keys, int32_t *positions,
1694                         uint64_t *hit_mask, void *data[])
1695 {
1696         uint64_t hits = 0;
1697         int32_t i;
1698         int32_t ret;
1699         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1700         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1701         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1702         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1703         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1704         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1705         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1706         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1707         struct rte_hash_bucket *cur_bkt, *next_bkt;
1708
1709         /* Prefetch first keys */
1710         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1711                 rte_prefetch0(keys[i]);
1712
1713         /*
1714          * Prefetch rest of the keys, calculate primary and
1715          * secondary bucket and prefetch them
1716          */
1717         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1718                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1719
1720                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1721
1722                 sig[i] = get_short_sig(prim_hash[i]);
1723                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1724                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1725
1726                 primary_bkt[i] = &h->buckets[prim_index[i]];
1727                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1728
1729                 rte_prefetch0(primary_bkt[i]);
1730                 rte_prefetch0(secondary_bkt[i]);
1731         }
1732
1733         /* Calculate and prefetch rest of the buckets */
1734         for (; i < num_keys; i++) {
1735                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1736
1737                 sig[i] = get_short_sig(prim_hash[i]);
1738                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1739                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1740
1741                 primary_bkt[i] = &h->buckets[prim_index[i]];
1742                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1743
1744                 rte_prefetch0(primary_bkt[i]);
1745                 rte_prefetch0(secondary_bkt[i]);
1746         }
1747
1748         __hash_rw_reader_lock(h);
1749
1750         /* Compare signatures and prefetch key slot of first hit */
1751         for (i = 0; i < num_keys; i++) {
1752                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1753                         primary_bkt[i], secondary_bkt[i],
1754                         sig[i], h->sig_cmp_fn);
1755
1756                 if (prim_hitmask[i]) {
1757                         uint32_t first_hit =
1758                                         __builtin_ctzl(prim_hitmask[i])
1759                                         >> 1;
1760                         uint32_t key_idx =
1761                                 primary_bkt[i]->key_idx[first_hit];
1762                         const struct rte_hash_key *key_slot =
1763                                 (const struct rte_hash_key *)(
1764                                 (const char *)h->key_store +
1765                                 key_idx * h->key_entry_size);
1766                         rte_prefetch0(key_slot);
1767                         continue;
1768                 }
1769
1770                 if (sec_hitmask[i]) {
1771                         uint32_t first_hit =
1772                                         __builtin_ctzl(sec_hitmask[i])
1773                                         >> 1;
1774                         uint32_t key_idx =
1775                                 secondary_bkt[i]->key_idx[first_hit];
1776                         const struct rte_hash_key *key_slot =
1777                                 (const struct rte_hash_key *)(
1778                                 (const char *)h->key_store +
1779                                 key_idx * h->key_entry_size);
1780                         rte_prefetch0(key_slot);
1781                 }
1782         }
1783
1784         /* Compare keys, first hits in primary first */
1785         for (i = 0; i < num_keys; i++) {
1786                 positions[i] = -ENOENT;
1787                 while (prim_hitmask[i]) {
1788                         uint32_t hit_index =
1789                                         __builtin_ctzl(prim_hitmask[i])
1790                                         >> 1;
1791                         uint32_t key_idx =
1792                                 primary_bkt[i]->key_idx[hit_index];
1793                         const struct rte_hash_key *key_slot =
1794                                 (const struct rte_hash_key *)(
1795                                 (const char *)h->key_store +
1796                                 key_idx * h->key_entry_size);
1797
1798                         /*
1799                          * If key index is 0, do not compare key,
1800                          * as it is checking the dummy slot
1801                          */
1802                         if (!!key_idx &
1803                                 !rte_hash_cmp_eq(
1804                                         key_slot->key, keys[i], h)) {
1805                                 if (data != NULL)
1806                                         data[i] = key_slot->pdata;
1807
1808                                 hits |= 1ULL << i;
1809                                 positions[i] = key_idx - 1;
1810                                 goto next_key;
1811                         }
1812                         prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1813                 }
1814
1815                 while (sec_hitmask[i]) {
1816                         uint32_t hit_index =
1817                                         __builtin_ctzl(sec_hitmask[i])
1818                                         >> 1;
1819                         uint32_t key_idx =
1820                                 secondary_bkt[i]->key_idx[hit_index];
1821                         const struct rte_hash_key *key_slot =
1822                                 (const struct rte_hash_key *)(
1823                                 (const char *)h->key_store +
1824                                 key_idx * h->key_entry_size);
1825
1826                         /*
1827                          * If key index is 0, do not compare key,
1828                          * as it is checking the dummy slot
1829                          */
1830
1831                         if (!!key_idx &
1832                                 !rte_hash_cmp_eq(
1833                                         key_slot->key, keys[i], h)) {
1834                                 if (data != NULL)
1835                                         data[i] = key_slot->pdata;
1836
1837                                 hits |= 1ULL << i;
1838                                 positions[i] = key_idx - 1;
1839                                 goto next_key;
1840                         }
1841                         sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1842                 }
1843 next_key:
1844                 continue;
1845         }
1846
1847         /* all found, do not need to go through ext bkt */
1848         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1849                 if (hit_mask != NULL)
1850                         *hit_mask = hits;
1851                 __hash_rw_reader_unlock(h);
1852                 return;
1853         }
1854
1855         /* need to check ext buckets for match */
1856         for (i = 0; i < num_keys; i++) {
1857                 if ((hits & (1ULL << i)) != 0)
1858                         continue;
1859                 next_bkt = secondary_bkt[i]->next;
1860                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1861                         if (data != NULL)
1862                                 ret = search_one_bucket_l(h, keys[i],
1863                                                 sig[i], &data[i], cur_bkt);
1864                         else
1865                                 ret = search_one_bucket_l(h, keys[i],
1866                                                 sig[i], NULL, cur_bkt);
1867                         if (ret != -1) {
1868                                 positions[i] = ret;
1869                                 hits |= 1ULL << i;
1870                                 break;
1871                         }
1872                 }
1873         }
1874
1875         __hash_rw_reader_unlock(h);
1876
1877         if (hit_mask != NULL)
1878                 *hit_mask = hits;
1879 }
1880
1881 static inline void
1882 __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
1883                         int32_t num_keys, int32_t *positions,
1884                         uint64_t *hit_mask, void *data[])
1885 {
1886         uint64_t hits = 0;
1887         int32_t i;
1888         int32_t ret;
1889         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1890         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1891         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1892         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1893         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1894         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1895         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1896         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1897         struct rte_hash_bucket *cur_bkt, *next_bkt;
1898         void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
1899         uint32_t cnt_b, cnt_a;
1900
1901         /* Prefetch first keys */
1902         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1903                 rte_prefetch0(keys[i]);
1904
1905         /*
1906          * Prefetch rest of the keys, calculate primary and
1907          * secondary bucket and prefetch them
1908          */
1909         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1910                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1911
1912                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1913
1914                 sig[i] = get_short_sig(prim_hash[i]);
1915                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1916                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1917
1918                 primary_bkt[i] = &h->buckets[prim_index[i]];
1919                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1920
1921                 rte_prefetch0(primary_bkt[i]);
1922                 rte_prefetch0(secondary_bkt[i]);
1923         }
1924
1925         /* Calculate and prefetch rest of the buckets */
1926         for (; i < num_keys; i++) {
1927                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1928
1929                 sig[i] = get_short_sig(prim_hash[i]);
1930                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1931                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1932
1933                 primary_bkt[i] = &h->buckets[prim_index[i]];
1934                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1935
1936                 rte_prefetch0(primary_bkt[i]);
1937                 rte_prefetch0(secondary_bkt[i]);
1938         }
1939
1940         for (i = 0; i < num_keys; i++)
1941                 positions[i] = -ENOENT;
1942
1943         do {
1944                 /* Load the table change counter before the lookup
1945                  * starts. Acquire semantics will make sure that
1946                  * loads in compare_signatures are not hoisted.
1947                  */
1948                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1949                                         __ATOMIC_ACQUIRE);
1950
1951                 /* Compare signatures and prefetch key slot of first hit */
1952                 for (i = 0; i < num_keys; i++) {
1953                         compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1954                                 primary_bkt[i], secondary_bkt[i],
1955                                 sig[i], h->sig_cmp_fn);
1956
1957                         if (prim_hitmask[i]) {
1958                                 uint32_t first_hit =
1959                                                 __builtin_ctzl(prim_hitmask[i])
1960                                                 >> 1;
1961                                 uint32_t key_idx =
1962                                         primary_bkt[i]->key_idx[first_hit];
1963                                 const struct rte_hash_key *key_slot =
1964                                         (const struct rte_hash_key *)(
1965                                         (const char *)h->key_store +
1966                                         key_idx * h->key_entry_size);
1967                                 rte_prefetch0(key_slot);
1968                                 continue;
1969                         }
1970
1971                         if (sec_hitmask[i]) {
1972                                 uint32_t first_hit =
1973                                                 __builtin_ctzl(sec_hitmask[i])
1974                                                 >> 1;
1975                                 uint32_t key_idx =
1976                                         secondary_bkt[i]->key_idx[first_hit];
1977                                 const struct rte_hash_key *key_slot =
1978                                         (const struct rte_hash_key *)(
1979                                         (const char *)h->key_store +
1980                                         key_idx * h->key_entry_size);
1981                                 rte_prefetch0(key_slot);
1982                         }
1983                 }
1984
1985                 /* Compare keys, first hits in primary first */
1986                 for (i = 0; i < num_keys; i++) {
1987                         while (prim_hitmask[i]) {
1988                                 uint32_t hit_index =
1989                                                 __builtin_ctzl(prim_hitmask[i])
1990                                                 >> 1;
1991                                 uint32_t key_idx =
1992                                 __atomic_load_n(
1993                                         &primary_bkt[i]->key_idx[hit_index],
1994                                         __ATOMIC_ACQUIRE);
1995                                 const struct rte_hash_key *key_slot =
1996                                         (const struct rte_hash_key *)(
1997                                         (const char *)h->key_store +
1998                                         key_idx * h->key_entry_size);
1999
2000                                 if (key_idx != EMPTY_SLOT)
2001                                         pdata[i] = __atomic_load_n(
2002                                                         &key_slot->pdata,
2003                                                         __ATOMIC_ACQUIRE);
2004                                 /*
2005                                  * If key index is 0, do not compare key,
2006                                  * as it is checking the dummy slot
2007                                  */
2008                                 if (!!key_idx &
2009                                         !rte_hash_cmp_eq(
2010                                                 key_slot->key, keys[i], h)) {
2011                                         if (data != NULL)
2012                                                 data[i] = pdata[i];
2013
2014                                         hits |= 1ULL << i;
2015                                         positions[i] = key_idx - 1;
2016                                         goto next_key;
2017                                 }
2018                                 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
2019                         }
2020
2021                         while (sec_hitmask[i]) {
2022                                 uint32_t hit_index =
2023                                                 __builtin_ctzl(sec_hitmask[i])
2024                                                 >> 1;
2025                                 uint32_t key_idx =
2026                                 __atomic_load_n(
2027                                         &secondary_bkt[i]->key_idx[hit_index],
2028                                         __ATOMIC_ACQUIRE);
2029                                 const struct rte_hash_key *key_slot =
2030                                         (const struct rte_hash_key *)(
2031                                         (const char *)h->key_store +
2032                                         key_idx * h->key_entry_size);
2033
2034                                 if (key_idx != EMPTY_SLOT)
2035                                         pdata[i] = __atomic_load_n(
2036                                                         &key_slot->pdata,
2037                                                         __ATOMIC_ACQUIRE);
2038                                 /*
2039                                  * If key index is 0, do not compare key,
2040                                  * as it is checking the dummy slot
2041                                  */
2042
2043                                 if (!!key_idx &
2044                                         !rte_hash_cmp_eq(
2045                                                 key_slot->key, keys[i], h)) {
2046                                         if (data != NULL)
2047                                                 data[i] = pdata[i];
2048
2049                                         hits |= 1ULL << i;
2050                                         positions[i] = key_idx - 1;
2051                                         goto next_key;
2052                                 }
2053                                 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
2054                         }
2055 next_key:
2056                         continue;
2057                 }
2058
2059                 /* all found, do not need to go through ext bkt */
2060                 if (hits == ((1ULL << num_keys) - 1)) {
2061                         if (hit_mask != NULL)
2062                                 *hit_mask = hits;
2063                         return;
2064                 }
2065                 /* need to check ext buckets for match */
2066                 if (h->ext_table_support) {
2067                         for (i = 0; i < num_keys; i++) {
2068                                 if ((hits & (1ULL << i)) != 0)
2069                                         continue;
2070                                 next_bkt = secondary_bkt[i]->next;
2071                                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
2072                                         if (data != NULL)
2073                                                 ret = search_one_bucket_lf(h,
2074                                                         keys[i], sig[i],
2075                                                         &data[i], cur_bkt);
2076                                         else
2077                                                 ret = search_one_bucket_lf(h,
2078                                                                 keys[i], sig[i],
2079                                                                 NULL, cur_bkt);
2080                                         if (ret != -1) {
2081                                                 positions[i] = ret;
2082                                                 hits |= 1ULL << i;
2083                                                 break;
2084                                         }
2085                                 }
2086                         }
2087                 }
2088                 /* The loads of sig_current in compare_signatures
2089                  * should not move below the load from tbl_chng_cnt.
2090                  */
2091                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
2092                 /* Re-read the table change counter to check if the
2093                  * table has changed during search. If yes, re-do
2094                  * the search.
2095                  * This load should not get hoisted. The load
2096                  * acquires on cnt_b, primary key index and secondary
2097                  * key index will make sure that it does not get
2098                  * hoisted.
2099                  */
2100                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
2101                                         __ATOMIC_ACQUIRE);
2102         } while (cnt_b != cnt_a);
2103
2104         if (hit_mask != NULL)
2105                 *hit_mask = hits;
2106 }
2107
2108 static inline void
2109 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2110                         int32_t num_keys, int32_t *positions,
2111                         uint64_t *hit_mask, void *data[])
2112 {
2113         if (h->readwrite_concur_lf_support)
2114                 __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
2115                                           hit_mask, data);
2116         else
2117                 __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
2118                                          hit_mask, data);
2119 }
2120
2121 int
2122 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2123                       uint32_t num_keys, int32_t *positions)
2124 {
2125         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2126                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2127                         (positions == NULL)), -EINVAL);
2128
2129         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
2130         return 0;
2131 }
2132
2133 int
2134 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
2135                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
2136 {
2137         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2138                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2139                         (hit_mask == NULL)), -EINVAL);
2140
2141         int32_t positions[num_keys];
2142
2143         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
2144
2145         /* Return number of hits */
2146         return __builtin_popcountl(*hit_mask);
2147 }
2148
2149 int32_t
2150 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
2151 {
2152         uint32_t bucket_idx, idx, position;
2153         struct rte_hash_key *next_key;
2154
2155         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
2156
2157         const uint32_t total_entries_main = h->num_buckets *
2158                                                         RTE_HASH_BUCKET_ENTRIES;
2159         const uint32_t total_entries = total_entries_main << 1;
2160
2161         /* Out of bounds of all buckets (both main table and ext table) */
2162         if (*next >= total_entries_main)
2163                 goto extend_table;
2164
2165         /* Calculate bucket and index of current iterator */
2166         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2167         idx = *next % RTE_HASH_BUCKET_ENTRIES;
2168
2169         /* If current position is empty, go to the next one */
2170         while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
2171                                         __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
2172                 (*next)++;
2173                 /* End of table */
2174                 if (*next == total_entries_main)
2175                         goto extend_table;
2176                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2177                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2178         }
2179
2180         __hash_rw_reader_lock(h);
2181         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2182                                 position * h->key_entry_size);
2183         /* Return key and data */
2184         *key = next_key->key;
2185         *data = next_key->pdata;
2186
2187         __hash_rw_reader_unlock(h);
2188
2189         /* Increment iterator */
2190         (*next)++;
2191
2192         return position - 1;
2193
2194 /* Begin to iterate extendable buckets */
2195 extend_table:
2196         /* Out of total bound or if ext bucket feature is not enabled */
2197         if (*next >= total_entries || !h->ext_table_support)
2198                 return -ENOENT;
2199
2200         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
2201         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2202
2203         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
2204                 (*next)++;
2205                 if (*next == total_entries)
2206                         return -ENOENT;
2207                 bucket_idx = (*next - total_entries_main) /
2208                                                 RTE_HASH_BUCKET_ENTRIES;
2209                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2210         }
2211         __hash_rw_reader_lock(h);
2212         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2213                                 position * h->key_entry_size);
2214         /* Return key and data */
2215         *key = next_key->key;
2216         *data = next_key->pdata;
2217
2218         __hash_rw_reader_unlock(h);
2219
2220         /* Increment iterator */
2221         (*next)++;
2222         return position - 1;
2223 }