hash: fix out-of-memory handling in hash creation
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  * Copyright(c) 2018 Arm Limited
4  */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <sys/queue.h>
12
13 #include <rte_common.h>
14 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
15 #include <rte_log.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring_elem.h>
28 #include <rte_compat.h>
29 #include <rte_vect.h>
30 #include <rte_tailq.h>
31
32 #include "rte_hash.h"
33 #include "rte_cuckoo_hash.h"
34
35 /* Mask of all flags supported by this version */
36 #define RTE_HASH_EXTRA_FLAGS_MASK (RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT | \
37                                    RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD | \
38                                    RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY | \
39                                    RTE_HASH_EXTRA_FLAGS_EXT_TABLE |     \
40                                    RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL | \
41                                    RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)
42
43 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
44         for (CURRENT_BKT = START_BUCKET;                                      \
45                 CURRENT_BKT != NULL;                                          \
46                 CURRENT_BKT = CURRENT_BKT->next)
47
48 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
49
50 static struct rte_tailq_elem rte_hash_tailq = {
51         .name = "RTE_HASH",
52 };
53 EAL_REGISTER_TAILQ(rte_hash_tailq)
54
55 struct rte_hash *
56 rte_hash_find_existing(const char *name)
57 {
58         struct rte_hash *h = NULL;
59         struct rte_tailq_entry *te;
60         struct rte_hash_list *hash_list;
61
62         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
63
64         rte_mcfg_tailq_read_lock();
65         TAILQ_FOREACH(te, hash_list, next) {
66                 h = (struct rte_hash *) te->data;
67                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
68                         break;
69         }
70         rte_mcfg_tailq_read_unlock();
71
72         if (te == NULL) {
73                 rte_errno = ENOENT;
74                 return NULL;
75         }
76         return h;
77 }
78
79 static inline struct rte_hash_bucket *
80 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
81 {
82         while (lst_bkt->next != NULL)
83                 lst_bkt = lst_bkt->next;
84         return lst_bkt;
85 }
86
87 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
88 {
89         h->cmp_jump_table_idx = KEY_CUSTOM;
90         h->rte_hash_custom_cmp_eq = func;
91 }
92
93 static inline int
94 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
95 {
96         if (h->cmp_jump_table_idx == KEY_CUSTOM)
97                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
98         else
99                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
100 }
101
102 /*
103  * We use higher 16 bits of hash as the signature value stored in table.
104  * We use the lower bits for the primary bucket
105  * location. Then we XOR primary bucket location and the signature
106  * to get the secondary bucket location. This is same as
107  * proposed in Bin Fan, et al's paper
108  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
109  * Smarter Hashing". The benefit to use
110  * XOR is that one could derive the alternative bucket location
111  * by only using the current bucket location and the signature.
112  */
113 static inline uint16_t
114 get_short_sig(const hash_sig_t hash)
115 {
116         return hash >> 16;
117 }
118
119 static inline uint32_t
120 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
121 {
122         return hash & h->bucket_bitmask;
123 }
124
125 static inline uint32_t
126 get_alt_bucket_index(const struct rte_hash *h,
127                         uint32_t cur_bkt_idx, uint16_t sig)
128 {
129         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
130 }
131
132 struct rte_hash *
133 rte_hash_create(const struct rte_hash_parameters *params)
134 {
135         struct rte_hash *h = NULL;
136         struct rte_tailq_entry *te = NULL;
137         struct rte_hash_list *hash_list;
138         struct rte_ring *r = NULL;
139         struct rte_ring *r_ext = NULL;
140         char hash_name[RTE_HASH_NAMESIZE];
141         void *k = NULL;
142         void *buckets = NULL;
143         void *buckets_ext = NULL;
144         char ring_name[RTE_RING_NAMESIZE];
145         char ext_ring_name[RTE_RING_NAMESIZE];
146         unsigned num_key_slots;
147         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
148         unsigned int ext_table_support = 0;
149         unsigned int readwrite_concur_support = 0;
150         unsigned int writer_takes_lock = 0;
151         unsigned int no_free_on_del = 0;
152         uint32_t *ext_bkt_to_free = NULL;
153         uint32_t *tbl_chng_cnt = NULL;
154         struct lcore_cache *local_free_slots = NULL;
155         unsigned int readwrite_concur_lf_support = 0;
156         uint32_t i;
157
158         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
159
160         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
161
162         if (params == NULL) {
163                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
164                 return NULL;
165         }
166
167         /* Check for valid parameters */
168         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
169                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
170                         (params->key_len == 0)) {
171                 rte_errno = EINVAL;
172                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
173                 return NULL;
174         }
175
176         if (params->extra_flag & ~RTE_HASH_EXTRA_FLAGS_MASK) {
177                 rte_errno = EINVAL;
178                 RTE_LOG(ERR, HASH, "rte_hash_create: unsupported extra flags\n");
179                 return NULL;
180         }
181
182         /* Validate correct usage of extra options */
183         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
184             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
185                 rte_errno = EINVAL;
186                 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
187                         "rw concurrency lock free\n");
188                 return NULL;
189         }
190
191         /* Check extra flags field to check extra options. */
192         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
193                 hw_trans_mem_support = 1;
194
195         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
196                 use_local_cache = 1;
197                 writer_takes_lock = 1;
198         }
199
200         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
201                 readwrite_concur_support = 1;
202                 writer_takes_lock = 1;
203         }
204
205         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
206                 ext_table_support = 1;
207
208         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
209                 no_free_on_del = 1;
210
211         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
212                 readwrite_concur_lf_support = 1;
213                 /* Enable not freeing internal memory/index on delete */
214                 no_free_on_del = 1;
215         }
216
217         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
218         if (use_local_cache)
219                 /*
220                  * Increase number of slots by total number of indices
221                  * that can be stored in the lcore caches
222                  * except for the first cache
223                  */
224                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
225                                         (LCORE_CACHE_SIZE - 1) + 1;
226         else
227                 num_key_slots = params->entries + 1;
228
229         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
230         /* Create ring (Dummy slot index is not enqueued) */
231         r = rte_ring_create_elem(ring_name, sizeof(uint32_t),
232                         rte_align32pow2(num_key_slots), params->socket_id, 0);
233         if (r == NULL) {
234                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
235                 goto err;
236         }
237
238         const uint32_t num_buckets = rte_align32pow2(params->entries) /
239                                                 RTE_HASH_BUCKET_ENTRIES;
240
241         /* Create ring for extendable buckets. */
242         if (ext_table_support) {
243                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
244                                                                 params->name);
245                 r_ext = rte_ring_create_elem(ext_ring_name, sizeof(uint32_t),
246                                 rte_align32pow2(num_buckets + 1),
247                                 params->socket_id, 0);
248
249                 if (r_ext == NULL) {
250                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
251                                                                 "failed\n");
252                         goto err;
253                 }
254         }
255
256         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
257
258         rte_mcfg_tailq_write_lock();
259
260         /* guarantee there's no existing: this is normally already checked
261          * by ring creation above */
262         TAILQ_FOREACH(te, hash_list, next) {
263                 h = (struct rte_hash *) te->data;
264                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
265                         break;
266         }
267         h = NULL;
268         if (te != NULL) {
269                 rte_errno = EEXIST;
270                 te = NULL;
271                 goto err_unlock;
272         }
273
274         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
275         if (te == NULL) {
276                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
277                 goto err_unlock;
278         }
279
280         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
281                                         RTE_CACHE_LINE_SIZE, params->socket_id);
282
283         if (h == NULL) {
284                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
285                 goto err_unlock;
286         }
287
288         buckets = rte_zmalloc_socket(NULL,
289                                 num_buckets * sizeof(struct rte_hash_bucket),
290                                 RTE_CACHE_LINE_SIZE, params->socket_id);
291
292         if (buckets == NULL) {
293                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
294                 goto err_unlock;
295         }
296
297         /* Allocate same number of extendable buckets */
298         if (ext_table_support) {
299                 buckets_ext = rte_zmalloc_socket(NULL,
300                                 num_buckets * sizeof(struct rte_hash_bucket),
301                                 RTE_CACHE_LINE_SIZE, params->socket_id);
302                 if (buckets_ext == NULL) {
303                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
304                                                         "failed\n");
305                         goto err_unlock;
306                 }
307                 /* Populate ext bkt ring. We reserve 0 similar to the
308                  * key-data slot, just in case in future we want to
309                  * use bucket index for the linked list and 0 means NULL
310                  * for next bucket
311                  */
312                 for (i = 1; i <= num_buckets; i++)
313                         rte_ring_sp_enqueue_elem(r_ext, &i, sizeof(uint32_t));
314
315                 if (readwrite_concur_lf_support) {
316                         ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) *
317                                                                 num_key_slots, 0);
318                         if (ext_bkt_to_free == NULL) {
319                                 RTE_LOG(ERR, HASH, "ext bkt to free memory allocation "
320                                                                 "failed\n");
321                                 goto err_unlock;
322                         }
323                 }
324         }
325
326         const uint32_t key_entry_size =
327                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
328                           KEY_ALIGNMENT);
329         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
330
331         k = rte_zmalloc_socket(NULL, key_tbl_size,
332                         RTE_CACHE_LINE_SIZE, params->socket_id);
333
334         if (k == NULL) {
335                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
336                 goto err_unlock;
337         }
338
339         tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
340                         RTE_CACHE_LINE_SIZE, params->socket_id);
341
342         if (tbl_chng_cnt == NULL) {
343                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
344                 goto err_unlock;
345         }
346
347 /*
348  * If x86 architecture is used, select appropriate compare function,
349  * which may use x86 intrinsics, otherwise use memcmp
350  */
351 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
352         /* Select function to compare keys */
353         switch (params->key_len) {
354         case 16:
355                 h->cmp_jump_table_idx = KEY_16_BYTES;
356                 break;
357         case 32:
358                 h->cmp_jump_table_idx = KEY_32_BYTES;
359                 break;
360         case 48:
361                 h->cmp_jump_table_idx = KEY_48_BYTES;
362                 break;
363         case 64:
364                 h->cmp_jump_table_idx = KEY_64_BYTES;
365                 break;
366         case 80:
367                 h->cmp_jump_table_idx = KEY_80_BYTES;
368                 break;
369         case 96:
370                 h->cmp_jump_table_idx = KEY_96_BYTES;
371                 break;
372         case 112:
373                 h->cmp_jump_table_idx = KEY_112_BYTES;
374                 break;
375         case 128:
376                 h->cmp_jump_table_idx = KEY_128_BYTES;
377                 break;
378         default:
379                 /* If key is not multiple of 16, use generic memcmp */
380                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
381         }
382 #else
383         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
384 #endif
385
386         if (use_local_cache) {
387                 local_free_slots = rte_zmalloc_socket(NULL,
388                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
389                                 RTE_CACHE_LINE_SIZE, params->socket_id);
390                 if (local_free_slots == NULL) {
391                         RTE_LOG(ERR, HASH, "local free slots memory allocation failed\n");
392                         goto err_unlock;
393                 }
394         }
395
396         /* Default hash function */
397 #if defined(RTE_ARCH_X86)
398         default_hash_func = (rte_hash_function)rte_hash_crc;
399 #elif defined(RTE_ARCH_ARM64)
400         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
401                 default_hash_func = (rte_hash_function)rte_hash_crc;
402 #endif
403         /* Setup hash context */
404         strlcpy(h->name, params->name, sizeof(h->name));
405         h->entries = params->entries;
406         h->key_len = params->key_len;
407         h->key_entry_size = key_entry_size;
408         h->hash_func_init_val = params->hash_func_init_val;
409
410         h->num_buckets = num_buckets;
411         h->bucket_bitmask = h->num_buckets - 1;
412         h->buckets = buckets;
413         h->buckets_ext = buckets_ext;
414         h->free_ext_bkts = r_ext;
415         h->hash_func = (params->hash_func == NULL) ?
416                 default_hash_func : params->hash_func;
417         h->key_store = k;
418         h->free_slots = r;
419         h->ext_bkt_to_free = ext_bkt_to_free;
420         h->tbl_chng_cnt = tbl_chng_cnt;
421         *h->tbl_chng_cnt = 0;
422         h->hw_trans_mem_support = hw_trans_mem_support;
423         h->use_local_cache = use_local_cache;
424         h->local_free_slots = local_free_slots;
425         h->readwrite_concur_support = readwrite_concur_support;
426         h->ext_table_support = ext_table_support;
427         h->writer_takes_lock = writer_takes_lock;
428         h->no_free_on_del = no_free_on_del;
429         h->readwrite_concur_lf_support = readwrite_concur_lf_support;
430
431 #if defined(RTE_ARCH_X86)
432         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
433                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
434         else
435 #elif defined(RTE_ARCH_ARM64)
436         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
437                 h->sig_cmp_fn = RTE_HASH_COMPARE_NEON;
438         else
439 #endif
440                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
441
442         /* Writer threads need to take the lock when:
443          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
444          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
445          */
446         if (h->writer_takes_lock) {
447                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
448                                                 RTE_CACHE_LINE_SIZE);
449                 if (h->readwrite_lock == NULL)
450                         goto err_unlock;
451
452                 rte_rwlock_init(h->readwrite_lock);
453         }
454
455         /* Populate free slots ring. Entry zero is reserved for key misses. */
456         for (i = 1; i < num_key_slots; i++)
457                 rte_ring_sp_enqueue_elem(r, &i, sizeof(uint32_t));
458
459         te->data = (void *) h;
460         TAILQ_INSERT_TAIL(hash_list, te, next);
461         rte_mcfg_tailq_write_unlock();
462
463         return h;
464 err_unlock:
465         rte_mcfg_tailq_write_unlock();
466 err:
467         rte_ring_free(r);
468         rte_ring_free(r_ext);
469         rte_free(te);
470         rte_free(local_free_slots);
471         rte_free(h);
472         rte_free(buckets);
473         rte_free(buckets_ext);
474         rte_free(k);
475         rte_free(tbl_chng_cnt);
476         rte_free(ext_bkt_to_free);
477         return NULL;
478 }
479
480 void
481 rte_hash_free(struct rte_hash *h)
482 {
483         struct rte_tailq_entry *te;
484         struct rte_hash_list *hash_list;
485
486         if (h == NULL)
487                 return;
488
489         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
490
491         rte_mcfg_tailq_write_lock();
492
493         /* find out tailq entry */
494         TAILQ_FOREACH(te, hash_list, next) {
495                 if (te->data == (void *) h)
496                         break;
497         }
498
499         if (te == NULL) {
500                 rte_mcfg_tailq_write_unlock();
501                 return;
502         }
503
504         TAILQ_REMOVE(hash_list, te, next);
505
506         rte_mcfg_tailq_write_unlock();
507
508         if (h->use_local_cache)
509                 rte_free(h->local_free_slots);
510         if (h->writer_takes_lock)
511                 rte_free(h->readwrite_lock);
512         rte_ring_free(h->free_slots);
513         rte_ring_free(h->free_ext_bkts);
514         rte_free(h->key_store);
515         rte_free(h->buckets);
516         rte_free(h->buckets_ext);
517         rte_free(h->tbl_chng_cnt);
518         rte_free(h->ext_bkt_to_free);
519         rte_free(h);
520         rte_free(te);
521 }
522
523 hash_sig_t
524 rte_hash_hash(const struct rte_hash *h, const void *key)
525 {
526         /* calc hash result by key */
527         return h->hash_func(key, h->key_len, h->hash_func_init_val);
528 }
529
530 int32_t
531 rte_hash_max_key_id(const struct rte_hash *h)
532 {
533         RETURN_IF_TRUE((h == NULL), -EINVAL);
534         if (h->use_local_cache)
535                 /*
536                  * Increase number of slots by total number of indices
537                  * that can be stored in the lcore caches
538                  */
539                 return (h->entries + ((RTE_MAX_LCORE - 1) *
540                                         (LCORE_CACHE_SIZE - 1)));
541         else
542                 return h->entries;
543 }
544
545 int32_t
546 rte_hash_count(const struct rte_hash *h)
547 {
548         uint32_t tot_ring_cnt, cached_cnt = 0;
549         uint32_t i, ret;
550
551         if (h == NULL)
552                 return -EINVAL;
553
554         if (h->use_local_cache) {
555                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
556                                         (LCORE_CACHE_SIZE - 1);
557                 for (i = 0; i < RTE_MAX_LCORE; i++)
558                         cached_cnt += h->local_free_slots[i].len;
559
560                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
561                                                                 cached_cnt;
562         } else {
563                 tot_ring_cnt = h->entries;
564                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
565         }
566         return ret;
567 }
568
569 /* Read write locks implemented using rte_rwlock */
570 static inline void
571 __hash_rw_writer_lock(const struct rte_hash *h)
572 {
573         if (h->writer_takes_lock && h->hw_trans_mem_support)
574                 rte_rwlock_write_lock_tm(h->readwrite_lock);
575         else if (h->writer_takes_lock)
576                 rte_rwlock_write_lock(h->readwrite_lock);
577 }
578
579 static inline void
580 __hash_rw_reader_lock(const struct rte_hash *h)
581 {
582         if (h->readwrite_concur_support && h->hw_trans_mem_support)
583                 rte_rwlock_read_lock_tm(h->readwrite_lock);
584         else if (h->readwrite_concur_support)
585                 rte_rwlock_read_lock(h->readwrite_lock);
586 }
587
588 static inline void
589 __hash_rw_writer_unlock(const struct rte_hash *h)
590 {
591         if (h->writer_takes_lock && h->hw_trans_mem_support)
592                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
593         else if (h->writer_takes_lock)
594                 rte_rwlock_write_unlock(h->readwrite_lock);
595 }
596
597 static inline void
598 __hash_rw_reader_unlock(const struct rte_hash *h)
599 {
600         if (h->readwrite_concur_support && h->hw_trans_mem_support)
601                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
602         else if (h->readwrite_concur_support)
603                 rte_rwlock_read_unlock(h->readwrite_lock);
604 }
605
606 void
607 rte_hash_reset(struct rte_hash *h)
608 {
609         uint32_t tot_ring_cnt, i;
610
611         if (h == NULL)
612                 return;
613
614         __hash_rw_writer_lock(h);
615         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
616         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
617         *h->tbl_chng_cnt = 0;
618
619         /* reset the free ring */
620         rte_ring_reset(h->free_slots);
621
622         /* flush free extendable bucket ring and memory */
623         if (h->ext_table_support) {
624                 memset(h->buckets_ext, 0, h->num_buckets *
625                                                 sizeof(struct rte_hash_bucket));
626                 rte_ring_reset(h->free_ext_bkts);
627         }
628
629         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
630         if (h->use_local_cache)
631                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
632                                         (LCORE_CACHE_SIZE - 1);
633         else
634                 tot_ring_cnt = h->entries;
635
636         for (i = 1; i < tot_ring_cnt + 1; i++)
637                 rte_ring_sp_enqueue_elem(h->free_slots, &i, sizeof(uint32_t));
638
639         /* Repopulate the free ext bkt ring. */
640         if (h->ext_table_support) {
641                 for (i = 1; i <= h->num_buckets; i++)
642                         rte_ring_sp_enqueue_elem(h->free_ext_bkts, &i,
643                                                         sizeof(uint32_t));
644         }
645
646         if (h->use_local_cache) {
647                 /* Reset local caches per lcore */
648                 for (i = 0; i < RTE_MAX_LCORE; i++)
649                         h->local_free_slots[i].len = 0;
650         }
651         __hash_rw_writer_unlock(h);
652 }
653
654 /*
655  * Function called to enqueue back an index in the cache/ring,
656  * as slot has not being used and it can be used in the
657  * next addition attempt.
658  */
659 static inline void
660 enqueue_slot_back(const struct rte_hash *h,
661                 struct lcore_cache *cached_free_slots,
662                 uint32_t slot_id)
663 {
664         if (h->use_local_cache) {
665                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
666                 cached_free_slots->len++;
667         } else
668                 rte_ring_sp_enqueue_elem(h->free_slots, &slot_id,
669                                                 sizeof(uint32_t));
670 }
671
672 /* Search a key from bucket and update its data.
673  * Writer holds the lock before calling this.
674  */
675 static inline int32_t
676 search_and_update(const struct rte_hash *h, void *data, const void *key,
677         struct rte_hash_bucket *bkt, uint16_t sig)
678 {
679         int i;
680         struct rte_hash_key *k, *keys = h->key_store;
681
682         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
683                 if (bkt->sig_current[i] == sig) {
684                         k = (struct rte_hash_key *) ((char *)keys +
685                                         bkt->key_idx[i] * h->key_entry_size);
686                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
687                                 /* The store to application data at *data
688                                  * should not leak after the store to pdata
689                                  * in the key store. i.e. pdata is the guard
690                                  * variable. Release the application data
691                                  * to the readers.
692                                  */
693                                 __atomic_store_n(&k->pdata,
694                                         data,
695                                         __ATOMIC_RELEASE);
696                                 /*
697                                  * Return index where key is stored,
698                                  * subtracting the first dummy index
699                                  */
700                                 return bkt->key_idx[i] - 1;
701                         }
702                 }
703         }
704         return -1;
705 }
706
707 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
708  * buckets around.
709  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
710  * empty entry.
711  */
712 static inline int32_t
713 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
714                 struct rte_hash_bucket *prim_bkt,
715                 struct rte_hash_bucket *sec_bkt,
716                 const struct rte_hash_key *key, void *data,
717                 uint16_t sig, uint32_t new_idx,
718                 int32_t *ret_val)
719 {
720         unsigned int i;
721         struct rte_hash_bucket *cur_bkt;
722         int32_t ret;
723
724         __hash_rw_writer_lock(h);
725         /* Check if key was inserted after last check but before this
726          * protected region in case of inserting duplicated keys.
727          */
728         ret = search_and_update(h, data, key, prim_bkt, sig);
729         if (ret != -1) {
730                 __hash_rw_writer_unlock(h);
731                 *ret_val = ret;
732                 return 1;
733         }
734
735         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
736                 ret = search_and_update(h, data, key, cur_bkt, sig);
737                 if (ret != -1) {
738                         __hash_rw_writer_unlock(h);
739                         *ret_val = ret;
740                         return 1;
741                 }
742         }
743
744         /* Insert new entry if there is room in the primary
745          * bucket.
746          */
747         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
748                 /* Check if slot is available */
749                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
750                         prim_bkt->sig_current[i] = sig;
751                         /* Store to signature and key should not
752                          * leak after the store to key_idx. i.e.
753                          * key_idx is the guard variable for signature
754                          * and key.
755                          */
756                         __atomic_store_n(&prim_bkt->key_idx[i],
757                                          new_idx,
758                                          __ATOMIC_RELEASE);
759                         break;
760                 }
761         }
762         __hash_rw_writer_unlock(h);
763
764         if (i != RTE_HASH_BUCKET_ENTRIES)
765                 return 0;
766
767         /* no empty entry */
768         return -1;
769 }
770
771 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
772  * the path head with new entry (sig, alt_hash, new_idx)
773  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
774  * return 0 if succeeds.
775  */
776 static inline int
777 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
778                         struct rte_hash_bucket *bkt,
779                         struct rte_hash_bucket *alt_bkt,
780                         const struct rte_hash_key *key, void *data,
781                         struct queue_node *leaf, uint32_t leaf_slot,
782                         uint16_t sig, uint32_t new_idx,
783                         int32_t *ret_val)
784 {
785         uint32_t prev_alt_bkt_idx;
786         struct rte_hash_bucket *cur_bkt;
787         struct queue_node *prev_node, *curr_node = leaf;
788         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
789         uint32_t prev_slot, curr_slot = leaf_slot;
790         int32_t ret;
791
792         __hash_rw_writer_lock(h);
793
794         /* In case empty slot was gone before entering protected region */
795         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
796                 __hash_rw_writer_unlock(h);
797                 return -1;
798         }
799
800         /* Check if key was inserted after last check but before this
801          * protected region.
802          */
803         ret = search_and_update(h, data, key, bkt, sig);
804         if (ret != -1) {
805                 __hash_rw_writer_unlock(h);
806                 *ret_val = ret;
807                 return 1;
808         }
809
810         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
811                 ret = search_and_update(h, data, key, cur_bkt, sig);
812                 if (ret != -1) {
813                         __hash_rw_writer_unlock(h);
814                         *ret_val = ret;
815                         return 1;
816                 }
817         }
818
819         while (likely(curr_node->prev != NULL)) {
820                 prev_node = curr_node->prev;
821                 prev_bkt = prev_node->bkt;
822                 prev_slot = curr_node->prev_slot;
823
824                 prev_alt_bkt_idx = get_alt_bucket_index(h,
825                                         prev_node->cur_bkt_idx,
826                                         prev_bkt->sig_current[prev_slot]);
827
828                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
829                                 != curr_bkt)) {
830                         /* revert it to empty, otherwise duplicated keys */
831                         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
832                                 EMPTY_SLOT,
833                                 __ATOMIC_RELEASE);
834                         __hash_rw_writer_unlock(h);
835                         return -1;
836                 }
837
838                 if (h->readwrite_concur_lf_support) {
839                         /* Inform the previous move. The current move need
840                          * not be informed now as the current bucket entry
841                          * is present in both primary and secondary.
842                          * Since there is one writer, load acquires on
843                          * tbl_chng_cnt are not required.
844                          */
845                         __atomic_store_n(h->tbl_chng_cnt,
846                                          *h->tbl_chng_cnt + 1,
847                                          __ATOMIC_RELEASE);
848                         /* The store to sig_current should not
849                          * move above the store to tbl_chng_cnt.
850                          */
851                         __atomic_thread_fence(__ATOMIC_RELEASE);
852                 }
853
854                 /* Need to swap current/alt sig to allow later
855                  * Cuckoo insert to move elements back to its
856                  * primary bucket if available
857                  */
858                 curr_bkt->sig_current[curr_slot] =
859                         prev_bkt->sig_current[prev_slot];
860                 /* Release the updated bucket entry */
861                 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
862                         prev_bkt->key_idx[prev_slot],
863                         __ATOMIC_RELEASE);
864
865                 curr_slot = prev_slot;
866                 curr_node = prev_node;
867                 curr_bkt = curr_node->bkt;
868         }
869
870         if (h->readwrite_concur_lf_support) {
871                 /* Inform the previous move. The current move need
872                  * not be informed now as the current bucket entry
873                  * is present in both primary and secondary.
874                  * Since there is one writer, load acquires on
875                  * tbl_chng_cnt are not required.
876                  */
877                 __atomic_store_n(h->tbl_chng_cnt,
878                                  *h->tbl_chng_cnt + 1,
879                                  __ATOMIC_RELEASE);
880                 /* The store to sig_current should not
881                  * move above the store to tbl_chng_cnt.
882                  */
883                 __atomic_thread_fence(__ATOMIC_RELEASE);
884         }
885
886         curr_bkt->sig_current[curr_slot] = sig;
887         /* Release the new bucket entry */
888         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
889                          new_idx,
890                          __ATOMIC_RELEASE);
891
892         __hash_rw_writer_unlock(h);
893
894         return 0;
895
896 }
897
898 /*
899  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
900  * Cuckoo
901  */
902 static inline int
903 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
904                         struct rte_hash_bucket *bkt,
905                         struct rte_hash_bucket *sec_bkt,
906                         const struct rte_hash_key *key, void *data,
907                         uint16_t sig, uint32_t bucket_idx,
908                         uint32_t new_idx, int32_t *ret_val)
909 {
910         unsigned int i;
911         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
912         struct queue_node *tail, *head;
913         struct rte_hash_bucket *curr_bkt, *alt_bkt;
914         uint32_t cur_idx, alt_idx;
915
916         tail = queue;
917         head = queue + 1;
918         tail->bkt = bkt;
919         tail->prev = NULL;
920         tail->prev_slot = -1;
921         tail->cur_bkt_idx = bucket_idx;
922
923         /* Cuckoo bfs Search */
924         while (likely(tail != head && head <
925                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
926                                         RTE_HASH_BUCKET_ENTRIES)) {
927                 curr_bkt = tail->bkt;
928                 cur_idx = tail->cur_bkt_idx;
929                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
930                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
931                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
932                                                 bkt, sec_bkt, key, data,
933                                                 tail, i, sig,
934                                                 new_idx, ret_val);
935                                 if (likely(ret != -1))
936                                         return ret;
937                         }
938
939                         /* Enqueue new node and keep prev node info */
940                         alt_idx = get_alt_bucket_index(h, cur_idx,
941                                                 curr_bkt->sig_current[i]);
942                         alt_bkt = &(h->buckets[alt_idx]);
943                         head->bkt = alt_bkt;
944                         head->cur_bkt_idx = alt_idx;
945                         head->prev = tail;
946                         head->prev_slot = i;
947                         head++;
948                 }
949                 tail++;
950         }
951
952         return -ENOSPC;
953 }
954
955 static inline int32_t
956 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
957                                                 hash_sig_t sig, void *data)
958 {
959         uint16_t short_sig;
960         uint32_t prim_bucket_idx, sec_bucket_idx;
961         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
962         struct rte_hash_key *new_k, *keys = h->key_store;
963         uint32_t ext_bkt_id = 0;
964         uint32_t slot_id;
965         int ret;
966         unsigned n_slots;
967         unsigned lcore_id;
968         unsigned int i;
969         struct lcore_cache *cached_free_slots = NULL;
970         int32_t ret_val;
971         struct rte_hash_bucket *last;
972
973         short_sig = get_short_sig(sig);
974         prim_bucket_idx = get_prim_bucket_index(h, sig);
975         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
976         prim_bkt = &h->buckets[prim_bucket_idx];
977         sec_bkt = &h->buckets[sec_bucket_idx];
978         rte_prefetch0(prim_bkt);
979         rte_prefetch0(sec_bkt);
980
981         /* Check if key is already inserted in primary location */
982         __hash_rw_writer_lock(h);
983         ret = search_and_update(h, data, key, prim_bkt, short_sig);
984         if (ret != -1) {
985                 __hash_rw_writer_unlock(h);
986                 return ret;
987         }
988
989         /* Check if key is already inserted in secondary location */
990         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
991                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
992                 if (ret != -1) {
993                         __hash_rw_writer_unlock(h);
994                         return ret;
995                 }
996         }
997
998         __hash_rw_writer_unlock(h);
999
1000         /* Did not find a match, so get a new slot for storing the new key */
1001         if (h->use_local_cache) {
1002                 lcore_id = rte_lcore_id();
1003                 cached_free_slots = &h->local_free_slots[lcore_id];
1004                 /* Try to get a free slot from the local cache */
1005                 if (cached_free_slots->len == 0) {
1006                         /* Need to get another burst of free slots from global ring */
1007                         n_slots = rte_ring_mc_dequeue_burst_elem(h->free_slots,
1008                                         cached_free_slots->objs,
1009                                         sizeof(uint32_t),
1010                                         LCORE_CACHE_SIZE, NULL);
1011                         if (n_slots == 0) {
1012                                 return -ENOSPC;
1013                         }
1014
1015                         cached_free_slots->len += n_slots;
1016                 }
1017
1018                 /* Get a free slot from the local cache */
1019                 cached_free_slots->len--;
1020                 slot_id = cached_free_slots->objs[cached_free_slots->len];
1021         } else {
1022                 if (rte_ring_sc_dequeue_elem(h->free_slots, &slot_id,
1023                                                 sizeof(uint32_t)) != 0) {
1024                         return -ENOSPC;
1025                 }
1026         }
1027
1028         new_k = RTE_PTR_ADD(keys, slot_id * h->key_entry_size);
1029         /* The store to application data (by the application) at *data should
1030          * not leak after the store of pdata in the key store. i.e. pdata is
1031          * the guard variable. Release the application data to the readers.
1032          */
1033         __atomic_store_n(&new_k->pdata,
1034                 data,
1035                 __ATOMIC_RELEASE);
1036         /* Copy key */
1037         memcpy(new_k->key, key, h->key_len);
1038
1039         /* Find an empty slot and insert */
1040         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
1041                                         short_sig, slot_id, &ret_val);
1042         if (ret == 0)
1043                 return slot_id - 1;
1044         else if (ret == 1) {
1045                 enqueue_slot_back(h, cached_free_slots, slot_id);
1046                 return ret_val;
1047         }
1048
1049         /* Primary bucket full, need to make space for new entry */
1050         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1051                                 short_sig, prim_bucket_idx, slot_id, &ret_val);
1052         if (ret == 0)
1053                 return slot_id - 1;
1054         else if (ret == 1) {
1055                 enqueue_slot_back(h, cached_free_slots, slot_id);
1056                 return ret_val;
1057         }
1058
1059         /* Also search secondary bucket to get better occupancy */
1060         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1061                                 short_sig, sec_bucket_idx, slot_id, &ret_val);
1062
1063         if (ret == 0)
1064                 return slot_id - 1;
1065         else if (ret == 1) {
1066                 enqueue_slot_back(h, cached_free_slots, slot_id);
1067                 return ret_val;
1068         }
1069
1070         /* if ext table not enabled, we failed the insertion */
1071         if (!h->ext_table_support) {
1072                 enqueue_slot_back(h, cached_free_slots, slot_id);
1073                 return ret;
1074         }
1075
1076         /* Now we need to go through the extendable bucket. Protection is needed
1077          * to protect all extendable bucket processes.
1078          */
1079         __hash_rw_writer_lock(h);
1080         /* We check for duplicates again since could be inserted before the lock */
1081         ret = search_and_update(h, data, key, prim_bkt, short_sig);
1082         if (ret != -1) {
1083                 enqueue_slot_back(h, cached_free_slots, slot_id);
1084                 goto failure;
1085         }
1086
1087         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1088                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1089                 if (ret != -1) {
1090                         enqueue_slot_back(h, cached_free_slots, slot_id);
1091                         goto failure;
1092                 }
1093         }
1094
1095         /* Search sec and ext buckets to find an empty entry to insert. */
1096         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1097                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1098                         /* Check if slot is available */
1099                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1100                                 cur_bkt->sig_current[i] = short_sig;
1101                                 /* Store to signature and key should not
1102                                  * leak after the store to key_idx. i.e.
1103                                  * key_idx is the guard variable for signature
1104                                  * and key.
1105                                  */
1106                                 __atomic_store_n(&cur_bkt->key_idx[i],
1107                                                  slot_id,
1108                                                  __ATOMIC_RELEASE);
1109                                 __hash_rw_writer_unlock(h);
1110                                 return slot_id - 1;
1111                         }
1112                 }
1113         }
1114
1115         /* Failed to get an empty entry from extendable buckets. Link a new
1116          * extendable bucket. We first get a free bucket from ring.
1117          */
1118         if (rte_ring_sc_dequeue_elem(h->free_ext_bkts, &ext_bkt_id,
1119                                                 sizeof(uint32_t)) != 0 ||
1120                                         ext_bkt_id == 0) {
1121                 ret = -ENOSPC;
1122                 goto failure;
1123         }
1124
1125         /* Use the first location of the new bucket */
1126         (h->buckets_ext[ext_bkt_id - 1]).sig_current[0] = short_sig;
1127         /* Store to signature and key should not leak after
1128          * the store to key_idx. i.e. key_idx is the guard variable
1129          * for signature and key.
1130          */
1131         __atomic_store_n(&(h->buckets_ext[ext_bkt_id - 1]).key_idx[0],
1132                          slot_id,
1133                          __ATOMIC_RELEASE);
1134         /* Link the new bucket to sec bucket linked list */
1135         last = rte_hash_get_last_bkt(sec_bkt);
1136         last->next = &h->buckets_ext[ext_bkt_id - 1];
1137         __hash_rw_writer_unlock(h);
1138         return slot_id - 1;
1139
1140 failure:
1141         __hash_rw_writer_unlock(h);
1142         return ret;
1143
1144 }
1145
1146 int32_t
1147 rte_hash_add_key_with_hash(const struct rte_hash *h,
1148                         const void *key, hash_sig_t sig)
1149 {
1150         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1151         return __rte_hash_add_key_with_hash(h, key, sig, 0);
1152 }
1153
1154 int32_t
1155 rte_hash_add_key(const struct rte_hash *h, const void *key)
1156 {
1157         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1158         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1159 }
1160
1161 int
1162 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1163                         const void *key, hash_sig_t sig, void *data)
1164 {
1165         int ret;
1166
1167         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1168         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1169         if (ret >= 0)
1170                 return 0;
1171         else
1172                 return ret;
1173 }
1174
1175 int
1176 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1177 {
1178         int ret;
1179
1180         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1181
1182         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1183         if (ret >= 0)
1184                 return 0;
1185         else
1186                 return ret;
1187 }
1188
1189 /* Search one bucket to find the match key - uses rw lock */
1190 static inline int32_t
1191 search_one_bucket_l(const struct rte_hash *h, const void *key,
1192                 uint16_t sig, void **data,
1193                 const struct rte_hash_bucket *bkt)
1194 {
1195         int i;
1196         struct rte_hash_key *k, *keys = h->key_store;
1197
1198         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1199                 if (bkt->sig_current[i] == sig &&
1200                                 bkt->key_idx[i] != EMPTY_SLOT) {
1201                         k = (struct rte_hash_key *) ((char *)keys +
1202                                         bkt->key_idx[i] * h->key_entry_size);
1203
1204                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1205                                 if (data != NULL)
1206                                         *data = k->pdata;
1207                                 /*
1208                                  * Return index where key is stored,
1209                                  * subtracting the first dummy index
1210                                  */
1211                                 return bkt->key_idx[i] - 1;
1212                         }
1213                 }
1214         }
1215         return -1;
1216 }
1217
1218 /* Search one bucket to find the match key */
1219 static inline int32_t
1220 search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
1221                         void **data, const struct rte_hash_bucket *bkt)
1222 {
1223         int i;
1224         uint32_t key_idx;
1225         struct rte_hash_key *k, *keys = h->key_store;
1226
1227         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1228                 /* Signature comparison is done before the acquire-load
1229                  * of the key index to achieve better performance.
1230                  * This can result in the reader loading old signature
1231                  * (which matches), while the key_idx is updated to a
1232                  * value that belongs to a new key. However, the full
1233                  * key comparison will ensure that the lookup fails.
1234                  */
1235                 if (bkt->sig_current[i] == sig) {
1236                         key_idx = __atomic_load_n(&bkt->key_idx[i],
1237                                           __ATOMIC_ACQUIRE);
1238                         if (key_idx != EMPTY_SLOT) {
1239                                 k = (struct rte_hash_key *) ((char *)keys +
1240                                                 key_idx * h->key_entry_size);
1241
1242                                 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1243                                         if (data != NULL) {
1244                                                 *data = __atomic_load_n(
1245                                                         &k->pdata,
1246                                                         __ATOMIC_ACQUIRE);
1247                                         }
1248                                         /*
1249                                          * Return index where key is stored,
1250                                          * subtracting the first dummy index
1251                                          */
1252                                         return key_idx - 1;
1253                                 }
1254                         }
1255                 }
1256         }
1257         return -1;
1258 }
1259
1260 static inline int32_t
1261 __rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
1262                                 hash_sig_t sig, void **data)
1263 {
1264         uint32_t prim_bucket_idx, sec_bucket_idx;
1265         struct rte_hash_bucket *bkt, *cur_bkt;
1266         int ret;
1267         uint16_t short_sig;
1268
1269         short_sig = get_short_sig(sig);
1270         prim_bucket_idx = get_prim_bucket_index(h, sig);
1271         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1272
1273         bkt = &h->buckets[prim_bucket_idx];
1274
1275         __hash_rw_reader_lock(h);
1276
1277         /* Check if key is in primary location */
1278         ret = search_one_bucket_l(h, key, short_sig, data, bkt);
1279         if (ret != -1) {
1280                 __hash_rw_reader_unlock(h);
1281                 return ret;
1282         }
1283         /* Calculate secondary hash */
1284         bkt = &h->buckets[sec_bucket_idx];
1285
1286         /* Check if key is in secondary location */
1287         FOR_EACH_BUCKET(cur_bkt, bkt) {
1288                 ret = search_one_bucket_l(h, key, short_sig,
1289                                         data, cur_bkt);
1290                 if (ret != -1) {
1291                         __hash_rw_reader_unlock(h);
1292                         return ret;
1293                 }
1294         }
1295
1296         __hash_rw_reader_unlock(h);
1297
1298         return -ENOENT;
1299 }
1300
1301 static inline int32_t
1302 __rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
1303                                         hash_sig_t sig, void **data)
1304 {
1305         uint32_t prim_bucket_idx, sec_bucket_idx;
1306         struct rte_hash_bucket *bkt, *cur_bkt;
1307         uint32_t cnt_b, cnt_a;
1308         int ret;
1309         uint16_t short_sig;
1310
1311         short_sig = get_short_sig(sig);
1312         prim_bucket_idx = get_prim_bucket_index(h, sig);
1313         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1314
1315         do {
1316                 /* Load the table change counter before the lookup
1317                  * starts. Acquire semantics will make sure that
1318                  * loads in search_one_bucket are not hoisted.
1319                  */
1320                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1321                                 __ATOMIC_ACQUIRE);
1322
1323                 /* Check if key is in primary location */
1324                 bkt = &h->buckets[prim_bucket_idx];
1325                 ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
1326                 if (ret != -1)
1327                         return ret;
1328                 /* Calculate secondary hash */
1329                 bkt = &h->buckets[sec_bucket_idx];
1330
1331                 /* Check if key is in secondary location */
1332                 FOR_EACH_BUCKET(cur_bkt, bkt) {
1333                         ret = search_one_bucket_lf(h, key, short_sig,
1334                                                 data, cur_bkt);
1335                         if (ret != -1)
1336                                 return ret;
1337                 }
1338
1339                 /* The loads of sig_current in search_one_bucket
1340                  * should not move below the load from tbl_chng_cnt.
1341                  */
1342                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1343                 /* Re-read the table change counter to check if the
1344                  * table has changed during search. If yes, re-do
1345                  * the search.
1346                  * This load should not get hoisted. The load
1347                  * acquires on cnt_b, key index in primary bucket
1348                  * and key index in secondary bucket will make sure
1349                  * that it does not get hoisted.
1350                  */
1351                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1352                                         __ATOMIC_ACQUIRE);
1353         } while (cnt_b != cnt_a);
1354
1355         return -ENOENT;
1356 }
1357
1358 static inline int32_t
1359 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1360                                         hash_sig_t sig, void **data)
1361 {
1362         if (h->readwrite_concur_lf_support)
1363                 return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
1364         else
1365                 return __rte_hash_lookup_with_hash_l(h, key, sig, data);
1366 }
1367
1368 int32_t
1369 rte_hash_lookup_with_hash(const struct rte_hash *h,
1370                         const void *key, hash_sig_t sig)
1371 {
1372         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1373         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1374 }
1375
1376 int32_t
1377 rte_hash_lookup(const struct rte_hash *h, const void *key)
1378 {
1379         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1380         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1381 }
1382
1383 int
1384 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1385                         const void *key, hash_sig_t sig, void **data)
1386 {
1387         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1388         return __rte_hash_lookup_with_hash(h, key, sig, data);
1389 }
1390
1391 int
1392 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1393 {
1394         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1395         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1396 }
1397
1398 static inline void
1399 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1400 {
1401         unsigned lcore_id, n_slots;
1402         struct lcore_cache *cached_free_slots;
1403
1404         if (h->use_local_cache) {
1405                 lcore_id = rte_lcore_id();
1406                 cached_free_slots = &h->local_free_slots[lcore_id];
1407                 /* Cache full, need to free it. */
1408                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1409                         /* Need to enqueue the free slots in global ring. */
1410                         n_slots = rte_ring_mp_enqueue_burst_elem(h->free_slots,
1411                                                 cached_free_slots->objs,
1412                                                 sizeof(uint32_t),
1413                                                 LCORE_CACHE_SIZE, NULL);
1414                         ERR_IF_TRUE((n_slots == 0),
1415                                 "%s: could not enqueue free slots in global ring\n",
1416                                 __func__);
1417                         cached_free_slots->len -= n_slots;
1418                 }
1419                 /* Put index of new free slot in cache. */
1420                 cached_free_slots->objs[cached_free_slots->len] =
1421                                                         bkt->key_idx[i];
1422                 cached_free_slots->len++;
1423         } else {
1424                 rte_ring_sp_enqueue_elem(h->free_slots,
1425                                 &bkt->key_idx[i], sizeof(uint32_t));
1426         }
1427 }
1428
1429 /* Compact the linked list by moving key from last entry in linked list to the
1430  * empty slot.
1431  */
1432 static inline void
1433 __rte_hash_compact_ll(const struct rte_hash *h,
1434                         struct rte_hash_bucket *cur_bkt, int pos) {
1435         int i;
1436         struct rte_hash_bucket *last_bkt;
1437
1438         if (!cur_bkt->next)
1439                 return;
1440
1441         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1442
1443         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1444                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1445                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1446                         __atomic_store_n(&cur_bkt->key_idx[pos],
1447                                          last_bkt->key_idx[i],
1448                                          __ATOMIC_RELEASE);
1449                         if (h->readwrite_concur_lf_support) {
1450                                 /* Inform the readers that the table has changed
1451                                  * Since there is one writer, load acquire on
1452                                  * tbl_chng_cnt is not required.
1453                                  */
1454                                 __atomic_store_n(h->tbl_chng_cnt,
1455                                          *h->tbl_chng_cnt + 1,
1456                                          __ATOMIC_RELEASE);
1457                                 /* The store to sig_current should
1458                                  * not move above the store to tbl_chng_cnt.
1459                                  */
1460                                 __atomic_thread_fence(__ATOMIC_RELEASE);
1461                         }
1462                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1463                         __atomic_store_n(&last_bkt->key_idx[i],
1464                                          EMPTY_SLOT,
1465                                          __ATOMIC_RELEASE);
1466                         return;
1467                 }
1468         }
1469 }
1470
1471 /* Search one bucket and remove the matched key.
1472  * Writer is expected to hold the lock while calling this
1473  * function.
1474  */
1475 static inline int32_t
1476 search_and_remove(const struct rte_hash *h, const void *key,
1477                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1478 {
1479         struct rte_hash_key *k, *keys = h->key_store;
1480         unsigned int i;
1481         uint32_t key_idx;
1482
1483         /* Check if key is in bucket */
1484         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1485                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1486                                           __ATOMIC_ACQUIRE);
1487                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1488                         k = (struct rte_hash_key *) ((char *)keys +
1489                                         key_idx * h->key_entry_size);
1490                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1491                                 bkt->sig_current[i] = NULL_SIGNATURE;
1492                                 /* Free the key store index if
1493                                  * no_free_on_del is disabled.
1494                                  */
1495                                 if (!h->no_free_on_del)
1496                                         remove_entry(h, bkt, i);
1497
1498                                 __atomic_store_n(&bkt->key_idx[i],
1499                                                  EMPTY_SLOT,
1500                                                  __ATOMIC_RELEASE);
1501
1502                                 *pos = i;
1503                                 /*
1504                                  * Return index where key is stored,
1505                                  * subtracting the first dummy index
1506                                  */
1507                                 return key_idx - 1;
1508                         }
1509                 }
1510         }
1511         return -1;
1512 }
1513
1514 static inline int32_t
1515 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1516                                                 hash_sig_t sig)
1517 {
1518         uint32_t prim_bucket_idx, sec_bucket_idx;
1519         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1520         struct rte_hash_bucket *cur_bkt;
1521         int pos;
1522         int32_t ret, i;
1523         uint16_t short_sig;
1524
1525         short_sig = get_short_sig(sig);
1526         prim_bucket_idx = get_prim_bucket_index(h, sig);
1527         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1528         prim_bkt = &h->buckets[prim_bucket_idx];
1529
1530         __hash_rw_writer_lock(h);
1531         /* look for key in primary bucket */
1532         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1533         if (ret != -1) {
1534                 __rte_hash_compact_ll(h, prim_bkt, pos);
1535                 last_bkt = prim_bkt->next;
1536                 prev_bkt = prim_bkt;
1537                 goto return_bkt;
1538         }
1539
1540         /* Calculate secondary hash */
1541         sec_bkt = &h->buckets[sec_bucket_idx];
1542
1543         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1544                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1545                 if (ret != -1) {
1546                         __rte_hash_compact_ll(h, cur_bkt, pos);
1547                         last_bkt = sec_bkt->next;
1548                         prev_bkt = sec_bkt;
1549                         goto return_bkt;
1550                 }
1551         }
1552
1553         __hash_rw_writer_unlock(h);
1554         return -ENOENT;
1555
1556 /* Search last bucket to see if empty to be recycled */
1557 return_bkt:
1558         if (!last_bkt) {
1559                 __hash_rw_writer_unlock(h);
1560                 return ret;
1561         }
1562         while (last_bkt->next) {
1563                 prev_bkt = last_bkt;
1564                 last_bkt = last_bkt->next;
1565         }
1566
1567         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1568                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1569                         break;
1570         }
1571         /* found empty bucket and recycle */
1572         if (i == RTE_HASH_BUCKET_ENTRIES) {
1573                 prev_bkt->next = NULL;
1574                 uint32_t index = last_bkt - h->buckets_ext + 1;
1575                 /* Recycle the empty bkt if
1576                  * no_free_on_del is disabled.
1577                  */
1578                 if (h->no_free_on_del)
1579                         /* Store index of an empty ext bkt to be recycled
1580                          * on calling rte_hash_del_xxx APIs.
1581                          * When lock free read-write concurrency is enabled,
1582                          * an empty ext bkt cannot be put into free list
1583                          * immediately (as readers might be using it still).
1584                          * Hence freeing of the ext bkt is piggy-backed to
1585                          * freeing of the key index.
1586                          */
1587                         h->ext_bkt_to_free[ret] = index;
1588                 else
1589                         rte_ring_sp_enqueue_elem(h->free_ext_bkts, &index,
1590                                                         sizeof(uint32_t));
1591         }
1592         __hash_rw_writer_unlock(h);
1593         return ret;
1594 }
1595
1596 int32_t
1597 rte_hash_del_key_with_hash(const struct rte_hash *h,
1598                         const void *key, hash_sig_t sig)
1599 {
1600         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1601         return __rte_hash_del_key_with_hash(h, key, sig);
1602 }
1603
1604 int32_t
1605 rte_hash_del_key(const struct rte_hash *h, const void *key)
1606 {
1607         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1608         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1609 }
1610
1611 int
1612 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1613                                void **key)
1614 {
1615         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1616
1617         struct rte_hash_key *k, *keys = h->key_store;
1618         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1619                                      h->key_entry_size);
1620         *key = k->key;
1621
1622         if (position !=
1623             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1624                                         NULL)) {
1625                 return -ENOENT;
1626         }
1627
1628         return 0;
1629 }
1630
1631 int
1632 rte_hash_free_key_with_position(const struct rte_hash *h,
1633                                 const int32_t position)
1634 {
1635         /* Key index where key is stored, adding the first dummy index */
1636         uint32_t key_idx = position + 1;
1637
1638         RETURN_IF_TRUE(((h == NULL) || (key_idx == EMPTY_SLOT)), -EINVAL);
1639
1640         unsigned int lcore_id, n_slots;
1641         struct lcore_cache *cached_free_slots;
1642         const uint32_t total_entries = h->use_local_cache ?
1643                 h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
1644                                                         : h->entries + 1;
1645
1646         /* Out of bounds */
1647         if (key_idx >= total_entries)
1648                 return -EINVAL;
1649         if (h->ext_table_support && h->readwrite_concur_lf_support) {
1650                 uint32_t index = h->ext_bkt_to_free[position];
1651                 if (index) {
1652                         /* Recycle empty ext bkt to free list. */
1653                         rte_ring_sp_enqueue_elem(h->free_ext_bkts, &index,
1654                                                         sizeof(uint32_t));
1655                         h->ext_bkt_to_free[position] = 0;
1656                 }
1657         }
1658
1659         if (h->use_local_cache) {
1660                 lcore_id = rte_lcore_id();
1661                 cached_free_slots = &h->local_free_slots[lcore_id];
1662                 /* Cache full, need to free it. */
1663                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1664                         /* Need to enqueue the free slots in global ring. */
1665                         n_slots = rte_ring_mp_enqueue_burst_elem(h->free_slots,
1666                                                 cached_free_slots->objs,
1667                                                 sizeof(uint32_t),
1668                                                 LCORE_CACHE_SIZE, NULL);
1669                         RETURN_IF_TRUE((n_slots == 0), -EFAULT);
1670                         cached_free_slots->len -= n_slots;
1671                 }
1672                 /* Put index of new free slot in cache. */
1673                 cached_free_slots->objs[cached_free_slots->len] = key_idx;
1674                 cached_free_slots->len++;
1675         } else {
1676                 rte_ring_sp_enqueue_elem(h->free_slots, &key_idx,
1677                                                 sizeof(uint32_t));
1678         }
1679
1680         return 0;
1681 }
1682
1683 static inline void
1684 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1685                         const struct rte_hash_bucket *prim_bkt,
1686                         const struct rte_hash_bucket *sec_bkt,
1687                         uint16_t sig,
1688                         enum rte_hash_sig_compare_function sig_cmp_fn)
1689 {
1690         unsigned int i;
1691
1692         /* For match mask the first bit of every two bits indicates the match */
1693         switch (sig_cmp_fn) {
1694 #if defined(RTE_MACHINE_CPUFLAG_SSE2)
1695         case RTE_HASH_COMPARE_SSE:
1696                 /* Compare all signatures in the bucket */
1697                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1698                                 _mm_load_si128(
1699                                         (__m128i const *)prim_bkt->sig_current),
1700                                 _mm_set1_epi16(sig)));
1701                 /* Compare all signatures in the bucket */
1702                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1703                                 _mm_load_si128(
1704                                         (__m128i const *)sec_bkt->sig_current),
1705                                 _mm_set1_epi16(sig)));
1706                 break;
1707 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
1708         case RTE_HASH_COMPARE_NEON: {
1709                 uint16x8_t vmat, vsig, x;
1710                 int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
1711
1712                 vsig = vld1q_dup_u16((uint16_t const *)&sig);
1713                 /* Compare all signatures in the primary bucket */
1714                 vmat = vceqq_u16(vsig,
1715                         vld1q_u16((uint16_t const *)prim_bkt->sig_current));
1716                 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1717                 *prim_hash_matches = (uint32_t)(vaddvq_u16(x));
1718                 /* Compare all signatures in the secondary bucket */
1719                 vmat = vceqq_u16(vsig,
1720                         vld1q_u16((uint16_t const *)sec_bkt->sig_current));
1721                 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1722                 *sec_hash_matches = (uint32_t)(vaddvq_u16(x));
1723                 }
1724                 break;
1725 #endif
1726         default:
1727                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1728                         *prim_hash_matches |=
1729                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1730                         *sec_hash_matches |=
1731                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1732                 }
1733         }
1734 }
1735
1736 static inline void
1737 __bulk_lookup_l(const struct rte_hash *h, const void **keys,
1738                 const struct rte_hash_bucket **primary_bkt,
1739                 const struct rte_hash_bucket **secondary_bkt,
1740                 uint16_t *sig, int32_t num_keys, int32_t *positions,
1741                 uint64_t *hit_mask, void *data[])
1742 {
1743         uint64_t hits = 0;
1744         int32_t i;
1745         int32_t ret;
1746         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1747         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1748         struct rte_hash_bucket *cur_bkt, *next_bkt;
1749
1750         __hash_rw_reader_lock(h);
1751
1752         /* Compare signatures and prefetch key slot of first hit */
1753         for (i = 0; i < num_keys; i++) {
1754                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1755                         primary_bkt[i], secondary_bkt[i],
1756                         sig[i], h->sig_cmp_fn);
1757
1758                 if (prim_hitmask[i]) {
1759                         uint32_t first_hit =
1760                                         __builtin_ctzl(prim_hitmask[i])
1761                                         >> 1;
1762                         uint32_t key_idx =
1763                                 primary_bkt[i]->key_idx[first_hit];
1764                         const struct rte_hash_key *key_slot =
1765                                 (const struct rte_hash_key *)(
1766                                 (const char *)h->key_store +
1767                                 key_idx * h->key_entry_size);
1768                         rte_prefetch0(key_slot);
1769                         continue;
1770                 }
1771
1772                 if (sec_hitmask[i]) {
1773                         uint32_t first_hit =
1774                                         __builtin_ctzl(sec_hitmask[i])
1775                                         >> 1;
1776                         uint32_t key_idx =
1777                                 secondary_bkt[i]->key_idx[first_hit];
1778                         const struct rte_hash_key *key_slot =
1779                                 (const struct rte_hash_key *)(
1780                                 (const char *)h->key_store +
1781                                 key_idx * h->key_entry_size);
1782                         rte_prefetch0(key_slot);
1783                 }
1784         }
1785
1786         /* Compare keys, first hits in primary first */
1787         for (i = 0; i < num_keys; i++) {
1788                 positions[i] = -ENOENT;
1789                 while (prim_hitmask[i]) {
1790                         uint32_t hit_index =
1791                                         __builtin_ctzl(prim_hitmask[i])
1792                                         >> 1;
1793                         uint32_t key_idx =
1794                                 primary_bkt[i]->key_idx[hit_index];
1795                         const struct rte_hash_key *key_slot =
1796                                 (const struct rte_hash_key *)(
1797                                 (const char *)h->key_store +
1798                                 key_idx * h->key_entry_size);
1799
1800                         /*
1801                          * If key index is 0, do not compare key,
1802                          * as it is checking the dummy slot
1803                          */
1804                         if (!!key_idx &
1805                                 !rte_hash_cmp_eq(
1806                                         key_slot->key, keys[i], h)) {
1807                                 if (data != NULL)
1808                                         data[i] = key_slot->pdata;
1809
1810                                 hits |= 1ULL << i;
1811                                 positions[i] = key_idx - 1;
1812                                 goto next_key;
1813                         }
1814                         prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1815                 }
1816
1817                 while (sec_hitmask[i]) {
1818                         uint32_t hit_index =
1819                                         __builtin_ctzl(sec_hitmask[i])
1820                                         >> 1;
1821                         uint32_t key_idx =
1822                                 secondary_bkt[i]->key_idx[hit_index];
1823                         const struct rte_hash_key *key_slot =
1824                                 (const struct rte_hash_key *)(
1825                                 (const char *)h->key_store +
1826                                 key_idx * h->key_entry_size);
1827
1828                         /*
1829                          * If key index is 0, do not compare key,
1830                          * as it is checking the dummy slot
1831                          */
1832
1833                         if (!!key_idx &
1834                                 !rte_hash_cmp_eq(
1835                                         key_slot->key, keys[i], h)) {
1836                                 if (data != NULL)
1837                                         data[i] = key_slot->pdata;
1838
1839                                 hits |= 1ULL << i;
1840                                 positions[i] = key_idx - 1;
1841                                 goto next_key;
1842                         }
1843                         sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1844                 }
1845 next_key:
1846                 continue;
1847         }
1848
1849         /* all found, do not need to go through ext bkt */
1850         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1851                 if (hit_mask != NULL)
1852                         *hit_mask = hits;
1853                 __hash_rw_reader_unlock(h);
1854                 return;
1855         }
1856
1857         /* need to check ext buckets for match */
1858         for (i = 0; i < num_keys; i++) {
1859                 if ((hits & (1ULL << i)) != 0)
1860                         continue;
1861                 next_bkt = secondary_bkt[i]->next;
1862                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1863                         if (data != NULL)
1864                                 ret = search_one_bucket_l(h, keys[i],
1865                                                 sig[i], &data[i], cur_bkt);
1866                         else
1867                                 ret = search_one_bucket_l(h, keys[i],
1868                                                 sig[i], NULL, cur_bkt);
1869                         if (ret != -1) {
1870                                 positions[i] = ret;
1871                                 hits |= 1ULL << i;
1872                                 break;
1873                         }
1874                 }
1875         }
1876
1877         __hash_rw_reader_unlock(h);
1878
1879         if (hit_mask != NULL)
1880                 *hit_mask = hits;
1881 }
1882
1883 static inline void
1884 __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
1885                 const struct rte_hash_bucket **primary_bkt,
1886                 const struct rte_hash_bucket **secondary_bkt,
1887                 uint16_t *sig, int32_t num_keys, int32_t *positions,
1888                 uint64_t *hit_mask, void *data[])
1889 {
1890         uint64_t hits = 0;
1891         int32_t i;
1892         int32_t ret;
1893         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1894         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1895         struct rte_hash_bucket *cur_bkt, *next_bkt;
1896         uint32_t cnt_b, cnt_a;
1897
1898         for (i = 0; i < num_keys; i++)
1899                 positions[i] = -ENOENT;
1900
1901         do {
1902                 /* Load the table change counter before the lookup
1903                  * starts. Acquire semantics will make sure that
1904                  * loads in compare_signatures are not hoisted.
1905                  */
1906                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1907                                         __ATOMIC_ACQUIRE);
1908
1909                 /* Compare signatures and prefetch key slot of first hit */
1910                 for (i = 0; i < num_keys; i++) {
1911                         compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1912                                 primary_bkt[i], secondary_bkt[i],
1913                                 sig[i], h->sig_cmp_fn);
1914
1915                         if (prim_hitmask[i]) {
1916                                 uint32_t first_hit =
1917                                                 __builtin_ctzl(prim_hitmask[i])
1918                                                 >> 1;
1919                                 uint32_t key_idx =
1920                                         primary_bkt[i]->key_idx[first_hit];
1921                                 const struct rte_hash_key *key_slot =
1922                                         (const struct rte_hash_key *)(
1923                                         (const char *)h->key_store +
1924                                         key_idx * h->key_entry_size);
1925                                 rte_prefetch0(key_slot);
1926                                 continue;
1927                         }
1928
1929                         if (sec_hitmask[i]) {
1930                                 uint32_t first_hit =
1931                                                 __builtin_ctzl(sec_hitmask[i])
1932                                                 >> 1;
1933                                 uint32_t key_idx =
1934                                         secondary_bkt[i]->key_idx[first_hit];
1935                                 const struct rte_hash_key *key_slot =
1936                                         (const struct rte_hash_key *)(
1937                                         (const char *)h->key_store +
1938                                         key_idx * h->key_entry_size);
1939                                 rte_prefetch0(key_slot);
1940                         }
1941                 }
1942
1943                 /* Compare keys, first hits in primary first */
1944                 for (i = 0; i < num_keys; i++) {
1945                         while (prim_hitmask[i]) {
1946                                 uint32_t hit_index =
1947                                                 __builtin_ctzl(prim_hitmask[i])
1948                                                 >> 1;
1949                                 uint32_t key_idx =
1950                                 __atomic_load_n(
1951                                         &primary_bkt[i]->key_idx[hit_index],
1952                                         __ATOMIC_ACQUIRE);
1953                                 const struct rte_hash_key *key_slot =
1954                                         (const struct rte_hash_key *)(
1955                                         (const char *)h->key_store +
1956                                         key_idx * h->key_entry_size);
1957
1958                                 /*
1959                                  * If key index is 0, do not compare key,
1960                                  * as it is checking the dummy slot
1961                                  */
1962                                 if (!!key_idx &
1963                                         !rte_hash_cmp_eq(
1964                                                 key_slot->key, keys[i], h)) {
1965                                         if (data != NULL)
1966                                                 data[i] = __atomic_load_n(
1967                                                         &key_slot->pdata,
1968                                                         __ATOMIC_ACQUIRE);
1969
1970                                         hits |= 1ULL << i;
1971                                         positions[i] = key_idx - 1;
1972                                         goto next_key;
1973                                 }
1974                                 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1975                         }
1976
1977                         while (sec_hitmask[i]) {
1978                                 uint32_t hit_index =
1979                                                 __builtin_ctzl(sec_hitmask[i])
1980                                                 >> 1;
1981                                 uint32_t key_idx =
1982                                 __atomic_load_n(
1983                                         &secondary_bkt[i]->key_idx[hit_index],
1984                                         __ATOMIC_ACQUIRE);
1985                                 const struct rte_hash_key *key_slot =
1986                                         (const struct rte_hash_key *)(
1987                                         (const char *)h->key_store +
1988                                         key_idx * h->key_entry_size);
1989
1990                                 /*
1991                                  * If key index is 0, do not compare key,
1992                                  * as it is checking the dummy slot
1993                                  */
1994
1995                                 if (!!key_idx &
1996                                         !rte_hash_cmp_eq(
1997                                                 key_slot->key, keys[i], h)) {
1998                                         if (data != NULL)
1999                                                 data[i] = __atomic_load_n(
2000                                                         &key_slot->pdata,
2001                                                         __ATOMIC_ACQUIRE);
2002
2003                                         hits |= 1ULL << i;
2004                                         positions[i] = key_idx - 1;
2005                                         goto next_key;
2006                                 }
2007                                 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
2008                         }
2009 next_key:
2010                         continue;
2011                 }
2012
2013                 /* all found, do not need to go through ext bkt */
2014                 if (hits == ((1ULL << num_keys) - 1)) {
2015                         if (hit_mask != NULL)
2016                                 *hit_mask = hits;
2017                         return;
2018                 }
2019                 /* need to check ext buckets for match */
2020                 if (h->ext_table_support) {
2021                         for (i = 0; i < num_keys; i++) {
2022                                 if ((hits & (1ULL << i)) != 0)
2023                                         continue;
2024                                 next_bkt = secondary_bkt[i]->next;
2025                                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
2026                                         if (data != NULL)
2027                                                 ret = search_one_bucket_lf(h,
2028                                                         keys[i], sig[i],
2029                                                         &data[i], cur_bkt);
2030                                         else
2031                                                 ret = search_one_bucket_lf(h,
2032                                                                 keys[i], sig[i],
2033                                                                 NULL, cur_bkt);
2034                                         if (ret != -1) {
2035                                                 positions[i] = ret;
2036                                                 hits |= 1ULL << i;
2037                                                 break;
2038                                         }
2039                                 }
2040                         }
2041                 }
2042                 /* The loads of sig_current in compare_signatures
2043                  * should not move below the load from tbl_chng_cnt.
2044                  */
2045                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
2046                 /* Re-read the table change counter to check if the
2047                  * table has changed during search. If yes, re-do
2048                  * the search.
2049                  * This load should not get hoisted. The load
2050                  * acquires on cnt_b, primary key index and secondary
2051                  * key index will make sure that it does not get
2052                  * hoisted.
2053                  */
2054                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
2055                                         __ATOMIC_ACQUIRE);
2056         } while (cnt_b != cnt_a);
2057
2058         if (hit_mask != NULL)
2059                 *hit_mask = hits;
2060 }
2061
2062 #define PREFETCH_OFFSET 4
2063 static inline void
2064 __bulk_lookup_prefetching_loop(const struct rte_hash *h,
2065         const void **keys, int32_t num_keys,
2066         uint16_t *sig,
2067         const struct rte_hash_bucket **primary_bkt,
2068         const struct rte_hash_bucket **secondary_bkt)
2069 {
2070         int32_t i;
2071         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
2072         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
2073         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
2074
2075         /* Prefetch first keys */
2076         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
2077                 rte_prefetch0(keys[i]);
2078
2079         /*
2080          * Prefetch rest of the keys, calculate primary and
2081          * secondary bucket and prefetch them
2082          */
2083         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
2084                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
2085
2086                 prim_hash[i] = rte_hash_hash(h, keys[i]);
2087
2088                 sig[i] = get_short_sig(prim_hash[i]);
2089                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2090                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2091
2092                 primary_bkt[i] = &h->buckets[prim_index[i]];
2093                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2094
2095                 rte_prefetch0(primary_bkt[i]);
2096                 rte_prefetch0(secondary_bkt[i]);
2097         }
2098
2099         /* Calculate and prefetch rest of the buckets */
2100         for (; i < num_keys; i++) {
2101                 prim_hash[i] = rte_hash_hash(h, keys[i]);
2102
2103                 sig[i] = get_short_sig(prim_hash[i]);
2104                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2105                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2106
2107                 primary_bkt[i] = &h->buckets[prim_index[i]];
2108                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2109
2110                 rte_prefetch0(primary_bkt[i]);
2111                 rte_prefetch0(secondary_bkt[i]);
2112         }
2113 }
2114
2115
2116 static inline void
2117 __rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
2118                         int32_t num_keys, int32_t *positions,
2119                         uint64_t *hit_mask, void *data[])
2120 {
2121         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2122         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2123         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2124
2125         __bulk_lookup_prefetching_loop(h, keys, num_keys, sig,
2126                 primary_bkt, secondary_bkt);
2127
2128         __bulk_lookup_l(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2129                 positions, hit_mask, data);
2130 }
2131
2132 static inline void
2133 __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
2134                         int32_t num_keys, int32_t *positions,
2135                         uint64_t *hit_mask, void *data[])
2136 {
2137         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2138         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2139         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2140
2141         __bulk_lookup_prefetching_loop(h, keys, num_keys, sig,
2142                 primary_bkt, secondary_bkt);
2143
2144         __bulk_lookup_lf(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2145                 positions, hit_mask, data);
2146 }
2147
2148 static inline void
2149 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2150                         int32_t num_keys, int32_t *positions,
2151                         uint64_t *hit_mask, void *data[])
2152 {
2153         if (h->readwrite_concur_lf_support)
2154                 __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
2155                                           hit_mask, data);
2156         else
2157                 __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
2158                                          hit_mask, data);
2159 }
2160
2161 int
2162 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2163                       uint32_t num_keys, int32_t *positions)
2164 {
2165         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2166                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2167                         (positions == NULL)), -EINVAL);
2168
2169         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
2170         return 0;
2171 }
2172
2173 int
2174 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
2175                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
2176 {
2177         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2178                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2179                         (hit_mask == NULL)), -EINVAL);
2180
2181         int32_t positions[num_keys];
2182
2183         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
2184
2185         /* Return number of hits */
2186         return __builtin_popcountl(*hit_mask);
2187 }
2188
2189
2190 static inline void
2191 __rte_hash_lookup_with_hash_bulk_l(const struct rte_hash *h,
2192                         const void **keys, hash_sig_t *prim_hash,
2193                         int32_t num_keys, int32_t *positions,
2194                         uint64_t *hit_mask, void *data[])
2195 {
2196         int32_t i;
2197         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
2198         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
2199         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2200         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2201         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2202
2203         /*
2204          * Prefetch keys, calculate primary and
2205          * secondary bucket and prefetch them
2206          */
2207         for (i = 0; i < num_keys; i++) {
2208                 rte_prefetch0(keys[i]);
2209
2210                 sig[i] = get_short_sig(prim_hash[i]);
2211                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2212                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2213
2214                 primary_bkt[i] = &h->buckets[prim_index[i]];
2215                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2216
2217                 rte_prefetch0(primary_bkt[i]);
2218                 rte_prefetch0(secondary_bkt[i]);
2219         }
2220
2221         __bulk_lookup_l(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2222                 positions, hit_mask, data);
2223 }
2224
2225 static inline void
2226 __rte_hash_lookup_with_hash_bulk_lf(const struct rte_hash *h,
2227                         const void **keys, hash_sig_t *prim_hash,
2228                         int32_t num_keys, int32_t *positions,
2229                         uint64_t *hit_mask, void *data[])
2230 {
2231         int32_t i;
2232         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
2233         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
2234         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2235         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2236         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2237
2238         /*
2239          * Prefetch keys, calculate primary and
2240          * secondary bucket and prefetch them
2241          */
2242         for (i = 0; i < num_keys; i++) {
2243                 rte_prefetch0(keys[i]);
2244
2245                 sig[i] = get_short_sig(prim_hash[i]);
2246                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2247                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2248
2249                 primary_bkt[i] = &h->buckets[prim_index[i]];
2250                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2251
2252                 rte_prefetch0(primary_bkt[i]);
2253                 rte_prefetch0(secondary_bkt[i]);
2254         }
2255
2256         __bulk_lookup_lf(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2257                 positions, hit_mask, data);
2258 }
2259
2260 static inline void
2261 __rte_hash_lookup_with_hash_bulk(const struct rte_hash *h, const void **keys,
2262                         hash_sig_t *prim_hash, int32_t num_keys,
2263                         int32_t *positions, uint64_t *hit_mask, void *data[])
2264 {
2265         if (h->readwrite_concur_lf_support)
2266                 __rte_hash_lookup_with_hash_bulk_lf(h, keys, prim_hash,
2267                                 num_keys, positions, hit_mask, data);
2268         else
2269                 __rte_hash_lookup_with_hash_bulk_l(h, keys, prim_hash,
2270                                 num_keys, positions, hit_mask, data);
2271 }
2272
2273 int
2274 rte_hash_lookup_with_hash_bulk(const struct rte_hash *h, const void **keys,
2275                 hash_sig_t *sig, uint32_t num_keys, int32_t *positions)
2276 {
2277         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) ||
2278                         (sig == NULL) || (num_keys == 0) ||
2279                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2280                         (positions == NULL)), -EINVAL);
2281
2282         __rte_hash_lookup_with_hash_bulk(h, keys, sig, num_keys,
2283                 positions, NULL, NULL);
2284         return 0;
2285 }
2286
2287 int
2288 rte_hash_lookup_with_hash_bulk_data(const struct rte_hash *h,
2289                 const void **keys, hash_sig_t *sig,
2290                 uint32_t num_keys, uint64_t *hit_mask, void *data[])
2291 {
2292         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) ||
2293                         (sig == NULL) || (num_keys == 0) ||
2294                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2295                         (hit_mask == NULL)), -EINVAL);
2296
2297         int32_t positions[num_keys];
2298
2299         __rte_hash_lookup_with_hash_bulk(h, keys, sig, num_keys,
2300                         positions, hit_mask, data);
2301
2302         /* Return number of hits */
2303         return __builtin_popcountl(*hit_mask);
2304 }
2305
2306 int32_t
2307 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
2308 {
2309         uint32_t bucket_idx, idx, position;
2310         struct rte_hash_key *next_key;
2311
2312         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
2313
2314         const uint32_t total_entries_main = h->num_buckets *
2315                                                         RTE_HASH_BUCKET_ENTRIES;
2316         const uint32_t total_entries = total_entries_main << 1;
2317
2318         /* Out of bounds of all buckets (both main table and ext table) */
2319         if (*next >= total_entries_main)
2320                 goto extend_table;
2321
2322         /* Calculate bucket and index of current iterator */
2323         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2324         idx = *next % RTE_HASH_BUCKET_ENTRIES;
2325
2326         /* If current position is empty, go to the next one */
2327         while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
2328                                         __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
2329                 (*next)++;
2330                 /* End of table */
2331                 if (*next == total_entries_main)
2332                         goto extend_table;
2333                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2334                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2335         }
2336
2337         __hash_rw_reader_lock(h);
2338         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2339                                 position * h->key_entry_size);
2340         /* Return key and data */
2341         *key = next_key->key;
2342         *data = next_key->pdata;
2343
2344         __hash_rw_reader_unlock(h);
2345
2346         /* Increment iterator */
2347         (*next)++;
2348
2349         return position - 1;
2350
2351 /* Begin to iterate extendable buckets */
2352 extend_table:
2353         /* Out of total bound or if ext bucket feature is not enabled */
2354         if (*next >= total_entries || !h->ext_table_support)
2355                 return -ENOENT;
2356
2357         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
2358         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2359
2360         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
2361                 (*next)++;
2362                 if (*next == total_entries)
2363                         return -ENOENT;
2364                 bucket_idx = (*next - total_entries_main) /
2365                                                 RTE_HASH_BUCKET_ENTRIES;
2366                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2367         }
2368         __hash_rw_reader_lock(h);
2369         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2370                                 position * h->key_entry_size);
2371         /* Return key and data */
2372         *key = next_key->key;
2373         *data = next_key->pdata;
2374
2375         __hash_rw_reader_unlock(h);
2376
2377         /* Increment iterator */
2378         (*next)++;
2379         return position - 1;
2380 }