hash: check flags on creation for future proofing
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  * Copyright(c) 2018 Arm Limited
4  */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <sys/queue.h>
12
13 #include <rte_common.h>
14 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
15 #include <rte_log.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring_elem.h>
28 #include <rte_compat.h>
29 #include <rte_vect.h>
30 #include <rte_tailq.h>
31
32 #include "rte_hash.h"
33 #include "rte_cuckoo_hash.h"
34
35 /* Mask of all flags supported by this version */
36 #define RTE_HASH_EXTRA_FLAGS_MASK (RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT | \
37                                    RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD | \
38                                    RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY | \
39                                    RTE_HASH_EXTRA_FLAGS_EXT_TABLE |     \
40                                    RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL | \
41                                    RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)
42
43 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
44         for (CURRENT_BKT = START_BUCKET;                                      \
45                 CURRENT_BKT != NULL;                                          \
46                 CURRENT_BKT = CURRENT_BKT->next)
47
48 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
49
50 static struct rte_tailq_elem rte_hash_tailq = {
51         .name = "RTE_HASH",
52 };
53 EAL_REGISTER_TAILQ(rte_hash_tailq)
54
55 struct rte_hash *
56 rte_hash_find_existing(const char *name)
57 {
58         struct rte_hash *h = NULL;
59         struct rte_tailq_entry *te;
60         struct rte_hash_list *hash_list;
61
62         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
63
64         rte_mcfg_tailq_read_lock();
65         TAILQ_FOREACH(te, hash_list, next) {
66                 h = (struct rte_hash *) te->data;
67                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
68                         break;
69         }
70         rte_mcfg_tailq_read_unlock();
71
72         if (te == NULL) {
73                 rte_errno = ENOENT;
74                 return NULL;
75         }
76         return h;
77 }
78
79 static inline struct rte_hash_bucket *
80 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
81 {
82         while (lst_bkt->next != NULL)
83                 lst_bkt = lst_bkt->next;
84         return lst_bkt;
85 }
86
87 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
88 {
89         h->cmp_jump_table_idx = KEY_CUSTOM;
90         h->rte_hash_custom_cmp_eq = func;
91 }
92
93 static inline int
94 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
95 {
96         if (h->cmp_jump_table_idx == KEY_CUSTOM)
97                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
98         else
99                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
100 }
101
102 /*
103  * We use higher 16 bits of hash as the signature value stored in table.
104  * We use the lower bits for the primary bucket
105  * location. Then we XOR primary bucket location and the signature
106  * to get the secondary bucket location. This is same as
107  * proposed in Bin Fan, et al's paper
108  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
109  * Smarter Hashing". The benefit to use
110  * XOR is that one could derive the alternative bucket location
111  * by only using the current bucket location and the signature.
112  */
113 static inline uint16_t
114 get_short_sig(const hash_sig_t hash)
115 {
116         return hash >> 16;
117 }
118
119 static inline uint32_t
120 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
121 {
122         return hash & h->bucket_bitmask;
123 }
124
125 static inline uint32_t
126 get_alt_bucket_index(const struct rte_hash *h,
127                         uint32_t cur_bkt_idx, uint16_t sig)
128 {
129         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
130 }
131
132 struct rte_hash *
133 rte_hash_create(const struct rte_hash_parameters *params)
134 {
135         struct rte_hash *h = NULL;
136         struct rte_tailq_entry *te = NULL;
137         struct rte_hash_list *hash_list;
138         struct rte_ring *r = NULL;
139         struct rte_ring *r_ext = NULL;
140         char hash_name[RTE_HASH_NAMESIZE];
141         void *k = NULL;
142         void *buckets = NULL;
143         void *buckets_ext = NULL;
144         char ring_name[RTE_RING_NAMESIZE];
145         char ext_ring_name[RTE_RING_NAMESIZE];
146         unsigned num_key_slots;
147         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
148         unsigned int ext_table_support = 0;
149         unsigned int readwrite_concur_support = 0;
150         unsigned int writer_takes_lock = 0;
151         unsigned int no_free_on_del = 0;
152         uint32_t *ext_bkt_to_free = NULL;
153         uint32_t *tbl_chng_cnt = NULL;
154         unsigned int readwrite_concur_lf_support = 0;
155         uint32_t i;
156
157         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
158
159         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
160
161         if (params == NULL) {
162                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
163                 return NULL;
164         }
165
166         /* Check for valid parameters */
167         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
168                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
169                         (params->key_len == 0)) {
170                 rte_errno = EINVAL;
171                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
172                 return NULL;
173         }
174
175         if (params->extra_flag & ~RTE_HASH_EXTRA_FLAGS_MASK) {
176                 rte_errno = EINVAL;
177                 RTE_LOG(ERR, HASH, "rte_hash_create: unsupported extra flags\n");
178                 return NULL;
179         }
180
181         /* Validate correct usage of extra options */
182         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
183             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
184                 rte_errno = EINVAL;
185                 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
186                         "rw concurrency lock free\n");
187                 return NULL;
188         }
189
190         /* Check extra flags field to check extra options. */
191         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
192                 hw_trans_mem_support = 1;
193
194         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
195                 use_local_cache = 1;
196                 writer_takes_lock = 1;
197         }
198
199         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
200                 readwrite_concur_support = 1;
201                 writer_takes_lock = 1;
202         }
203
204         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
205                 ext_table_support = 1;
206
207         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
208                 no_free_on_del = 1;
209
210         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
211                 readwrite_concur_lf_support = 1;
212                 /* Enable not freeing internal memory/index on delete */
213                 no_free_on_del = 1;
214         }
215
216         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
217         if (use_local_cache)
218                 /*
219                  * Increase number of slots by total number of indices
220                  * that can be stored in the lcore caches
221                  * except for the first cache
222                  */
223                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
224                                         (LCORE_CACHE_SIZE - 1) + 1;
225         else
226                 num_key_slots = params->entries + 1;
227
228         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
229         /* Create ring (Dummy slot index is not enqueued) */
230         r = rte_ring_create_elem(ring_name, sizeof(uint32_t),
231                         rte_align32pow2(num_key_slots), params->socket_id, 0);
232         if (r == NULL) {
233                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
234                 goto err;
235         }
236
237         const uint32_t num_buckets = rte_align32pow2(params->entries) /
238                                                 RTE_HASH_BUCKET_ENTRIES;
239
240         /* Create ring for extendable buckets. */
241         if (ext_table_support) {
242                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
243                                                                 params->name);
244                 r_ext = rte_ring_create_elem(ext_ring_name, sizeof(uint32_t),
245                                 rte_align32pow2(num_buckets + 1),
246                                 params->socket_id, 0);
247
248                 if (r_ext == NULL) {
249                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
250                                                                 "failed\n");
251                         goto err;
252                 }
253         }
254
255         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
256
257         rte_mcfg_tailq_write_lock();
258
259         /* guarantee there's no existing: this is normally already checked
260          * by ring creation above */
261         TAILQ_FOREACH(te, hash_list, next) {
262                 h = (struct rte_hash *) te->data;
263                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
264                         break;
265         }
266         h = NULL;
267         if (te != NULL) {
268                 rte_errno = EEXIST;
269                 te = NULL;
270                 goto err_unlock;
271         }
272
273         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
274         if (te == NULL) {
275                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
276                 goto err_unlock;
277         }
278
279         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
280                                         RTE_CACHE_LINE_SIZE, params->socket_id);
281
282         if (h == NULL) {
283                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
284                 goto err_unlock;
285         }
286
287         buckets = rte_zmalloc_socket(NULL,
288                                 num_buckets * sizeof(struct rte_hash_bucket),
289                                 RTE_CACHE_LINE_SIZE, params->socket_id);
290
291         if (buckets == NULL) {
292                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
293                 goto err_unlock;
294         }
295
296         /* Allocate same number of extendable buckets */
297         if (ext_table_support) {
298                 buckets_ext = rte_zmalloc_socket(NULL,
299                                 num_buckets * sizeof(struct rte_hash_bucket),
300                                 RTE_CACHE_LINE_SIZE, params->socket_id);
301                 if (buckets_ext == NULL) {
302                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
303                                                         "failed\n");
304                         goto err_unlock;
305                 }
306                 /* Populate ext bkt ring. We reserve 0 similar to the
307                  * key-data slot, just in case in future we want to
308                  * use bucket index for the linked list and 0 means NULL
309                  * for next bucket
310                  */
311                 for (i = 1; i <= num_buckets; i++)
312                         rte_ring_sp_enqueue_elem(r_ext, &i, sizeof(uint32_t));
313
314                 if (readwrite_concur_lf_support) {
315                         ext_bkt_to_free = rte_zmalloc(NULL, sizeof(uint32_t) *
316                                                                 num_key_slots, 0);
317                         if (ext_bkt_to_free == NULL) {
318                                 RTE_LOG(ERR, HASH, "ext bkt to free memory allocation "
319                                                                 "failed\n");
320                                 goto err_unlock;
321                         }
322                 }
323         }
324
325         const uint32_t key_entry_size =
326                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
327                           KEY_ALIGNMENT);
328         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
329
330         k = rte_zmalloc_socket(NULL, key_tbl_size,
331                         RTE_CACHE_LINE_SIZE, params->socket_id);
332
333         if (k == NULL) {
334                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
335                 goto err_unlock;
336         }
337
338         tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
339                         RTE_CACHE_LINE_SIZE, params->socket_id);
340
341         if (tbl_chng_cnt == NULL) {
342                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
343                 goto err_unlock;
344         }
345
346 /*
347  * If x86 architecture is used, select appropriate compare function,
348  * which may use x86 intrinsics, otherwise use memcmp
349  */
350 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
351         /* Select function to compare keys */
352         switch (params->key_len) {
353         case 16:
354                 h->cmp_jump_table_idx = KEY_16_BYTES;
355                 break;
356         case 32:
357                 h->cmp_jump_table_idx = KEY_32_BYTES;
358                 break;
359         case 48:
360                 h->cmp_jump_table_idx = KEY_48_BYTES;
361                 break;
362         case 64:
363                 h->cmp_jump_table_idx = KEY_64_BYTES;
364                 break;
365         case 80:
366                 h->cmp_jump_table_idx = KEY_80_BYTES;
367                 break;
368         case 96:
369                 h->cmp_jump_table_idx = KEY_96_BYTES;
370                 break;
371         case 112:
372                 h->cmp_jump_table_idx = KEY_112_BYTES;
373                 break;
374         case 128:
375                 h->cmp_jump_table_idx = KEY_128_BYTES;
376                 break;
377         default:
378                 /* If key is not multiple of 16, use generic memcmp */
379                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
380         }
381 #else
382         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
383 #endif
384
385         if (use_local_cache) {
386                 h->local_free_slots = rte_zmalloc_socket(NULL,
387                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
388                                 RTE_CACHE_LINE_SIZE, params->socket_id);
389         }
390
391         /* Default hash function */
392 #if defined(RTE_ARCH_X86)
393         default_hash_func = (rte_hash_function)rte_hash_crc;
394 #elif defined(RTE_ARCH_ARM64)
395         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
396                 default_hash_func = (rte_hash_function)rte_hash_crc;
397 #endif
398         /* Setup hash context */
399         strlcpy(h->name, params->name, sizeof(h->name));
400         h->entries = params->entries;
401         h->key_len = params->key_len;
402         h->key_entry_size = key_entry_size;
403         h->hash_func_init_val = params->hash_func_init_val;
404
405         h->num_buckets = num_buckets;
406         h->bucket_bitmask = h->num_buckets - 1;
407         h->buckets = buckets;
408         h->buckets_ext = buckets_ext;
409         h->free_ext_bkts = r_ext;
410         h->hash_func = (params->hash_func == NULL) ?
411                 default_hash_func : params->hash_func;
412         h->key_store = k;
413         h->free_slots = r;
414         h->ext_bkt_to_free = ext_bkt_to_free;
415         h->tbl_chng_cnt = tbl_chng_cnt;
416         *h->tbl_chng_cnt = 0;
417         h->hw_trans_mem_support = hw_trans_mem_support;
418         h->use_local_cache = use_local_cache;
419         h->readwrite_concur_support = readwrite_concur_support;
420         h->ext_table_support = ext_table_support;
421         h->writer_takes_lock = writer_takes_lock;
422         h->no_free_on_del = no_free_on_del;
423         h->readwrite_concur_lf_support = readwrite_concur_lf_support;
424
425 #if defined(RTE_ARCH_X86)
426         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
427                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
428         else
429 #elif defined(RTE_ARCH_ARM64)
430         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
431                 h->sig_cmp_fn = RTE_HASH_COMPARE_NEON;
432         else
433 #endif
434                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
435
436         /* Writer threads need to take the lock when:
437          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
438          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
439          */
440         if (h->writer_takes_lock) {
441                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
442                                                 RTE_CACHE_LINE_SIZE);
443                 if (h->readwrite_lock == NULL)
444                         goto err_unlock;
445
446                 rte_rwlock_init(h->readwrite_lock);
447         }
448
449         /* Populate free slots ring. Entry zero is reserved for key misses. */
450         for (i = 1; i < num_key_slots; i++)
451                 rte_ring_sp_enqueue_elem(r, &i, sizeof(uint32_t));
452
453         te->data = (void *) h;
454         TAILQ_INSERT_TAIL(hash_list, te, next);
455         rte_mcfg_tailq_write_unlock();
456
457         return h;
458 err_unlock:
459         rte_mcfg_tailq_write_unlock();
460 err:
461         rte_ring_free(r);
462         rte_ring_free(r_ext);
463         rte_free(te);
464         rte_free(h);
465         rte_free(buckets);
466         rte_free(buckets_ext);
467         rte_free(k);
468         rte_free(tbl_chng_cnt);
469         rte_free(ext_bkt_to_free);
470         return NULL;
471 }
472
473 void
474 rte_hash_free(struct rte_hash *h)
475 {
476         struct rte_tailq_entry *te;
477         struct rte_hash_list *hash_list;
478
479         if (h == NULL)
480                 return;
481
482         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
483
484         rte_mcfg_tailq_write_lock();
485
486         /* find out tailq entry */
487         TAILQ_FOREACH(te, hash_list, next) {
488                 if (te->data == (void *) h)
489                         break;
490         }
491
492         if (te == NULL) {
493                 rte_mcfg_tailq_write_unlock();
494                 return;
495         }
496
497         TAILQ_REMOVE(hash_list, te, next);
498
499         rte_mcfg_tailq_write_unlock();
500
501         if (h->use_local_cache)
502                 rte_free(h->local_free_slots);
503         if (h->writer_takes_lock)
504                 rte_free(h->readwrite_lock);
505         rte_ring_free(h->free_slots);
506         rte_ring_free(h->free_ext_bkts);
507         rte_free(h->key_store);
508         rte_free(h->buckets);
509         rte_free(h->buckets_ext);
510         rte_free(h->tbl_chng_cnt);
511         rte_free(h->ext_bkt_to_free);
512         rte_free(h);
513         rte_free(te);
514 }
515
516 hash_sig_t
517 rte_hash_hash(const struct rte_hash *h, const void *key)
518 {
519         /* calc hash result by key */
520         return h->hash_func(key, h->key_len, h->hash_func_init_val);
521 }
522
523 int32_t
524 rte_hash_max_key_id(const struct rte_hash *h)
525 {
526         RETURN_IF_TRUE((h == NULL), -EINVAL);
527         if (h->use_local_cache)
528                 /*
529                  * Increase number of slots by total number of indices
530                  * that can be stored in the lcore caches
531                  */
532                 return (h->entries + ((RTE_MAX_LCORE - 1) *
533                                         (LCORE_CACHE_SIZE - 1)));
534         else
535                 return h->entries;
536 }
537
538 int32_t
539 rte_hash_count(const struct rte_hash *h)
540 {
541         uint32_t tot_ring_cnt, cached_cnt = 0;
542         uint32_t i, ret;
543
544         if (h == NULL)
545                 return -EINVAL;
546
547         if (h->use_local_cache) {
548                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
549                                         (LCORE_CACHE_SIZE - 1);
550                 for (i = 0; i < RTE_MAX_LCORE; i++)
551                         cached_cnt += h->local_free_slots[i].len;
552
553                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
554                                                                 cached_cnt;
555         } else {
556                 tot_ring_cnt = h->entries;
557                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
558         }
559         return ret;
560 }
561
562 /* Read write locks implemented using rte_rwlock */
563 static inline void
564 __hash_rw_writer_lock(const struct rte_hash *h)
565 {
566         if (h->writer_takes_lock && h->hw_trans_mem_support)
567                 rte_rwlock_write_lock_tm(h->readwrite_lock);
568         else if (h->writer_takes_lock)
569                 rte_rwlock_write_lock(h->readwrite_lock);
570 }
571
572 static inline void
573 __hash_rw_reader_lock(const struct rte_hash *h)
574 {
575         if (h->readwrite_concur_support && h->hw_trans_mem_support)
576                 rte_rwlock_read_lock_tm(h->readwrite_lock);
577         else if (h->readwrite_concur_support)
578                 rte_rwlock_read_lock(h->readwrite_lock);
579 }
580
581 static inline void
582 __hash_rw_writer_unlock(const struct rte_hash *h)
583 {
584         if (h->writer_takes_lock && h->hw_trans_mem_support)
585                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
586         else if (h->writer_takes_lock)
587                 rte_rwlock_write_unlock(h->readwrite_lock);
588 }
589
590 static inline void
591 __hash_rw_reader_unlock(const struct rte_hash *h)
592 {
593         if (h->readwrite_concur_support && h->hw_trans_mem_support)
594                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
595         else if (h->readwrite_concur_support)
596                 rte_rwlock_read_unlock(h->readwrite_lock);
597 }
598
599 void
600 rte_hash_reset(struct rte_hash *h)
601 {
602         uint32_t tot_ring_cnt, i;
603
604         if (h == NULL)
605                 return;
606
607         __hash_rw_writer_lock(h);
608         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
609         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
610         *h->tbl_chng_cnt = 0;
611
612         /* reset the free ring */
613         rte_ring_reset(h->free_slots);
614
615         /* flush free extendable bucket ring and memory */
616         if (h->ext_table_support) {
617                 memset(h->buckets_ext, 0, h->num_buckets *
618                                                 sizeof(struct rte_hash_bucket));
619                 rte_ring_reset(h->free_ext_bkts);
620         }
621
622         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
623         if (h->use_local_cache)
624                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
625                                         (LCORE_CACHE_SIZE - 1);
626         else
627                 tot_ring_cnt = h->entries;
628
629         for (i = 1; i < tot_ring_cnt + 1; i++)
630                 rte_ring_sp_enqueue_elem(h->free_slots, &i, sizeof(uint32_t));
631
632         /* Repopulate the free ext bkt ring. */
633         if (h->ext_table_support) {
634                 for (i = 1; i <= h->num_buckets; i++)
635                         rte_ring_sp_enqueue_elem(h->free_ext_bkts, &i,
636                                                         sizeof(uint32_t));
637         }
638
639         if (h->use_local_cache) {
640                 /* Reset local caches per lcore */
641                 for (i = 0; i < RTE_MAX_LCORE; i++)
642                         h->local_free_slots[i].len = 0;
643         }
644         __hash_rw_writer_unlock(h);
645 }
646
647 /*
648  * Function called to enqueue back an index in the cache/ring,
649  * as slot has not being used and it can be used in the
650  * next addition attempt.
651  */
652 static inline void
653 enqueue_slot_back(const struct rte_hash *h,
654                 struct lcore_cache *cached_free_slots,
655                 uint32_t slot_id)
656 {
657         if (h->use_local_cache) {
658                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
659                 cached_free_slots->len++;
660         } else
661                 rte_ring_sp_enqueue_elem(h->free_slots, &slot_id,
662                                                 sizeof(uint32_t));
663 }
664
665 /* Search a key from bucket and update its data.
666  * Writer holds the lock before calling this.
667  */
668 static inline int32_t
669 search_and_update(const struct rte_hash *h, void *data, const void *key,
670         struct rte_hash_bucket *bkt, uint16_t sig)
671 {
672         int i;
673         struct rte_hash_key *k, *keys = h->key_store;
674
675         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
676                 if (bkt->sig_current[i] == sig) {
677                         k = (struct rte_hash_key *) ((char *)keys +
678                                         bkt->key_idx[i] * h->key_entry_size);
679                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
680                                 /* The store to application data at *data
681                                  * should not leak after the store to pdata
682                                  * in the key store. i.e. pdata is the guard
683                                  * variable. Release the application data
684                                  * to the readers.
685                                  */
686                                 __atomic_store_n(&k->pdata,
687                                         data,
688                                         __ATOMIC_RELEASE);
689                                 /*
690                                  * Return index where key is stored,
691                                  * subtracting the first dummy index
692                                  */
693                                 return bkt->key_idx[i] - 1;
694                         }
695                 }
696         }
697         return -1;
698 }
699
700 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
701  * buckets around.
702  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
703  * empty entry.
704  */
705 static inline int32_t
706 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
707                 struct rte_hash_bucket *prim_bkt,
708                 struct rte_hash_bucket *sec_bkt,
709                 const struct rte_hash_key *key, void *data,
710                 uint16_t sig, uint32_t new_idx,
711                 int32_t *ret_val)
712 {
713         unsigned int i;
714         struct rte_hash_bucket *cur_bkt;
715         int32_t ret;
716
717         __hash_rw_writer_lock(h);
718         /* Check if key was inserted after last check but before this
719          * protected region in case of inserting duplicated keys.
720          */
721         ret = search_and_update(h, data, key, prim_bkt, sig);
722         if (ret != -1) {
723                 __hash_rw_writer_unlock(h);
724                 *ret_val = ret;
725                 return 1;
726         }
727
728         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
729                 ret = search_and_update(h, data, key, cur_bkt, sig);
730                 if (ret != -1) {
731                         __hash_rw_writer_unlock(h);
732                         *ret_val = ret;
733                         return 1;
734                 }
735         }
736
737         /* Insert new entry if there is room in the primary
738          * bucket.
739          */
740         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
741                 /* Check if slot is available */
742                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
743                         prim_bkt->sig_current[i] = sig;
744                         /* Store to signature and key should not
745                          * leak after the store to key_idx. i.e.
746                          * key_idx is the guard variable for signature
747                          * and key.
748                          */
749                         __atomic_store_n(&prim_bkt->key_idx[i],
750                                          new_idx,
751                                          __ATOMIC_RELEASE);
752                         break;
753                 }
754         }
755         __hash_rw_writer_unlock(h);
756
757         if (i != RTE_HASH_BUCKET_ENTRIES)
758                 return 0;
759
760         /* no empty entry */
761         return -1;
762 }
763
764 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
765  * the path head with new entry (sig, alt_hash, new_idx)
766  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
767  * return 0 if succeeds.
768  */
769 static inline int
770 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
771                         struct rte_hash_bucket *bkt,
772                         struct rte_hash_bucket *alt_bkt,
773                         const struct rte_hash_key *key, void *data,
774                         struct queue_node *leaf, uint32_t leaf_slot,
775                         uint16_t sig, uint32_t new_idx,
776                         int32_t *ret_val)
777 {
778         uint32_t prev_alt_bkt_idx;
779         struct rte_hash_bucket *cur_bkt;
780         struct queue_node *prev_node, *curr_node = leaf;
781         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
782         uint32_t prev_slot, curr_slot = leaf_slot;
783         int32_t ret;
784
785         __hash_rw_writer_lock(h);
786
787         /* In case empty slot was gone before entering protected region */
788         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
789                 __hash_rw_writer_unlock(h);
790                 return -1;
791         }
792
793         /* Check if key was inserted after last check but before this
794          * protected region.
795          */
796         ret = search_and_update(h, data, key, bkt, sig);
797         if (ret != -1) {
798                 __hash_rw_writer_unlock(h);
799                 *ret_val = ret;
800                 return 1;
801         }
802
803         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
804                 ret = search_and_update(h, data, key, cur_bkt, sig);
805                 if (ret != -1) {
806                         __hash_rw_writer_unlock(h);
807                         *ret_val = ret;
808                         return 1;
809                 }
810         }
811
812         while (likely(curr_node->prev != NULL)) {
813                 prev_node = curr_node->prev;
814                 prev_bkt = prev_node->bkt;
815                 prev_slot = curr_node->prev_slot;
816
817                 prev_alt_bkt_idx = get_alt_bucket_index(h,
818                                         prev_node->cur_bkt_idx,
819                                         prev_bkt->sig_current[prev_slot]);
820
821                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
822                                 != curr_bkt)) {
823                         /* revert it to empty, otherwise duplicated keys */
824                         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
825                                 EMPTY_SLOT,
826                                 __ATOMIC_RELEASE);
827                         __hash_rw_writer_unlock(h);
828                         return -1;
829                 }
830
831                 if (h->readwrite_concur_lf_support) {
832                         /* Inform the previous move. The current move need
833                          * not be informed now as the current bucket entry
834                          * is present in both primary and secondary.
835                          * Since there is one writer, load acquires on
836                          * tbl_chng_cnt are not required.
837                          */
838                         __atomic_store_n(h->tbl_chng_cnt,
839                                          *h->tbl_chng_cnt + 1,
840                                          __ATOMIC_RELEASE);
841                         /* The store to sig_current should not
842                          * move above the store to tbl_chng_cnt.
843                          */
844                         __atomic_thread_fence(__ATOMIC_RELEASE);
845                 }
846
847                 /* Need to swap current/alt sig to allow later
848                  * Cuckoo insert to move elements back to its
849                  * primary bucket if available
850                  */
851                 curr_bkt->sig_current[curr_slot] =
852                         prev_bkt->sig_current[prev_slot];
853                 /* Release the updated bucket entry */
854                 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
855                         prev_bkt->key_idx[prev_slot],
856                         __ATOMIC_RELEASE);
857
858                 curr_slot = prev_slot;
859                 curr_node = prev_node;
860                 curr_bkt = curr_node->bkt;
861         }
862
863         if (h->readwrite_concur_lf_support) {
864                 /* Inform the previous move. The current move need
865                  * not be informed now as the current bucket entry
866                  * is present in both primary and secondary.
867                  * Since there is one writer, load acquires on
868                  * tbl_chng_cnt are not required.
869                  */
870                 __atomic_store_n(h->tbl_chng_cnt,
871                                  *h->tbl_chng_cnt + 1,
872                                  __ATOMIC_RELEASE);
873                 /* The store to sig_current should not
874                  * move above the store to tbl_chng_cnt.
875                  */
876                 __atomic_thread_fence(__ATOMIC_RELEASE);
877         }
878
879         curr_bkt->sig_current[curr_slot] = sig;
880         /* Release the new bucket entry */
881         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
882                          new_idx,
883                          __ATOMIC_RELEASE);
884
885         __hash_rw_writer_unlock(h);
886
887         return 0;
888
889 }
890
891 /*
892  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
893  * Cuckoo
894  */
895 static inline int
896 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
897                         struct rte_hash_bucket *bkt,
898                         struct rte_hash_bucket *sec_bkt,
899                         const struct rte_hash_key *key, void *data,
900                         uint16_t sig, uint32_t bucket_idx,
901                         uint32_t new_idx, int32_t *ret_val)
902 {
903         unsigned int i;
904         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
905         struct queue_node *tail, *head;
906         struct rte_hash_bucket *curr_bkt, *alt_bkt;
907         uint32_t cur_idx, alt_idx;
908
909         tail = queue;
910         head = queue + 1;
911         tail->bkt = bkt;
912         tail->prev = NULL;
913         tail->prev_slot = -1;
914         tail->cur_bkt_idx = bucket_idx;
915
916         /* Cuckoo bfs Search */
917         while (likely(tail != head && head <
918                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
919                                         RTE_HASH_BUCKET_ENTRIES)) {
920                 curr_bkt = tail->bkt;
921                 cur_idx = tail->cur_bkt_idx;
922                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
923                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
924                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
925                                                 bkt, sec_bkt, key, data,
926                                                 tail, i, sig,
927                                                 new_idx, ret_val);
928                                 if (likely(ret != -1))
929                                         return ret;
930                         }
931
932                         /* Enqueue new node and keep prev node info */
933                         alt_idx = get_alt_bucket_index(h, cur_idx,
934                                                 curr_bkt->sig_current[i]);
935                         alt_bkt = &(h->buckets[alt_idx]);
936                         head->bkt = alt_bkt;
937                         head->cur_bkt_idx = alt_idx;
938                         head->prev = tail;
939                         head->prev_slot = i;
940                         head++;
941                 }
942                 tail++;
943         }
944
945         return -ENOSPC;
946 }
947
948 static inline int32_t
949 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
950                                                 hash_sig_t sig, void *data)
951 {
952         uint16_t short_sig;
953         uint32_t prim_bucket_idx, sec_bucket_idx;
954         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
955         struct rte_hash_key *new_k, *keys = h->key_store;
956         uint32_t ext_bkt_id = 0;
957         uint32_t slot_id;
958         int ret;
959         unsigned n_slots;
960         unsigned lcore_id;
961         unsigned int i;
962         struct lcore_cache *cached_free_slots = NULL;
963         int32_t ret_val;
964         struct rte_hash_bucket *last;
965
966         short_sig = get_short_sig(sig);
967         prim_bucket_idx = get_prim_bucket_index(h, sig);
968         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
969         prim_bkt = &h->buckets[prim_bucket_idx];
970         sec_bkt = &h->buckets[sec_bucket_idx];
971         rte_prefetch0(prim_bkt);
972         rte_prefetch0(sec_bkt);
973
974         /* Check if key is already inserted in primary location */
975         __hash_rw_writer_lock(h);
976         ret = search_and_update(h, data, key, prim_bkt, short_sig);
977         if (ret != -1) {
978                 __hash_rw_writer_unlock(h);
979                 return ret;
980         }
981
982         /* Check if key is already inserted in secondary location */
983         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
984                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
985                 if (ret != -1) {
986                         __hash_rw_writer_unlock(h);
987                         return ret;
988                 }
989         }
990
991         __hash_rw_writer_unlock(h);
992
993         /* Did not find a match, so get a new slot for storing the new key */
994         if (h->use_local_cache) {
995                 lcore_id = rte_lcore_id();
996                 cached_free_slots = &h->local_free_slots[lcore_id];
997                 /* Try to get a free slot from the local cache */
998                 if (cached_free_slots->len == 0) {
999                         /* Need to get another burst of free slots from global ring */
1000                         n_slots = rte_ring_mc_dequeue_burst_elem(h->free_slots,
1001                                         cached_free_slots->objs,
1002                                         sizeof(uint32_t),
1003                                         LCORE_CACHE_SIZE, NULL);
1004                         if (n_slots == 0) {
1005                                 return -ENOSPC;
1006                         }
1007
1008                         cached_free_slots->len += n_slots;
1009                 }
1010
1011                 /* Get a free slot from the local cache */
1012                 cached_free_slots->len--;
1013                 slot_id = cached_free_slots->objs[cached_free_slots->len];
1014         } else {
1015                 if (rte_ring_sc_dequeue_elem(h->free_slots, &slot_id,
1016                                                 sizeof(uint32_t)) != 0) {
1017                         return -ENOSPC;
1018                 }
1019         }
1020
1021         new_k = RTE_PTR_ADD(keys, slot_id * h->key_entry_size);
1022         /* The store to application data (by the application) at *data should
1023          * not leak after the store of pdata in the key store. i.e. pdata is
1024          * the guard variable. Release the application data to the readers.
1025          */
1026         __atomic_store_n(&new_k->pdata,
1027                 data,
1028                 __ATOMIC_RELEASE);
1029         /* Copy key */
1030         memcpy(new_k->key, key, h->key_len);
1031
1032         /* Find an empty slot and insert */
1033         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
1034                                         short_sig, slot_id, &ret_val);
1035         if (ret == 0)
1036                 return slot_id - 1;
1037         else if (ret == 1) {
1038                 enqueue_slot_back(h, cached_free_slots, slot_id);
1039                 return ret_val;
1040         }
1041
1042         /* Primary bucket full, need to make space for new entry */
1043         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1044                                 short_sig, prim_bucket_idx, slot_id, &ret_val);
1045         if (ret == 0)
1046                 return slot_id - 1;
1047         else if (ret == 1) {
1048                 enqueue_slot_back(h, cached_free_slots, slot_id);
1049                 return ret_val;
1050         }
1051
1052         /* Also search secondary bucket to get better occupancy */
1053         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1054                                 short_sig, sec_bucket_idx, slot_id, &ret_val);
1055
1056         if (ret == 0)
1057                 return slot_id - 1;
1058         else if (ret == 1) {
1059                 enqueue_slot_back(h, cached_free_slots, slot_id);
1060                 return ret_val;
1061         }
1062
1063         /* if ext table not enabled, we failed the insertion */
1064         if (!h->ext_table_support) {
1065                 enqueue_slot_back(h, cached_free_slots, slot_id);
1066                 return ret;
1067         }
1068
1069         /* Now we need to go through the extendable bucket. Protection is needed
1070          * to protect all extendable bucket processes.
1071          */
1072         __hash_rw_writer_lock(h);
1073         /* We check for duplicates again since could be inserted before the lock */
1074         ret = search_and_update(h, data, key, prim_bkt, short_sig);
1075         if (ret != -1) {
1076                 enqueue_slot_back(h, cached_free_slots, slot_id);
1077                 goto failure;
1078         }
1079
1080         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1081                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1082                 if (ret != -1) {
1083                         enqueue_slot_back(h, cached_free_slots, slot_id);
1084                         goto failure;
1085                 }
1086         }
1087
1088         /* Search sec and ext buckets to find an empty entry to insert. */
1089         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1090                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1091                         /* Check if slot is available */
1092                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1093                                 cur_bkt->sig_current[i] = short_sig;
1094                                 /* Store to signature and key should not
1095                                  * leak after the store to key_idx. i.e.
1096                                  * key_idx is the guard variable for signature
1097                                  * and key.
1098                                  */
1099                                 __atomic_store_n(&cur_bkt->key_idx[i],
1100                                                  slot_id,
1101                                                  __ATOMIC_RELEASE);
1102                                 __hash_rw_writer_unlock(h);
1103                                 return slot_id - 1;
1104                         }
1105                 }
1106         }
1107
1108         /* Failed to get an empty entry from extendable buckets. Link a new
1109          * extendable bucket. We first get a free bucket from ring.
1110          */
1111         if (rte_ring_sc_dequeue_elem(h->free_ext_bkts, &ext_bkt_id,
1112                                                 sizeof(uint32_t)) != 0 ||
1113                                         ext_bkt_id == 0) {
1114                 ret = -ENOSPC;
1115                 goto failure;
1116         }
1117
1118         /* Use the first location of the new bucket */
1119         (h->buckets_ext[ext_bkt_id - 1]).sig_current[0] = short_sig;
1120         /* Store to signature and key should not leak after
1121          * the store to key_idx. i.e. key_idx is the guard variable
1122          * for signature and key.
1123          */
1124         __atomic_store_n(&(h->buckets_ext[ext_bkt_id - 1]).key_idx[0],
1125                          slot_id,
1126                          __ATOMIC_RELEASE);
1127         /* Link the new bucket to sec bucket linked list */
1128         last = rte_hash_get_last_bkt(sec_bkt);
1129         last->next = &h->buckets_ext[ext_bkt_id - 1];
1130         __hash_rw_writer_unlock(h);
1131         return slot_id - 1;
1132
1133 failure:
1134         __hash_rw_writer_unlock(h);
1135         return ret;
1136
1137 }
1138
1139 int32_t
1140 rte_hash_add_key_with_hash(const struct rte_hash *h,
1141                         const void *key, hash_sig_t sig)
1142 {
1143         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1144         return __rte_hash_add_key_with_hash(h, key, sig, 0);
1145 }
1146
1147 int32_t
1148 rte_hash_add_key(const struct rte_hash *h, const void *key)
1149 {
1150         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1151         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1152 }
1153
1154 int
1155 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1156                         const void *key, hash_sig_t sig, void *data)
1157 {
1158         int ret;
1159
1160         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1161         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1162         if (ret >= 0)
1163                 return 0;
1164         else
1165                 return ret;
1166 }
1167
1168 int
1169 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1170 {
1171         int ret;
1172
1173         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1174
1175         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1176         if (ret >= 0)
1177                 return 0;
1178         else
1179                 return ret;
1180 }
1181
1182 /* Search one bucket to find the match key - uses rw lock */
1183 static inline int32_t
1184 search_one_bucket_l(const struct rte_hash *h, const void *key,
1185                 uint16_t sig, void **data,
1186                 const struct rte_hash_bucket *bkt)
1187 {
1188         int i;
1189         struct rte_hash_key *k, *keys = h->key_store;
1190
1191         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1192                 if (bkt->sig_current[i] == sig &&
1193                                 bkt->key_idx[i] != EMPTY_SLOT) {
1194                         k = (struct rte_hash_key *) ((char *)keys +
1195                                         bkt->key_idx[i] * h->key_entry_size);
1196
1197                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1198                                 if (data != NULL)
1199                                         *data = k->pdata;
1200                                 /*
1201                                  * Return index where key is stored,
1202                                  * subtracting the first dummy index
1203                                  */
1204                                 return bkt->key_idx[i] - 1;
1205                         }
1206                 }
1207         }
1208         return -1;
1209 }
1210
1211 /* Search one bucket to find the match key */
1212 static inline int32_t
1213 search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
1214                         void **data, const struct rte_hash_bucket *bkt)
1215 {
1216         int i;
1217         uint32_t key_idx;
1218         struct rte_hash_key *k, *keys = h->key_store;
1219
1220         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1221                 /* Signature comparison is done before the acquire-load
1222                  * of the key index to achieve better performance.
1223                  * This can result in the reader loading old signature
1224                  * (which matches), while the key_idx is updated to a
1225                  * value that belongs to a new key. However, the full
1226                  * key comparison will ensure that the lookup fails.
1227                  */
1228                 if (bkt->sig_current[i] == sig) {
1229                         key_idx = __atomic_load_n(&bkt->key_idx[i],
1230                                           __ATOMIC_ACQUIRE);
1231                         if (key_idx != EMPTY_SLOT) {
1232                                 k = (struct rte_hash_key *) ((char *)keys +
1233                                                 key_idx * h->key_entry_size);
1234
1235                                 if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1236                                         if (data != NULL) {
1237                                                 *data = __atomic_load_n(
1238                                                         &k->pdata,
1239                                                         __ATOMIC_ACQUIRE);
1240                                         }
1241                                         /*
1242                                          * Return index where key is stored,
1243                                          * subtracting the first dummy index
1244                                          */
1245                                         return key_idx - 1;
1246                                 }
1247                         }
1248                 }
1249         }
1250         return -1;
1251 }
1252
1253 static inline int32_t
1254 __rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
1255                                 hash_sig_t sig, void **data)
1256 {
1257         uint32_t prim_bucket_idx, sec_bucket_idx;
1258         struct rte_hash_bucket *bkt, *cur_bkt;
1259         int ret;
1260         uint16_t short_sig;
1261
1262         short_sig = get_short_sig(sig);
1263         prim_bucket_idx = get_prim_bucket_index(h, sig);
1264         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1265
1266         bkt = &h->buckets[prim_bucket_idx];
1267
1268         __hash_rw_reader_lock(h);
1269
1270         /* Check if key is in primary location */
1271         ret = search_one_bucket_l(h, key, short_sig, data, bkt);
1272         if (ret != -1) {
1273                 __hash_rw_reader_unlock(h);
1274                 return ret;
1275         }
1276         /* Calculate secondary hash */
1277         bkt = &h->buckets[sec_bucket_idx];
1278
1279         /* Check if key is in secondary location */
1280         FOR_EACH_BUCKET(cur_bkt, bkt) {
1281                 ret = search_one_bucket_l(h, key, short_sig,
1282                                         data, cur_bkt);
1283                 if (ret != -1) {
1284                         __hash_rw_reader_unlock(h);
1285                         return ret;
1286                 }
1287         }
1288
1289         __hash_rw_reader_unlock(h);
1290
1291         return -ENOENT;
1292 }
1293
1294 static inline int32_t
1295 __rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
1296                                         hash_sig_t sig, void **data)
1297 {
1298         uint32_t prim_bucket_idx, sec_bucket_idx;
1299         struct rte_hash_bucket *bkt, *cur_bkt;
1300         uint32_t cnt_b, cnt_a;
1301         int ret;
1302         uint16_t short_sig;
1303
1304         short_sig = get_short_sig(sig);
1305         prim_bucket_idx = get_prim_bucket_index(h, sig);
1306         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1307
1308         do {
1309                 /* Load the table change counter before the lookup
1310                  * starts. Acquire semantics will make sure that
1311                  * loads in search_one_bucket are not hoisted.
1312                  */
1313                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1314                                 __ATOMIC_ACQUIRE);
1315
1316                 /* Check if key is in primary location */
1317                 bkt = &h->buckets[prim_bucket_idx];
1318                 ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
1319                 if (ret != -1)
1320                         return ret;
1321                 /* Calculate secondary hash */
1322                 bkt = &h->buckets[sec_bucket_idx];
1323
1324                 /* Check if key is in secondary location */
1325                 FOR_EACH_BUCKET(cur_bkt, bkt) {
1326                         ret = search_one_bucket_lf(h, key, short_sig,
1327                                                 data, cur_bkt);
1328                         if (ret != -1)
1329                                 return ret;
1330                 }
1331
1332                 /* The loads of sig_current in search_one_bucket
1333                  * should not move below the load from tbl_chng_cnt.
1334                  */
1335                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1336                 /* Re-read the table change counter to check if the
1337                  * table has changed during search. If yes, re-do
1338                  * the search.
1339                  * This load should not get hoisted. The load
1340                  * acquires on cnt_b, key index in primary bucket
1341                  * and key index in secondary bucket will make sure
1342                  * that it does not get hoisted.
1343                  */
1344                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1345                                         __ATOMIC_ACQUIRE);
1346         } while (cnt_b != cnt_a);
1347
1348         return -ENOENT;
1349 }
1350
1351 static inline int32_t
1352 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1353                                         hash_sig_t sig, void **data)
1354 {
1355         if (h->readwrite_concur_lf_support)
1356                 return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
1357         else
1358                 return __rte_hash_lookup_with_hash_l(h, key, sig, data);
1359 }
1360
1361 int32_t
1362 rte_hash_lookup_with_hash(const struct rte_hash *h,
1363                         const void *key, hash_sig_t sig)
1364 {
1365         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1366         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1367 }
1368
1369 int32_t
1370 rte_hash_lookup(const struct rte_hash *h, const void *key)
1371 {
1372         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1373         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1374 }
1375
1376 int
1377 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1378                         const void *key, hash_sig_t sig, void **data)
1379 {
1380         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1381         return __rte_hash_lookup_with_hash(h, key, sig, data);
1382 }
1383
1384 int
1385 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1386 {
1387         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1388         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1389 }
1390
1391 static inline void
1392 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1393 {
1394         unsigned lcore_id, n_slots;
1395         struct lcore_cache *cached_free_slots;
1396
1397         if (h->use_local_cache) {
1398                 lcore_id = rte_lcore_id();
1399                 cached_free_slots = &h->local_free_slots[lcore_id];
1400                 /* Cache full, need to free it. */
1401                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1402                         /* Need to enqueue the free slots in global ring. */
1403                         n_slots = rte_ring_mp_enqueue_burst_elem(h->free_slots,
1404                                                 cached_free_slots->objs,
1405                                                 sizeof(uint32_t),
1406                                                 LCORE_CACHE_SIZE, NULL);
1407                         ERR_IF_TRUE((n_slots == 0),
1408                                 "%s: could not enqueue free slots in global ring\n",
1409                                 __func__);
1410                         cached_free_slots->len -= n_slots;
1411                 }
1412                 /* Put index of new free slot in cache. */
1413                 cached_free_slots->objs[cached_free_slots->len] =
1414                                                         bkt->key_idx[i];
1415                 cached_free_slots->len++;
1416         } else {
1417                 rte_ring_sp_enqueue_elem(h->free_slots,
1418                                 &bkt->key_idx[i], sizeof(uint32_t));
1419         }
1420 }
1421
1422 /* Compact the linked list by moving key from last entry in linked list to the
1423  * empty slot.
1424  */
1425 static inline void
1426 __rte_hash_compact_ll(const struct rte_hash *h,
1427                         struct rte_hash_bucket *cur_bkt, int pos) {
1428         int i;
1429         struct rte_hash_bucket *last_bkt;
1430
1431         if (!cur_bkt->next)
1432                 return;
1433
1434         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1435
1436         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1437                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1438                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1439                         __atomic_store_n(&cur_bkt->key_idx[pos],
1440                                          last_bkt->key_idx[i],
1441                                          __ATOMIC_RELEASE);
1442                         if (h->readwrite_concur_lf_support) {
1443                                 /* Inform the readers that the table has changed
1444                                  * Since there is one writer, load acquire on
1445                                  * tbl_chng_cnt is not required.
1446                                  */
1447                                 __atomic_store_n(h->tbl_chng_cnt,
1448                                          *h->tbl_chng_cnt + 1,
1449                                          __ATOMIC_RELEASE);
1450                                 /* The store to sig_current should
1451                                  * not move above the store to tbl_chng_cnt.
1452                                  */
1453                                 __atomic_thread_fence(__ATOMIC_RELEASE);
1454                         }
1455                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1456                         __atomic_store_n(&last_bkt->key_idx[i],
1457                                          EMPTY_SLOT,
1458                                          __ATOMIC_RELEASE);
1459                         return;
1460                 }
1461         }
1462 }
1463
1464 /* Search one bucket and remove the matched key.
1465  * Writer is expected to hold the lock while calling this
1466  * function.
1467  */
1468 static inline int32_t
1469 search_and_remove(const struct rte_hash *h, const void *key,
1470                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1471 {
1472         struct rte_hash_key *k, *keys = h->key_store;
1473         unsigned int i;
1474         uint32_t key_idx;
1475
1476         /* Check if key is in bucket */
1477         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1478                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1479                                           __ATOMIC_ACQUIRE);
1480                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1481                         k = (struct rte_hash_key *) ((char *)keys +
1482                                         key_idx * h->key_entry_size);
1483                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1484                                 bkt->sig_current[i] = NULL_SIGNATURE;
1485                                 /* Free the key store index if
1486                                  * no_free_on_del is disabled.
1487                                  */
1488                                 if (!h->no_free_on_del)
1489                                         remove_entry(h, bkt, i);
1490
1491                                 __atomic_store_n(&bkt->key_idx[i],
1492                                                  EMPTY_SLOT,
1493                                                  __ATOMIC_RELEASE);
1494
1495                                 *pos = i;
1496                                 /*
1497                                  * Return index where key is stored,
1498                                  * subtracting the first dummy index
1499                                  */
1500                                 return key_idx - 1;
1501                         }
1502                 }
1503         }
1504         return -1;
1505 }
1506
1507 static inline int32_t
1508 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1509                                                 hash_sig_t sig)
1510 {
1511         uint32_t prim_bucket_idx, sec_bucket_idx;
1512         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1513         struct rte_hash_bucket *cur_bkt;
1514         int pos;
1515         int32_t ret, i;
1516         uint16_t short_sig;
1517
1518         short_sig = get_short_sig(sig);
1519         prim_bucket_idx = get_prim_bucket_index(h, sig);
1520         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1521         prim_bkt = &h->buckets[prim_bucket_idx];
1522
1523         __hash_rw_writer_lock(h);
1524         /* look for key in primary bucket */
1525         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1526         if (ret != -1) {
1527                 __rte_hash_compact_ll(h, prim_bkt, pos);
1528                 last_bkt = prim_bkt->next;
1529                 prev_bkt = prim_bkt;
1530                 goto return_bkt;
1531         }
1532
1533         /* Calculate secondary hash */
1534         sec_bkt = &h->buckets[sec_bucket_idx];
1535
1536         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1537                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1538                 if (ret != -1) {
1539                         __rte_hash_compact_ll(h, cur_bkt, pos);
1540                         last_bkt = sec_bkt->next;
1541                         prev_bkt = sec_bkt;
1542                         goto return_bkt;
1543                 }
1544         }
1545
1546         __hash_rw_writer_unlock(h);
1547         return -ENOENT;
1548
1549 /* Search last bucket to see if empty to be recycled */
1550 return_bkt:
1551         if (!last_bkt) {
1552                 __hash_rw_writer_unlock(h);
1553                 return ret;
1554         }
1555         while (last_bkt->next) {
1556                 prev_bkt = last_bkt;
1557                 last_bkt = last_bkt->next;
1558         }
1559
1560         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1561                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1562                         break;
1563         }
1564         /* found empty bucket and recycle */
1565         if (i == RTE_HASH_BUCKET_ENTRIES) {
1566                 prev_bkt->next = NULL;
1567                 uint32_t index = last_bkt - h->buckets_ext + 1;
1568                 /* Recycle the empty bkt if
1569                  * no_free_on_del is disabled.
1570                  */
1571                 if (h->no_free_on_del)
1572                         /* Store index of an empty ext bkt to be recycled
1573                          * on calling rte_hash_del_xxx APIs.
1574                          * When lock free read-write concurrency is enabled,
1575                          * an empty ext bkt cannot be put into free list
1576                          * immediately (as readers might be using it still).
1577                          * Hence freeing of the ext bkt is piggy-backed to
1578                          * freeing of the key index.
1579                          */
1580                         h->ext_bkt_to_free[ret] = index;
1581                 else
1582                         rte_ring_sp_enqueue_elem(h->free_ext_bkts, &index,
1583                                                         sizeof(uint32_t));
1584         }
1585         __hash_rw_writer_unlock(h);
1586         return ret;
1587 }
1588
1589 int32_t
1590 rte_hash_del_key_with_hash(const struct rte_hash *h,
1591                         const void *key, hash_sig_t sig)
1592 {
1593         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1594         return __rte_hash_del_key_with_hash(h, key, sig);
1595 }
1596
1597 int32_t
1598 rte_hash_del_key(const struct rte_hash *h, const void *key)
1599 {
1600         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1601         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1602 }
1603
1604 int
1605 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1606                                void **key)
1607 {
1608         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1609
1610         struct rte_hash_key *k, *keys = h->key_store;
1611         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1612                                      h->key_entry_size);
1613         *key = k->key;
1614
1615         if (position !=
1616             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1617                                         NULL)) {
1618                 return -ENOENT;
1619         }
1620
1621         return 0;
1622 }
1623
1624 int
1625 rte_hash_free_key_with_position(const struct rte_hash *h,
1626                                 const int32_t position)
1627 {
1628         /* Key index where key is stored, adding the first dummy index */
1629         uint32_t key_idx = position + 1;
1630
1631         RETURN_IF_TRUE(((h == NULL) || (key_idx == EMPTY_SLOT)), -EINVAL);
1632
1633         unsigned int lcore_id, n_slots;
1634         struct lcore_cache *cached_free_slots;
1635         const uint32_t total_entries = h->use_local_cache ?
1636                 h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
1637                                                         : h->entries + 1;
1638
1639         /* Out of bounds */
1640         if (key_idx >= total_entries)
1641                 return -EINVAL;
1642         if (h->ext_table_support && h->readwrite_concur_lf_support) {
1643                 uint32_t index = h->ext_bkt_to_free[position];
1644                 if (index) {
1645                         /* Recycle empty ext bkt to free list. */
1646                         rte_ring_sp_enqueue_elem(h->free_ext_bkts, &index,
1647                                                         sizeof(uint32_t));
1648                         h->ext_bkt_to_free[position] = 0;
1649                 }
1650         }
1651
1652         if (h->use_local_cache) {
1653                 lcore_id = rte_lcore_id();
1654                 cached_free_slots = &h->local_free_slots[lcore_id];
1655                 /* Cache full, need to free it. */
1656                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1657                         /* Need to enqueue the free slots in global ring. */
1658                         n_slots = rte_ring_mp_enqueue_burst_elem(h->free_slots,
1659                                                 cached_free_slots->objs,
1660                                                 sizeof(uint32_t),
1661                                                 LCORE_CACHE_SIZE, NULL);
1662                         RETURN_IF_TRUE((n_slots == 0), -EFAULT);
1663                         cached_free_slots->len -= n_slots;
1664                 }
1665                 /* Put index of new free slot in cache. */
1666                 cached_free_slots->objs[cached_free_slots->len] = key_idx;
1667                 cached_free_slots->len++;
1668         } else {
1669                 rte_ring_sp_enqueue_elem(h->free_slots, &key_idx,
1670                                                 sizeof(uint32_t));
1671         }
1672
1673         return 0;
1674 }
1675
1676 static inline void
1677 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1678                         const struct rte_hash_bucket *prim_bkt,
1679                         const struct rte_hash_bucket *sec_bkt,
1680                         uint16_t sig,
1681                         enum rte_hash_sig_compare_function sig_cmp_fn)
1682 {
1683         unsigned int i;
1684
1685         /* For match mask the first bit of every two bits indicates the match */
1686         switch (sig_cmp_fn) {
1687 #if defined(RTE_MACHINE_CPUFLAG_SSE2)
1688         case RTE_HASH_COMPARE_SSE:
1689                 /* Compare all signatures in the bucket */
1690                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1691                                 _mm_load_si128(
1692                                         (__m128i const *)prim_bkt->sig_current),
1693                                 _mm_set1_epi16(sig)));
1694                 /* Compare all signatures in the bucket */
1695                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1696                                 _mm_load_si128(
1697                                         (__m128i const *)sec_bkt->sig_current),
1698                                 _mm_set1_epi16(sig)));
1699                 break;
1700 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
1701         case RTE_HASH_COMPARE_NEON: {
1702                 uint16x8_t vmat, vsig, x;
1703                 int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1};
1704
1705                 vsig = vld1q_dup_u16((uint16_t const *)&sig);
1706                 /* Compare all signatures in the primary bucket */
1707                 vmat = vceqq_u16(vsig,
1708                         vld1q_u16((uint16_t const *)prim_bkt->sig_current));
1709                 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1710                 *prim_hash_matches = (uint32_t)(vaddvq_u16(x));
1711                 /* Compare all signatures in the secondary bucket */
1712                 vmat = vceqq_u16(vsig,
1713                         vld1q_u16((uint16_t const *)sec_bkt->sig_current));
1714                 x = vshlq_u16(vandq_u16(vmat, vdupq_n_u16(0x8000)), shift);
1715                 *sec_hash_matches = (uint32_t)(vaddvq_u16(x));
1716                 }
1717                 break;
1718 #endif
1719         default:
1720                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1721                         *prim_hash_matches |=
1722                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1723                         *sec_hash_matches |=
1724                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1725                 }
1726         }
1727 }
1728
1729 static inline void
1730 __bulk_lookup_l(const struct rte_hash *h, const void **keys,
1731                 const struct rte_hash_bucket **primary_bkt,
1732                 const struct rte_hash_bucket **secondary_bkt,
1733                 uint16_t *sig, int32_t num_keys, int32_t *positions,
1734                 uint64_t *hit_mask, void *data[])
1735 {
1736         uint64_t hits = 0;
1737         int32_t i;
1738         int32_t ret;
1739         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1740         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1741         struct rte_hash_bucket *cur_bkt, *next_bkt;
1742
1743         __hash_rw_reader_lock(h);
1744
1745         /* Compare signatures and prefetch key slot of first hit */
1746         for (i = 0; i < num_keys; i++) {
1747                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1748                         primary_bkt[i], secondary_bkt[i],
1749                         sig[i], h->sig_cmp_fn);
1750
1751                 if (prim_hitmask[i]) {
1752                         uint32_t first_hit =
1753                                         __builtin_ctzl(prim_hitmask[i])
1754                                         >> 1;
1755                         uint32_t key_idx =
1756                                 primary_bkt[i]->key_idx[first_hit];
1757                         const struct rte_hash_key *key_slot =
1758                                 (const struct rte_hash_key *)(
1759                                 (const char *)h->key_store +
1760                                 key_idx * h->key_entry_size);
1761                         rte_prefetch0(key_slot);
1762                         continue;
1763                 }
1764
1765                 if (sec_hitmask[i]) {
1766                         uint32_t first_hit =
1767                                         __builtin_ctzl(sec_hitmask[i])
1768                                         >> 1;
1769                         uint32_t key_idx =
1770                                 secondary_bkt[i]->key_idx[first_hit];
1771                         const struct rte_hash_key *key_slot =
1772                                 (const struct rte_hash_key *)(
1773                                 (const char *)h->key_store +
1774                                 key_idx * h->key_entry_size);
1775                         rte_prefetch0(key_slot);
1776                 }
1777         }
1778
1779         /* Compare keys, first hits in primary first */
1780         for (i = 0; i < num_keys; i++) {
1781                 positions[i] = -ENOENT;
1782                 while (prim_hitmask[i]) {
1783                         uint32_t hit_index =
1784                                         __builtin_ctzl(prim_hitmask[i])
1785                                         >> 1;
1786                         uint32_t key_idx =
1787                                 primary_bkt[i]->key_idx[hit_index];
1788                         const struct rte_hash_key *key_slot =
1789                                 (const struct rte_hash_key *)(
1790                                 (const char *)h->key_store +
1791                                 key_idx * h->key_entry_size);
1792
1793                         /*
1794                          * If key index is 0, do not compare key,
1795                          * as it is checking the dummy slot
1796                          */
1797                         if (!!key_idx &
1798                                 !rte_hash_cmp_eq(
1799                                         key_slot->key, keys[i], h)) {
1800                                 if (data != NULL)
1801                                         data[i] = key_slot->pdata;
1802
1803                                 hits |= 1ULL << i;
1804                                 positions[i] = key_idx - 1;
1805                                 goto next_key;
1806                         }
1807                         prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1808                 }
1809
1810                 while (sec_hitmask[i]) {
1811                         uint32_t hit_index =
1812                                         __builtin_ctzl(sec_hitmask[i])
1813                                         >> 1;
1814                         uint32_t key_idx =
1815                                 secondary_bkt[i]->key_idx[hit_index];
1816                         const struct rte_hash_key *key_slot =
1817                                 (const struct rte_hash_key *)(
1818                                 (const char *)h->key_store +
1819                                 key_idx * h->key_entry_size);
1820
1821                         /*
1822                          * If key index is 0, do not compare key,
1823                          * as it is checking the dummy slot
1824                          */
1825
1826                         if (!!key_idx &
1827                                 !rte_hash_cmp_eq(
1828                                         key_slot->key, keys[i], h)) {
1829                                 if (data != NULL)
1830                                         data[i] = key_slot->pdata;
1831
1832                                 hits |= 1ULL << i;
1833                                 positions[i] = key_idx - 1;
1834                                 goto next_key;
1835                         }
1836                         sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1837                 }
1838 next_key:
1839                 continue;
1840         }
1841
1842         /* all found, do not need to go through ext bkt */
1843         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1844                 if (hit_mask != NULL)
1845                         *hit_mask = hits;
1846                 __hash_rw_reader_unlock(h);
1847                 return;
1848         }
1849
1850         /* need to check ext buckets for match */
1851         for (i = 0; i < num_keys; i++) {
1852                 if ((hits & (1ULL << i)) != 0)
1853                         continue;
1854                 next_bkt = secondary_bkt[i]->next;
1855                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1856                         if (data != NULL)
1857                                 ret = search_one_bucket_l(h, keys[i],
1858                                                 sig[i], &data[i], cur_bkt);
1859                         else
1860                                 ret = search_one_bucket_l(h, keys[i],
1861                                                 sig[i], NULL, cur_bkt);
1862                         if (ret != -1) {
1863                                 positions[i] = ret;
1864                                 hits |= 1ULL << i;
1865                                 break;
1866                         }
1867                 }
1868         }
1869
1870         __hash_rw_reader_unlock(h);
1871
1872         if (hit_mask != NULL)
1873                 *hit_mask = hits;
1874 }
1875
1876 static inline void
1877 __bulk_lookup_lf(const struct rte_hash *h, const void **keys,
1878                 const struct rte_hash_bucket **primary_bkt,
1879                 const struct rte_hash_bucket **secondary_bkt,
1880                 uint16_t *sig, int32_t num_keys, int32_t *positions,
1881                 uint64_t *hit_mask, void *data[])
1882 {
1883         uint64_t hits = 0;
1884         int32_t i;
1885         int32_t ret;
1886         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1887         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1888         struct rte_hash_bucket *cur_bkt, *next_bkt;
1889         uint32_t cnt_b, cnt_a;
1890
1891         for (i = 0; i < num_keys; i++)
1892                 positions[i] = -ENOENT;
1893
1894         do {
1895                 /* Load the table change counter before the lookup
1896                  * starts. Acquire semantics will make sure that
1897                  * loads in compare_signatures are not hoisted.
1898                  */
1899                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1900                                         __ATOMIC_ACQUIRE);
1901
1902                 /* Compare signatures and prefetch key slot of first hit */
1903                 for (i = 0; i < num_keys; i++) {
1904                         compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1905                                 primary_bkt[i], secondary_bkt[i],
1906                                 sig[i], h->sig_cmp_fn);
1907
1908                         if (prim_hitmask[i]) {
1909                                 uint32_t first_hit =
1910                                                 __builtin_ctzl(prim_hitmask[i])
1911                                                 >> 1;
1912                                 uint32_t key_idx =
1913                                         primary_bkt[i]->key_idx[first_hit];
1914                                 const struct rte_hash_key *key_slot =
1915                                         (const struct rte_hash_key *)(
1916                                         (const char *)h->key_store +
1917                                         key_idx * h->key_entry_size);
1918                                 rte_prefetch0(key_slot);
1919                                 continue;
1920                         }
1921
1922                         if (sec_hitmask[i]) {
1923                                 uint32_t first_hit =
1924                                                 __builtin_ctzl(sec_hitmask[i])
1925                                                 >> 1;
1926                                 uint32_t key_idx =
1927                                         secondary_bkt[i]->key_idx[first_hit];
1928                                 const struct rte_hash_key *key_slot =
1929                                         (const struct rte_hash_key *)(
1930                                         (const char *)h->key_store +
1931                                         key_idx * h->key_entry_size);
1932                                 rte_prefetch0(key_slot);
1933                         }
1934                 }
1935
1936                 /* Compare keys, first hits in primary first */
1937                 for (i = 0; i < num_keys; i++) {
1938                         while (prim_hitmask[i]) {
1939                                 uint32_t hit_index =
1940                                                 __builtin_ctzl(prim_hitmask[i])
1941                                                 >> 1;
1942                                 uint32_t key_idx =
1943                                 __atomic_load_n(
1944                                         &primary_bkt[i]->key_idx[hit_index],
1945                                         __ATOMIC_ACQUIRE);
1946                                 const struct rte_hash_key *key_slot =
1947                                         (const struct rte_hash_key *)(
1948                                         (const char *)h->key_store +
1949                                         key_idx * h->key_entry_size);
1950
1951                                 /*
1952                                  * If key index is 0, do not compare key,
1953                                  * as it is checking the dummy slot
1954                                  */
1955                                 if (!!key_idx &
1956                                         !rte_hash_cmp_eq(
1957                                                 key_slot->key, keys[i], h)) {
1958                                         if (data != NULL)
1959                                                 data[i] = __atomic_load_n(
1960                                                         &key_slot->pdata,
1961                                                         __ATOMIC_ACQUIRE);
1962
1963                                         hits |= 1ULL << i;
1964                                         positions[i] = key_idx - 1;
1965                                         goto next_key;
1966                                 }
1967                                 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1968                         }
1969
1970                         while (sec_hitmask[i]) {
1971                                 uint32_t hit_index =
1972                                                 __builtin_ctzl(sec_hitmask[i])
1973                                                 >> 1;
1974                                 uint32_t key_idx =
1975                                 __atomic_load_n(
1976                                         &secondary_bkt[i]->key_idx[hit_index],
1977                                         __ATOMIC_ACQUIRE);
1978                                 const struct rte_hash_key *key_slot =
1979                                         (const struct rte_hash_key *)(
1980                                         (const char *)h->key_store +
1981                                         key_idx * h->key_entry_size);
1982
1983                                 /*
1984                                  * If key index is 0, do not compare key,
1985                                  * as it is checking the dummy slot
1986                                  */
1987
1988                                 if (!!key_idx &
1989                                         !rte_hash_cmp_eq(
1990                                                 key_slot->key, keys[i], h)) {
1991                                         if (data != NULL)
1992                                                 data[i] = __atomic_load_n(
1993                                                         &key_slot->pdata,
1994                                                         __ATOMIC_ACQUIRE);
1995
1996                                         hits |= 1ULL << i;
1997                                         positions[i] = key_idx - 1;
1998                                         goto next_key;
1999                                 }
2000                                 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
2001                         }
2002 next_key:
2003                         continue;
2004                 }
2005
2006                 /* all found, do not need to go through ext bkt */
2007                 if (hits == ((1ULL << num_keys) - 1)) {
2008                         if (hit_mask != NULL)
2009                                 *hit_mask = hits;
2010                         return;
2011                 }
2012                 /* need to check ext buckets for match */
2013                 if (h->ext_table_support) {
2014                         for (i = 0; i < num_keys; i++) {
2015                                 if ((hits & (1ULL << i)) != 0)
2016                                         continue;
2017                                 next_bkt = secondary_bkt[i]->next;
2018                                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
2019                                         if (data != NULL)
2020                                                 ret = search_one_bucket_lf(h,
2021                                                         keys[i], sig[i],
2022                                                         &data[i], cur_bkt);
2023                                         else
2024                                                 ret = search_one_bucket_lf(h,
2025                                                                 keys[i], sig[i],
2026                                                                 NULL, cur_bkt);
2027                                         if (ret != -1) {
2028                                                 positions[i] = ret;
2029                                                 hits |= 1ULL << i;
2030                                                 break;
2031                                         }
2032                                 }
2033                         }
2034                 }
2035                 /* The loads of sig_current in compare_signatures
2036                  * should not move below the load from tbl_chng_cnt.
2037                  */
2038                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
2039                 /* Re-read the table change counter to check if the
2040                  * table has changed during search. If yes, re-do
2041                  * the search.
2042                  * This load should not get hoisted. The load
2043                  * acquires on cnt_b, primary key index and secondary
2044                  * key index will make sure that it does not get
2045                  * hoisted.
2046                  */
2047                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
2048                                         __ATOMIC_ACQUIRE);
2049         } while (cnt_b != cnt_a);
2050
2051         if (hit_mask != NULL)
2052                 *hit_mask = hits;
2053 }
2054
2055 #define PREFETCH_OFFSET 4
2056 static inline void
2057 __bulk_lookup_prefetching_loop(const struct rte_hash *h,
2058         const void **keys, int32_t num_keys,
2059         uint16_t *sig,
2060         const struct rte_hash_bucket **primary_bkt,
2061         const struct rte_hash_bucket **secondary_bkt)
2062 {
2063         int32_t i;
2064         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
2065         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
2066         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
2067
2068         /* Prefetch first keys */
2069         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
2070                 rte_prefetch0(keys[i]);
2071
2072         /*
2073          * Prefetch rest of the keys, calculate primary and
2074          * secondary bucket and prefetch them
2075          */
2076         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
2077                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
2078
2079                 prim_hash[i] = rte_hash_hash(h, keys[i]);
2080
2081                 sig[i] = get_short_sig(prim_hash[i]);
2082                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2083                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2084
2085                 primary_bkt[i] = &h->buckets[prim_index[i]];
2086                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2087
2088                 rte_prefetch0(primary_bkt[i]);
2089                 rte_prefetch0(secondary_bkt[i]);
2090         }
2091
2092         /* Calculate and prefetch rest of the buckets */
2093         for (; i < num_keys; i++) {
2094                 prim_hash[i] = rte_hash_hash(h, keys[i]);
2095
2096                 sig[i] = get_short_sig(prim_hash[i]);
2097                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2098                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2099
2100                 primary_bkt[i] = &h->buckets[prim_index[i]];
2101                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2102
2103                 rte_prefetch0(primary_bkt[i]);
2104                 rte_prefetch0(secondary_bkt[i]);
2105         }
2106 }
2107
2108
2109 static inline void
2110 __rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
2111                         int32_t num_keys, int32_t *positions,
2112                         uint64_t *hit_mask, void *data[])
2113 {
2114         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2115         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2116         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2117
2118         __bulk_lookup_prefetching_loop(h, keys, num_keys, sig,
2119                 primary_bkt, secondary_bkt);
2120
2121         __bulk_lookup_l(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2122                 positions, hit_mask, data);
2123 }
2124
2125 static inline void
2126 __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
2127                         int32_t num_keys, int32_t *positions,
2128                         uint64_t *hit_mask, void *data[])
2129 {
2130         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2131         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2132         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2133
2134         __bulk_lookup_prefetching_loop(h, keys, num_keys, sig,
2135                 primary_bkt, secondary_bkt);
2136
2137         __bulk_lookup_lf(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2138                 positions, hit_mask, data);
2139 }
2140
2141 static inline void
2142 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2143                         int32_t num_keys, int32_t *positions,
2144                         uint64_t *hit_mask, void *data[])
2145 {
2146         if (h->readwrite_concur_lf_support)
2147                 __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
2148                                           hit_mask, data);
2149         else
2150                 __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
2151                                          hit_mask, data);
2152 }
2153
2154 int
2155 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
2156                       uint32_t num_keys, int32_t *positions)
2157 {
2158         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2159                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2160                         (positions == NULL)), -EINVAL);
2161
2162         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
2163         return 0;
2164 }
2165
2166 int
2167 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
2168                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
2169 {
2170         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
2171                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2172                         (hit_mask == NULL)), -EINVAL);
2173
2174         int32_t positions[num_keys];
2175
2176         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
2177
2178         /* Return number of hits */
2179         return __builtin_popcountl(*hit_mask);
2180 }
2181
2182
2183 static inline void
2184 __rte_hash_lookup_with_hash_bulk_l(const struct rte_hash *h,
2185                         const void **keys, hash_sig_t *prim_hash,
2186                         int32_t num_keys, int32_t *positions,
2187                         uint64_t *hit_mask, void *data[])
2188 {
2189         int32_t i;
2190         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
2191         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
2192         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2193         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2194         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2195
2196         /*
2197          * Prefetch keys, calculate primary and
2198          * secondary bucket and prefetch them
2199          */
2200         for (i = 0; i < num_keys; i++) {
2201                 rte_prefetch0(keys[i]);
2202
2203                 sig[i] = get_short_sig(prim_hash[i]);
2204                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2205                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2206
2207                 primary_bkt[i] = &h->buckets[prim_index[i]];
2208                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2209
2210                 rte_prefetch0(primary_bkt[i]);
2211                 rte_prefetch0(secondary_bkt[i]);
2212         }
2213
2214         __bulk_lookup_l(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2215                 positions, hit_mask, data);
2216 }
2217
2218 static inline void
2219 __rte_hash_lookup_with_hash_bulk_lf(const struct rte_hash *h,
2220                         const void **keys, hash_sig_t *prim_hash,
2221                         int32_t num_keys, int32_t *positions,
2222                         uint64_t *hit_mask, void *data[])
2223 {
2224         int32_t i;
2225         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
2226         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
2227         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
2228         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2229         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
2230
2231         /*
2232          * Prefetch keys, calculate primary and
2233          * secondary bucket and prefetch them
2234          */
2235         for (i = 0; i < num_keys; i++) {
2236                 rte_prefetch0(keys[i]);
2237
2238                 sig[i] = get_short_sig(prim_hash[i]);
2239                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
2240                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
2241
2242                 primary_bkt[i] = &h->buckets[prim_index[i]];
2243                 secondary_bkt[i] = &h->buckets[sec_index[i]];
2244
2245                 rte_prefetch0(primary_bkt[i]);
2246                 rte_prefetch0(secondary_bkt[i]);
2247         }
2248
2249         __bulk_lookup_lf(h, keys, primary_bkt, secondary_bkt, sig, num_keys,
2250                 positions, hit_mask, data);
2251 }
2252
2253 static inline void
2254 __rte_hash_lookup_with_hash_bulk(const struct rte_hash *h, const void **keys,
2255                         hash_sig_t *prim_hash, int32_t num_keys,
2256                         int32_t *positions, uint64_t *hit_mask, void *data[])
2257 {
2258         if (h->readwrite_concur_lf_support)
2259                 __rte_hash_lookup_with_hash_bulk_lf(h, keys, prim_hash,
2260                                 num_keys, positions, hit_mask, data);
2261         else
2262                 __rte_hash_lookup_with_hash_bulk_l(h, keys, prim_hash,
2263                                 num_keys, positions, hit_mask, data);
2264 }
2265
2266 int
2267 rte_hash_lookup_with_hash_bulk(const struct rte_hash *h, const void **keys,
2268                 hash_sig_t *sig, uint32_t num_keys, int32_t *positions)
2269 {
2270         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) ||
2271                         (sig == NULL) || (num_keys == 0) ||
2272                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2273                         (positions == NULL)), -EINVAL);
2274
2275         __rte_hash_lookup_with_hash_bulk(h, keys, sig, num_keys,
2276                 positions, NULL, NULL);
2277         return 0;
2278 }
2279
2280 int
2281 rte_hash_lookup_with_hash_bulk_data(const struct rte_hash *h,
2282                 const void **keys, hash_sig_t *sig,
2283                 uint32_t num_keys, uint64_t *hit_mask, void *data[])
2284 {
2285         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) ||
2286                         (sig == NULL) || (num_keys == 0) ||
2287                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
2288                         (hit_mask == NULL)), -EINVAL);
2289
2290         int32_t positions[num_keys];
2291
2292         __rte_hash_lookup_with_hash_bulk(h, keys, sig, num_keys,
2293                         positions, hit_mask, data);
2294
2295         /* Return number of hits */
2296         return __builtin_popcountl(*hit_mask);
2297 }
2298
2299 int32_t
2300 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
2301 {
2302         uint32_t bucket_idx, idx, position;
2303         struct rte_hash_key *next_key;
2304
2305         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
2306
2307         const uint32_t total_entries_main = h->num_buckets *
2308                                                         RTE_HASH_BUCKET_ENTRIES;
2309         const uint32_t total_entries = total_entries_main << 1;
2310
2311         /* Out of bounds of all buckets (both main table and ext table) */
2312         if (*next >= total_entries_main)
2313                 goto extend_table;
2314
2315         /* Calculate bucket and index of current iterator */
2316         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2317         idx = *next % RTE_HASH_BUCKET_ENTRIES;
2318
2319         /* If current position is empty, go to the next one */
2320         while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
2321                                         __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
2322                 (*next)++;
2323                 /* End of table */
2324                 if (*next == total_entries_main)
2325                         goto extend_table;
2326                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
2327                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
2328         }
2329
2330         __hash_rw_reader_lock(h);
2331         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2332                                 position * h->key_entry_size);
2333         /* Return key and data */
2334         *key = next_key->key;
2335         *data = next_key->pdata;
2336
2337         __hash_rw_reader_unlock(h);
2338
2339         /* Increment iterator */
2340         (*next)++;
2341
2342         return position - 1;
2343
2344 /* Begin to iterate extendable buckets */
2345 extend_table:
2346         /* Out of total bound or if ext bucket feature is not enabled */
2347         if (*next >= total_entries || !h->ext_table_support)
2348                 return -ENOENT;
2349
2350         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
2351         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2352
2353         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
2354                 (*next)++;
2355                 if (*next == total_entries)
2356                         return -ENOENT;
2357                 bucket_idx = (*next - total_entries_main) /
2358                                                 RTE_HASH_BUCKET_ENTRIES;
2359                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
2360         }
2361         __hash_rw_reader_lock(h);
2362         next_key = (struct rte_hash_key *) ((char *)h->key_store +
2363                                 position * h->key_entry_size);
2364         /* Return key and data */
2365         *key = next_key->key;
2366         *data = next_key->pdata;
2367
2368         __hash_rw_reader_unlock(h);
2369
2370         /* Increment iterator */
2371         (*next)++;
2372         return position - 1;
2373 }