3539a10625482e2eba1dc04f83ea8e671b59c92b
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdint.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
14 #include <rte_log.h>
15 #include <rte_memcpy.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring.h>
28 #include <rte_compat.h>
29 #include <rte_pause.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
35         for (CURRENT_BKT = START_BUCKET;                                      \
36                 CURRENT_BKT != NULL;                                          \
37                 CURRENT_BKT = CURRENT_BKT->next)
38
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
40
41 static struct rte_tailq_elem rte_hash_tailq = {
42         .name = "RTE_HASH",
43 };
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
45
46 struct rte_hash *
47 rte_hash_find_existing(const char *name)
48 {
49         struct rte_hash *h = NULL;
50         struct rte_tailq_entry *te;
51         struct rte_hash_list *hash_list;
52
53         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
54
55         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56         TAILQ_FOREACH(te, hash_list, next) {
57                 h = (struct rte_hash *) te->data;
58                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
59                         break;
60         }
61         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
62
63         if (te == NULL) {
64                 rte_errno = ENOENT;
65                 return NULL;
66         }
67         return h;
68 }
69
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
72 {
73         while (lst_bkt->next != NULL)
74                 lst_bkt = lst_bkt->next;
75         return lst_bkt;
76 }
77
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
79 {
80         h->cmp_jump_table_idx = KEY_CUSTOM;
81         h->rte_hash_custom_cmp_eq = func;
82 }
83
84 static inline int
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
86 {
87         if (h->cmp_jump_table_idx == KEY_CUSTOM)
88                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
89         else
90                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
91 }
92
93 /*
94  * We use higher 16 bits of hash as the signature value stored in table.
95  * We use the lower bits for the primary bucket
96  * location. Then we XOR primary bucket location and the signature
97  * to get the secondary bucket location. This is same as
98  * proposed in Bin Fan, et al's paper
99  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100  * Smarter Hashing". The benefit to use
101  * XOR is that one could derive the alternative bucket location
102  * by only using the current bucket location and the signature.
103  */
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
106 {
107         return hash >> 16;
108 }
109
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
112 {
113         return hash & h->bucket_bitmask;
114 }
115
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118                         uint32_t cur_bkt_idx, uint16_t sig)
119 {
120         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
121 }
122
123 struct rte_hash *
124 rte_hash_create(const struct rte_hash_parameters *params)
125 {
126         struct rte_hash *h = NULL;
127         struct rte_tailq_entry *te = NULL;
128         struct rte_hash_list *hash_list;
129         struct rte_ring *r = NULL;
130         struct rte_ring *r_ext = NULL;
131         char hash_name[RTE_HASH_NAMESIZE];
132         void *k = NULL;
133         void *buckets = NULL;
134         void *buckets_ext = NULL;
135         char ring_name[RTE_RING_NAMESIZE];
136         char ext_ring_name[RTE_RING_NAMESIZE];
137         unsigned num_key_slots;
138         unsigned i;
139         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140         unsigned int ext_table_support = 0;
141         unsigned int readwrite_concur_support = 0;
142         unsigned int writer_takes_lock = 0;
143
144         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
145
146         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
147
148         if (params == NULL) {
149                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
150                 return NULL;
151         }
152
153         /* Check for valid parameters */
154         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
155                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
156                         (params->key_len == 0)) {
157                 rte_errno = EINVAL;
158                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
159                 return NULL;
160         }
161
162         /* Check extra flags field to check extra options. */
163         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
164                 hw_trans_mem_support = 1;
165
166         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
167                 use_local_cache = 1;
168                 writer_takes_lock = 1;
169         }
170
171         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
172                 readwrite_concur_support = 1;
173                 writer_takes_lock = 1;
174         }
175
176         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
177                 ext_table_support = 1;
178
179         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
180         if (use_local_cache)
181                 /*
182                  * Increase number of slots by total number of indices
183                  * that can be stored in the lcore caches
184                  * except for the first cache
185                  */
186                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
187                                         (LCORE_CACHE_SIZE - 1) + 1;
188         else
189                 num_key_slots = params->entries + 1;
190
191         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
192         /* Create ring (Dummy slot index is not enqueued) */
193         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
194                         params->socket_id, 0);
195         if (r == NULL) {
196                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
197                 goto err;
198         }
199
200         const uint32_t num_buckets = rte_align32pow2(params->entries) /
201                                                 RTE_HASH_BUCKET_ENTRIES;
202
203         /* Create ring for extendable buckets. */
204         if (ext_table_support) {
205                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
206                                                                 params->name);
207                 r_ext = rte_ring_create(ext_ring_name,
208                                 rte_align32pow2(num_buckets + 1),
209                                 params->socket_id, 0);
210
211                 if (r_ext == NULL) {
212                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
213                                                                 "failed\n");
214                         goto err;
215                 }
216         }
217
218         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
219
220         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
221
222         /* guarantee there's no existing: this is normally already checked
223          * by ring creation above */
224         TAILQ_FOREACH(te, hash_list, next) {
225                 h = (struct rte_hash *) te->data;
226                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
227                         break;
228         }
229         h = NULL;
230         if (te != NULL) {
231                 rte_errno = EEXIST;
232                 te = NULL;
233                 goto err_unlock;
234         }
235
236         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
237         if (te == NULL) {
238                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
239                 goto err_unlock;
240         }
241
242         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
243                                         RTE_CACHE_LINE_SIZE, params->socket_id);
244
245         if (h == NULL) {
246                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
247                 goto err_unlock;
248         }
249
250         buckets = rte_zmalloc_socket(NULL,
251                                 num_buckets * sizeof(struct rte_hash_bucket),
252                                 RTE_CACHE_LINE_SIZE, params->socket_id);
253
254         if (buckets == NULL) {
255                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
256                 goto err_unlock;
257         }
258
259         /* Allocate same number of extendable buckets */
260         if (ext_table_support) {
261                 buckets_ext = rte_zmalloc_socket(NULL,
262                                 num_buckets * sizeof(struct rte_hash_bucket),
263                                 RTE_CACHE_LINE_SIZE, params->socket_id);
264                 if (buckets_ext == NULL) {
265                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
266                                                         "failed\n");
267                         goto err_unlock;
268                 }
269                 /* Populate ext bkt ring. We reserve 0 similar to the
270                  * key-data slot, just in case in future we want to
271                  * use bucket index for the linked list and 0 means NULL
272                  * for next bucket
273                  */
274                 for (i = 1; i <= num_buckets; i++)
275                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
276         }
277
278         const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
279         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
280
281         k = rte_zmalloc_socket(NULL, key_tbl_size,
282                         RTE_CACHE_LINE_SIZE, params->socket_id);
283
284         if (k == NULL) {
285                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
286                 goto err_unlock;
287         }
288
289 /*
290  * If x86 architecture is used, select appropriate compare function,
291  * which may use x86 intrinsics, otherwise use memcmp
292  */
293 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
294         /* Select function to compare keys */
295         switch (params->key_len) {
296         case 16:
297                 h->cmp_jump_table_idx = KEY_16_BYTES;
298                 break;
299         case 32:
300                 h->cmp_jump_table_idx = KEY_32_BYTES;
301                 break;
302         case 48:
303                 h->cmp_jump_table_idx = KEY_48_BYTES;
304                 break;
305         case 64:
306                 h->cmp_jump_table_idx = KEY_64_BYTES;
307                 break;
308         case 80:
309                 h->cmp_jump_table_idx = KEY_80_BYTES;
310                 break;
311         case 96:
312                 h->cmp_jump_table_idx = KEY_96_BYTES;
313                 break;
314         case 112:
315                 h->cmp_jump_table_idx = KEY_112_BYTES;
316                 break;
317         case 128:
318                 h->cmp_jump_table_idx = KEY_128_BYTES;
319                 break;
320         default:
321                 /* If key is not multiple of 16, use generic memcmp */
322                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
323         }
324 #else
325         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
326 #endif
327
328         if (use_local_cache) {
329                 h->local_free_slots = rte_zmalloc_socket(NULL,
330                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
331                                 RTE_CACHE_LINE_SIZE, params->socket_id);
332         }
333
334         /* Default hash function */
335 #if defined(RTE_ARCH_X86)
336         default_hash_func = (rte_hash_function)rte_hash_crc;
337 #elif defined(RTE_ARCH_ARM64)
338         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
339                 default_hash_func = (rte_hash_function)rte_hash_crc;
340 #endif
341         /* Setup hash context */
342         snprintf(h->name, sizeof(h->name), "%s", params->name);
343         h->entries = params->entries;
344         h->key_len = params->key_len;
345         h->key_entry_size = key_entry_size;
346         h->hash_func_init_val = params->hash_func_init_val;
347
348         h->num_buckets = num_buckets;
349         h->bucket_bitmask = h->num_buckets - 1;
350         h->buckets = buckets;
351         h->buckets_ext = buckets_ext;
352         h->free_ext_bkts = r_ext;
353         h->hash_func = (params->hash_func == NULL) ?
354                 default_hash_func : params->hash_func;
355         h->key_store = k;
356         h->free_slots = r;
357         h->hw_trans_mem_support = hw_trans_mem_support;
358         h->use_local_cache = use_local_cache;
359         h->readwrite_concur_support = readwrite_concur_support;
360         h->ext_table_support = ext_table_support;
361         h->writer_takes_lock = writer_takes_lock;
362
363 #if defined(RTE_ARCH_X86)
364         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
365                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
366         else
367 #endif
368                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
369
370         /* Writer threads need to take the lock when:
371          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
372          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
373          */
374         if (h->writer_takes_lock) {
375                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
376                                                 RTE_CACHE_LINE_SIZE);
377                 if (h->readwrite_lock == NULL)
378                         goto err_unlock;
379
380                 rte_rwlock_init(h->readwrite_lock);
381         }
382
383         /* Populate free slots ring. Entry zero is reserved for key misses. */
384         for (i = 1; i < num_key_slots; i++)
385                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
386
387         te->data = (void *) h;
388         TAILQ_INSERT_TAIL(hash_list, te, next);
389         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
390
391         return h;
392 err_unlock:
393         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
394 err:
395         rte_ring_free(r);
396         rte_ring_free(r_ext);
397         rte_free(te);
398         rte_free(h);
399         rte_free(buckets);
400         rte_free(buckets_ext);
401         rte_free(k);
402         return NULL;
403 }
404
405 void
406 rte_hash_free(struct rte_hash *h)
407 {
408         struct rte_tailq_entry *te;
409         struct rte_hash_list *hash_list;
410
411         if (h == NULL)
412                 return;
413
414         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
415
416         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
417
418         /* find out tailq entry */
419         TAILQ_FOREACH(te, hash_list, next) {
420                 if (te->data == (void *) h)
421                         break;
422         }
423
424         if (te == NULL) {
425                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
426                 return;
427         }
428
429         TAILQ_REMOVE(hash_list, te, next);
430
431         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
432
433         if (h->use_local_cache)
434                 rte_free(h->local_free_slots);
435         if (h->writer_takes_lock)
436                 rte_free(h->readwrite_lock);
437         rte_ring_free(h->free_slots);
438         rte_ring_free(h->free_ext_bkts);
439         rte_free(h->key_store);
440         rte_free(h->buckets);
441         rte_free(h->buckets_ext);
442         rte_free(h);
443         rte_free(te);
444 }
445
446 hash_sig_t
447 rte_hash_hash(const struct rte_hash *h, const void *key)
448 {
449         /* calc hash result by key */
450         return h->hash_func(key, h->key_len, h->hash_func_init_val);
451 }
452
453 int32_t
454 rte_hash_count(const struct rte_hash *h)
455 {
456         uint32_t tot_ring_cnt, cached_cnt = 0;
457         uint32_t i, ret;
458
459         if (h == NULL)
460                 return -EINVAL;
461
462         if (h->use_local_cache) {
463                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
464                                         (LCORE_CACHE_SIZE - 1);
465                 for (i = 0; i < RTE_MAX_LCORE; i++)
466                         cached_cnt += h->local_free_slots[i].len;
467
468                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
469                                                                 cached_cnt;
470         } else {
471                 tot_ring_cnt = h->entries;
472                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
473         }
474         return ret;
475 }
476
477 /* Read write locks implemented using rte_rwlock */
478 static inline void
479 __hash_rw_writer_lock(const struct rte_hash *h)
480 {
481         if (h->writer_takes_lock && h->hw_trans_mem_support)
482                 rte_rwlock_write_lock_tm(h->readwrite_lock);
483         else if (h->writer_takes_lock)
484                 rte_rwlock_write_lock(h->readwrite_lock);
485 }
486
487 static inline void
488 __hash_rw_reader_lock(const struct rte_hash *h)
489 {
490         if (h->readwrite_concur_support && h->hw_trans_mem_support)
491                 rte_rwlock_read_lock_tm(h->readwrite_lock);
492         else if (h->readwrite_concur_support)
493                 rte_rwlock_read_lock(h->readwrite_lock);
494 }
495
496 static inline void
497 __hash_rw_writer_unlock(const struct rte_hash *h)
498 {
499         if (h->writer_takes_lock && h->hw_trans_mem_support)
500                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
501         else if (h->writer_takes_lock)
502                 rte_rwlock_write_unlock(h->readwrite_lock);
503 }
504
505 static inline void
506 __hash_rw_reader_unlock(const struct rte_hash *h)
507 {
508         if (h->readwrite_concur_support && h->hw_trans_mem_support)
509                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
510         else if (h->readwrite_concur_support)
511                 rte_rwlock_read_unlock(h->readwrite_lock);
512 }
513
514 void
515 rte_hash_reset(struct rte_hash *h)
516 {
517         void *ptr;
518         uint32_t tot_ring_cnt, i;
519
520         if (h == NULL)
521                 return;
522
523         __hash_rw_writer_lock(h);
524         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
525         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
526
527         /* clear the free ring */
528         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
529                 rte_pause();
530
531         /* clear free extendable bucket ring and memory */
532         if (h->ext_table_support) {
533                 memset(h->buckets_ext, 0, h->num_buckets *
534                                                 sizeof(struct rte_hash_bucket));
535                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
536                         rte_pause();
537         }
538
539         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
540         if (h->use_local_cache)
541                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
542                                         (LCORE_CACHE_SIZE - 1);
543         else
544                 tot_ring_cnt = h->entries;
545
546         for (i = 1; i < tot_ring_cnt + 1; i++)
547                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
548
549         /* Repopulate the free ext bkt ring. */
550         if (h->ext_table_support) {
551                 for (i = 1; i <= h->num_buckets; i++)
552                         rte_ring_sp_enqueue(h->free_ext_bkts,
553                                                 (void *)((uintptr_t) i));
554         }
555
556         if (h->use_local_cache) {
557                 /* Reset local caches per lcore */
558                 for (i = 0; i < RTE_MAX_LCORE; i++)
559                         h->local_free_slots[i].len = 0;
560         }
561         __hash_rw_writer_unlock(h);
562 }
563
564 /*
565  * Function called to enqueue back an index in the cache/ring,
566  * as slot has not being used and it can be used in the
567  * next addition attempt.
568  */
569 static inline void
570 enqueue_slot_back(const struct rte_hash *h,
571                 struct lcore_cache *cached_free_slots,
572                 void *slot_id)
573 {
574         if (h->use_local_cache) {
575                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
576                 cached_free_slots->len++;
577         } else
578                 rte_ring_sp_enqueue(h->free_slots, slot_id);
579 }
580
581 /* Search a key from bucket and update its data */
582 static inline int32_t
583 search_and_update(const struct rte_hash *h, void *data, const void *key,
584         struct rte_hash_bucket *bkt, uint16_t sig)
585 {
586         int i;
587         struct rte_hash_key *k, *keys = h->key_store;
588
589         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
590                 if (bkt->sig_current[i] == sig) {
591                         k = (struct rte_hash_key *) ((char *)keys +
592                                         bkt->key_idx[i] * h->key_entry_size);
593                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
594                                 /* Update data */
595                                 k->pdata = data;
596                                 /*
597                                  * Return index where key is stored,
598                                  * subtracting the first dummy index
599                                  */
600                                 return bkt->key_idx[i] - 1;
601                         }
602                 }
603         }
604         return -1;
605 }
606
607 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
608  * buckets around.
609  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
610  * empty entry.
611  */
612 static inline int32_t
613 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
614                 struct rte_hash_bucket *prim_bkt,
615                 struct rte_hash_bucket *sec_bkt,
616                 const struct rte_hash_key *key, void *data,
617                 uint16_t sig, uint32_t new_idx,
618                 int32_t *ret_val)
619 {
620         unsigned int i;
621         struct rte_hash_bucket *cur_bkt;
622         int32_t ret;
623
624         __hash_rw_writer_lock(h);
625         /* Check if key was inserted after last check but before this
626          * protected region in case of inserting duplicated keys.
627          */
628         ret = search_and_update(h, data, key, prim_bkt, sig);
629         if (ret != -1) {
630                 __hash_rw_writer_unlock(h);
631                 *ret_val = ret;
632                 return 1;
633         }
634
635         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
636                 ret = search_and_update(h, data, key, cur_bkt, sig);
637                 if (ret != -1) {
638                         __hash_rw_writer_unlock(h);
639                         *ret_val = ret;
640                         return 1;
641                 }
642         }
643
644         /* Insert new entry if there is room in the primary
645          * bucket.
646          */
647         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
648                 /* Check if slot is available */
649                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
650                         prim_bkt->sig_current[i] = sig;
651                         prim_bkt->key_idx[i] = new_idx;
652                         break;
653                 }
654         }
655         __hash_rw_writer_unlock(h);
656
657         if (i != RTE_HASH_BUCKET_ENTRIES)
658                 return 0;
659
660         /* no empty entry */
661         return -1;
662 }
663
664 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
665  * the path head with new entry (sig, alt_hash, new_idx)
666  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
667  * return 0 if succeeds.
668  */
669 static inline int
670 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
671                         struct rte_hash_bucket *bkt,
672                         struct rte_hash_bucket *alt_bkt,
673                         const struct rte_hash_key *key, void *data,
674                         struct queue_node *leaf, uint32_t leaf_slot,
675                         uint16_t sig, uint32_t new_idx,
676                         int32_t *ret_val)
677 {
678         uint32_t prev_alt_bkt_idx;
679         struct rte_hash_bucket *cur_bkt;
680         struct queue_node *prev_node, *curr_node = leaf;
681         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
682         uint32_t prev_slot, curr_slot = leaf_slot;
683         int32_t ret;
684
685         __hash_rw_writer_lock(h);
686
687         /* In case empty slot was gone before entering protected region */
688         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
689                 __hash_rw_writer_unlock(h);
690                 return -1;
691         }
692
693         /* Check if key was inserted after last check but before this
694          * protected region.
695          */
696         ret = search_and_update(h, data, key, bkt, sig);
697         if (ret != -1) {
698                 __hash_rw_writer_unlock(h);
699                 *ret_val = ret;
700                 return 1;
701         }
702
703         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
704                 ret = search_and_update(h, data, key, cur_bkt, sig);
705                 if (ret != -1) {
706                         __hash_rw_writer_unlock(h);
707                         *ret_val = ret;
708                         return 1;
709                 }
710         }
711
712         while (likely(curr_node->prev != NULL)) {
713                 prev_node = curr_node->prev;
714                 prev_bkt = prev_node->bkt;
715                 prev_slot = curr_node->prev_slot;
716
717                 prev_alt_bkt_idx = get_alt_bucket_index(h,
718                                         prev_node->cur_bkt_idx,
719                                         prev_bkt->sig_current[prev_slot]);
720
721                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
722                                 != curr_bkt)) {
723                         /* revert it to empty, otherwise duplicated keys */
724                         curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
725                         __hash_rw_writer_unlock(h);
726                         return -1;
727                 }
728
729                 /* Need to swap current/alt sig to allow later
730                  * Cuckoo insert to move elements back to its
731                  * primary bucket if available
732                  */
733                 curr_bkt->sig_current[curr_slot] =
734                         prev_bkt->sig_current[prev_slot];
735                 curr_bkt->key_idx[curr_slot] =
736                         prev_bkt->key_idx[prev_slot];
737
738                 curr_slot = prev_slot;
739                 curr_node = prev_node;
740                 curr_bkt = curr_node->bkt;
741         }
742
743         curr_bkt->sig_current[curr_slot] = sig;
744         curr_bkt->key_idx[curr_slot] = new_idx;
745
746         __hash_rw_writer_unlock(h);
747
748         return 0;
749
750 }
751
752 /*
753  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
754  * Cuckoo
755  */
756 static inline int
757 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
758                         struct rte_hash_bucket *bkt,
759                         struct rte_hash_bucket *sec_bkt,
760                         const struct rte_hash_key *key, void *data,
761                         uint16_t sig, uint32_t bucket_idx,
762                         uint32_t new_idx, int32_t *ret_val)
763 {
764         unsigned int i;
765         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
766         struct queue_node *tail, *head;
767         struct rte_hash_bucket *curr_bkt, *alt_bkt;
768         uint32_t cur_idx, alt_idx;
769
770         tail = queue;
771         head = queue + 1;
772         tail->bkt = bkt;
773         tail->prev = NULL;
774         tail->prev_slot = -1;
775         tail->cur_bkt_idx = bucket_idx;
776
777         /* Cuckoo bfs Search */
778         while (likely(tail != head && head <
779                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
780                                         RTE_HASH_BUCKET_ENTRIES)) {
781                 curr_bkt = tail->bkt;
782                 cur_idx = tail->cur_bkt_idx;
783                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
784                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
785                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
786                                                 bkt, sec_bkt, key, data,
787                                                 tail, i, sig,
788                                                 new_idx, ret_val);
789                                 if (likely(ret != -1))
790                                         return ret;
791                         }
792
793                         /* Enqueue new node and keep prev node info */
794                         alt_idx = get_alt_bucket_index(h, cur_idx,
795                                                 curr_bkt->sig_current[i]);
796                         alt_bkt = &(h->buckets[alt_idx]);
797                         head->bkt = alt_bkt;
798                         head->cur_bkt_idx = alt_idx;
799                         head->prev = tail;
800                         head->prev_slot = i;
801                         head++;
802                 }
803                 tail++;
804         }
805
806         return -ENOSPC;
807 }
808
809 static inline int32_t
810 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
811                                                 hash_sig_t sig, void *data)
812 {
813         uint16_t short_sig;
814         uint32_t prim_bucket_idx, sec_bucket_idx;
815         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
816         struct rte_hash_key *new_k, *keys = h->key_store;
817         void *slot_id = NULL;
818         void *ext_bkt_id = NULL;
819         uint32_t new_idx, bkt_id;
820         int ret;
821         unsigned n_slots;
822         unsigned lcore_id;
823         unsigned int i;
824         struct lcore_cache *cached_free_slots = NULL;
825         int32_t ret_val;
826         struct rte_hash_bucket *last;
827
828         short_sig = get_short_sig(sig);
829         prim_bucket_idx = get_prim_bucket_index(h, sig);
830         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
831         prim_bkt = &h->buckets[prim_bucket_idx];
832         sec_bkt = &h->buckets[sec_bucket_idx];
833         rte_prefetch0(prim_bkt);
834         rte_prefetch0(sec_bkt);
835
836         /* Check if key is already inserted in primary location */
837         __hash_rw_writer_lock(h);
838         ret = search_and_update(h, data, key, prim_bkt, short_sig);
839         if (ret != -1) {
840                 __hash_rw_writer_unlock(h);
841                 return ret;
842         }
843
844         /* Check if key is already inserted in secondary location */
845         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
846                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
847                 if (ret != -1) {
848                         __hash_rw_writer_unlock(h);
849                         return ret;
850                 }
851         }
852
853         __hash_rw_writer_unlock(h);
854
855         /* Did not find a match, so get a new slot for storing the new key */
856         if (h->use_local_cache) {
857                 lcore_id = rte_lcore_id();
858                 cached_free_slots = &h->local_free_slots[lcore_id];
859                 /* Try to get a free slot from the local cache */
860                 if (cached_free_slots->len == 0) {
861                         /* Need to get another burst of free slots from global ring */
862                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
863                                         cached_free_slots->objs,
864                                         LCORE_CACHE_SIZE, NULL);
865                         if (n_slots == 0) {
866                                 return -ENOSPC;
867                         }
868
869                         cached_free_slots->len += n_slots;
870                 }
871
872                 /* Get a free slot from the local cache */
873                 cached_free_slots->len--;
874                 slot_id = cached_free_slots->objs[cached_free_slots->len];
875         } else {
876                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
877                         return -ENOSPC;
878                 }
879         }
880
881         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
882         new_idx = (uint32_t)((uintptr_t) slot_id);
883         /* Copy key */
884         rte_memcpy(new_k->key, key, h->key_len);
885         new_k->pdata = data;
886
887
888         /* Find an empty slot and insert */
889         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
890                                         short_sig, new_idx, &ret_val);
891         if (ret == 0)
892                 return new_idx - 1;
893         else if (ret == 1) {
894                 enqueue_slot_back(h, cached_free_slots, slot_id);
895                 return ret_val;
896         }
897
898         /* Primary bucket full, need to make space for new entry */
899         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
900                                 short_sig, prim_bucket_idx, new_idx, &ret_val);
901         if (ret == 0)
902                 return new_idx - 1;
903         else if (ret == 1) {
904                 enqueue_slot_back(h, cached_free_slots, slot_id);
905                 return ret_val;
906         }
907
908         /* Also search secondary bucket to get better occupancy */
909         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
910                                 short_sig, sec_bucket_idx, new_idx, &ret_val);
911
912         if (ret == 0)
913                 return new_idx - 1;
914         else if (ret == 1) {
915                 enqueue_slot_back(h, cached_free_slots, slot_id);
916                 return ret_val;
917         }
918
919         /* if ext table not enabled, we failed the insertion */
920         if (!h->ext_table_support) {
921                 enqueue_slot_back(h, cached_free_slots, slot_id);
922                 return ret;
923         }
924
925         /* Now we need to go through the extendable bucket. Protection is needed
926          * to protect all extendable bucket processes.
927          */
928         __hash_rw_writer_lock(h);
929         /* We check for duplicates again since could be inserted before the lock */
930         ret = search_and_update(h, data, key, prim_bkt, short_sig);
931         if (ret != -1) {
932                 enqueue_slot_back(h, cached_free_slots, slot_id);
933                 goto failure;
934         }
935
936         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
937                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
938                 if (ret != -1) {
939                         enqueue_slot_back(h, cached_free_slots, slot_id);
940                         goto failure;
941                 }
942         }
943
944         /* Search sec and ext buckets to find an empty entry to insert. */
945         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
946                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
947                         /* Check if slot is available */
948                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
949                                 cur_bkt->sig_current[i] = short_sig;
950                                 cur_bkt->key_idx[i] = new_idx;
951                                 __hash_rw_writer_unlock(h);
952                                 return new_idx - 1;
953                         }
954                 }
955         }
956
957         /* Failed to get an empty entry from extendable buckets. Link a new
958          * extendable bucket. We first get a free bucket from ring.
959          */
960         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
961                 ret = -ENOSPC;
962                 goto failure;
963         }
964
965         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
966         /* Use the first location of the new bucket */
967         (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
968         (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
969         /* Link the new bucket to sec bucket linked list */
970         last = rte_hash_get_last_bkt(sec_bkt);
971         last->next = &h->buckets_ext[bkt_id];
972         __hash_rw_writer_unlock(h);
973         return new_idx - 1;
974
975 failure:
976         __hash_rw_writer_unlock(h);
977         return ret;
978
979 }
980
981 int32_t
982 rte_hash_add_key_with_hash(const struct rte_hash *h,
983                         const void *key, hash_sig_t sig)
984 {
985         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
986         return __rte_hash_add_key_with_hash(h, key, sig, 0);
987 }
988
989 int32_t
990 rte_hash_add_key(const struct rte_hash *h, const void *key)
991 {
992         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
993         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
994 }
995
996 int
997 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
998                         const void *key, hash_sig_t sig, void *data)
999 {
1000         int ret;
1001
1002         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1003         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1004         if (ret >= 0)
1005                 return 0;
1006         else
1007                 return ret;
1008 }
1009
1010 int
1011 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1012 {
1013         int ret;
1014
1015         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1016
1017         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1018         if (ret >= 0)
1019                 return 0;
1020         else
1021                 return ret;
1022 }
1023
1024 /* Search one bucket to find the match key */
1025 static inline int32_t
1026 search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
1027                         void **data, const struct rte_hash_bucket *bkt)
1028 {
1029         int i;
1030         struct rte_hash_key *k, *keys = h->key_store;
1031
1032         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1033                 if (bkt->sig_current[i] == sig &&
1034                                 bkt->key_idx[i] != EMPTY_SLOT) {
1035                         k = (struct rte_hash_key *) ((char *)keys +
1036                                         bkt->key_idx[i] * h->key_entry_size);
1037                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1038                                 if (data != NULL)
1039                                         *data = k->pdata;
1040                                 /*
1041                                  * Return index where key is stored,
1042                                  * subtracting the first dummy index
1043                                  */
1044                                 return bkt->key_idx[i] - 1;
1045                         }
1046                 }
1047         }
1048         return -1;
1049 }
1050
1051 static inline int32_t
1052 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1053                                         hash_sig_t sig, void **data)
1054 {
1055         uint32_t prim_bucket_idx, sec_bucket_idx;
1056         struct rte_hash_bucket *bkt, *cur_bkt;
1057         int ret;
1058         uint16_t short_sig;
1059
1060         short_sig = get_short_sig(sig);
1061         prim_bucket_idx = get_prim_bucket_index(h, sig);
1062         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1063         bkt = &h->buckets[prim_bucket_idx];
1064
1065         __hash_rw_reader_lock(h);
1066
1067         /* Check if key is in primary location */
1068         ret = search_one_bucket(h, key, short_sig, data, bkt);
1069         if (ret != -1) {
1070                 __hash_rw_reader_unlock(h);
1071                 return ret;
1072         }
1073         /* Calculate secondary hash */
1074         bkt = &h->buckets[sec_bucket_idx];
1075
1076         /* Check if key is in secondary location */
1077         FOR_EACH_BUCKET(cur_bkt, bkt) {
1078                 ret = search_one_bucket(h, key, short_sig, data, cur_bkt);
1079                 if (ret != -1) {
1080                         __hash_rw_reader_unlock(h);
1081                         return ret;
1082                 }
1083         }
1084         __hash_rw_reader_unlock(h);
1085         return -ENOENT;
1086 }
1087
1088 int32_t
1089 rte_hash_lookup_with_hash(const struct rte_hash *h,
1090                         const void *key, hash_sig_t sig)
1091 {
1092         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1093         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1094 }
1095
1096 int32_t
1097 rte_hash_lookup(const struct rte_hash *h, const void *key)
1098 {
1099         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1100         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1101 }
1102
1103 int
1104 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1105                         const void *key, hash_sig_t sig, void **data)
1106 {
1107         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1108         return __rte_hash_lookup_with_hash(h, key, sig, data);
1109 }
1110
1111 int
1112 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1113 {
1114         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1115         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1116 }
1117
1118 static inline void
1119 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1120 {
1121         unsigned lcore_id, n_slots;
1122         struct lcore_cache *cached_free_slots;
1123
1124         bkt->sig_current[i] = NULL_SIGNATURE;
1125         if (h->use_local_cache) {
1126                 lcore_id = rte_lcore_id();
1127                 cached_free_slots = &h->local_free_slots[lcore_id];
1128                 /* Cache full, need to free it. */
1129                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1130                         /* Need to enqueue the free slots in global ring. */
1131                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1132                                                 cached_free_slots->objs,
1133                                                 LCORE_CACHE_SIZE, NULL);
1134                         cached_free_slots->len -= n_slots;
1135                 }
1136                 /* Put index of new free slot in cache. */
1137                 cached_free_slots->objs[cached_free_slots->len] =
1138                                 (void *)((uintptr_t)bkt->key_idx[i]);
1139                 cached_free_slots->len++;
1140         } else {
1141                 rte_ring_sp_enqueue(h->free_slots,
1142                                 (void *)((uintptr_t)bkt->key_idx[i]));
1143         }
1144 }
1145
1146 /* Compact the linked list by moving key from last entry in linked list to the
1147  * empty slot.
1148  */
1149 static inline void
1150 __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
1151         int i;
1152         struct rte_hash_bucket *last_bkt;
1153
1154         if (!cur_bkt->next)
1155                 return;
1156
1157         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1158
1159         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1160                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1161                         cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
1162                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1163                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1164                         last_bkt->key_idx[i] = EMPTY_SLOT;
1165                         return;
1166                 }
1167         }
1168 }
1169
1170 /* Search one bucket and remove the matched key */
1171 static inline int32_t
1172 search_and_remove(const struct rte_hash *h, const void *key,
1173                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1174 {
1175         struct rte_hash_key *k, *keys = h->key_store;
1176         unsigned int i;
1177         int32_t ret;
1178
1179         /* Check if key is in bucket */
1180         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1181                 if (bkt->sig_current[i] == sig &&
1182                                 bkt->key_idx[i] != EMPTY_SLOT) {
1183                         k = (struct rte_hash_key *) ((char *)keys +
1184                                         bkt->key_idx[i] * h->key_entry_size);
1185                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1186                                 remove_entry(h, bkt, i);
1187
1188                                 /* Return index where key is stored,
1189                                  * subtracting the first dummy index
1190                                  */
1191                                 ret = bkt->key_idx[i] - 1;
1192                                 bkt->key_idx[i] = EMPTY_SLOT;
1193                                 *pos = i;
1194                                 return ret;
1195                         }
1196                 }
1197         }
1198         return -1;
1199 }
1200
1201 static inline int32_t
1202 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1203                                                 hash_sig_t sig)
1204 {
1205         uint32_t prim_bucket_idx, sec_bucket_idx;
1206         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1207         struct rte_hash_bucket *cur_bkt;
1208         int pos;
1209         int32_t ret, i;
1210         uint16_t short_sig;
1211
1212         short_sig = get_short_sig(sig);
1213         prim_bucket_idx = get_prim_bucket_index(h, sig);
1214         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1215         prim_bkt = &h->buckets[prim_bucket_idx];
1216
1217         __hash_rw_writer_lock(h);
1218         /* look for key in primary bucket */
1219         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1220         if (ret != -1) {
1221                 __rte_hash_compact_ll(prim_bkt, pos);
1222                 last_bkt = prim_bkt->next;
1223                 prev_bkt = prim_bkt;
1224                 goto return_bkt;
1225         }
1226
1227         /* Calculate secondary hash */
1228         sec_bkt = &h->buckets[sec_bucket_idx];
1229
1230         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1231                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1232                 if (ret != -1) {
1233                         __rte_hash_compact_ll(cur_bkt, pos);
1234                         last_bkt = sec_bkt->next;
1235                         prev_bkt = sec_bkt;
1236                         goto return_bkt;
1237                 }
1238         }
1239
1240         __hash_rw_writer_unlock(h);
1241         return -ENOENT;
1242
1243 /* Search last bucket to see if empty to be recycled */
1244 return_bkt:
1245         if (!last_bkt) {
1246                 __hash_rw_writer_unlock(h);
1247                 return ret;
1248         }
1249         while (last_bkt->next) {
1250                 prev_bkt = last_bkt;
1251                 last_bkt = last_bkt->next;
1252         }
1253
1254         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1255                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1256                         break;
1257         }
1258         /* found empty bucket and recycle */
1259         if (i == RTE_HASH_BUCKET_ENTRIES) {
1260                 prev_bkt->next = last_bkt->next = NULL;
1261                 uint32_t index = last_bkt - h->buckets_ext + 1;
1262                 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1263         }
1264
1265         __hash_rw_writer_unlock(h);
1266         return ret;
1267 }
1268
1269 int32_t
1270 rte_hash_del_key_with_hash(const struct rte_hash *h,
1271                         const void *key, hash_sig_t sig)
1272 {
1273         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1274         return __rte_hash_del_key_with_hash(h, key, sig);
1275 }
1276
1277 int32_t
1278 rte_hash_del_key(const struct rte_hash *h, const void *key)
1279 {
1280         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1281         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1282 }
1283
1284 int
1285 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1286                                void **key)
1287 {
1288         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1289
1290         struct rte_hash_key *k, *keys = h->key_store;
1291         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1292                                      h->key_entry_size);
1293         *key = k->key;
1294
1295         if (position !=
1296             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1297                                         NULL)) {
1298                 return -ENOENT;
1299         }
1300
1301         return 0;
1302 }
1303
1304 static inline void
1305 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1306                         const struct rte_hash_bucket *prim_bkt,
1307                         const struct rte_hash_bucket *sec_bkt,
1308                         uint16_t sig,
1309                         enum rte_hash_sig_compare_function sig_cmp_fn)
1310 {
1311         unsigned int i;
1312
1313         /* For match mask the first bit of every two bits indicates the match */
1314         switch (sig_cmp_fn) {
1315 #ifdef RTE_MACHINE_CPUFLAG_SSE2
1316         case RTE_HASH_COMPARE_SSE:
1317                 /* Compare all signatures in the bucket */
1318                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1319                                 _mm_load_si128(
1320                                         (__m128i const *)prim_bkt->sig_current),
1321                                 _mm_set1_epi16(sig)));
1322                 /* Compare all signatures in the bucket */
1323                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1324                                 _mm_load_si128(
1325                                         (__m128i const *)sec_bkt->sig_current),
1326                                 _mm_set1_epi16(sig)));
1327                 break;
1328 #endif
1329         default:
1330                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1331                         *prim_hash_matches |=
1332                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1333                         *sec_hash_matches |=
1334                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1335                 }
1336         }
1337 }
1338
1339 #define PREFETCH_OFFSET 4
1340 static inline void
1341 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1342                         int32_t num_keys, int32_t *positions,
1343                         uint64_t *hit_mask, void *data[])
1344 {
1345         uint64_t hits = 0;
1346         int32_t i;
1347         int32_t ret;
1348         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1349         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1350         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1351         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1352         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1353         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1354         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1355         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1356         struct rte_hash_bucket *cur_bkt, *next_bkt;
1357
1358         /* Prefetch first keys */
1359         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1360                 rte_prefetch0(keys[i]);
1361
1362         /*
1363          * Prefetch rest of the keys, calculate primary and
1364          * secondary bucket and prefetch them
1365          */
1366         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1367                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1368
1369                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1370
1371                 sig[i] = get_short_sig(prim_hash[i]);
1372                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1373                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1374
1375                 primary_bkt[i] = &h->buckets[prim_index[i]];
1376                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1377
1378                 rte_prefetch0(primary_bkt[i]);
1379                 rte_prefetch0(secondary_bkt[i]);
1380         }
1381
1382         /* Calculate and prefetch rest of the buckets */
1383         for (; i < num_keys; i++) {
1384                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1385
1386                 sig[i] = get_short_sig(prim_hash[i]);
1387                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1388                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1389
1390                 primary_bkt[i] = &h->buckets[prim_index[i]];
1391                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1392
1393                 rte_prefetch0(primary_bkt[i]);
1394                 rte_prefetch0(secondary_bkt[i]);
1395         }
1396
1397         __hash_rw_reader_lock(h);
1398         /* Compare signatures and prefetch key slot of first hit */
1399         for (i = 0; i < num_keys; i++) {
1400                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1401                                 primary_bkt[i], secondary_bkt[i],
1402                                 sig[i], h->sig_cmp_fn);
1403
1404                 if (prim_hitmask[i]) {
1405                         uint32_t first_hit =
1406                                         __builtin_ctzl(prim_hitmask[i]) >> 1;
1407                         uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
1408                         const struct rte_hash_key *key_slot =
1409                                 (const struct rte_hash_key *)(
1410                                 (const char *)h->key_store +
1411                                 key_idx * h->key_entry_size);
1412                         rte_prefetch0(key_slot);
1413                         continue;
1414                 }
1415
1416                 if (sec_hitmask[i]) {
1417                         uint32_t first_hit =
1418                                         __builtin_ctzl(sec_hitmask[i]) >> 1;
1419                         uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
1420                         const struct rte_hash_key *key_slot =
1421                                 (const struct rte_hash_key *)(
1422                                 (const char *)h->key_store +
1423                                 key_idx * h->key_entry_size);
1424                         rte_prefetch0(key_slot);
1425                 }
1426         }
1427
1428         /* Compare keys, first hits in primary first */
1429         for (i = 0; i < num_keys; i++) {
1430                 positions[i] = -ENOENT;
1431                 while (prim_hitmask[i]) {
1432                         uint32_t hit_index =
1433                                         __builtin_ctzl(prim_hitmask[i]) >> 1;
1434
1435                         uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
1436                         const struct rte_hash_key *key_slot =
1437                                 (const struct rte_hash_key *)(
1438                                 (const char *)h->key_store +
1439                                 key_idx * h->key_entry_size);
1440                         /*
1441                          * If key index is 0, do not compare key,
1442                          * as it is checking the dummy slot
1443                          */
1444                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1445                                 if (data != NULL)
1446                                         data[i] = key_slot->pdata;
1447
1448                                 hits |= 1ULL << i;
1449                                 positions[i] = key_idx - 1;
1450                                 goto next_key;
1451                         }
1452                         prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1453                 }
1454
1455                 while (sec_hitmask[i]) {
1456                         uint32_t hit_index =
1457                                         __builtin_ctzl(sec_hitmask[i]) >> 1;
1458
1459                         uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
1460                         const struct rte_hash_key *key_slot =
1461                                 (const struct rte_hash_key *)(
1462                                 (const char *)h->key_store +
1463                                 key_idx * h->key_entry_size);
1464                         /*
1465                          * If key index is 0, do not compare key,
1466                          * as it is checking the dummy slot
1467                          */
1468
1469                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1470                                 if (data != NULL)
1471                                         data[i] = key_slot->pdata;
1472
1473                                 hits |= 1ULL << i;
1474                                 positions[i] = key_idx - 1;
1475                                 goto next_key;
1476                         }
1477                         sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1478                 }
1479
1480 next_key:
1481                 continue;
1482         }
1483
1484         /* all found, do not need to go through ext bkt */
1485         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1486                 if (hit_mask != NULL)
1487                         *hit_mask = hits;
1488                 __hash_rw_reader_unlock(h);
1489                 return;
1490         }
1491
1492         /* need to check ext buckets for match */
1493         for (i = 0; i < num_keys; i++) {
1494                 if ((hits & (1ULL << i)) != 0)
1495                         continue;
1496                 next_bkt = secondary_bkt[i]->next;
1497                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1498                         if (data != NULL)
1499                                 ret = search_one_bucket(h, keys[i],
1500                                                 sig[i], &data[i], cur_bkt);
1501                         else
1502                                 ret = search_one_bucket(h, keys[i],
1503                                                 sig[i], NULL, cur_bkt);
1504                         if (ret != -1) {
1505                                 positions[i] = ret;
1506                                 hits |= 1ULL << i;
1507                                 break;
1508                         }
1509                 }
1510         }
1511
1512         __hash_rw_reader_unlock(h);
1513
1514         if (hit_mask != NULL)
1515                 *hit_mask = hits;
1516 }
1517
1518 int
1519 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1520                       uint32_t num_keys, int32_t *positions)
1521 {
1522         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1523                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1524                         (positions == NULL)), -EINVAL);
1525
1526         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1527         return 0;
1528 }
1529
1530 int
1531 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1532                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1533 {
1534         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1535                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1536                         (hit_mask == NULL)), -EINVAL);
1537
1538         int32_t positions[num_keys];
1539
1540         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1541
1542         /* Return number of hits */
1543         return __builtin_popcountl(*hit_mask);
1544 }
1545
1546 int32_t
1547 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1548 {
1549         uint32_t bucket_idx, idx, position;
1550         struct rte_hash_key *next_key;
1551
1552         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1553
1554         const uint32_t total_entries_main = h->num_buckets *
1555                                                         RTE_HASH_BUCKET_ENTRIES;
1556         const uint32_t total_entries = total_entries_main << 1;
1557
1558         /* Out of bounds of all buckets (both main table and ext table) */
1559         if (*next >= total_entries_main)
1560                 goto extend_table;
1561
1562         /* Calculate bucket and index of current iterator */
1563         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1564         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1565
1566         /* If current position is empty, go to the next one */
1567         while ((position = h->buckets[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1568                 (*next)++;
1569                 /* End of table */
1570                 if (*next == total_entries_main)
1571                         goto extend_table;
1572                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1573                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1574         }
1575
1576         __hash_rw_reader_lock(h);
1577         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1578                                 position * h->key_entry_size);
1579         /* Return key and data */
1580         *key = next_key->key;
1581         *data = next_key->pdata;
1582
1583         __hash_rw_reader_unlock(h);
1584
1585         /* Increment iterator */
1586         (*next)++;
1587
1588         return position - 1;
1589
1590 /* Begin to iterate extendable buckets */
1591 extend_table:
1592         /* Out of total bound or if ext bucket feature is not enabled */
1593         if (*next >= total_entries || !h->ext_table_support)
1594                 return -ENOENT;
1595
1596         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
1597         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1598
1599         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1600                 (*next)++;
1601                 if (*next == total_entries)
1602                         return -ENOENT;
1603                 bucket_idx = (*next - total_entries_main) /
1604                                                 RTE_HASH_BUCKET_ENTRIES;
1605                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1606         }
1607         __hash_rw_reader_lock(h);
1608         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1609                                 position * h->key_entry_size);
1610         /* Return key and data */
1611         *key = next_key->key;
1612         *data = next_key->pdata;
1613
1614         __hash_rw_reader_unlock(h);
1615
1616         /* Increment iterator */
1617         (*next)++;
1618         return position - 1;
1619 }