hash: fix key store element alignment
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdint.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
14 #include <rte_log.h>
15 #include <rte_memcpy.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring.h>
28 #include <rte_compat.h>
29 #include <rte_pause.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
35         for (CURRENT_BKT = START_BUCKET;                                      \
36                 CURRENT_BKT != NULL;                                          \
37                 CURRENT_BKT = CURRENT_BKT->next)
38
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
40
41 static struct rte_tailq_elem rte_hash_tailq = {
42         .name = "RTE_HASH",
43 };
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
45
46 struct rte_hash *
47 rte_hash_find_existing(const char *name)
48 {
49         struct rte_hash *h = NULL;
50         struct rte_tailq_entry *te;
51         struct rte_hash_list *hash_list;
52
53         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
54
55         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56         TAILQ_FOREACH(te, hash_list, next) {
57                 h = (struct rte_hash *) te->data;
58                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
59                         break;
60         }
61         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
62
63         if (te == NULL) {
64                 rte_errno = ENOENT;
65                 return NULL;
66         }
67         return h;
68 }
69
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
72 {
73         while (lst_bkt->next != NULL)
74                 lst_bkt = lst_bkt->next;
75         return lst_bkt;
76 }
77
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
79 {
80         h->cmp_jump_table_idx = KEY_CUSTOM;
81         h->rte_hash_custom_cmp_eq = func;
82 }
83
84 static inline int
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
86 {
87         if (h->cmp_jump_table_idx == KEY_CUSTOM)
88                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
89         else
90                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
91 }
92
93 /*
94  * We use higher 16 bits of hash as the signature value stored in table.
95  * We use the lower bits for the primary bucket
96  * location. Then we XOR primary bucket location and the signature
97  * to get the secondary bucket location. This is same as
98  * proposed in Bin Fan, et al's paper
99  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100  * Smarter Hashing". The benefit to use
101  * XOR is that one could derive the alternative bucket location
102  * by only using the current bucket location and the signature.
103  */
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
106 {
107         return hash >> 16;
108 }
109
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
112 {
113         return hash & h->bucket_bitmask;
114 }
115
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118                         uint32_t cur_bkt_idx, uint16_t sig)
119 {
120         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
121 }
122
123 struct rte_hash *
124 rte_hash_create(const struct rte_hash_parameters *params)
125 {
126         struct rte_hash *h = NULL;
127         struct rte_tailq_entry *te = NULL;
128         struct rte_hash_list *hash_list;
129         struct rte_ring *r = NULL;
130         struct rte_ring *r_ext = NULL;
131         char hash_name[RTE_HASH_NAMESIZE];
132         void *k = NULL;
133         void *buckets = NULL;
134         void *buckets_ext = NULL;
135         char ring_name[RTE_RING_NAMESIZE];
136         char ext_ring_name[RTE_RING_NAMESIZE];
137         unsigned num_key_slots;
138         unsigned i;
139         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140         unsigned int ext_table_support = 0;
141         unsigned int readwrite_concur_support = 0;
142         unsigned int writer_takes_lock = 0;
143         unsigned int no_free_on_del = 0;
144
145         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
146
147         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
148
149         if (params == NULL) {
150                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
151                 return NULL;
152         }
153
154         /* Check for valid parameters */
155         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
156                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
157                         (params->key_len == 0)) {
158                 rte_errno = EINVAL;
159                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
160                 return NULL;
161         }
162
163         /* Check extra flags field to check extra options. */
164         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
165                 hw_trans_mem_support = 1;
166
167         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
168                 use_local_cache = 1;
169                 writer_takes_lock = 1;
170         }
171
172         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
173                 readwrite_concur_support = 1;
174                 writer_takes_lock = 1;
175         }
176
177         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
178                 ext_table_support = 1;
179
180         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
181                 no_free_on_del = 1;
182
183         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
184         if (use_local_cache)
185                 /*
186                  * Increase number of slots by total number of indices
187                  * that can be stored in the lcore caches
188                  * except for the first cache
189                  */
190                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
191                                         (LCORE_CACHE_SIZE - 1) + 1;
192         else
193                 num_key_slots = params->entries + 1;
194
195         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
196         /* Create ring (Dummy slot index is not enqueued) */
197         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
198                         params->socket_id, 0);
199         if (r == NULL) {
200                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
201                 goto err;
202         }
203
204         const uint32_t num_buckets = rte_align32pow2(params->entries) /
205                                                 RTE_HASH_BUCKET_ENTRIES;
206
207         /* Create ring for extendable buckets. */
208         if (ext_table_support) {
209                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
210                                                                 params->name);
211                 r_ext = rte_ring_create(ext_ring_name,
212                                 rte_align32pow2(num_buckets + 1),
213                                 params->socket_id, 0);
214
215                 if (r_ext == NULL) {
216                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
217                                                                 "failed\n");
218                         goto err;
219                 }
220         }
221
222         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
223
224         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
225
226         /* guarantee there's no existing: this is normally already checked
227          * by ring creation above */
228         TAILQ_FOREACH(te, hash_list, next) {
229                 h = (struct rte_hash *) te->data;
230                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
231                         break;
232         }
233         h = NULL;
234         if (te != NULL) {
235                 rte_errno = EEXIST;
236                 te = NULL;
237                 goto err_unlock;
238         }
239
240         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
241         if (te == NULL) {
242                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
243                 goto err_unlock;
244         }
245
246         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
247                                         RTE_CACHE_LINE_SIZE, params->socket_id);
248
249         if (h == NULL) {
250                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
251                 goto err_unlock;
252         }
253
254         buckets = rte_zmalloc_socket(NULL,
255                                 num_buckets * sizeof(struct rte_hash_bucket),
256                                 RTE_CACHE_LINE_SIZE, params->socket_id);
257
258         if (buckets == NULL) {
259                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
260                 goto err_unlock;
261         }
262
263         /* Allocate same number of extendable buckets */
264         if (ext_table_support) {
265                 buckets_ext = rte_zmalloc_socket(NULL,
266                                 num_buckets * sizeof(struct rte_hash_bucket),
267                                 RTE_CACHE_LINE_SIZE, params->socket_id);
268                 if (buckets_ext == NULL) {
269                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
270                                                         "failed\n");
271                         goto err_unlock;
272                 }
273                 /* Populate ext bkt ring. We reserve 0 similar to the
274                  * key-data slot, just in case in future we want to
275                  * use bucket index for the linked list and 0 means NULL
276                  * for next bucket
277                  */
278                 for (i = 1; i <= num_buckets; i++)
279                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
280         }
281
282         const uint32_t key_entry_size =
283                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
284                           KEY_ALIGNMENT);
285         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
286
287         k = rte_zmalloc_socket(NULL, key_tbl_size,
288                         RTE_CACHE_LINE_SIZE, params->socket_id);
289
290         if (k == NULL) {
291                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
292                 goto err_unlock;
293         }
294
295 /*
296  * If x86 architecture is used, select appropriate compare function,
297  * which may use x86 intrinsics, otherwise use memcmp
298  */
299 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
300         /* Select function to compare keys */
301         switch (params->key_len) {
302         case 16:
303                 h->cmp_jump_table_idx = KEY_16_BYTES;
304                 break;
305         case 32:
306                 h->cmp_jump_table_idx = KEY_32_BYTES;
307                 break;
308         case 48:
309                 h->cmp_jump_table_idx = KEY_48_BYTES;
310                 break;
311         case 64:
312                 h->cmp_jump_table_idx = KEY_64_BYTES;
313                 break;
314         case 80:
315                 h->cmp_jump_table_idx = KEY_80_BYTES;
316                 break;
317         case 96:
318                 h->cmp_jump_table_idx = KEY_96_BYTES;
319                 break;
320         case 112:
321                 h->cmp_jump_table_idx = KEY_112_BYTES;
322                 break;
323         case 128:
324                 h->cmp_jump_table_idx = KEY_128_BYTES;
325                 break;
326         default:
327                 /* If key is not multiple of 16, use generic memcmp */
328                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
329         }
330 #else
331         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
332 #endif
333
334         if (use_local_cache) {
335                 h->local_free_slots = rte_zmalloc_socket(NULL,
336                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
337                                 RTE_CACHE_LINE_SIZE, params->socket_id);
338         }
339
340         /* Default hash function */
341 #if defined(RTE_ARCH_X86)
342         default_hash_func = (rte_hash_function)rte_hash_crc;
343 #elif defined(RTE_ARCH_ARM64)
344         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
345                 default_hash_func = (rte_hash_function)rte_hash_crc;
346 #endif
347         /* Setup hash context */
348         snprintf(h->name, sizeof(h->name), "%s", params->name);
349         h->entries = params->entries;
350         h->key_len = params->key_len;
351         h->key_entry_size = key_entry_size;
352         h->hash_func_init_val = params->hash_func_init_val;
353
354         h->num_buckets = num_buckets;
355         h->bucket_bitmask = h->num_buckets - 1;
356         h->buckets = buckets;
357         h->buckets_ext = buckets_ext;
358         h->free_ext_bkts = r_ext;
359         h->hash_func = (params->hash_func == NULL) ?
360                 default_hash_func : params->hash_func;
361         h->key_store = k;
362         h->free_slots = r;
363         h->hw_trans_mem_support = hw_trans_mem_support;
364         h->use_local_cache = use_local_cache;
365         h->readwrite_concur_support = readwrite_concur_support;
366         h->ext_table_support = ext_table_support;
367         h->writer_takes_lock = writer_takes_lock;
368         h->no_free_on_del = no_free_on_del;
369
370 #if defined(RTE_ARCH_X86)
371         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
372                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
373         else
374 #endif
375                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
376
377         /* Writer threads need to take the lock when:
378          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
379          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
380          */
381         if (h->writer_takes_lock) {
382                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
383                                                 RTE_CACHE_LINE_SIZE);
384                 if (h->readwrite_lock == NULL)
385                         goto err_unlock;
386
387                 rte_rwlock_init(h->readwrite_lock);
388         }
389
390         /* Populate free slots ring. Entry zero is reserved for key misses. */
391         for (i = 1; i < num_key_slots; i++)
392                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
393
394         te->data = (void *) h;
395         TAILQ_INSERT_TAIL(hash_list, te, next);
396         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
397
398         return h;
399 err_unlock:
400         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
401 err:
402         rte_ring_free(r);
403         rte_ring_free(r_ext);
404         rte_free(te);
405         rte_free(h);
406         rte_free(buckets);
407         rte_free(buckets_ext);
408         rte_free(k);
409         return NULL;
410 }
411
412 void
413 rte_hash_free(struct rte_hash *h)
414 {
415         struct rte_tailq_entry *te;
416         struct rte_hash_list *hash_list;
417
418         if (h == NULL)
419                 return;
420
421         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
422
423         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
424
425         /* find out tailq entry */
426         TAILQ_FOREACH(te, hash_list, next) {
427                 if (te->data == (void *) h)
428                         break;
429         }
430
431         if (te == NULL) {
432                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
433                 return;
434         }
435
436         TAILQ_REMOVE(hash_list, te, next);
437
438         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
439
440         if (h->use_local_cache)
441                 rte_free(h->local_free_slots);
442         if (h->writer_takes_lock)
443                 rte_free(h->readwrite_lock);
444         rte_ring_free(h->free_slots);
445         rte_ring_free(h->free_ext_bkts);
446         rte_free(h->key_store);
447         rte_free(h->buckets);
448         rte_free(h->buckets_ext);
449         rte_free(h);
450         rte_free(te);
451 }
452
453 hash_sig_t
454 rte_hash_hash(const struct rte_hash *h, const void *key)
455 {
456         /* calc hash result by key */
457         return h->hash_func(key, h->key_len, h->hash_func_init_val);
458 }
459
460 int32_t
461 rte_hash_count(const struct rte_hash *h)
462 {
463         uint32_t tot_ring_cnt, cached_cnt = 0;
464         uint32_t i, ret;
465
466         if (h == NULL)
467                 return -EINVAL;
468
469         if (h->use_local_cache) {
470                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
471                                         (LCORE_CACHE_SIZE - 1);
472                 for (i = 0; i < RTE_MAX_LCORE; i++)
473                         cached_cnt += h->local_free_slots[i].len;
474
475                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
476                                                                 cached_cnt;
477         } else {
478                 tot_ring_cnt = h->entries;
479                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
480         }
481         return ret;
482 }
483
484 /* Read write locks implemented using rte_rwlock */
485 static inline void
486 __hash_rw_writer_lock(const struct rte_hash *h)
487 {
488         if (h->writer_takes_lock && h->hw_trans_mem_support)
489                 rte_rwlock_write_lock_tm(h->readwrite_lock);
490         else if (h->writer_takes_lock)
491                 rte_rwlock_write_lock(h->readwrite_lock);
492 }
493
494 static inline void
495 __hash_rw_reader_lock(const struct rte_hash *h)
496 {
497         if (h->readwrite_concur_support && h->hw_trans_mem_support)
498                 rte_rwlock_read_lock_tm(h->readwrite_lock);
499         else if (h->readwrite_concur_support)
500                 rte_rwlock_read_lock(h->readwrite_lock);
501 }
502
503 static inline void
504 __hash_rw_writer_unlock(const struct rte_hash *h)
505 {
506         if (h->writer_takes_lock && h->hw_trans_mem_support)
507                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
508         else if (h->writer_takes_lock)
509                 rte_rwlock_write_unlock(h->readwrite_lock);
510 }
511
512 static inline void
513 __hash_rw_reader_unlock(const struct rte_hash *h)
514 {
515         if (h->readwrite_concur_support && h->hw_trans_mem_support)
516                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
517         else if (h->readwrite_concur_support)
518                 rte_rwlock_read_unlock(h->readwrite_lock);
519 }
520
521 void
522 rte_hash_reset(struct rte_hash *h)
523 {
524         void *ptr;
525         uint32_t tot_ring_cnt, i;
526
527         if (h == NULL)
528                 return;
529
530         __hash_rw_writer_lock(h);
531         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
532         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
533
534         /* clear the free ring */
535         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
536                 rte_pause();
537
538         /* clear free extendable bucket ring and memory */
539         if (h->ext_table_support) {
540                 memset(h->buckets_ext, 0, h->num_buckets *
541                                                 sizeof(struct rte_hash_bucket));
542                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
543                         rte_pause();
544         }
545
546         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
547         if (h->use_local_cache)
548                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
549                                         (LCORE_CACHE_SIZE - 1);
550         else
551                 tot_ring_cnt = h->entries;
552
553         for (i = 1; i < tot_ring_cnt + 1; i++)
554                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
555
556         /* Repopulate the free ext bkt ring. */
557         if (h->ext_table_support) {
558                 for (i = 1; i <= h->num_buckets; i++)
559                         rte_ring_sp_enqueue(h->free_ext_bkts,
560                                                 (void *)((uintptr_t) i));
561         }
562
563         if (h->use_local_cache) {
564                 /* Reset local caches per lcore */
565                 for (i = 0; i < RTE_MAX_LCORE; i++)
566                         h->local_free_slots[i].len = 0;
567         }
568         __hash_rw_writer_unlock(h);
569 }
570
571 /*
572  * Function called to enqueue back an index in the cache/ring,
573  * as slot has not being used and it can be used in the
574  * next addition attempt.
575  */
576 static inline void
577 enqueue_slot_back(const struct rte_hash *h,
578                 struct lcore_cache *cached_free_slots,
579                 void *slot_id)
580 {
581         if (h->use_local_cache) {
582                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
583                 cached_free_slots->len++;
584         } else
585                 rte_ring_sp_enqueue(h->free_slots, slot_id);
586 }
587
588 /* Search a key from bucket and update its data */
589 static inline int32_t
590 search_and_update(const struct rte_hash *h, void *data, const void *key,
591         struct rte_hash_bucket *bkt, uint16_t sig)
592 {
593         int i;
594         struct rte_hash_key *k, *keys = h->key_store;
595
596         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
597                 if (bkt->sig_current[i] == sig) {
598                         k = (struct rte_hash_key *) ((char *)keys +
599                                         bkt->key_idx[i] * h->key_entry_size);
600                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
601                                 /* Update data */
602                                 k->pdata = data;
603                                 /*
604                                  * Return index where key is stored,
605                                  * subtracting the first dummy index
606                                  */
607                                 return bkt->key_idx[i] - 1;
608                         }
609                 }
610         }
611         return -1;
612 }
613
614 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
615  * buckets around.
616  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
617  * empty entry.
618  */
619 static inline int32_t
620 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
621                 struct rte_hash_bucket *prim_bkt,
622                 struct rte_hash_bucket *sec_bkt,
623                 const struct rte_hash_key *key, void *data,
624                 uint16_t sig, uint32_t new_idx,
625                 int32_t *ret_val)
626 {
627         unsigned int i;
628         struct rte_hash_bucket *cur_bkt;
629         int32_t ret;
630
631         __hash_rw_writer_lock(h);
632         /* Check if key was inserted after last check but before this
633          * protected region in case of inserting duplicated keys.
634          */
635         ret = search_and_update(h, data, key, prim_bkt, sig);
636         if (ret != -1) {
637                 __hash_rw_writer_unlock(h);
638                 *ret_val = ret;
639                 return 1;
640         }
641
642         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
643                 ret = search_and_update(h, data, key, cur_bkt, sig);
644                 if (ret != -1) {
645                         __hash_rw_writer_unlock(h);
646                         *ret_val = ret;
647                         return 1;
648                 }
649         }
650
651         /* Insert new entry if there is room in the primary
652          * bucket.
653          */
654         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
655                 /* Check if slot is available */
656                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
657                         prim_bkt->sig_current[i] = sig;
658                         prim_bkt->key_idx[i] = new_idx;
659                         break;
660                 }
661         }
662         __hash_rw_writer_unlock(h);
663
664         if (i != RTE_HASH_BUCKET_ENTRIES)
665                 return 0;
666
667         /* no empty entry */
668         return -1;
669 }
670
671 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
672  * the path head with new entry (sig, alt_hash, new_idx)
673  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
674  * return 0 if succeeds.
675  */
676 static inline int
677 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
678                         struct rte_hash_bucket *bkt,
679                         struct rte_hash_bucket *alt_bkt,
680                         const struct rte_hash_key *key, void *data,
681                         struct queue_node *leaf, uint32_t leaf_slot,
682                         uint16_t sig, uint32_t new_idx,
683                         int32_t *ret_val)
684 {
685         uint32_t prev_alt_bkt_idx;
686         struct rte_hash_bucket *cur_bkt;
687         struct queue_node *prev_node, *curr_node = leaf;
688         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
689         uint32_t prev_slot, curr_slot = leaf_slot;
690         int32_t ret;
691
692         __hash_rw_writer_lock(h);
693
694         /* In case empty slot was gone before entering protected region */
695         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
696                 __hash_rw_writer_unlock(h);
697                 return -1;
698         }
699
700         /* Check if key was inserted after last check but before this
701          * protected region.
702          */
703         ret = search_and_update(h, data, key, bkt, sig);
704         if (ret != -1) {
705                 __hash_rw_writer_unlock(h);
706                 *ret_val = ret;
707                 return 1;
708         }
709
710         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
711                 ret = search_and_update(h, data, key, cur_bkt, sig);
712                 if (ret != -1) {
713                         __hash_rw_writer_unlock(h);
714                         *ret_val = ret;
715                         return 1;
716                 }
717         }
718
719         while (likely(curr_node->prev != NULL)) {
720                 prev_node = curr_node->prev;
721                 prev_bkt = prev_node->bkt;
722                 prev_slot = curr_node->prev_slot;
723
724                 prev_alt_bkt_idx = get_alt_bucket_index(h,
725                                         prev_node->cur_bkt_idx,
726                                         prev_bkt->sig_current[prev_slot]);
727
728                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
729                                 != curr_bkt)) {
730                         /* revert it to empty, otherwise duplicated keys */
731                         curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
732                         __hash_rw_writer_unlock(h);
733                         return -1;
734                 }
735
736                 /* Need to swap current/alt sig to allow later
737                  * Cuckoo insert to move elements back to its
738                  * primary bucket if available
739                  */
740                 curr_bkt->sig_current[curr_slot] =
741                         prev_bkt->sig_current[prev_slot];
742                 curr_bkt->key_idx[curr_slot] =
743                         prev_bkt->key_idx[prev_slot];
744
745                 curr_slot = prev_slot;
746                 curr_node = prev_node;
747                 curr_bkt = curr_node->bkt;
748         }
749
750         curr_bkt->sig_current[curr_slot] = sig;
751         curr_bkt->key_idx[curr_slot] = new_idx;
752
753         __hash_rw_writer_unlock(h);
754
755         return 0;
756
757 }
758
759 /*
760  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
761  * Cuckoo
762  */
763 static inline int
764 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
765                         struct rte_hash_bucket *bkt,
766                         struct rte_hash_bucket *sec_bkt,
767                         const struct rte_hash_key *key, void *data,
768                         uint16_t sig, uint32_t bucket_idx,
769                         uint32_t new_idx, int32_t *ret_val)
770 {
771         unsigned int i;
772         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
773         struct queue_node *tail, *head;
774         struct rte_hash_bucket *curr_bkt, *alt_bkt;
775         uint32_t cur_idx, alt_idx;
776
777         tail = queue;
778         head = queue + 1;
779         tail->bkt = bkt;
780         tail->prev = NULL;
781         tail->prev_slot = -1;
782         tail->cur_bkt_idx = bucket_idx;
783
784         /* Cuckoo bfs Search */
785         while (likely(tail != head && head <
786                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
787                                         RTE_HASH_BUCKET_ENTRIES)) {
788                 curr_bkt = tail->bkt;
789                 cur_idx = tail->cur_bkt_idx;
790                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
791                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
792                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
793                                                 bkt, sec_bkt, key, data,
794                                                 tail, i, sig,
795                                                 new_idx, ret_val);
796                                 if (likely(ret != -1))
797                                         return ret;
798                         }
799
800                         /* Enqueue new node and keep prev node info */
801                         alt_idx = get_alt_bucket_index(h, cur_idx,
802                                                 curr_bkt->sig_current[i]);
803                         alt_bkt = &(h->buckets[alt_idx]);
804                         head->bkt = alt_bkt;
805                         head->cur_bkt_idx = alt_idx;
806                         head->prev = tail;
807                         head->prev_slot = i;
808                         head++;
809                 }
810                 tail++;
811         }
812
813         return -ENOSPC;
814 }
815
816 static inline int32_t
817 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
818                                                 hash_sig_t sig, void *data)
819 {
820         uint16_t short_sig;
821         uint32_t prim_bucket_idx, sec_bucket_idx;
822         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
823         struct rte_hash_key *new_k, *keys = h->key_store;
824         void *slot_id = NULL;
825         void *ext_bkt_id = NULL;
826         uint32_t new_idx, bkt_id;
827         int ret;
828         unsigned n_slots;
829         unsigned lcore_id;
830         unsigned int i;
831         struct lcore_cache *cached_free_slots = NULL;
832         int32_t ret_val;
833         struct rte_hash_bucket *last;
834
835         short_sig = get_short_sig(sig);
836         prim_bucket_idx = get_prim_bucket_index(h, sig);
837         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
838         prim_bkt = &h->buckets[prim_bucket_idx];
839         sec_bkt = &h->buckets[sec_bucket_idx];
840         rte_prefetch0(prim_bkt);
841         rte_prefetch0(sec_bkt);
842
843         /* Check if key is already inserted in primary location */
844         __hash_rw_writer_lock(h);
845         ret = search_and_update(h, data, key, prim_bkt, short_sig);
846         if (ret != -1) {
847                 __hash_rw_writer_unlock(h);
848                 return ret;
849         }
850
851         /* Check if key is already inserted in secondary location */
852         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
853                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
854                 if (ret != -1) {
855                         __hash_rw_writer_unlock(h);
856                         return ret;
857                 }
858         }
859
860         __hash_rw_writer_unlock(h);
861
862         /* Did not find a match, so get a new slot for storing the new key */
863         if (h->use_local_cache) {
864                 lcore_id = rte_lcore_id();
865                 cached_free_slots = &h->local_free_slots[lcore_id];
866                 /* Try to get a free slot from the local cache */
867                 if (cached_free_slots->len == 0) {
868                         /* Need to get another burst of free slots from global ring */
869                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
870                                         cached_free_slots->objs,
871                                         LCORE_CACHE_SIZE, NULL);
872                         if (n_slots == 0) {
873                                 return -ENOSPC;
874                         }
875
876                         cached_free_slots->len += n_slots;
877                 }
878
879                 /* Get a free slot from the local cache */
880                 cached_free_slots->len--;
881                 slot_id = cached_free_slots->objs[cached_free_slots->len];
882         } else {
883                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
884                         return -ENOSPC;
885                 }
886         }
887
888         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
889         new_idx = (uint32_t)((uintptr_t) slot_id);
890         /* Copy key */
891         rte_memcpy(new_k->key, key, h->key_len);
892         new_k->pdata = data;
893
894
895         /* Find an empty slot and insert */
896         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
897                                         short_sig, new_idx, &ret_val);
898         if (ret == 0)
899                 return new_idx - 1;
900         else if (ret == 1) {
901                 enqueue_slot_back(h, cached_free_slots, slot_id);
902                 return ret_val;
903         }
904
905         /* Primary bucket full, need to make space for new entry */
906         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
907                                 short_sig, prim_bucket_idx, new_idx, &ret_val);
908         if (ret == 0)
909                 return new_idx - 1;
910         else if (ret == 1) {
911                 enqueue_slot_back(h, cached_free_slots, slot_id);
912                 return ret_val;
913         }
914
915         /* Also search secondary bucket to get better occupancy */
916         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
917                                 short_sig, sec_bucket_idx, new_idx, &ret_val);
918
919         if (ret == 0)
920                 return new_idx - 1;
921         else if (ret == 1) {
922                 enqueue_slot_back(h, cached_free_slots, slot_id);
923                 return ret_val;
924         }
925
926         /* if ext table not enabled, we failed the insertion */
927         if (!h->ext_table_support) {
928                 enqueue_slot_back(h, cached_free_slots, slot_id);
929                 return ret;
930         }
931
932         /* Now we need to go through the extendable bucket. Protection is needed
933          * to protect all extendable bucket processes.
934          */
935         __hash_rw_writer_lock(h);
936         /* We check for duplicates again since could be inserted before the lock */
937         ret = search_and_update(h, data, key, prim_bkt, short_sig);
938         if (ret != -1) {
939                 enqueue_slot_back(h, cached_free_slots, slot_id);
940                 goto failure;
941         }
942
943         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
944                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
945                 if (ret != -1) {
946                         enqueue_slot_back(h, cached_free_slots, slot_id);
947                         goto failure;
948                 }
949         }
950
951         /* Search sec and ext buckets to find an empty entry to insert. */
952         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
953                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
954                         /* Check if slot is available */
955                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
956                                 cur_bkt->sig_current[i] = short_sig;
957                                 cur_bkt->key_idx[i] = new_idx;
958                                 __hash_rw_writer_unlock(h);
959                                 return new_idx - 1;
960                         }
961                 }
962         }
963
964         /* Failed to get an empty entry from extendable buckets. Link a new
965          * extendable bucket. We first get a free bucket from ring.
966          */
967         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
968                 ret = -ENOSPC;
969                 goto failure;
970         }
971
972         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
973         /* Use the first location of the new bucket */
974         (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
975         (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
976         /* Link the new bucket to sec bucket linked list */
977         last = rte_hash_get_last_bkt(sec_bkt);
978         last->next = &h->buckets_ext[bkt_id];
979         __hash_rw_writer_unlock(h);
980         return new_idx - 1;
981
982 failure:
983         __hash_rw_writer_unlock(h);
984         return ret;
985
986 }
987
988 int32_t
989 rte_hash_add_key_with_hash(const struct rte_hash *h,
990                         const void *key, hash_sig_t sig)
991 {
992         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
993         return __rte_hash_add_key_with_hash(h, key, sig, 0);
994 }
995
996 int32_t
997 rte_hash_add_key(const struct rte_hash *h, const void *key)
998 {
999         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1000         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1001 }
1002
1003 int
1004 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1005                         const void *key, hash_sig_t sig, void *data)
1006 {
1007         int ret;
1008
1009         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1010         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1011         if (ret >= 0)
1012                 return 0;
1013         else
1014                 return ret;
1015 }
1016
1017 int
1018 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1019 {
1020         int ret;
1021
1022         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1023
1024         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1025         if (ret >= 0)
1026                 return 0;
1027         else
1028                 return ret;
1029 }
1030
1031 /* Search one bucket to find the match key */
1032 static inline int32_t
1033 search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
1034                         void **data, const struct rte_hash_bucket *bkt)
1035 {
1036         int i;
1037         struct rte_hash_key *k, *keys = h->key_store;
1038
1039         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1040                 if (bkt->sig_current[i] == sig &&
1041                                 bkt->key_idx[i] != EMPTY_SLOT) {
1042                         k = (struct rte_hash_key *) ((char *)keys +
1043                                         bkt->key_idx[i] * h->key_entry_size);
1044                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1045                                 if (data != NULL)
1046                                         *data = k->pdata;
1047                                 /*
1048                                  * Return index where key is stored,
1049                                  * subtracting the first dummy index
1050                                  */
1051                                 return bkt->key_idx[i] - 1;
1052                         }
1053                 }
1054         }
1055         return -1;
1056 }
1057
1058 static inline int32_t
1059 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1060                                         hash_sig_t sig, void **data)
1061 {
1062         uint32_t prim_bucket_idx, sec_bucket_idx;
1063         struct rte_hash_bucket *bkt, *cur_bkt;
1064         int ret;
1065         uint16_t short_sig;
1066
1067         short_sig = get_short_sig(sig);
1068         prim_bucket_idx = get_prim_bucket_index(h, sig);
1069         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1070         bkt = &h->buckets[prim_bucket_idx];
1071
1072         __hash_rw_reader_lock(h);
1073
1074         /* Check if key is in primary location */
1075         ret = search_one_bucket(h, key, short_sig, data, bkt);
1076         if (ret != -1) {
1077                 __hash_rw_reader_unlock(h);
1078                 return ret;
1079         }
1080         /* Calculate secondary hash */
1081         bkt = &h->buckets[sec_bucket_idx];
1082
1083         /* Check if key is in secondary location */
1084         FOR_EACH_BUCKET(cur_bkt, bkt) {
1085                 ret = search_one_bucket(h, key, short_sig, data, cur_bkt);
1086                 if (ret != -1) {
1087                         __hash_rw_reader_unlock(h);
1088                         return ret;
1089                 }
1090         }
1091         __hash_rw_reader_unlock(h);
1092         return -ENOENT;
1093 }
1094
1095 int32_t
1096 rte_hash_lookup_with_hash(const struct rte_hash *h,
1097                         const void *key, hash_sig_t sig)
1098 {
1099         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1100         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1101 }
1102
1103 int32_t
1104 rte_hash_lookup(const struct rte_hash *h, const void *key)
1105 {
1106         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1107         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1108 }
1109
1110 int
1111 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1112                         const void *key, hash_sig_t sig, void **data)
1113 {
1114         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1115         return __rte_hash_lookup_with_hash(h, key, sig, data);
1116 }
1117
1118 int
1119 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1120 {
1121         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1122         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1123 }
1124
1125 static inline void
1126 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1127 {
1128         unsigned lcore_id, n_slots;
1129         struct lcore_cache *cached_free_slots;
1130
1131         if (h->use_local_cache) {
1132                 lcore_id = rte_lcore_id();
1133                 cached_free_slots = &h->local_free_slots[lcore_id];
1134                 /* Cache full, need to free it. */
1135                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1136                         /* Need to enqueue the free slots in global ring. */
1137                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1138                                                 cached_free_slots->objs,
1139                                                 LCORE_CACHE_SIZE, NULL);
1140                         cached_free_slots->len -= n_slots;
1141                 }
1142                 /* Put index of new free slot in cache. */
1143                 cached_free_slots->objs[cached_free_slots->len] =
1144                                 (void *)((uintptr_t)bkt->key_idx[i]);
1145                 cached_free_slots->len++;
1146         } else {
1147                 rte_ring_sp_enqueue(h->free_slots,
1148                                 (void *)((uintptr_t)bkt->key_idx[i]));
1149         }
1150 }
1151
1152 /* Compact the linked list by moving key from last entry in linked list to the
1153  * empty slot.
1154  */
1155 static inline void
1156 __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
1157         int i;
1158         struct rte_hash_bucket *last_bkt;
1159
1160         if (!cur_bkt->next)
1161                 return;
1162
1163         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1164
1165         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1166                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1167                         cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
1168                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1169                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1170                         last_bkt->key_idx[i] = EMPTY_SLOT;
1171                         return;
1172                 }
1173         }
1174 }
1175
1176 /* Search one bucket and remove the matched key */
1177 static inline int32_t
1178 search_and_remove(const struct rte_hash *h, const void *key,
1179                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1180 {
1181         struct rte_hash_key *k, *keys = h->key_store;
1182         unsigned int i;
1183         int32_t ret;
1184
1185         /* Check if key is in bucket */
1186         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1187                 if (bkt->sig_current[i] == sig &&
1188                                 bkt->key_idx[i] != EMPTY_SLOT) {
1189                         k = (struct rte_hash_key *) ((char *)keys +
1190                                         bkt->key_idx[i] * h->key_entry_size);
1191                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1192                                 bkt->sig_current[i] = NULL_SIGNATURE;
1193                                 /* Free the key store index if
1194                                  * no_free_on_del is disabled.
1195                                  */
1196                                 if (!h->no_free_on_del)
1197                                         remove_entry(h, bkt, i);
1198
1199                                 /* Return index where key is stored,
1200                                  * subtracting the first dummy index
1201                                  */
1202                                 ret = bkt->key_idx[i] - 1;
1203                                 bkt->key_idx[i] = EMPTY_SLOT;
1204                                 *pos = i;
1205                                 return ret;
1206                         }
1207                 }
1208         }
1209         return -1;
1210 }
1211
1212 static inline int32_t
1213 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1214                                                 hash_sig_t sig)
1215 {
1216         uint32_t prim_bucket_idx, sec_bucket_idx;
1217         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1218         struct rte_hash_bucket *cur_bkt;
1219         int pos;
1220         int32_t ret, i;
1221         uint16_t short_sig;
1222
1223         short_sig = get_short_sig(sig);
1224         prim_bucket_idx = get_prim_bucket_index(h, sig);
1225         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1226         prim_bkt = &h->buckets[prim_bucket_idx];
1227
1228         __hash_rw_writer_lock(h);
1229         /* look for key in primary bucket */
1230         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1231         if (ret != -1) {
1232                 __rte_hash_compact_ll(prim_bkt, pos);
1233                 last_bkt = prim_bkt->next;
1234                 prev_bkt = prim_bkt;
1235                 goto return_bkt;
1236         }
1237
1238         /* Calculate secondary hash */
1239         sec_bkt = &h->buckets[sec_bucket_idx];
1240
1241         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1242                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1243                 if (ret != -1) {
1244                         __rte_hash_compact_ll(cur_bkt, pos);
1245                         last_bkt = sec_bkt->next;
1246                         prev_bkt = sec_bkt;
1247                         goto return_bkt;
1248                 }
1249         }
1250
1251         __hash_rw_writer_unlock(h);
1252         return -ENOENT;
1253
1254 /* Search last bucket to see if empty to be recycled */
1255 return_bkt:
1256         if (!last_bkt) {
1257                 __hash_rw_writer_unlock(h);
1258                 return ret;
1259         }
1260         while (last_bkt->next) {
1261                 prev_bkt = last_bkt;
1262                 last_bkt = last_bkt->next;
1263         }
1264
1265         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1266                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1267                         break;
1268         }
1269         /* found empty bucket and recycle */
1270         if (i == RTE_HASH_BUCKET_ENTRIES) {
1271                 prev_bkt->next = last_bkt->next = NULL;
1272                 uint32_t index = last_bkt - h->buckets_ext + 1;
1273                 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1274         }
1275
1276         __hash_rw_writer_unlock(h);
1277         return ret;
1278 }
1279
1280 int32_t
1281 rte_hash_del_key_with_hash(const struct rte_hash *h,
1282                         const void *key, hash_sig_t sig)
1283 {
1284         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1285         return __rte_hash_del_key_with_hash(h, key, sig);
1286 }
1287
1288 int32_t
1289 rte_hash_del_key(const struct rte_hash *h, const void *key)
1290 {
1291         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1292         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1293 }
1294
1295 int
1296 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1297                                void **key)
1298 {
1299         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1300
1301         struct rte_hash_key *k, *keys = h->key_store;
1302         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1303                                      h->key_entry_size);
1304         *key = k->key;
1305
1306         if (position !=
1307             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1308                                         NULL)) {
1309                 return -ENOENT;
1310         }
1311
1312         return 0;
1313 }
1314
1315 int __rte_experimental
1316 rte_hash_free_key_with_position(const struct rte_hash *h,
1317                                 const int32_t position)
1318 {
1319         RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
1320
1321         unsigned int lcore_id, n_slots;
1322         struct lcore_cache *cached_free_slots;
1323         const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1324
1325         /* Out of bounds */
1326         if (position >= total_entries)
1327                 return -EINVAL;
1328
1329         if (h->use_local_cache) {
1330                 lcore_id = rte_lcore_id();
1331                 cached_free_slots = &h->local_free_slots[lcore_id];
1332                 /* Cache full, need to free it. */
1333                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1334                         /* Need to enqueue the free slots in global ring. */
1335                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1336                                                 cached_free_slots->objs,
1337                                                 LCORE_CACHE_SIZE, NULL);
1338                         cached_free_slots->len -= n_slots;
1339                 }
1340                 /* Put index of new free slot in cache. */
1341                 cached_free_slots->objs[cached_free_slots->len] =
1342                                         (void *)((uintptr_t)position);
1343                 cached_free_slots->len++;
1344         } else {
1345                 rte_ring_sp_enqueue(h->free_slots,
1346                                 (void *)((uintptr_t)position));
1347         }
1348
1349         return 0;
1350 }
1351
1352 static inline void
1353 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1354                         const struct rte_hash_bucket *prim_bkt,
1355                         const struct rte_hash_bucket *sec_bkt,
1356                         uint16_t sig,
1357                         enum rte_hash_sig_compare_function sig_cmp_fn)
1358 {
1359         unsigned int i;
1360
1361         /* For match mask the first bit of every two bits indicates the match */
1362         switch (sig_cmp_fn) {
1363 #ifdef RTE_MACHINE_CPUFLAG_SSE2
1364         case RTE_HASH_COMPARE_SSE:
1365                 /* Compare all signatures in the bucket */
1366                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1367                                 _mm_load_si128(
1368                                         (__m128i const *)prim_bkt->sig_current),
1369                                 _mm_set1_epi16(sig)));
1370                 /* Compare all signatures in the bucket */
1371                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1372                                 _mm_load_si128(
1373                                         (__m128i const *)sec_bkt->sig_current),
1374                                 _mm_set1_epi16(sig)));
1375                 break;
1376 #endif
1377         default:
1378                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1379                         *prim_hash_matches |=
1380                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1381                         *sec_hash_matches |=
1382                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1383                 }
1384         }
1385 }
1386
1387 #define PREFETCH_OFFSET 4
1388 static inline void
1389 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1390                         int32_t num_keys, int32_t *positions,
1391                         uint64_t *hit_mask, void *data[])
1392 {
1393         uint64_t hits = 0;
1394         int32_t i;
1395         int32_t ret;
1396         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1397         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1398         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1399         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1400         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1401         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1402         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1403         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1404         struct rte_hash_bucket *cur_bkt, *next_bkt;
1405
1406         /* Prefetch first keys */
1407         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1408                 rte_prefetch0(keys[i]);
1409
1410         /*
1411          * Prefetch rest of the keys, calculate primary and
1412          * secondary bucket and prefetch them
1413          */
1414         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1415                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1416
1417                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1418
1419                 sig[i] = get_short_sig(prim_hash[i]);
1420                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1421                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1422
1423                 primary_bkt[i] = &h->buckets[prim_index[i]];
1424                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1425
1426                 rte_prefetch0(primary_bkt[i]);
1427                 rte_prefetch0(secondary_bkt[i]);
1428         }
1429
1430         /* Calculate and prefetch rest of the buckets */
1431         for (; i < num_keys; i++) {
1432                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1433
1434                 sig[i] = get_short_sig(prim_hash[i]);
1435                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1436                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1437
1438                 primary_bkt[i] = &h->buckets[prim_index[i]];
1439                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1440
1441                 rte_prefetch0(primary_bkt[i]);
1442                 rte_prefetch0(secondary_bkt[i]);
1443         }
1444
1445         __hash_rw_reader_lock(h);
1446         /* Compare signatures and prefetch key slot of first hit */
1447         for (i = 0; i < num_keys; i++) {
1448                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1449                                 primary_bkt[i], secondary_bkt[i],
1450                                 sig[i], h->sig_cmp_fn);
1451
1452                 if (prim_hitmask[i]) {
1453                         uint32_t first_hit =
1454                                         __builtin_ctzl(prim_hitmask[i]) >> 1;
1455                         uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
1456                         const struct rte_hash_key *key_slot =
1457                                 (const struct rte_hash_key *)(
1458                                 (const char *)h->key_store +
1459                                 key_idx * h->key_entry_size);
1460                         rte_prefetch0(key_slot);
1461                         continue;
1462                 }
1463
1464                 if (sec_hitmask[i]) {
1465                         uint32_t first_hit =
1466                                         __builtin_ctzl(sec_hitmask[i]) >> 1;
1467                         uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
1468                         const struct rte_hash_key *key_slot =
1469                                 (const struct rte_hash_key *)(
1470                                 (const char *)h->key_store +
1471                                 key_idx * h->key_entry_size);
1472                         rte_prefetch0(key_slot);
1473                 }
1474         }
1475
1476         /* Compare keys, first hits in primary first */
1477         for (i = 0; i < num_keys; i++) {
1478                 positions[i] = -ENOENT;
1479                 while (prim_hitmask[i]) {
1480                         uint32_t hit_index =
1481                                         __builtin_ctzl(prim_hitmask[i]) >> 1;
1482
1483                         uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
1484                         const struct rte_hash_key *key_slot =
1485                                 (const struct rte_hash_key *)(
1486                                 (const char *)h->key_store +
1487                                 key_idx * h->key_entry_size);
1488                         /*
1489                          * If key index is 0, do not compare key,
1490                          * as it is checking the dummy slot
1491                          */
1492                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1493                                 if (data != NULL)
1494                                         data[i] = key_slot->pdata;
1495
1496                                 hits |= 1ULL << i;
1497                                 positions[i] = key_idx - 1;
1498                                 goto next_key;
1499                         }
1500                         prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1501                 }
1502
1503                 while (sec_hitmask[i]) {
1504                         uint32_t hit_index =
1505                                         __builtin_ctzl(sec_hitmask[i]) >> 1;
1506
1507                         uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
1508                         const struct rte_hash_key *key_slot =
1509                                 (const struct rte_hash_key *)(
1510                                 (const char *)h->key_store +
1511                                 key_idx * h->key_entry_size);
1512                         /*
1513                          * If key index is 0, do not compare key,
1514                          * as it is checking the dummy slot
1515                          */
1516
1517                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1518                                 if (data != NULL)
1519                                         data[i] = key_slot->pdata;
1520
1521                                 hits |= 1ULL << i;
1522                                 positions[i] = key_idx - 1;
1523                                 goto next_key;
1524                         }
1525                         sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1526                 }
1527
1528 next_key:
1529                 continue;
1530         }
1531
1532         /* all found, do not need to go through ext bkt */
1533         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1534                 if (hit_mask != NULL)
1535                         *hit_mask = hits;
1536                 __hash_rw_reader_unlock(h);
1537                 return;
1538         }
1539
1540         /* need to check ext buckets for match */
1541         for (i = 0; i < num_keys; i++) {
1542                 if ((hits & (1ULL << i)) != 0)
1543                         continue;
1544                 next_bkt = secondary_bkt[i]->next;
1545                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1546                         if (data != NULL)
1547                                 ret = search_one_bucket(h, keys[i],
1548                                                 sig[i], &data[i], cur_bkt);
1549                         else
1550                                 ret = search_one_bucket(h, keys[i],
1551                                                 sig[i], NULL, cur_bkt);
1552                         if (ret != -1) {
1553                                 positions[i] = ret;
1554                                 hits |= 1ULL << i;
1555                                 break;
1556                         }
1557                 }
1558         }
1559
1560         __hash_rw_reader_unlock(h);
1561
1562         if (hit_mask != NULL)
1563                 *hit_mask = hits;
1564 }
1565
1566 int
1567 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1568                       uint32_t num_keys, int32_t *positions)
1569 {
1570         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1571                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1572                         (positions == NULL)), -EINVAL);
1573
1574         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1575         return 0;
1576 }
1577
1578 int
1579 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1580                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1581 {
1582         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1583                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1584                         (hit_mask == NULL)), -EINVAL);
1585
1586         int32_t positions[num_keys];
1587
1588         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1589
1590         /* Return number of hits */
1591         return __builtin_popcountl(*hit_mask);
1592 }
1593
1594 int32_t
1595 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1596 {
1597         uint32_t bucket_idx, idx, position;
1598         struct rte_hash_key *next_key;
1599
1600         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1601
1602         const uint32_t total_entries_main = h->num_buckets *
1603                                                         RTE_HASH_BUCKET_ENTRIES;
1604         const uint32_t total_entries = total_entries_main << 1;
1605
1606         /* Out of bounds of all buckets (both main table and ext table) */
1607         if (*next >= total_entries_main)
1608                 goto extend_table;
1609
1610         /* Calculate bucket and index of current iterator */
1611         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1612         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1613
1614         /* If current position is empty, go to the next one */
1615         while ((position = h->buckets[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1616                 (*next)++;
1617                 /* End of table */
1618                 if (*next == total_entries_main)
1619                         goto extend_table;
1620                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1621                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1622         }
1623
1624         __hash_rw_reader_lock(h);
1625         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1626                                 position * h->key_entry_size);
1627         /* Return key and data */
1628         *key = next_key->key;
1629         *data = next_key->pdata;
1630
1631         __hash_rw_reader_unlock(h);
1632
1633         /* Increment iterator */
1634         (*next)++;
1635
1636         return position - 1;
1637
1638 /* Begin to iterate extendable buckets */
1639 extend_table:
1640         /* Out of total bound or if ext bucket feature is not enabled */
1641         if (*next >= total_entries || !h->ext_table_support)
1642                 return -ENOENT;
1643
1644         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
1645         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1646
1647         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1648                 (*next)++;
1649                 if (*next == total_entries)
1650                         return -ENOENT;
1651                 bucket_idx = (*next - total_entries_main) /
1652                                                 RTE_HASH_BUCKET_ENTRIES;
1653                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1654         }
1655         __hash_rw_reader_lock(h);
1656         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1657                                 position * h->key_entry_size);
1658         /* Return key and data */
1659         *key = next_key->key;
1660         *data = next_key->pdata;
1661
1662         __hash_rw_reader_unlock(h);
1663
1664         /* Increment iterator */
1665         (*next)++;
1666         return position - 1;
1667 }