hash: add lock-free r/w concurrency
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  * Copyright(c) 2018 Arm Limited
4  */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <sys/queue.h>
12
13 #include <rte_common.h>
14 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
15 #include <rte_log.h>
16 #include <rte_memcpy.h>
17 #include <rte_prefetch.h>
18 #include <rte_branch_prediction.h>
19 #include <rte_malloc.h>
20 #include <rte_eal.h>
21 #include <rte_eal_memconfig.h>
22 #include <rte_per_lcore.h>
23 #include <rte_errno.h>
24 #include <rte_string_fns.h>
25 #include <rte_cpuflags.h>
26 #include <rte_rwlock.h>
27 #include <rte_spinlock.h>
28 #include <rte_ring.h>
29 #include <rte_compat.h>
30 #include <rte_pause.h>
31
32 #include "rte_hash.h"
33 #include "rte_cuckoo_hash.h"
34
35 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
36         for (CURRENT_BKT = START_BUCKET;                                      \
37                 CURRENT_BKT != NULL;                                          \
38                 CURRENT_BKT = CURRENT_BKT->next)
39
40 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
41
42 static struct rte_tailq_elem rte_hash_tailq = {
43         .name = "RTE_HASH",
44 };
45 EAL_REGISTER_TAILQ(rte_hash_tailq)
46
47 struct rte_hash *
48 rte_hash_find_existing(const char *name)
49 {
50         struct rte_hash *h = NULL;
51         struct rte_tailq_entry *te;
52         struct rte_hash_list *hash_list;
53
54         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
55
56         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
57         TAILQ_FOREACH(te, hash_list, next) {
58                 h = (struct rte_hash *) te->data;
59                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
60                         break;
61         }
62         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
63
64         if (te == NULL) {
65                 rte_errno = ENOENT;
66                 return NULL;
67         }
68         return h;
69 }
70
71 static inline struct rte_hash_bucket *
72 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
73 {
74         while (lst_bkt->next != NULL)
75                 lst_bkt = lst_bkt->next;
76         return lst_bkt;
77 }
78
79 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
80 {
81         h->cmp_jump_table_idx = KEY_CUSTOM;
82         h->rte_hash_custom_cmp_eq = func;
83 }
84
85 static inline int
86 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
87 {
88         if (h->cmp_jump_table_idx == KEY_CUSTOM)
89                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
90         else
91                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
92 }
93
94 /*
95  * We use higher 16 bits of hash as the signature value stored in table.
96  * We use the lower bits for the primary bucket
97  * location. Then we XOR primary bucket location and the signature
98  * to get the secondary bucket location. This is same as
99  * proposed in Bin Fan, et al's paper
100  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
101  * Smarter Hashing". The benefit to use
102  * XOR is that one could derive the alternative bucket location
103  * by only using the current bucket location and the signature.
104  */
105 static inline uint16_t
106 get_short_sig(const hash_sig_t hash)
107 {
108         return hash >> 16;
109 }
110
111 static inline uint32_t
112 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
113 {
114         return hash & h->bucket_bitmask;
115 }
116
117 static inline uint32_t
118 get_alt_bucket_index(const struct rte_hash *h,
119                         uint32_t cur_bkt_idx, uint16_t sig)
120 {
121         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
122 }
123
124 struct rte_hash *
125 rte_hash_create(const struct rte_hash_parameters *params)
126 {
127         struct rte_hash *h = NULL;
128         struct rte_tailq_entry *te = NULL;
129         struct rte_hash_list *hash_list;
130         struct rte_ring *r = NULL;
131         struct rte_ring *r_ext = NULL;
132         char hash_name[RTE_HASH_NAMESIZE];
133         void *k = NULL;
134         void *buckets = NULL;
135         void *buckets_ext = NULL;
136         char ring_name[RTE_RING_NAMESIZE];
137         char ext_ring_name[RTE_RING_NAMESIZE];
138         unsigned num_key_slots;
139         unsigned i;
140         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
141         unsigned int ext_table_support = 0;
142         unsigned int readwrite_concur_support = 0;
143         unsigned int writer_takes_lock = 0;
144         unsigned int no_free_on_del = 0;
145         uint32_t *tbl_chng_cnt = NULL;
146         unsigned int readwrite_concur_lf_support = 0;
147
148         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
149
150         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
151
152         if (params == NULL) {
153                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
154                 return NULL;
155         }
156
157         /* Check for valid parameters */
158         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
159                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
160                         (params->key_len == 0)) {
161                 rte_errno = EINVAL;
162                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
163                 return NULL;
164         }
165
166         /* Validate correct usage of extra options */
167         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
168             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
169                 rte_errno = EINVAL;
170                 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
171                         "rw concurrency lock free\n");
172                 return NULL;
173         }
174
175         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
176             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
177                 rte_errno = EINVAL;
178                 RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
179                         "feature not supported with rw concurrency "
180                         "lock free\n");
181                 return NULL;
182         }
183
184         /* Check extra flags field to check extra options. */
185         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
186                 hw_trans_mem_support = 1;
187
188         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
189                 use_local_cache = 1;
190                 writer_takes_lock = 1;
191         }
192
193         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
194                 readwrite_concur_support = 1;
195                 writer_takes_lock = 1;
196         }
197
198         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
199                 ext_table_support = 1;
200
201         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
202                 no_free_on_del = 1;
203
204         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
205                 readwrite_concur_lf_support = 1;
206                 /* Enable not freeing internal memory/index on delete */
207                 no_free_on_del = 1;
208         }
209
210         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
211         if (use_local_cache)
212                 /*
213                  * Increase number of slots by total number of indices
214                  * that can be stored in the lcore caches
215                  * except for the first cache
216                  */
217                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
218                                         (LCORE_CACHE_SIZE - 1) + 1;
219         else
220                 num_key_slots = params->entries + 1;
221
222         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
223         /* Create ring (Dummy slot index is not enqueued) */
224         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
225                         params->socket_id, 0);
226         if (r == NULL) {
227                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
228                 goto err;
229         }
230
231         const uint32_t num_buckets = rte_align32pow2(params->entries) /
232                                                 RTE_HASH_BUCKET_ENTRIES;
233
234         /* Create ring for extendable buckets. */
235         if (ext_table_support) {
236                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
237                                                                 params->name);
238                 r_ext = rte_ring_create(ext_ring_name,
239                                 rte_align32pow2(num_buckets + 1),
240                                 params->socket_id, 0);
241
242                 if (r_ext == NULL) {
243                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
244                                                                 "failed\n");
245                         goto err;
246                 }
247         }
248
249         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
250
251         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
252
253         /* guarantee there's no existing: this is normally already checked
254          * by ring creation above */
255         TAILQ_FOREACH(te, hash_list, next) {
256                 h = (struct rte_hash *) te->data;
257                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
258                         break;
259         }
260         h = NULL;
261         if (te != NULL) {
262                 rte_errno = EEXIST;
263                 te = NULL;
264                 goto err_unlock;
265         }
266
267         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
268         if (te == NULL) {
269                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
270                 goto err_unlock;
271         }
272
273         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
274                                         RTE_CACHE_LINE_SIZE, params->socket_id);
275
276         if (h == NULL) {
277                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
278                 goto err_unlock;
279         }
280
281         buckets = rte_zmalloc_socket(NULL,
282                                 num_buckets * sizeof(struct rte_hash_bucket),
283                                 RTE_CACHE_LINE_SIZE, params->socket_id);
284
285         if (buckets == NULL) {
286                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
287                 goto err_unlock;
288         }
289
290         /* Allocate same number of extendable buckets */
291         if (ext_table_support) {
292                 buckets_ext = rte_zmalloc_socket(NULL,
293                                 num_buckets * sizeof(struct rte_hash_bucket),
294                                 RTE_CACHE_LINE_SIZE, params->socket_id);
295                 if (buckets_ext == NULL) {
296                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
297                                                         "failed\n");
298                         goto err_unlock;
299                 }
300                 /* Populate ext bkt ring. We reserve 0 similar to the
301                  * key-data slot, just in case in future we want to
302                  * use bucket index for the linked list and 0 means NULL
303                  * for next bucket
304                  */
305                 for (i = 1; i <= num_buckets; i++)
306                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
307         }
308
309         const uint32_t key_entry_size =
310                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
311                           KEY_ALIGNMENT);
312         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
313
314         k = rte_zmalloc_socket(NULL, key_tbl_size,
315                         RTE_CACHE_LINE_SIZE, params->socket_id);
316
317         if (k == NULL) {
318                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
319                 goto err_unlock;
320         }
321
322         tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
323                         RTE_CACHE_LINE_SIZE, params->socket_id);
324
325         if (tbl_chng_cnt == NULL) {
326                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
327                 goto err_unlock;
328         }
329
330 /*
331  * If x86 architecture is used, select appropriate compare function,
332  * which may use x86 intrinsics, otherwise use memcmp
333  */
334 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
335         /* Select function to compare keys */
336         switch (params->key_len) {
337         case 16:
338                 h->cmp_jump_table_idx = KEY_16_BYTES;
339                 break;
340         case 32:
341                 h->cmp_jump_table_idx = KEY_32_BYTES;
342                 break;
343         case 48:
344                 h->cmp_jump_table_idx = KEY_48_BYTES;
345                 break;
346         case 64:
347                 h->cmp_jump_table_idx = KEY_64_BYTES;
348                 break;
349         case 80:
350                 h->cmp_jump_table_idx = KEY_80_BYTES;
351                 break;
352         case 96:
353                 h->cmp_jump_table_idx = KEY_96_BYTES;
354                 break;
355         case 112:
356                 h->cmp_jump_table_idx = KEY_112_BYTES;
357                 break;
358         case 128:
359                 h->cmp_jump_table_idx = KEY_128_BYTES;
360                 break;
361         default:
362                 /* If key is not multiple of 16, use generic memcmp */
363                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
364         }
365 #else
366         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
367 #endif
368
369         if (use_local_cache) {
370                 h->local_free_slots = rte_zmalloc_socket(NULL,
371                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
372                                 RTE_CACHE_LINE_SIZE, params->socket_id);
373         }
374
375         /* Default hash function */
376 #if defined(RTE_ARCH_X86)
377         default_hash_func = (rte_hash_function)rte_hash_crc;
378 #elif defined(RTE_ARCH_ARM64)
379         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
380                 default_hash_func = (rte_hash_function)rte_hash_crc;
381 #endif
382         /* Setup hash context */
383         snprintf(h->name, sizeof(h->name), "%s", params->name);
384         h->entries = params->entries;
385         h->key_len = params->key_len;
386         h->key_entry_size = key_entry_size;
387         h->hash_func_init_val = params->hash_func_init_val;
388
389         h->num_buckets = num_buckets;
390         h->bucket_bitmask = h->num_buckets - 1;
391         h->buckets = buckets;
392         h->buckets_ext = buckets_ext;
393         h->free_ext_bkts = r_ext;
394         h->hash_func = (params->hash_func == NULL) ?
395                 default_hash_func : params->hash_func;
396         h->key_store = k;
397         h->free_slots = r;
398         h->tbl_chng_cnt = tbl_chng_cnt;
399         *h->tbl_chng_cnt = 0;
400         h->hw_trans_mem_support = hw_trans_mem_support;
401         h->use_local_cache = use_local_cache;
402         h->readwrite_concur_support = readwrite_concur_support;
403         h->ext_table_support = ext_table_support;
404         h->writer_takes_lock = writer_takes_lock;
405         h->no_free_on_del = no_free_on_del;
406         h->readwrite_concur_lf_support = readwrite_concur_lf_support;
407
408 #if defined(RTE_ARCH_X86)
409         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
410                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
411         else
412 #endif
413                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
414
415         /* Writer threads need to take the lock when:
416          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
417          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
418          */
419         if (h->writer_takes_lock) {
420                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
421                                                 RTE_CACHE_LINE_SIZE);
422                 if (h->readwrite_lock == NULL)
423                         goto err_unlock;
424
425                 rte_rwlock_init(h->readwrite_lock);
426         }
427
428         /* Populate free slots ring. Entry zero is reserved for key misses. */
429         for (i = 1; i < num_key_slots; i++)
430                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
431
432         te->data = (void *) h;
433         TAILQ_INSERT_TAIL(hash_list, te, next);
434         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
435
436         return h;
437 err_unlock:
438         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
439 err:
440         rte_ring_free(r);
441         rte_ring_free(r_ext);
442         rte_free(te);
443         rte_free(h);
444         rte_free(buckets);
445         rte_free(buckets_ext);
446         rte_free(k);
447         rte_free(tbl_chng_cnt);
448         return NULL;
449 }
450
451 void
452 rte_hash_free(struct rte_hash *h)
453 {
454         struct rte_tailq_entry *te;
455         struct rte_hash_list *hash_list;
456
457         if (h == NULL)
458                 return;
459
460         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
461
462         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
463
464         /* find out tailq entry */
465         TAILQ_FOREACH(te, hash_list, next) {
466                 if (te->data == (void *) h)
467                         break;
468         }
469
470         if (te == NULL) {
471                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
472                 return;
473         }
474
475         TAILQ_REMOVE(hash_list, te, next);
476
477         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
478
479         if (h->use_local_cache)
480                 rte_free(h->local_free_slots);
481         if (h->writer_takes_lock)
482                 rte_free(h->readwrite_lock);
483         rte_ring_free(h->free_slots);
484         rte_ring_free(h->free_ext_bkts);
485         rte_free(h->key_store);
486         rte_free(h->buckets);
487         rte_free(h->buckets_ext);
488         rte_free(h->tbl_chng_cnt);
489         rte_free(h);
490         rte_free(te);
491 }
492
493 hash_sig_t
494 rte_hash_hash(const struct rte_hash *h, const void *key)
495 {
496         /* calc hash result by key */
497         return h->hash_func(key, h->key_len, h->hash_func_init_val);
498 }
499
500 int32_t
501 rte_hash_count(const struct rte_hash *h)
502 {
503         uint32_t tot_ring_cnt, cached_cnt = 0;
504         uint32_t i, ret;
505
506         if (h == NULL)
507                 return -EINVAL;
508
509         if (h->use_local_cache) {
510                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
511                                         (LCORE_CACHE_SIZE - 1);
512                 for (i = 0; i < RTE_MAX_LCORE; i++)
513                         cached_cnt += h->local_free_slots[i].len;
514
515                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
516                                                                 cached_cnt;
517         } else {
518                 tot_ring_cnt = h->entries;
519                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
520         }
521         return ret;
522 }
523
524 /* Read write locks implemented using rte_rwlock */
525 static inline void
526 __hash_rw_writer_lock(const struct rte_hash *h)
527 {
528         if (h->writer_takes_lock && h->hw_trans_mem_support)
529                 rte_rwlock_write_lock_tm(h->readwrite_lock);
530         else if (h->writer_takes_lock)
531                 rte_rwlock_write_lock(h->readwrite_lock);
532 }
533
534 static inline void
535 __hash_rw_reader_lock(const struct rte_hash *h)
536 {
537         if (h->readwrite_concur_support && h->hw_trans_mem_support)
538                 rte_rwlock_read_lock_tm(h->readwrite_lock);
539         else if (h->readwrite_concur_support)
540                 rte_rwlock_read_lock(h->readwrite_lock);
541 }
542
543 static inline void
544 __hash_rw_writer_unlock(const struct rte_hash *h)
545 {
546         if (h->writer_takes_lock && h->hw_trans_mem_support)
547                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
548         else if (h->writer_takes_lock)
549                 rte_rwlock_write_unlock(h->readwrite_lock);
550 }
551
552 static inline void
553 __hash_rw_reader_unlock(const struct rte_hash *h)
554 {
555         if (h->readwrite_concur_support && h->hw_trans_mem_support)
556                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
557         else if (h->readwrite_concur_support)
558                 rte_rwlock_read_unlock(h->readwrite_lock);
559 }
560
561 void
562 rte_hash_reset(struct rte_hash *h)
563 {
564         void *ptr;
565         uint32_t tot_ring_cnt, i;
566
567         if (h == NULL)
568                 return;
569
570         __hash_rw_writer_lock(h);
571         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
572         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
573         *h->tbl_chng_cnt = 0;
574
575         /* clear the free ring */
576         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
577                 rte_pause();
578
579         /* clear free extendable bucket ring and memory */
580         if (h->ext_table_support) {
581                 memset(h->buckets_ext, 0, h->num_buckets *
582                                                 sizeof(struct rte_hash_bucket));
583                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
584                         rte_pause();
585         }
586
587         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
588         if (h->use_local_cache)
589                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
590                                         (LCORE_CACHE_SIZE - 1);
591         else
592                 tot_ring_cnt = h->entries;
593
594         for (i = 1; i < tot_ring_cnt + 1; i++)
595                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
596
597         /* Repopulate the free ext bkt ring. */
598         if (h->ext_table_support) {
599                 for (i = 1; i <= h->num_buckets; i++)
600                         rte_ring_sp_enqueue(h->free_ext_bkts,
601                                                 (void *)((uintptr_t) i));
602         }
603
604         if (h->use_local_cache) {
605                 /* Reset local caches per lcore */
606                 for (i = 0; i < RTE_MAX_LCORE; i++)
607                         h->local_free_slots[i].len = 0;
608         }
609         __hash_rw_writer_unlock(h);
610 }
611
612 /*
613  * Function called to enqueue back an index in the cache/ring,
614  * as slot has not being used and it can be used in the
615  * next addition attempt.
616  */
617 static inline void
618 enqueue_slot_back(const struct rte_hash *h,
619                 struct lcore_cache *cached_free_slots,
620                 void *slot_id)
621 {
622         if (h->use_local_cache) {
623                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
624                 cached_free_slots->len++;
625         } else
626                 rte_ring_sp_enqueue(h->free_slots, slot_id);
627 }
628
629 /* Search a key from bucket and update its data.
630  * Writer holds the lock before calling this.
631  */
632 static inline int32_t
633 search_and_update(const struct rte_hash *h, void *data, const void *key,
634         struct rte_hash_bucket *bkt, uint16_t sig)
635 {
636         int i;
637         struct rte_hash_key *k, *keys = h->key_store;
638
639         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
640                 if (bkt->sig_current[i] == sig) {
641                         k = (struct rte_hash_key *) ((char *)keys +
642                                         bkt->key_idx[i] * h->key_entry_size);
643                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
644                                 /* 'pdata' acts as the synchronization point
645                                  * when an existing hash entry is updated.
646                                  * Key is not updated in this case.
647                                  */
648                                 __atomic_store_n(&k->pdata,
649                                         data,
650                                         __ATOMIC_RELEASE);
651                                 /*
652                                  * Return index where key is stored,
653                                  * subtracting the first dummy index
654                                  */
655                                 return bkt->key_idx[i] - 1;
656                         }
657                 }
658         }
659         return -1;
660 }
661
662 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
663  * buckets around.
664  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
665  * empty entry.
666  */
667 static inline int32_t
668 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
669                 struct rte_hash_bucket *prim_bkt,
670                 struct rte_hash_bucket *sec_bkt,
671                 const struct rte_hash_key *key, void *data,
672                 uint16_t sig, uint32_t new_idx,
673                 int32_t *ret_val)
674 {
675         unsigned int i;
676         struct rte_hash_bucket *cur_bkt;
677         int32_t ret;
678
679         __hash_rw_writer_lock(h);
680         /* Check if key was inserted after last check but before this
681          * protected region in case of inserting duplicated keys.
682          */
683         ret = search_and_update(h, data, key, prim_bkt, sig);
684         if (ret != -1) {
685                 __hash_rw_writer_unlock(h);
686                 *ret_val = ret;
687                 return 1;
688         }
689
690         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
691                 ret = search_and_update(h, data, key, cur_bkt, sig);
692                 if (ret != -1) {
693                         __hash_rw_writer_unlock(h);
694                         *ret_val = ret;
695                         return 1;
696                 }
697         }
698
699         /* Insert new entry if there is room in the primary
700          * bucket.
701          */
702         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
703                 /* Check if slot is available */
704                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
705                         prim_bkt->sig_current[i] = sig;
706                         /* Key can be of arbitrary length, so it is
707                          * not possible to store it atomically.
708                          * Hence the new key element's memory stores
709                          * (key as well as data) should be complete
710                          * before it is referenced.
711                          */
712                         __atomic_store_n(&prim_bkt->key_idx[i],
713                                          new_idx,
714                                          __ATOMIC_RELEASE);
715                         break;
716                 }
717         }
718         __hash_rw_writer_unlock(h);
719
720         if (i != RTE_HASH_BUCKET_ENTRIES)
721                 return 0;
722
723         /* no empty entry */
724         return -1;
725 }
726
727 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
728  * the path head with new entry (sig, alt_hash, new_idx)
729  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
730  * return 0 if succeeds.
731  */
732 static inline int
733 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
734                         struct rte_hash_bucket *bkt,
735                         struct rte_hash_bucket *alt_bkt,
736                         const struct rte_hash_key *key, void *data,
737                         struct queue_node *leaf, uint32_t leaf_slot,
738                         uint16_t sig, uint32_t new_idx,
739                         int32_t *ret_val)
740 {
741         uint32_t prev_alt_bkt_idx;
742         struct rte_hash_bucket *cur_bkt;
743         struct queue_node *prev_node, *curr_node = leaf;
744         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
745         uint32_t prev_slot, curr_slot = leaf_slot;
746         int32_t ret;
747
748         __hash_rw_writer_lock(h);
749
750         /* In case empty slot was gone before entering protected region */
751         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
752                 __hash_rw_writer_unlock(h);
753                 return -1;
754         }
755
756         /* Check if key was inserted after last check but before this
757          * protected region.
758          */
759         ret = search_and_update(h, data, key, bkt, sig);
760         if (ret != -1) {
761                 __hash_rw_writer_unlock(h);
762                 *ret_val = ret;
763                 return 1;
764         }
765
766         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
767                 ret = search_and_update(h, data, key, cur_bkt, sig);
768                 if (ret != -1) {
769                         __hash_rw_writer_unlock(h);
770                         *ret_val = ret;
771                         return 1;
772                 }
773         }
774
775         while (likely(curr_node->prev != NULL)) {
776                 prev_node = curr_node->prev;
777                 prev_bkt = prev_node->bkt;
778                 prev_slot = curr_node->prev_slot;
779
780                 prev_alt_bkt_idx = get_alt_bucket_index(h,
781                                         prev_node->cur_bkt_idx,
782                                         prev_bkt->sig_current[prev_slot]);
783
784                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
785                                 != curr_bkt)) {
786                         /* revert it to empty, otherwise duplicated keys */
787                         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
788                                 EMPTY_SLOT,
789                                 __ATOMIC_RELEASE);
790                         __hash_rw_writer_unlock(h);
791                         return -1;
792                 }
793
794                 if (h->readwrite_concur_lf_support) {
795                         /* Inform the previous move. The current move need
796                          * not be informed now as the current bucket entry
797                          * is present in both primary and secondary.
798                          * Since there is one writer, load acquires on
799                          * tbl_chng_cnt are not required.
800                          */
801                         __atomic_store_n(h->tbl_chng_cnt,
802                                          *h->tbl_chng_cnt + 1,
803                                          __ATOMIC_RELEASE);
804                         /* The stores to sig_alt and sig_current should not
805                          * move above the store to tbl_chng_cnt.
806                          */
807                         __atomic_thread_fence(__ATOMIC_RELEASE);
808                 }
809
810                 /* Need to swap current/alt sig to allow later
811                  * Cuckoo insert to move elements back to its
812                  * primary bucket if available
813                  */
814                 curr_bkt->sig_current[curr_slot] =
815                         prev_bkt->sig_current[prev_slot];
816                 /* Release the updated bucket entry */
817                 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
818                         prev_bkt->key_idx[prev_slot],
819                         __ATOMIC_RELEASE);
820
821                 curr_slot = prev_slot;
822                 curr_node = prev_node;
823                 curr_bkt = curr_node->bkt;
824         }
825
826         if (h->readwrite_concur_lf_support) {
827                 /* Inform the previous move. The current move need
828                  * not be informed now as the current bucket entry
829                  * is present in both primary and secondary.
830                  * Since there is one writer, load acquires on
831                  * tbl_chng_cnt are not required.
832                  */
833                 __atomic_store_n(h->tbl_chng_cnt,
834                                  *h->tbl_chng_cnt + 1,
835                                  __ATOMIC_RELEASE);
836                 /* The stores to sig_alt and sig_current should not
837                  * move above the store to tbl_chng_cnt.
838                  */
839                 __atomic_thread_fence(__ATOMIC_RELEASE);
840         }
841
842         curr_bkt->sig_current[curr_slot] = sig;
843         /* Release the new bucket entry */
844         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
845                          new_idx,
846                          __ATOMIC_RELEASE);
847
848         __hash_rw_writer_unlock(h);
849
850         return 0;
851
852 }
853
854 /*
855  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
856  * Cuckoo
857  */
858 static inline int
859 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
860                         struct rte_hash_bucket *bkt,
861                         struct rte_hash_bucket *sec_bkt,
862                         const struct rte_hash_key *key, void *data,
863                         uint16_t sig, uint32_t bucket_idx,
864                         uint32_t new_idx, int32_t *ret_val)
865 {
866         unsigned int i;
867         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
868         struct queue_node *tail, *head;
869         struct rte_hash_bucket *curr_bkt, *alt_bkt;
870         uint32_t cur_idx, alt_idx;
871
872         tail = queue;
873         head = queue + 1;
874         tail->bkt = bkt;
875         tail->prev = NULL;
876         tail->prev_slot = -1;
877         tail->cur_bkt_idx = bucket_idx;
878
879         /* Cuckoo bfs Search */
880         while (likely(tail != head && head <
881                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
882                                         RTE_HASH_BUCKET_ENTRIES)) {
883                 curr_bkt = tail->bkt;
884                 cur_idx = tail->cur_bkt_idx;
885                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
886                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
887                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
888                                                 bkt, sec_bkt, key, data,
889                                                 tail, i, sig,
890                                                 new_idx, ret_val);
891                                 if (likely(ret != -1))
892                                         return ret;
893                         }
894
895                         /* Enqueue new node and keep prev node info */
896                         alt_idx = get_alt_bucket_index(h, cur_idx,
897                                                 curr_bkt->sig_current[i]);
898                         alt_bkt = &(h->buckets[alt_idx]);
899                         head->bkt = alt_bkt;
900                         head->cur_bkt_idx = alt_idx;
901                         head->prev = tail;
902                         head->prev_slot = i;
903                         head++;
904                 }
905                 tail++;
906         }
907
908         return -ENOSPC;
909 }
910
911 static inline int32_t
912 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
913                                                 hash_sig_t sig, void *data)
914 {
915         uint16_t short_sig;
916         uint32_t prim_bucket_idx, sec_bucket_idx;
917         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
918         struct rte_hash_key *new_k, *keys = h->key_store;
919         void *slot_id = NULL;
920         void *ext_bkt_id = NULL;
921         uint32_t new_idx, bkt_id;
922         int ret;
923         unsigned n_slots;
924         unsigned lcore_id;
925         unsigned int i;
926         struct lcore_cache *cached_free_slots = NULL;
927         int32_t ret_val;
928         struct rte_hash_bucket *last;
929
930         short_sig = get_short_sig(sig);
931         prim_bucket_idx = get_prim_bucket_index(h, sig);
932         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
933         prim_bkt = &h->buckets[prim_bucket_idx];
934         sec_bkt = &h->buckets[sec_bucket_idx];
935         rte_prefetch0(prim_bkt);
936         rte_prefetch0(sec_bkt);
937
938         /* Check if key is already inserted in primary location */
939         __hash_rw_writer_lock(h);
940         ret = search_and_update(h, data, key, prim_bkt, short_sig);
941         if (ret != -1) {
942                 __hash_rw_writer_unlock(h);
943                 return ret;
944         }
945
946         /* Check if key is already inserted in secondary location */
947         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
948                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
949                 if (ret != -1) {
950                         __hash_rw_writer_unlock(h);
951                         return ret;
952                 }
953         }
954
955         __hash_rw_writer_unlock(h);
956
957         /* Did not find a match, so get a new slot for storing the new key */
958         if (h->use_local_cache) {
959                 lcore_id = rte_lcore_id();
960                 cached_free_slots = &h->local_free_slots[lcore_id];
961                 /* Try to get a free slot from the local cache */
962                 if (cached_free_slots->len == 0) {
963                         /* Need to get another burst of free slots from global ring */
964                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
965                                         cached_free_slots->objs,
966                                         LCORE_CACHE_SIZE, NULL);
967                         if (n_slots == 0) {
968                                 return -ENOSPC;
969                         }
970
971                         cached_free_slots->len += n_slots;
972                 }
973
974                 /* Get a free slot from the local cache */
975                 cached_free_slots->len--;
976                 slot_id = cached_free_slots->objs[cached_free_slots->len];
977         } else {
978                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
979                         return -ENOSPC;
980                 }
981         }
982
983         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
984         new_idx = (uint32_t)((uintptr_t) slot_id);
985         /* Copy key */
986         rte_memcpy(new_k->key, key, h->key_len);
987         /* Key can be of arbitrary length, so it is not possible to store
988          * it atomically. Hence the new key element's memory stores
989          * (key as well as data) should be complete before it is referenced.
990          * 'pdata' acts as the synchronization point when an existing hash
991          * entry is updated.
992          */
993         __atomic_store_n(&new_k->pdata,
994                 data,
995                 __ATOMIC_RELEASE);
996
997         /* Find an empty slot and insert */
998         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
999                                         short_sig, new_idx, &ret_val);
1000         if (ret == 0)
1001                 return new_idx - 1;
1002         else if (ret == 1) {
1003                 enqueue_slot_back(h, cached_free_slots, slot_id);
1004                 return ret_val;
1005         }
1006
1007         /* Primary bucket full, need to make space for new entry */
1008         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1009                                 short_sig, prim_bucket_idx, new_idx, &ret_val);
1010         if (ret == 0)
1011                 return new_idx - 1;
1012         else if (ret == 1) {
1013                 enqueue_slot_back(h, cached_free_slots, slot_id);
1014                 return ret_val;
1015         }
1016
1017         /* Also search secondary bucket to get better occupancy */
1018         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1019                                 short_sig, sec_bucket_idx, new_idx, &ret_val);
1020
1021         if (ret == 0)
1022                 return new_idx - 1;
1023         else if (ret == 1) {
1024                 enqueue_slot_back(h, cached_free_slots, slot_id);
1025                 return ret_val;
1026         }
1027
1028         /* if ext table not enabled, we failed the insertion */
1029         if (!h->ext_table_support) {
1030                 enqueue_slot_back(h, cached_free_slots, slot_id);
1031                 return ret;
1032         }
1033
1034         /* Now we need to go through the extendable bucket. Protection is needed
1035          * to protect all extendable bucket processes.
1036          */
1037         __hash_rw_writer_lock(h);
1038         /* We check for duplicates again since could be inserted before the lock */
1039         ret = search_and_update(h, data, key, prim_bkt, short_sig);
1040         if (ret != -1) {
1041                 enqueue_slot_back(h, cached_free_slots, slot_id);
1042                 goto failure;
1043         }
1044
1045         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1046                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1047                 if (ret != -1) {
1048                         enqueue_slot_back(h, cached_free_slots, slot_id);
1049                         goto failure;
1050                 }
1051         }
1052
1053         /* Search sec and ext buckets to find an empty entry to insert. */
1054         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1055                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1056                         /* Check if slot is available */
1057                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1058                                 cur_bkt->sig_current[i] = short_sig;
1059                                 cur_bkt->key_idx[i] = new_idx;
1060                                 __hash_rw_writer_unlock(h);
1061                                 return new_idx - 1;
1062                         }
1063                 }
1064         }
1065
1066         /* Failed to get an empty entry from extendable buckets. Link a new
1067          * extendable bucket. We first get a free bucket from ring.
1068          */
1069         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
1070                 ret = -ENOSPC;
1071                 goto failure;
1072         }
1073
1074         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
1075         /* Use the first location of the new bucket */
1076         (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
1077         (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
1078         /* Link the new bucket to sec bucket linked list */
1079         last = rte_hash_get_last_bkt(sec_bkt);
1080         last->next = &h->buckets_ext[bkt_id];
1081         __hash_rw_writer_unlock(h);
1082         return new_idx - 1;
1083
1084 failure:
1085         __hash_rw_writer_unlock(h);
1086         return ret;
1087
1088 }
1089
1090 int32_t
1091 rte_hash_add_key_with_hash(const struct rte_hash *h,
1092                         const void *key, hash_sig_t sig)
1093 {
1094         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1095         return __rte_hash_add_key_with_hash(h, key, sig, 0);
1096 }
1097
1098 int32_t
1099 rte_hash_add_key(const struct rte_hash *h, const void *key)
1100 {
1101         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1102         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1103 }
1104
1105 int
1106 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1107                         const void *key, hash_sig_t sig, void *data)
1108 {
1109         int ret;
1110
1111         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1112         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1113         if (ret >= 0)
1114                 return 0;
1115         else
1116                 return ret;
1117 }
1118
1119 int
1120 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1121 {
1122         int ret;
1123
1124         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1125
1126         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1127         if (ret >= 0)
1128                 return 0;
1129         else
1130                 return ret;
1131 }
1132
1133 /* Search one bucket to find the match key */
1134 static inline int32_t
1135 search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
1136                         void **data, const struct rte_hash_bucket *bkt)
1137 {
1138         int i;
1139         uint32_t key_idx;
1140         void *pdata;
1141         struct rte_hash_key *k, *keys = h->key_store;
1142
1143         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1144                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1145                                           __ATOMIC_ACQUIRE);
1146                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1147                         k = (struct rte_hash_key *) ((char *)keys +
1148                                         key_idx * h->key_entry_size);
1149                         pdata = __atomic_load_n(&k->pdata,
1150                                         __ATOMIC_ACQUIRE);
1151
1152                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1153                                 if (data != NULL)
1154                                         *data = pdata;
1155                                 /*
1156                                  * Return index where key is stored,
1157                                  * subtracting the first dummy index
1158                                  */
1159                                 return key_idx - 1;
1160                         }
1161                 }
1162         }
1163         return -1;
1164 }
1165
1166 static inline int32_t
1167 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1168                                         hash_sig_t sig, void **data)
1169 {
1170         uint32_t prim_bucket_idx, sec_bucket_idx;
1171         struct rte_hash_bucket *bkt, *cur_bkt;
1172         uint32_t cnt_b, cnt_a;
1173         int ret;
1174         uint16_t short_sig;
1175
1176         short_sig = get_short_sig(sig);
1177         prim_bucket_idx = get_prim_bucket_index(h, sig);
1178         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1179
1180         __hash_rw_reader_lock(h);
1181
1182         do {
1183                 /* Load the table change counter before the lookup
1184                  * starts. Acquire semantics will make sure that
1185                  * loads in search_one_bucket are not hoisted.
1186                  */
1187                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1188                                 __ATOMIC_ACQUIRE);
1189
1190                 /* Check if key is in primary location */
1191                 bkt = &h->buckets[prim_bucket_idx];
1192                 ret = search_one_bucket(h, key, short_sig, data, bkt);
1193                 if (ret != -1) {
1194                         __hash_rw_reader_unlock(h);
1195                         return ret;
1196                 }
1197                 /* Calculate secondary hash */
1198                 bkt = &h->buckets[sec_bucket_idx];
1199
1200                 /* Check if key is in secondary location */
1201                 FOR_EACH_BUCKET(cur_bkt, bkt) {
1202                         ret = search_one_bucket(h, key, short_sig,
1203                                                 data, cur_bkt);
1204                         if (ret != -1) {
1205                                 __hash_rw_reader_unlock(h);
1206                                 return ret;
1207                         }
1208                 }
1209
1210                 /* The loads of sig_current in search_one_bucket
1211                  * should not move below the load from tbl_chng_cnt.
1212                  */
1213                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1214                 /* Re-read the table change counter to check if the
1215                  * table has changed during search. If yes, re-do
1216                  * the search.
1217                  * This load should not get hoisted. The load
1218                  * acquires on cnt_b, key index in primary bucket
1219                  * and key index in secondary bucket will make sure
1220                  * that it does not get hoisted.
1221                  */
1222                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1223                                         __ATOMIC_ACQUIRE);
1224         } while (cnt_b != cnt_a);
1225
1226         __hash_rw_reader_unlock(h);
1227
1228         return -ENOENT;
1229 }
1230
1231 int32_t
1232 rte_hash_lookup_with_hash(const struct rte_hash *h,
1233                         const void *key, hash_sig_t sig)
1234 {
1235         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1236         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1237 }
1238
1239 int32_t
1240 rte_hash_lookup(const struct rte_hash *h, const void *key)
1241 {
1242         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1243         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1244 }
1245
1246 int
1247 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1248                         const void *key, hash_sig_t sig, void **data)
1249 {
1250         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1251         return __rte_hash_lookup_with_hash(h, key, sig, data);
1252 }
1253
1254 int
1255 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1256 {
1257         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1258         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1259 }
1260
1261 static inline void
1262 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1263 {
1264         unsigned lcore_id, n_slots;
1265         struct lcore_cache *cached_free_slots;
1266
1267         if (h->use_local_cache) {
1268                 lcore_id = rte_lcore_id();
1269                 cached_free_slots = &h->local_free_slots[lcore_id];
1270                 /* Cache full, need to free it. */
1271                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1272                         /* Need to enqueue the free slots in global ring. */
1273                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1274                                                 cached_free_slots->objs,
1275                                                 LCORE_CACHE_SIZE, NULL);
1276                         cached_free_slots->len -= n_slots;
1277                 }
1278                 /* Put index of new free slot in cache. */
1279                 cached_free_slots->objs[cached_free_slots->len] =
1280                                 (void *)((uintptr_t)bkt->key_idx[i]);
1281                 cached_free_slots->len++;
1282         } else {
1283                 rte_ring_sp_enqueue(h->free_slots,
1284                                 (void *)((uintptr_t)bkt->key_idx[i]));
1285         }
1286 }
1287
1288 /* Compact the linked list by moving key from last entry in linked list to the
1289  * empty slot.
1290  */
1291 static inline void
1292 __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
1293         int i;
1294         struct rte_hash_bucket *last_bkt;
1295
1296         if (!cur_bkt->next)
1297                 return;
1298
1299         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1300
1301         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1302                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1303                         cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
1304                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1305                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1306                         last_bkt->key_idx[i] = EMPTY_SLOT;
1307                         return;
1308                 }
1309         }
1310 }
1311
1312 /* Search one bucket and remove the matched key.
1313  * Writer is expected to hold the lock while calling this
1314  * function.
1315  */
1316 static inline int32_t
1317 search_and_remove(const struct rte_hash *h, const void *key,
1318                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1319 {
1320         struct rte_hash_key *k, *keys = h->key_store;
1321         unsigned int i;
1322         uint32_t key_idx;
1323
1324         /* Check if key is in bucket */
1325         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1326                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1327                                           __ATOMIC_ACQUIRE);
1328                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1329                         k = (struct rte_hash_key *) ((char *)keys +
1330                                         key_idx * h->key_entry_size);
1331                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1332                                 bkt->sig_current[i] = NULL_SIGNATURE;
1333                                 /* Free the key store index if
1334                                  * no_free_on_del is disabled.
1335                                  */
1336                                 if (!h->no_free_on_del)
1337                                         remove_entry(h, bkt, i);
1338
1339                                 __atomic_store_n(&bkt->key_idx[i],
1340                                                  EMPTY_SLOT,
1341                                                  __ATOMIC_RELEASE);
1342
1343                                 *pos = i;
1344                                 /*
1345                                  * Return index where key is stored,
1346                                  * subtracting the first dummy index
1347                                  */
1348                                 return key_idx - 1;
1349                         }
1350                 }
1351         }
1352         return -1;
1353 }
1354
1355 static inline int32_t
1356 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1357                                                 hash_sig_t sig)
1358 {
1359         uint32_t prim_bucket_idx, sec_bucket_idx;
1360         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1361         struct rte_hash_bucket *cur_bkt;
1362         int pos;
1363         int32_t ret, i;
1364         uint16_t short_sig;
1365
1366         short_sig = get_short_sig(sig);
1367         prim_bucket_idx = get_prim_bucket_index(h, sig);
1368         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1369         prim_bkt = &h->buckets[prim_bucket_idx];
1370
1371         __hash_rw_writer_lock(h);
1372         /* look for key in primary bucket */
1373         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1374         if (ret != -1) {
1375                 __rte_hash_compact_ll(prim_bkt, pos);
1376                 last_bkt = prim_bkt->next;
1377                 prev_bkt = prim_bkt;
1378                 goto return_bkt;
1379         }
1380
1381         /* Calculate secondary hash */
1382         sec_bkt = &h->buckets[sec_bucket_idx];
1383
1384         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1385                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1386                 if (ret != -1) {
1387                         __rte_hash_compact_ll(cur_bkt, pos);
1388                         last_bkt = sec_bkt->next;
1389                         prev_bkt = sec_bkt;
1390                         goto return_bkt;
1391                 }
1392         }
1393
1394         __hash_rw_writer_unlock(h);
1395         return -ENOENT;
1396
1397 /* Search last bucket to see if empty to be recycled */
1398 return_bkt:
1399         if (!last_bkt) {
1400                 __hash_rw_writer_unlock(h);
1401                 return ret;
1402         }
1403         while (last_bkt->next) {
1404                 prev_bkt = last_bkt;
1405                 last_bkt = last_bkt->next;
1406         }
1407
1408         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1409                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1410                         break;
1411         }
1412         /* found empty bucket and recycle */
1413         if (i == RTE_HASH_BUCKET_ENTRIES) {
1414                 prev_bkt->next = last_bkt->next = NULL;
1415                 uint32_t index = last_bkt - h->buckets_ext + 1;
1416                 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1417         }
1418
1419         __hash_rw_writer_unlock(h);
1420         return ret;
1421 }
1422
1423 int32_t
1424 rte_hash_del_key_with_hash(const struct rte_hash *h,
1425                         const void *key, hash_sig_t sig)
1426 {
1427         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1428         return __rte_hash_del_key_with_hash(h, key, sig);
1429 }
1430
1431 int32_t
1432 rte_hash_del_key(const struct rte_hash *h, const void *key)
1433 {
1434         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1435         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1436 }
1437
1438 int
1439 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1440                                void **key)
1441 {
1442         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1443
1444         struct rte_hash_key *k, *keys = h->key_store;
1445         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1446                                      h->key_entry_size);
1447         *key = k->key;
1448
1449         if (position !=
1450             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1451                                         NULL)) {
1452                 return -ENOENT;
1453         }
1454
1455         return 0;
1456 }
1457
1458 int __rte_experimental
1459 rte_hash_free_key_with_position(const struct rte_hash *h,
1460                                 const int32_t position)
1461 {
1462         RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
1463
1464         unsigned int lcore_id, n_slots;
1465         struct lcore_cache *cached_free_slots;
1466         const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1467
1468         /* Out of bounds */
1469         if (position >= total_entries)
1470                 return -EINVAL;
1471
1472         if (h->use_local_cache) {
1473                 lcore_id = rte_lcore_id();
1474                 cached_free_slots = &h->local_free_slots[lcore_id];
1475                 /* Cache full, need to free it. */
1476                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1477                         /* Need to enqueue the free slots in global ring. */
1478                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1479                                                 cached_free_slots->objs,
1480                                                 LCORE_CACHE_SIZE, NULL);
1481                         cached_free_slots->len -= n_slots;
1482                 }
1483                 /* Put index of new free slot in cache. */
1484                 cached_free_slots->objs[cached_free_slots->len] =
1485                                         (void *)((uintptr_t)position);
1486                 cached_free_slots->len++;
1487         } else {
1488                 rte_ring_sp_enqueue(h->free_slots,
1489                                 (void *)((uintptr_t)position));
1490         }
1491
1492         return 0;
1493 }
1494
1495 static inline void
1496 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1497                         const struct rte_hash_bucket *prim_bkt,
1498                         const struct rte_hash_bucket *sec_bkt,
1499                         uint16_t sig,
1500                         enum rte_hash_sig_compare_function sig_cmp_fn)
1501 {
1502         unsigned int i;
1503
1504         /* For match mask the first bit of every two bits indicates the match */
1505         switch (sig_cmp_fn) {
1506 #ifdef RTE_MACHINE_CPUFLAG_SSE2
1507         case RTE_HASH_COMPARE_SSE:
1508                 /* Compare all signatures in the bucket */
1509                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1510                                 _mm_load_si128(
1511                                         (__m128i const *)prim_bkt->sig_current),
1512                                 _mm_set1_epi16(sig)));
1513                 /* Compare all signatures in the bucket */
1514                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1515                                 _mm_load_si128(
1516                                         (__m128i const *)sec_bkt->sig_current),
1517                                 _mm_set1_epi16(sig)));
1518                 break;
1519 #endif
1520         default:
1521                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1522                         *prim_hash_matches |=
1523                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1524                         *sec_hash_matches |=
1525                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1526                 }
1527         }
1528 }
1529
1530 #define PREFETCH_OFFSET 4
1531 static inline void
1532 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1533                         int32_t num_keys, int32_t *positions,
1534                         uint64_t *hit_mask, void *data[])
1535 {
1536         uint64_t hits = 0;
1537         int32_t i;
1538         int32_t ret;
1539         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1540         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1541         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1542         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1543         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1544         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1545         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1546         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1547         struct rte_hash_bucket *cur_bkt, *next_bkt;
1548         void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
1549         uint32_t cnt_b, cnt_a;
1550
1551         /* Prefetch first keys */
1552         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1553                 rte_prefetch0(keys[i]);
1554
1555         /*
1556          * Prefetch rest of the keys, calculate primary and
1557          * secondary bucket and prefetch them
1558          */
1559         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1560                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1561
1562                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1563
1564                 sig[i] = get_short_sig(prim_hash[i]);
1565                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1566                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1567
1568                 primary_bkt[i] = &h->buckets[prim_index[i]];
1569                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1570
1571                 rte_prefetch0(primary_bkt[i]);
1572                 rte_prefetch0(secondary_bkt[i]);
1573         }
1574
1575         /* Calculate and prefetch rest of the buckets */
1576         for (; i < num_keys; i++) {
1577                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1578
1579                 sig[i] = get_short_sig(prim_hash[i]);
1580                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1581                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1582
1583                 primary_bkt[i] = &h->buckets[prim_index[i]];
1584                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1585
1586                 rte_prefetch0(primary_bkt[i]);
1587                 rte_prefetch0(secondary_bkt[i]);
1588         }
1589
1590         __hash_rw_reader_lock(h);
1591         do {
1592                 /* Load the table change counter before the lookup
1593                  * starts. Acquire semantics will make sure that
1594                  * loads in compare_signatures are not hoisted.
1595                  */
1596                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1597                                         __ATOMIC_ACQUIRE);
1598
1599                 /* Compare signatures and prefetch key slot of first hit */
1600                 for (i = 0; i < num_keys; i++) {
1601                         compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1602                                 primary_bkt[i], secondary_bkt[i],
1603                                 sig[i], h->sig_cmp_fn);
1604
1605                         if (prim_hitmask[i]) {
1606                                 uint32_t first_hit =
1607                                                 __builtin_ctzl(prim_hitmask[i])
1608                                                 >> 1;
1609                                 uint32_t key_idx =
1610                                         primary_bkt[i]->key_idx[first_hit];
1611                                 const struct rte_hash_key *key_slot =
1612                                         (const struct rte_hash_key *)(
1613                                         (const char *)h->key_store +
1614                                         key_idx * h->key_entry_size);
1615                                 rte_prefetch0(key_slot);
1616                                 continue;
1617                         }
1618
1619                         if (sec_hitmask[i]) {
1620                                 uint32_t first_hit =
1621                                                 __builtin_ctzl(sec_hitmask[i])
1622                                                 >> 1;
1623                                 uint32_t key_idx =
1624                                         secondary_bkt[i]->key_idx[first_hit];
1625                                 const struct rte_hash_key *key_slot =
1626                                         (const struct rte_hash_key *)(
1627                                         (const char *)h->key_store +
1628                                         key_idx * h->key_entry_size);
1629                                 rte_prefetch0(key_slot);
1630                         }
1631                 }
1632
1633                 /* Compare keys, first hits in primary first */
1634                 for (i = 0; i < num_keys; i++) {
1635                         positions[i] = -ENOENT;
1636                         while (prim_hitmask[i]) {
1637                                 uint32_t hit_index =
1638                                                 __builtin_ctzl(prim_hitmask[i])
1639                                                 >> 1;
1640                                 uint32_t key_idx =
1641                                 __atomic_load_n(
1642                                         &primary_bkt[i]->key_idx[hit_index],
1643                                         __ATOMIC_ACQUIRE);
1644                                 const struct rte_hash_key *key_slot =
1645                                         (const struct rte_hash_key *)(
1646                                         (const char *)h->key_store +
1647                                         key_idx * h->key_entry_size);
1648
1649                                 if (key_idx != EMPTY_SLOT)
1650                                         pdata[i] = __atomic_load_n(
1651                                                         &key_slot->pdata,
1652                                                         __ATOMIC_ACQUIRE);
1653                                 /*
1654                                  * If key index is 0, do not compare key,
1655                                  * as it is checking the dummy slot
1656                                  */
1657                                 if (!!key_idx &
1658                                         !rte_hash_cmp_eq(
1659                                                 key_slot->key, keys[i], h)) {
1660                                         if (data != NULL)
1661                                                 data[i] = pdata[i];
1662
1663                                         hits |= 1ULL << i;
1664                                         positions[i] = key_idx - 1;
1665                                         goto next_key;
1666                                 }
1667                                 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1668                         }
1669
1670                         while (sec_hitmask[i]) {
1671                                 uint32_t hit_index =
1672                                                 __builtin_ctzl(sec_hitmask[i])
1673                                                 >> 1;
1674                                 uint32_t key_idx =
1675                                 __atomic_load_n(
1676                                         &secondary_bkt[i]->key_idx[hit_index],
1677                                         __ATOMIC_ACQUIRE);
1678                                 const struct rte_hash_key *key_slot =
1679                                         (const struct rte_hash_key *)(
1680                                         (const char *)h->key_store +
1681                                         key_idx * h->key_entry_size);
1682
1683                                 if (key_idx != EMPTY_SLOT)
1684                                         pdata[i] = __atomic_load_n(
1685                                                         &key_slot->pdata,
1686                                                         __ATOMIC_ACQUIRE);
1687                                 /*
1688                                  * If key index is 0, do not compare key,
1689                                  * as it is checking the dummy slot
1690                                  */
1691
1692                                 if (!!key_idx &
1693                                         !rte_hash_cmp_eq(
1694                                                 key_slot->key, keys[i], h)) {
1695                                         if (data != NULL)
1696                                                 data[i] = pdata[i];
1697
1698                                         hits |= 1ULL << i;
1699                                         positions[i] = key_idx - 1;
1700                                         goto next_key;
1701                                 }
1702                                 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1703                         }
1704 next_key:
1705                         continue;
1706                 }
1707
1708                 /* The loads of sig_current in compare_signatures
1709                  * should not move below the load from tbl_chng_cnt.
1710                  */
1711                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1712                 /* Re-read the table change counter to check if the
1713                  * table has changed during search. If yes, re-do
1714                  * the search.
1715                  * This load should not get hoisted. The load
1716                  * acquires on cnt_b, primary key index and secondary
1717                  * key index will make sure that it does not get
1718                  * hoisted.
1719                  */
1720                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1721                                         __ATOMIC_ACQUIRE);
1722         } while (cnt_b != cnt_a);
1723
1724         /* all found, do not need to go through ext bkt */
1725         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1726                 if (hit_mask != NULL)
1727                         *hit_mask = hits;
1728                 __hash_rw_reader_unlock(h);
1729                 return;
1730         }
1731
1732         /* need to check ext buckets for match */
1733         for (i = 0; i < num_keys; i++) {
1734                 if ((hits & (1ULL << i)) != 0)
1735                         continue;
1736                 next_bkt = secondary_bkt[i]->next;
1737                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1738                         if (data != NULL)
1739                                 ret = search_one_bucket(h, keys[i],
1740                                                 sig[i], &data[i], cur_bkt);
1741                         else
1742                                 ret = search_one_bucket(h, keys[i],
1743                                                 sig[i], NULL, cur_bkt);
1744                         if (ret != -1) {
1745                                 positions[i] = ret;
1746                                 hits |= 1ULL << i;
1747                                 break;
1748                         }
1749                 }
1750         }
1751
1752         __hash_rw_reader_unlock(h);
1753
1754         if (hit_mask != NULL)
1755                 *hit_mask = hits;
1756 }
1757
1758 int
1759 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1760                       uint32_t num_keys, int32_t *positions)
1761 {
1762         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1763                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1764                         (positions == NULL)), -EINVAL);
1765
1766         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1767         return 0;
1768 }
1769
1770 int
1771 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1772                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1773 {
1774         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1775                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1776                         (hit_mask == NULL)), -EINVAL);
1777
1778         int32_t positions[num_keys];
1779
1780         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1781
1782         /* Return number of hits */
1783         return __builtin_popcountl(*hit_mask);
1784 }
1785
1786 int32_t
1787 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1788 {
1789         uint32_t bucket_idx, idx, position;
1790         struct rte_hash_key *next_key;
1791
1792         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1793
1794         const uint32_t total_entries_main = h->num_buckets *
1795                                                         RTE_HASH_BUCKET_ENTRIES;
1796         const uint32_t total_entries = total_entries_main << 1;
1797
1798         /* Out of bounds of all buckets (both main table and ext table) */
1799         if (*next >= total_entries_main)
1800                 goto extend_table;
1801
1802         /* Calculate bucket and index of current iterator */
1803         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1804         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1805
1806         /* If current position is empty, go to the next one */
1807         while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
1808                                         __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
1809                 (*next)++;
1810                 /* End of table */
1811                 if (*next == total_entries_main)
1812                         goto extend_table;
1813                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1814                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1815         }
1816
1817         __hash_rw_reader_lock(h);
1818         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1819                                 position * h->key_entry_size);
1820         /* Return key and data */
1821         *key = next_key->key;
1822         *data = next_key->pdata;
1823
1824         __hash_rw_reader_unlock(h);
1825
1826         /* Increment iterator */
1827         (*next)++;
1828
1829         return position - 1;
1830
1831 /* Begin to iterate extendable buckets */
1832 extend_table:
1833         /* Out of total bound or if ext bucket feature is not enabled */
1834         if (*next >= total_entries || !h->ext_table_support)
1835                 return -ENOENT;
1836
1837         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
1838         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1839
1840         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1841                 (*next)++;
1842                 if (*next == total_entries)
1843                         return -ENOENT;
1844                 bucket_idx = (*next - total_entries_main) /
1845                                                 RTE_HASH_BUCKET_ENTRIES;
1846                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1847         }
1848         __hash_rw_reader_lock(h);
1849         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1850                                 position * h->key_entry_size);
1851         /* Return key and data */
1852         *key = next_key->key;
1853         *data = next_key->pdata;
1854
1855         __hash_rw_reader_unlock(h);
1856
1857         /* Increment iterator */
1858         (*next)++;
1859         return position - 1;
1860 }