hash: fix key slot size accuracy
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdint.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
14 #include <rte_log.h>
15 #include <rte_memcpy.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring.h>
28 #include <rte_compat.h>
29 #include <rte_pause.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #if defined(RTE_ARCH_X86)
35 #include "rte_cuckoo_hash_x86.h"
36 #endif
37
38 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
39
40 static struct rte_tailq_elem rte_hash_tailq = {
41         .name = "RTE_HASH",
42 };
43 EAL_REGISTER_TAILQ(rte_hash_tailq)
44
45 struct rte_hash *
46 rte_hash_find_existing(const char *name)
47 {
48         struct rte_hash *h = NULL;
49         struct rte_tailq_entry *te;
50         struct rte_hash_list *hash_list;
51
52         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
53
54         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
55         TAILQ_FOREACH(te, hash_list, next) {
56                 h = (struct rte_hash *) te->data;
57                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
58                         break;
59         }
60         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
61
62         if (te == NULL) {
63                 rte_errno = ENOENT;
64                 return NULL;
65         }
66         return h;
67 }
68
69 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
70 {
71         h->cmp_jump_table_idx = KEY_CUSTOM;
72         h->rte_hash_custom_cmp_eq = func;
73 }
74
75 static inline int
76 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
77 {
78         if (h->cmp_jump_table_idx == KEY_CUSTOM)
79                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
80         else
81                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
82 }
83
84 struct rte_hash *
85 rte_hash_create(const struct rte_hash_parameters *params)
86 {
87         struct rte_hash *h = NULL;
88         struct rte_tailq_entry *te = NULL;
89         struct rte_hash_list *hash_list;
90         struct rte_ring *r = NULL;
91         char hash_name[RTE_HASH_NAMESIZE];
92         void *k = NULL;
93         void *buckets = NULL;
94         char ring_name[RTE_RING_NAMESIZE];
95         unsigned num_key_slots;
96         unsigned hw_trans_mem_support = 0;
97         unsigned i;
98         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
99
100         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
101
102         if (params == NULL) {
103                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
104                 return NULL;
105         }
106
107         /* Check for valid parameters */
108         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
109                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
110                         (params->key_len == 0)) {
111                 rte_errno = EINVAL;
112                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
113                 return NULL;
114         }
115
116         /* Check extra flags field to check extra options. */
117         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
118                 hw_trans_mem_support = 1;
119
120         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
121         if (hw_trans_mem_support)
122                 /*
123                  * Increase number of slots by total number of indices
124                  * that can be stored in the lcore caches
125                  * except for the first cache
126                  */
127                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
128                                         (LCORE_CACHE_SIZE - 1) + 1;
129         else
130                 num_key_slots = params->entries + 1;
131
132         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
133         /* Create ring (Dummy slot index is not enqueued) */
134         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
135                         params->socket_id, 0);
136         if (r == NULL) {
137                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
138                 goto err;
139         }
140
141         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
142
143         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
144
145         /* guarantee there's no existing: this is normally already checked
146          * by ring creation above */
147         TAILQ_FOREACH(te, hash_list, next) {
148                 h = (struct rte_hash *) te->data;
149                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
150                         break;
151         }
152         h = NULL;
153         if (te != NULL) {
154                 rte_errno = EEXIST;
155                 te = NULL;
156                 goto err_unlock;
157         }
158
159         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
160         if (te == NULL) {
161                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
162                 goto err_unlock;
163         }
164
165         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
166                                         RTE_CACHE_LINE_SIZE, params->socket_id);
167
168         if (h == NULL) {
169                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
170                 goto err_unlock;
171         }
172
173         const uint32_t num_buckets = rte_align32pow2(params->entries)
174                                         / RTE_HASH_BUCKET_ENTRIES;
175
176         buckets = rte_zmalloc_socket(NULL,
177                                 num_buckets * sizeof(struct rte_hash_bucket),
178                                 RTE_CACHE_LINE_SIZE, params->socket_id);
179
180         if (buckets == NULL) {
181                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
182                 goto err_unlock;
183         }
184
185         const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
186         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
187
188         k = rte_zmalloc_socket(NULL, key_tbl_size,
189                         RTE_CACHE_LINE_SIZE, params->socket_id);
190
191         if (k == NULL) {
192                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
193                 goto err_unlock;
194         }
195
196 /*
197  * If x86 architecture is used, select appropriate compare function,
198  * which may use x86 intrinsics, otherwise use memcmp
199  */
200 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
201         /* Select function to compare keys */
202         switch (params->key_len) {
203         case 16:
204                 h->cmp_jump_table_idx = KEY_16_BYTES;
205                 break;
206         case 32:
207                 h->cmp_jump_table_idx = KEY_32_BYTES;
208                 break;
209         case 48:
210                 h->cmp_jump_table_idx = KEY_48_BYTES;
211                 break;
212         case 64:
213                 h->cmp_jump_table_idx = KEY_64_BYTES;
214                 break;
215         case 80:
216                 h->cmp_jump_table_idx = KEY_80_BYTES;
217                 break;
218         case 96:
219                 h->cmp_jump_table_idx = KEY_96_BYTES;
220                 break;
221         case 112:
222                 h->cmp_jump_table_idx = KEY_112_BYTES;
223                 break;
224         case 128:
225                 h->cmp_jump_table_idx = KEY_128_BYTES;
226                 break;
227         default:
228                 /* If key is not multiple of 16, use generic memcmp */
229                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
230         }
231 #else
232         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
233 #endif
234
235         if (hw_trans_mem_support) {
236                 h->local_free_slots = rte_zmalloc_socket(NULL,
237                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
238                                 RTE_CACHE_LINE_SIZE, params->socket_id);
239         }
240
241         /* Default hash function */
242 #if defined(RTE_ARCH_X86)
243         default_hash_func = (rte_hash_function)rte_hash_crc;
244 #elif defined(RTE_ARCH_ARM64)
245         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
246                 default_hash_func = (rte_hash_function)rte_hash_crc;
247 #endif
248         /* Setup hash context */
249         snprintf(h->name, sizeof(h->name), "%s", params->name);
250         h->entries = params->entries;
251         h->key_len = params->key_len;
252         h->key_entry_size = key_entry_size;
253         h->hash_func_init_val = params->hash_func_init_val;
254
255         h->num_buckets = num_buckets;
256         h->bucket_bitmask = h->num_buckets - 1;
257         h->buckets = buckets;
258         h->hash_func = (params->hash_func == NULL) ?
259                 default_hash_func : params->hash_func;
260         h->key_store = k;
261         h->free_slots = r;
262         h->hw_trans_mem_support = hw_trans_mem_support;
263
264 #if defined(RTE_ARCH_X86)
265         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
266                 h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
267         else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
268                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
269         else
270 #endif
271                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
272
273         /* Turn on multi-writer only with explicit flat from user and TM
274          * support.
275          */
276         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
277                 if (h->hw_trans_mem_support) {
278                         h->add_key = ADD_KEY_MULTIWRITER_TM;
279                 } else {
280                         h->add_key = ADD_KEY_MULTIWRITER;
281                         h->multiwriter_lock = rte_malloc(NULL,
282                                                         sizeof(rte_spinlock_t),
283                                                         RTE_CACHE_LINE_SIZE);
284                         if (h->multiwriter_lock == NULL)
285                                 goto err_unlock;
286
287                         rte_spinlock_init(h->multiwriter_lock);
288                 }
289         } else
290                 h->add_key = ADD_KEY_SINGLEWRITER;
291
292         /* Populate free slots ring. Entry zero is reserved for key misses. */
293         for (i = 1; i < num_key_slots; i++)
294                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
295
296         te->data = (void *) h;
297         TAILQ_INSERT_TAIL(hash_list, te, next);
298         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
299
300         return h;
301 err_unlock:
302         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
303 err:
304         rte_ring_free(r);
305         rte_free(te);
306         rte_free(h);
307         rte_free(buckets);
308         rte_free(k);
309         return NULL;
310 }
311
312 void
313 rte_hash_free(struct rte_hash *h)
314 {
315         struct rte_tailq_entry *te;
316         struct rte_hash_list *hash_list;
317
318         if (h == NULL)
319                 return;
320
321         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
322
323         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
324
325         /* find out tailq entry */
326         TAILQ_FOREACH(te, hash_list, next) {
327                 if (te->data == (void *) h)
328                         break;
329         }
330
331         if (te == NULL) {
332                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
333                 return;
334         }
335
336         TAILQ_REMOVE(hash_list, te, next);
337
338         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
339
340         if (h->hw_trans_mem_support)
341                 rte_free(h->local_free_slots);
342
343         if (h->add_key == ADD_KEY_MULTIWRITER)
344                 rte_free(h->multiwriter_lock);
345         rte_ring_free(h->free_slots);
346         rte_free(h->key_store);
347         rte_free(h->buckets);
348         rte_free(h);
349         rte_free(te);
350 }
351
352 hash_sig_t
353 rte_hash_hash(const struct rte_hash *h, const void *key)
354 {
355         /* calc hash result by key */
356         return h->hash_func(key, h->key_len, h->hash_func_init_val);
357 }
358
359 /* Calc the secondary hash value from the primary hash value of a given key */
360 static inline hash_sig_t
361 rte_hash_secondary_hash(const hash_sig_t primary_hash)
362 {
363         static const unsigned all_bits_shift = 12;
364         static const unsigned alt_bits_xor = 0x5bd1e995;
365
366         uint32_t tag = primary_hash >> all_bits_shift;
367
368         return primary_hash ^ ((tag + 1) * alt_bits_xor);
369 }
370
371 void
372 rte_hash_reset(struct rte_hash *h)
373 {
374         void *ptr;
375         uint32_t tot_ring_cnt, i;
376
377         if (h == NULL)
378                 return;
379
380         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
381         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
382
383         /* clear the free ring */
384         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
385                 rte_pause();
386
387         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
388         if (h->hw_trans_mem_support)
389                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
390                                         (LCORE_CACHE_SIZE - 1);
391         else
392                 tot_ring_cnt = h->entries;
393
394         for (i = 1; i < tot_ring_cnt + 1; i++)
395                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
396
397         if (h->hw_trans_mem_support) {
398                 /* Reset local caches per lcore */
399                 for (i = 0; i < RTE_MAX_LCORE; i++)
400                         h->local_free_slots[i].len = 0;
401         }
402 }
403
404 /* Search for an entry that can be pushed to its alternative location */
405 static inline int
406 make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt,
407                 unsigned int *nr_pushes)
408 {
409         unsigned i, j;
410         int ret;
411         uint32_t next_bucket_idx;
412         struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES];
413
414         /*
415          * Push existing item (search for bucket with space in
416          * alternative locations) to its alternative location
417          */
418         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
419                 /* Search for space in alternative locations */
420                 next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
421                 next_bkt[i] = &h->buckets[next_bucket_idx];
422                 for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
423                         if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
424                                 break;
425                 }
426
427                 if (j != RTE_HASH_BUCKET_ENTRIES)
428                         break;
429         }
430
431         /* Alternative location has spare room (end of recursive function) */
432         if (i != RTE_HASH_BUCKET_ENTRIES) {
433                 next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
434                 next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
435                 next_bkt[i]->key_idx[j] = bkt->key_idx[i];
436                 return i;
437         }
438
439         /* Pick entry that has not been pushed yet */
440         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++)
441                 if (bkt->flag[i] == 0)
442                         break;
443
444         /* All entries have been pushed, so entry cannot be added */
445         if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES)
446                 return -ENOSPC;
447
448         /* Set flag to indicate that this entry is going to be pushed */
449         bkt->flag[i] = 1;
450
451         /* Need room in alternative bucket to insert the pushed entry */
452         ret = make_space_bucket(h, next_bkt[i], nr_pushes);
453         /*
454          * After recursive function.
455          * Clear flags and insert the pushed entry
456          * in its alternative location if successful,
457          * or return error
458          */
459         bkt->flag[i] = 0;
460         if (ret >= 0) {
461                 next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
462                 next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
463                 next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
464                 return i;
465         } else
466                 return ret;
467
468 }
469
470 /*
471  * Function called to enqueue back an index in the cache/ring,
472  * as slot has not being used and it can be used in the
473  * next addition attempt.
474  */
475 static inline void
476 enqueue_slot_back(const struct rte_hash *h,
477                 struct lcore_cache *cached_free_slots,
478                 void *slot_id)
479 {
480         if (h->hw_trans_mem_support) {
481                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
482                 cached_free_slots->len++;
483         } else
484                 rte_ring_sp_enqueue(h->free_slots, slot_id);
485 }
486
487 static inline int32_t
488 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
489                                                 hash_sig_t sig, void *data)
490 {
491         hash_sig_t alt_hash;
492         uint32_t prim_bucket_idx, sec_bucket_idx;
493         unsigned i;
494         struct rte_hash_bucket *prim_bkt, *sec_bkt;
495         struct rte_hash_key *new_k, *k, *keys = h->key_store;
496         void *slot_id = NULL;
497         uint32_t new_idx;
498         int ret;
499         unsigned n_slots;
500         unsigned lcore_id;
501         struct lcore_cache *cached_free_slots = NULL;
502         unsigned int nr_pushes = 0;
503
504         if (h->add_key == ADD_KEY_MULTIWRITER)
505                 rte_spinlock_lock(h->multiwriter_lock);
506
507         prim_bucket_idx = sig & h->bucket_bitmask;
508         prim_bkt = &h->buckets[prim_bucket_idx];
509         rte_prefetch0(prim_bkt);
510
511         alt_hash = rte_hash_secondary_hash(sig);
512         sec_bucket_idx = alt_hash & h->bucket_bitmask;
513         sec_bkt = &h->buckets[sec_bucket_idx];
514         rte_prefetch0(sec_bkt);
515
516         /* Get a new slot for storing the new key */
517         if (h->hw_trans_mem_support) {
518                 lcore_id = rte_lcore_id();
519                 cached_free_slots = &h->local_free_slots[lcore_id];
520                 /* Try to get a free slot from the local cache */
521                 if (cached_free_slots->len == 0) {
522                         /* Need to get another burst of free slots from global ring */
523                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
524                                         cached_free_slots->objs,
525                                         LCORE_CACHE_SIZE, NULL);
526                         if (n_slots == 0) {
527                                 ret = -ENOSPC;
528                                 goto failure;
529                         }
530
531                         cached_free_slots->len += n_slots;
532                 }
533
534                 /* Get a free slot from the local cache */
535                 cached_free_slots->len--;
536                 slot_id = cached_free_slots->objs[cached_free_slots->len];
537         } else {
538                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
539                         ret = -ENOSPC;
540                         goto failure;
541                 }
542         }
543
544         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
545         rte_prefetch0(new_k);
546         new_idx = (uint32_t)((uintptr_t) slot_id);
547
548         /* Check if key is already inserted in primary location */
549         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
550                 if (prim_bkt->sig_current[i] == sig &&
551                                 prim_bkt->sig_alt[i] == alt_hash) {
552                         k = (struct rte_hash_key *) ((char *)keys +
553                                         prim_bkt->key_idx[i] * h->key_entry_size);
554                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
555                                 /* Enqueue index of free slot back in the ring. */
556                                 enqueue_slot_back(h, cached_free_slots, slot_id);
557                                 /* Update data */
558                                 k->pdata = data;
559                                 /*
560                                  * Return index where key is stored,
561                                  * subtracting the first dummy index
562                                  */
563                                 ret = prim_bkt->key_idx[i] - 1;
564                                 goto failure;
565                         }
566                 }
567         }
568
569         /* Check if key is already inserted in secondary location */
570         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
571                 if (sec_bkt->sig_alt[i] == sig &&
572                                 sec_bkt->sig_current[i] == alt_hash) {
573                         k = (struct rte_hash_key *) ((char *)keys +
574                                         sec_bkt->key_idx[i] * h->key_entry_size);
575                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
576                                 /* Enqueue index of free slot back in the ring. */
577                                 enqueue_slot_back(h, cached_free_slots, slot_id);
578                                 /* Update data */
579                                 k->pdata = data;
580                                 /*
581                                  * Return index where key is stored,
582                                  * subtracting the first dummy index
583                                  */
584                                 ret = sec_bkt->key_idx[i] - 1;
585                                 goto failure;
586                         }
587                 }
588         }
589
590         /* Copy key */
591         rte_memcpy(new_k->key, key, h->key_len);
592         new_k->pdata = data;
593
594 #if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
595         if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
596                 ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
597                                 sig, alt_hash, new_idx);
598                 if (ret >= 0)
599                         return new_idx - 1;
600
601                 /* Primary bucket full, need to make space for new entry */
602                 ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
603                                                         alt_hash, new_idx);
604
605                 if (ret >= 0)
606                         return new_idx - 1;
607
608                 /* Also search secondary bucket to get better occupancy */
609                 ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
610                                                         alt_hash, new_idx);
611
612                 if (ret >= 0)
613                         return new_idx - 1;
614         } else {
615 #endif
616                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
617                         /* Check if slot is available */
618                         if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
619                                 prim_bkt->sig_current[i] = sig;
620                                 prim_bkt->sig_alt[i] = alt_hash;
621                                 prim_bkt->key_idx[i] = new_idx;
622                                 break;
623                         }
624                 }
625
626                 if (i != RTE_HASH_BUCKET_ENTRIES) {
627                         if (h->add_key == ADD_KEY_MULTIWRITER)
628                                 rte_spinlock_unlock(h->multiwriter_lock);
629                         return new_idx - 1;
630                 }
631
632                 /* Primary bucket full, need to make space for new entry
633                  * After recursive function.
634                  * Insert the new entry in the position of the pushed entry
635                  * if successful or return error and
636                  * store the new slot back in the ring
637                  */
638                 ret = make_space_bucket(h, prim_bkt, &nr_pushes);
639                 if (ret >= 0) {
640                         prim_bkt->sig_current[ret] = sig;
641                         prim_bkt->sig_alt[ret] = alt_hash;
642                         prim_bkt->key_idx[ret] = new_idx;
643                         if (h->add_key == ADD_KEY_MULTIWRITER)
644                                 rte_spinlock_unlock(h->multiwriter_lock);
645                         return new_idx - 1;
646                 }
647 #if defined(RTE_ARCH_X86)
648         }
649 #endif
650         /* Error in addition, store new slot back in the ring and return error */
651         enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
652
653 failure:
654         if (h->add_key == ADD_KEY_MULTIWRITER)
655                 rte_spinlock_unlock(h->multiwriter_lock);
656         return ret;
657 }
658
659 int32_t
660 rte_hash_add_key_with_hash(const struct rte_hash *h,
661                         const void *key, hash_sig_t sig)
662 {
663         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
664         return __rte_hash_add_key_with_hash(h, key, sig, 0);
665 }
666
667 int32_t
668 rte_hash_add_key(const struct rte_hash *h, const void *key)
669 {
670         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
671         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
672 }
673
674 int
675 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
676                         const void *key, hash_sig_t sig, void *data)
677 {
678         int ret;
679
680         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
681         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
682         if (ret >= 0)
683                 return 0;
684         else
685                 return ret;
686 }
687
688 int
689 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
690 {
691         int ret;
692
693         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
694
695         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
696         if (ret >= 0)
697                 return 0;
698         else
699                 return ret;
700 }
701 static inline int32_t
702 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
703                                         hash_sig_t sig, void **data)
704 {
705         uint32_t bucket_idx;
706         hash_sig_t alt_hash;
707         unsigned i;
708         struct rte_hash_bucket *bkt;
709         struct rte_hash_key *k, *keys = h->key_store;
710
711         bucket_idx = sig & h->bucket_bitmask;
712         bkt = &h->buckets[bucket_idx];
713
714         /* Check if key is in primary location */
715         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
716                 if (bkt->sig_current[i] == sig &&
717                                 bkt->key_idx[i] != EMPTY_SLOT) {
718                         k = (struct rte_hash_key *) ((char *)keys +
719                                         bkt->key_idx[i] * h->key_entry_size);
720                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
721                                 if (data != NULL)
722                                         *data = k->pdata;
723                                 /*
724                                  * Return index where key is stored,
725                                  * subtracting the first dummy index
726                                  */
727                                 return bkt->key_idx[i] - 1;
728                         }
729                 }
730         }
731
732         /* Calculate secondary hash */
733         alt_hash = rte_hash_secondary_hash(sig);
734         bucket_idx = alt_hash & h->bucket_bitmask;
735         bkt = &h->buckets[bucket_idx];
736
737         /* Check if key is in secondary location */
738         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
739                 if (bkt->sig_current[i] == alt_hash &&
740                                 bkt->sig_alt[i] == sig) {
741                         k = (struct rte_hash_key *) ((char *)keys +
742                                         bkt->key_idx[i] * h->key_entry_size);
743                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
744                                 if (data != NULL)
745                                         *data = k->pdata;
746                                 /*
747                                  * Return index where key is stored,
748                                  * subtracting the first dummy index
749                                  */
750                                 return bkt->key_idx[i] - 1;
751                         }
752                 }
753         }
754
755         return -ENOENT;
756 }
757
758 int32_t
759 rte_hash_lookup_with_hash(const struct rte_hash *h,
760                         const void *key, hash_sig_t sig)
761 {
762         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
763         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
764 }
765
766 int32_t
767 rte_hash_lookup(const struct rte_hash *h, const void *key)
768 {
769         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
770         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
771 }
772
773 int
774 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
775                         const void *key, hash_sig_t sig, void **data)
776 {
777         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
778         return __rte_hash_lookup_with_hash(h, key, sig, data);
779 }
780
781 int
782 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
783 {
784         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
785         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
786 }
787
788 static inline void
789 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
790 {
791         unsigned lcore_id, n_slots;
792         struct lcore_cache *cached_free_slots;
793
794         bkt->sig_current[i] = NULL_SIGNATURE;
795         bkt->sig_alt[i] = NULL_SIGNATURE;
796         if (h->hw_trans_mem_support) {
797                 lcore_id = rte_lcore_id();
798                 cached_free_slots = &h->local_free_slots[lcore_id];
799                 /* Cache full, need to free it. */
800                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
801                         /* Need to enqueue the free slots in global ring. */
802                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
803                                                 cached_free_slots->objs,
804                                                 LCORE_CACHE_SIZE, NULL);
805                         cached_free_slots->len -= n_slots;
806                 }
807                 /* Put index of new free slot in cache. */
808                 cached_free_slots->objs[cached_free_slots->len] =
809                                 (void *)((uintptr_t)bkt->key_idx[i]);
810                 cached_free_slots->len++;
811         } else {
812                 rte_ring_sp_enqueue(h->free_slots,
813                                 (void *)((uintptr_t)bkt->key_idx[i]));
814         }
815 }
816
817 static inline int32_t
818 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
819                                                 hash_sig_t sig)
820 {
821         uint32_t bucket_idx;
822         hash_sig_t alt_hash;
823         unsigned i;
824         struct rte_hash_bucket *bkt;
825         struct rte_hash_key *k, *keys = h->key_store;
826         int32_t ret;
827
828         bucket_idx = sig & h->bucket_bitmask;
829         bkt = &h->buckets[bucket_idx];
830
831         /* Check if key is in primary location */
832         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
833                 if (bkt->sig_current[i] == sig &&
834                                 bkt->key_idx[i] != EMPTY_SLOT) {
835                         k = (struct rte_hash_key *) ((char *)keys +
836                                         bkt->key_idx[i] * h->key_entry_size);
837                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
838                                 remove_entry(h, bkt, i);
839
840                                 /*
841                                  * Return index where key is stored,
842                                  * subtracting the first dummy index
843                                  */
844                                 ret = bkt->key_idx[i] - 1;
845                                 bkt->key_idx[i] = EMPTY_SLOT;
846                                 return ret;
847                         }
848                 }
849         }
850
851         /* Calculate secondary hash */
852         alt_hash = rte_hash_secondary_hash(sig);
853         bucket_idx = alt_hash & h->bucket_bitmask;
854         bkt = &h->buckets[bucket_idx];
855
856         /* Check if key is in secondary location */
857         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
858                 if (bkt->sig_current[i] == alt_hash &&
859                                 bkt->key_idx[i] != EMPTY_SLOT) {
860                         k = (struct rte_hash_key *) ((char *)keys +
861                                         bkt->key_idx[i] * h->key_entry_size);
862                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
863                                 remove_entry(h, bkt, i);
864
865                                 /*
866                                  * Return index where key is stored,
867                                  * subtracting the first dummy index
868                                  */
869                                 ret = bkt->key_idx[i] - 1;
870                                 bkt->key_idx[i] = EMPTY_SLOT;
871                                 return ret;
872                         }
873                 }
874         }
875
876         return -ENOENT;
877 }
878
879 int32_t
880 rte_hash_del_key_with_hash(const struct rte_hash *h,
881                         const void *key, hash_sig_t sig)
882 {
883         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
884         return __rte_hash_del_key_with_hash(h, key, sig);
885 }
886
887 int32_t
888 rte_hash_del_key(const struct rte_hash *h, const void *key)
889 {
890         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
891         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
892 }
893
894 int
895 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
896                                void **key)
897 {
898         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
899
900         struct rte_hash_key *k, *keys = h->key_store;
901         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
902                                      h->key_entry_size);
903         *key = k->key;
904
905         if (position !=
906             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
907                                         NULL)) {
908                 return -ENOENT;
909         }
910
911         return 0;
912 }
913
914 static inline void
915 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
916                         const struct rte_hash_bucket *prim_bkt,
917                         const struct rte_hash_bucket *sec_bkt,
918                         hash_sig_t prim_hash, hash_sig_t sec_hash,
919                         enum rte_hash_sig_compare_function sig_cmp_fn)
920 {
921         unsigned int i;
922
923         switch (sig_cmp_fn) {
924 #ifdef RTE_MACHINE_CPUFLAG_AVX2
925         case RTE_HASH_COMPARE_AVX2:
926                 *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
927                                 _mm256_load_si256(
928                                         (__m256i const *)prim_bkt->sig_current),
929                                 _mm256_set1_epi32(prim_hash)));
930                 *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
931                                 _mm256_load_si256(
932                                         (__m256i const *)sec_bkt->sig_current),
933                                 _mm256_set1_epi32(sec_hash)));
934                 break;
935 #endif
936 #ifdef RTE_MACHINE_CPUFLAG_SSE2
937         case RTE_HASH_COMPARE_SSE:
938                 /* Compare the first 4 signatures in the bucket */
939                 *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
940                                 _mm_load_si128(
941                                         (__m128i const *)prim_bkt->sig_current),
942                                 _mm_set1_epi32(prim_hash)));
943                 *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
944                                 _mm_load_si128(
945                                         (__m128i const *)&prim_bkt->sig_current[4]),
946                                 _mm_set1_epi32(prim_hash)))) << 4;
947                 /* Compare the first 4 signatures in the bucket */
948                 *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
949                                 _mm_load_si128(
950                                         (__m128i const *)sec_bkt->sig_current),
951                                 _mm_set1_epi32(sec_hash)));
952                 *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
953                                 _mm_load_si128(
954                                         (__m128i const *)&sec_bkt->sig_current[4]),
955                                 _mm_set1_epi32(sec_hash)))) << 4;
956                 break;
957 #endif
958         default:
959                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
960                         *prim_hash_matches |=
961                                 ((prim_hash == prim_bkt->sig_current[i]) << i);
962                         *sec_hash_matches |=
963                                 ((sec_hash == sec_bkt->sig_current[i]) << i);
964                 }
965         }
966
967 }
968
969 #define PREFETCH_OFFSET 4
970 static inline void
971 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
972                         int32_t num_keys, int32_t *positions,
973                         uint64_t *hit_mask, void *data[])
974 {
975         uint64_t hits = 0;
976         int32_t i;
977         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
978         uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
979         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
980         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
981         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
982         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
983
984         /* Prefetch first keys */
985         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
986                 rte_prefetch0(keys[i]);
987
988         /*
989          * Prefetch rest of the keys, calculate primary and
990          * secondary bucket and prefetch them
991          */
992         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
993                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
994
995                 prim_hash[i] = rte_hash_hash(h, keys[i]);
996                 sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
997
998                 primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
999                 secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
1000
1001                 rte_prefetch0(primary_bkt[i]);
1002                 rte_prefetch0(secondary_bkt[i]);
1003         }
1004
1005         /* Calculate and prefetch rest of the buckets */
1006         for (; i < num_keys; i++) {
1007                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1008                 sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
1009
1010                 primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
1011                 secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
1012
1013                 rte_prefetch0(primary_bkt[i]);
1014                 rte_prefetch0(secondary_bkt[i]);
1015         }
1016
1017         /* Compare signatures and prefetch key slot of first hit */
1018         for (i = 0; i < num_keys; i++) {
1019                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1020                                 primary_bkt[i], secondary_bkt[i],
1021                                 prim_hash[i], sec_hash[i], h->sig_cmp_fn);
1022
1023                 if (prim_hitmask[i]) {
1024                         uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
1025                         uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
1026                         const struct rte_hash_key *key_slot =
1027                                 (const struct rte_hash_key *)(
1028                                 (const char *)h->key_store +
1029                                 key_idx * h->key_entry_size);
1030                         rte_prefetch0(key_slot);
1031                         continue;
1032                 }
1033
1034                 if (sec_hitmask[i]) {
1035                         uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
1036                         uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
1037                         const struct rte_hash_key *key_slot =
1038                                 (const struct rte_hash_key *)(
1039                                 (const char *)h->key_store +
1040                                 key_idx * h->key_entry_size);
1041                         rte_prefetch0(key_slot);
1042                 }
1043         }
1044
1045         /* Compare keys, first hits in primary first */
1046         for (i = 0; i < num_keys; i++) {
1047                 positions[i] = -ENOENT;
1048                 while (prim_hitmask[i]) {
1049                         uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
1050
1051                         uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
1052                         const struct rte_hash_key *key_slot =
1053                                 (const struct rte_hash_key *)(
1054                                 (const char *)h->key_store +
1055                                 key_idx * h->key_entry_size);
1056                         /*
1057                          * If key index is 0, do not compare key,
1058                          * as it is checking the dummy slot
1059                          */
1060                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1061                                 if (data != NULL)
1062                                         data[i] = key_slot->pdata;
1063
1064                                 hits |= 1ULL << i;
1065                                 positions[i] = key_idx - 1;
1066                                 goto next_key;
1067                         }
1068                         prim_hitmask[i] &= ~(1 << (hit_index));
1069                 }
1070
1071                 while (sec_hitmask[i]) {
1072                         uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
1073
1074                         uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
1075                         const struct rte_hash_key *key_slot =
1076                                 (const struct rte_hash_key *)(
1077                                 (const char *)h->key_store +
1078                                 key_idx * h->key_entry_size);
1079                         /*
1080                          * If key index is 0, do not compare key,
1081                          * as it is checking the dummy slot
1082                          */
1083
1084                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1085                                 if (data != NULL)
1086                                         data[i] = key_slot->pdata;
1087
1088                                 hits |= 1ULL << i;
1089                                 positions[i] = key_idx - 1;
1090                                 goto next_key;
1091                         }
1092                         sec_hitmask[i] &= ~(1 << (hit_index));
1093                 }
1094
1095 next_key:
1096                 continue;
1097         }
1098
1099         if (hit_mask != NULL)
1100                 *hit_mask = hits;
1101 }
1102
1103 int
1104 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1105                       uint32_t num_keys, int32_t *positions)
1106 {
1107         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1108                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1109                         (positions == NULL)), -EINVAL);
1110
1111         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1112         return 0;
1113 }
1114
1115 int
1116 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1117                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1118 {
1119         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1120                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1121                         (hit_mask == NULL)), -EINVAL);
1122
1123         int32_t positions[num_keys];
1124
1125         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1126
1127         /* Return number of hits */
1128         return __builtin_popcountl(*hit_mask);
1129 }
1130
1131 int32_t
1132 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1133 {
1134         uint32_t bucket_idx, idx, position;
1135         struct rte_hash_key *next_key;
1136
1137         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1138
1139         const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1140         /* Out of bounds */
1141         if (*next >= total_entries)
1142                 return -ENOENT;
1143
1144         /* Calculate bucket and index of current iterator */
1145         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1146         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1147
1148         /* If current position is empty, go to the next one */
1149         while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
1150                 (*next)++;
1151                 /* End of table */
1152                 if (*next == total_entries)
1153                         return -ENOENT;
1154                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1155                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1156         }
1157
1158         /* Get position of entry in key table */
1159         position = h->buckets[bucket_idx].key_idx[idx];
1160         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1161                                 position * h->key_entry_size);
1162         /* Return key and data */
1163         *key = next_key->key;
1164         *data = next_key->pdata;
1165
1166         /* Increment iterator */
1167         (*next)++;
1168
1169         return position - 1;
1170 }