hash: fix multiwriter lock memory allocation
[dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdint.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
14 #include <rte_log.h>
15 #include <rte_memcpy.h>
16 #include <rte_prefetch.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_malloc.h>
19 #include <rte_eal.h>
20 #include <rte_eal_memconfig.h>
21 #include <rte_per_lcore.h>
22 #include <rte_errno.h>
23 #include <rte_string_fns.h>
24 #include <rte_cpuflags.h>
25 #include <rte_rwlock.h>
26 #include <rte_spinlock.h>
27 #include <rte_ring.h>
28 #include <rte_compat.h>
29 #include <rte_pause.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #if defined(RTE_ARCH_X86)
35 #include "rte_cuckoo_hash_x86.h"
36 #endif
37
38 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
39
40 static struct rte_tailq_elem rte_hash_tailq = {
41         .name = "RTE_HASH",
42 };
43 EAL_REGISTER_TAILQ(rte_hash_tailq)
44
45 struct rte_hash *
46 rte_hash_find_existing(const char *name)
47 {
48         struct rte_hash *h = NULL;
49         struct rte_tailq_entry *te;
50         struct rte_hash_list *hash_list;
51
52         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
53
54         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
55         TAILQ_FOREACH(te, hash_list, next) {
56                 h = (struct rte_hash *) te->data;
57                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
58                         break;
59         }
60         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
61
62         if (te == NULL) {
63                 rte_errno = ENOENT;
64                 return NULL;
65         }
66         return h;
67 }
68
69 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
70 {
71         h->cmp_jump_table_idx = KEY_CUSTOM;
72         h->rte_hash_custom_cmp_eq = func;
73 }
74
75 static inline int
76 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
77 {
78         if (h->cmp_jump_table_idx == KEY_CUSTOM)
79                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
80         else
81                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
82 }
83
84 struct rte_hash *
85 rte_hash_create(const struct rte_hash_parameters *params)
86 {
87         struct rte_hash *h = NULL;
88         struct rte_tailq_entry *te = NULL;
89         struct rte_hash_list *hash_list;
90         struct rte_ring *r = NULL;
91         char hash_name[RTE_HASH_NAMESIZE];
92         void *k = NULL;
93         void *buckets = NULL;
94         char ring_name[RTE_RING_NAMESIZE];
95         unsigned num_key_slots;
96         unsigned hw_trans_mem_support = 0;
97         unsigned i;
98         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
99
100         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
101
102         if (params == NULL) {
103                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
104                 return NULL;
105         }
106
107         /* Check for valid parameters */
108         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
109                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
110                         (params->key_len == 0)) {
111                 rte_errno = EINVAL;
112                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
113                 return NULL;
114         }
115
116         /* Check extra flags field to check extra options. */
117         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
118                 hw_trans_mem_support = 1;
119
120         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
121         if (hw_trans_mem_support)
122                 /*
123                  * Increase number of slots by total number of indices
124                  * that can be stored in the lcore caches
125                  * except for the first cache
126                  */
127                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
128                                         LCORE_CACHE_SIZE + 1;
129         else
130                 num_key_slots = params->entries + 1;
131
132         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
133         /* Create ring (Dummy slot index is not enqueued) */
134         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1),
135                         params->socket_id, 0);
136         if (r == NULL) {
137                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
138                 goto err;
139         }
140
141         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
142
143         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
144
145         /* guarantee there's no existing: this is normally already checked
146          * by ring creation above */
147         TAILQ_FOREACH(te, hash_list, next) {
148                 h = (struct rte_hash *) te->data;
149                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
150                         break;
151         }
152         h = NULL;
153         if (te != NULL) {
154                 rte_errno = EEXIST;
155                 te = NULL;
156                 goto err_unlock;
157         }
158
159         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
160         if (te == NULL) {
161                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
162                 goto err_unlock;
163         }
164
165         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
166                                         RTE_CACHE_LINE_SIZE, params->socket_id);
167
168         if (h == NULL) {
169                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
170                 goto err_unlock;
171         }
172
173         const uint32_t num_buckets = rte_align32pow2(params->entries)
174                                         / RTE_HASH_BUCKET_ENTRIES;
175
176         buckets = rte_zmalloc_socket(NULL,
177                                 num_buckets * sizeof(struct rte_hash_bucket),
178                                 RTE_CACHE_LINE_SIZE, params->socket_id);
179
180         if (buckets == NULL) {
181                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
182                 goto err_unlock;
183         }
184
185         const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
186         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
187
188         k = rte_zmalloc_socket(NULL, key_tbl_size,
189                         RTE_CACHE_LINE_SIZE, params->socket_id);
190
191         if (k == NULL) {
192                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
193                 goto err_unlock;
194         }
195
196 /*
197  * If x86 architecture is used, select appropriate compare function,
198  * which may use x86 intrinsics, otherwise use memcmp
199  */
200 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
201         /* Select function to compare keys */
202         switch (params->key_len) {
203         case 16:
204                 h->cmp_jump_table_idx = KEY_16_BYTES;
205                 break;
206         case 32:
207                 h->cmp_jump_table_idx = KEY_32_BYTES;
208                 break;
209         case 48:
210                 h->cmp_jump_table_idx = KEY_48_BYTES;
211                 break;
212         case 64:
213                 h->cmp_jump_table_idx = KEY_64_BYTES;
214                 break;
215         case 80:
216                 h->cmp_jump_table_idx = KEY_80_BYTES;
217                 break;
218         case 96:
219                 h->cmp_jump_table_idx = KEY_96_BYTES;
220                 break;
221         case 112:
222                 h->cmp_jump_table_idx = KEY_112_BYTES;
223                 break;
224         case 128:
225                 h->cmp_jump_table_idx = KEY_128_BYTES;
226                 break;
227         default:
228                 /* If key is not multiple of 16, use generic memcmp */
229                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
230         }
231 #else
232         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
233 #endif
234
235         if (hw_trans_mem_support) {
236                 h->local_free_slots = rte_zmalloc_socket(NULL,
237                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
238                                 RTE_CACHE_LINE_SIZE, params->socket_id);
239         }
240
241         /* Default hash function */
242 #if defined(RTE_ARCH_X86)
243         default_hash_func = (rte_hash_function)rte_hash_crc;
244 #elif defined(RTE_ARCH_ARM64)
245         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
246                 default_hash_func = (rte_hash_function)rte_hash_crc;
247 #endif
248         /* Setup hash context */
249         snprintf(h->name, sizeof(h->name), "%s", params->name);
250         h->entries = params->entries;
251         h->key_len = params->key_len;
252         h->key_entry_size = key_entry_size;
253         h->hash_func_init_val = params->hash_func_init_val;
254
255         h->num_buckets = num_buckets;
256         h->bucket_bitmask = h->num_buckets - 1;
257         h->buckets = buckets;
258         h->hash_func = (params->hash_func == NULL) ?
259                 default_hash_func : params->hash_func;
260         h->key_store = k;
261         h->free_slots = r;
262         h->hw_trans_mem_support = hw_trans_mem_support;
263
264 #if defined(RTE_ARCH_X86)
265         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
266                 h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
267         else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
268                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
269         else
270 #endif
271                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
272
273         /* Turn on multi-writer only with explicit flat from user and TM
274          * support.
275          */
276         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
277                 if (h->hw_trans_mem_support) {
278                         h->add_key = ADD_KEY_MULTIWRITER_TM;
279                 } else {
280                         h->add_key = ADD_KEY_MULTIWRITER;
281                         h->multiwriter_lock = rte_malloc(NULL,
282                                                         sizeof(rte_spinlock_t),
283                                                         RTE_CACHE_LINE_SIZE);
284                         if (h->multiwriter_lock == NULL)
285                                 goto err_unlock;
286
287                         rte_spinlock_init(h->multiwriter_lock);
288                 }
289         } else
290                 h->add_key = ADD_KEY_SINGLEWRITER;
291
292         /* Populate free slots ring. Entry zero is reserved for key misses. */
293         for (i = 1; i < params->entries + 1; i++)
294                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
295
296         te->data = (void *) h;
297         TAILQ_INSERT_TAIL(hash_list, te, next);
298         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
299
300         return h;
301 err_unlock:
302         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
303 err:
304         rte_ring_free(r);
305         rte_free(te);
306         rte_free(h);
307         rte_free(buckets);
308         rte_free(k);
309         return NULL;
310 }
311
312 void
313 rte_hash_free(struct rte_hash *h)
314 {
315         struct rte_tailq_entry *te;
316         struct rte_hash_list *hash_list;
317
318         if (h == NULL)
319                 return;
320
321         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
322
323         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
324
325         /* find out tailq entry */
326         TAILQ_FOREACH(te, hash_list, next) {
327                 if (te->data == (void *) h)
328                         break;
329         }
330
331         if (te == NULL) {
332                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
333                 return;
334         }
335
336         TAILQ_REMOVE(hash_list, te, next);
337
338         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
339
340         if (h->hw_trans_mem_support)
341                 rte_free(h->local_free_slots);
342
343         if (h->add_key == ADD_KEY_MULTIWRITER)
344                 rte_free(h->multiwriter_lock);
345         rte_ring_free(h->free_slots);
346         rte_free(h->key_store);
347         rte_free(h->buckets);
348         rte_free(h);
349         rte_free(te);
350 }
351
352 hash_sig_t
353 rte_hash_hash(const struct rte_hash *h, const void *key)
354 {
355         /* calc hash result by key */
356         return h->hash_func(key, h->key_len, h->hash_func_init_val);
357 }
358
359 /* Calc the secondary hash value from the primary hash value of a given key */
360 static inline hash_sig_t
361 rte_hash_secondary_hash(const hash_sig_t primary_hash)
362 {
363         static const unsigned all_bits_shift = 12;
364         static const unsigned alt_bits_xor = 0x5bd1e995;
365
366         uint32_t tag = primary_hash >> all_bits_shift;
367
368         return primary_hash ^ ((tag + 1) * alt_bits_xor);
369 }
370
371 void
372 rte_hash_reset(struct rte_hash *h)
373 {
374         void *ptr;
375         unsigned i;
376
377         if (h == NULL)
378                 return;
379
380         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
381         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
382
383         /* clear the free ring */
384         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
385                 rte_pause();
386
387         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
388         for (i = 1; i < h->entries + 1; i++)
389                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
390
391         if (h->hw_trans_mem_support) {
392                 /* Reset local caches per lcore */
393                 for (i = 0; i < RTE_MAX_LCORE; i++)
394                         h->local_free_slots[i].len = 0;
395         }
396 }
397
398 /* Search for an entry that can be pushed to its alternative location */
399 static inline int
400 make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt,
401                 unsigned int *nr_pushes)
402 {
403         unsigned i, j;
404         int ret;
405         uint32_t next_bucket_idx;
406         struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES];
407
408         /*
409          * Push existing item (search for bucket with space in
410          * alternative locations) to its alternative location
411          */
412         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
413                 /* Search for space in alternative locations */
414                 next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
415                 next_bkt[i] = &h->buckets[next_bucket_idx];
416                 for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
417                         if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
418                                 break;
419                 }
420
421                 if (j != RTE_HASH_BUCKET_ENTRIES)
422                         break;
423         }
424
425         /* Alternative location has spare room (end of recursive function) */
426         if (i != RTE_HASH_BUCKET_ENTRIES) {
427                 next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
428                 next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
429                 next_bkt[i]->key_idx[j] = bkt->key_idx[i];
430                 return i;
431         }
432
433         /* Pick entry that has not been pushed yet */
434         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++)
435                 if (bkt->flag[i] == 0)
436                         break;
437
438         /* All entries have been pushed, so entry cannot be added */
439         if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES)
440                 return -ENOSPC;
441
442         /* Set flag to indicate that this entry is going to be pushed */
443         bkt->flag[i] = 1;
444
445         /* Need room in alternative bucket to insert the pushed entry */
446         ret = make_space_bucket(h, next_bkt[i], nr_pushes);
447         /*
448          * After recursive function.
449          * Clear flags and insert the pushed entry
450          * in its alternative location if successful,
451          * or return error
452          */
453         bkt->flag[i] = 0;
454         if (ret >= 0) {
455                 next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
456                 next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
457                 next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
458                 return i;
459         } else
460                 return ret;
461
462 }
463
464 /*
465  * Function called to enqueue back an index in the cache/ring,
466  * as slot has not being used and it can be used in the
467  * next addition attempt.
468  */
469 static inline void
470 enqueue_slot_back(const struct rte_hash *h,
471                 struct lcore_cache *cached_free_slots,
472                 void *slot_id)
473 {
474         if (h->hw_trans_mem_support) {
475                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
476                 cached_free_slots->len++;
477         } else
478                 rte_ring_sp_enqueue(h->free_slots, slot_id);
479 }
480
481 static inline int32_t
482 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
483                                                 hash_sig_t sig, void *data)
484 {
485         hash_sig_t alt_hash;
486         uint32_t prim_bucket_idx, sec_bucket_idx;
487         unsigned i;
488         struct rte_hash_bucket *prim_bkt, *sec_bkt;
489         struct rte_hash_key *new_k, *k, *keys = h->key_store;
490         void *slot_id = NULL;
491         uint32_t new_idx;
492         int ret;
493         unsigned n_slots;
494         unsigned lcore_id;
495         struct lcore_cache *cached_free_slots = NULL;
496         unsigned int nr_pushes = 0;
497
498         if (h->add_key == ADD_KEY_MULTIWRITER)
499                 rte_spinlock_lock(h->multiwriter_lock);
500
501         prim_bucket_idx = sig & h->bucket_bitmask;
502         prim_bkt = &h->buckets[prim_bucket_idx];
503         rte_prefetch0(prim_bkt);
504
505         alt_hash = rte_hash_secondary_hash(sig);
506         sec_bucket_idx = alt_hash & h->bucket_bitmask;
507         sec_bkt = &h->buckets[sec_bucket_idx];
508         rte_prefetch0(sec_bkt);
509
510         /* Get a new slot for storing the new key */
511         if (h->hw_trans_mem_support) {
512                 lcore_id = rte_lcore_id();
513                 cached_free_slots = &h->local_free_slots[lcore_id];
514                 /* Try to get a free slot from the local cache */
515                 if (cached_free_slots->len == 0) {
516                         /* Need to get another burst of free slots from global ring */
517                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
518                                         cached_free_slots->objs,
519                                         LCORE_CACHE_SIZE, NULL);
520                         if (n_slots == 0) {
521                                 ret = -ENOSPC;
522                                 goto failure;
523                         }
524
525                         cached_free_slots->len += n_slots;
526                 }
527
528                 /* Get a free slot from the local cache */
529                 cached_free_slots->len--;
530                 slot_id = cached_free_slots->objs[cached_free_slots->len];
531         } else {
532                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
533                         ret = -ENOSPC;
534                         goto failure;
535                 }
536         }
537
538         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
539         rte_prefetch0(new_k);
540         new_idx = (uint32_t)((uintptr_t) slot_id);
541
542         /* Check if key is already inserted in primary location */
543         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
544                 if (prim_bkt->sig_current[i] == sig &&
545                                 prim_bkt->sig_alt[i] == alt_hash) {
546                         k = (struct rte_hash_key *) ((char *)keys +
547                                         prim_bkt->key_idx[i] * h->key_entry_size);
548                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
549                                 /* Enqueue index of free slot back in the ring. */
550                                 enqueue_slot_back(h, cached_free_slots, slot_id);
551                                 /* Update data */
552                                 k->pdata = data;
553                                 /*
554                                  * Return index where key is stored,
555                                  * subtracting the first dummy index
556                                  */
557                                 ret = prim_bkt->key_idx[i] - 1;
558                                 goto failure;
559                         }
560                 }
561         }
562
563         /* Check if key is already inserted in secondary location */
564         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
565                 if (sec_bkt->sig_alt[i] == sig &&
566                                 sec_bkt->sig_current[i] == alt_hash) {
567                         k = (struct rte_hash_key *) ((char *)keys +
568                                         sec_bkt->key_idx[i] * h->key_entry_size);
569                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
570                                 /* Enqueue index of free slot back in the ring. */
571                                 enqueue_slot_back(h, cached_free_slots, slot_id);
572                                 /* Update data */
573                                 k->pdata = data;
574                                 /*
575                                  * Return index where key is stored,
576                                  * subtracting the first dummy index
577                                  */
578                                 ret = sec_bkt->key_idx[i] - 1;
579                                 goto failure;
580                         }
581                 }
582         }
583
584         /* Copy key */
585         rte_memcpy(new_k->key, key, h->key_len);
586         new_k->pdata = data;
587
588 #if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
589         if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
590                 ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
591                                 sig, alt_hash, new_idx);
592                 if (ret >= 0)
593                         return new_idx - 1;
594
595                 /* Primary bucket full, need to make space for new entry */
596                 ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
597                                                         alt_hash, new_idx);
598
599                 if (ret >= 0)
600                         return new_idx - 1;
601
602                 /* Also search secondary bucket to get better occupancy */
603                 ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
604                                                         alt_hash, new_idx);
605
606                 if (ret >= 0)
607                         return new_idx - 1;
608         } else {
609 #endif
610                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
611                         /* Check if slot is available */
612                         if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
613                                 prim_bkt->sig_current[i] = sig;
614                                 prim_bkt->sig_alt[i] = alt_hash;
615                                 prim_bkt->key_idx[i] = new_idx;
616                                 break;
617                         }
618                 }
619
620                 if (i != RTE_HASH_BUCKET_ENTRIES) {
621                         if (h->add_key == ADD_KEY_MULTIWRITER)
622                                 rte_spinlock_unlock(h->multiwriter_lock);
623                         return new_idx - 1;
624                 }
625
626                 /* Primary bucket full, need to make space for new entry
627                  * After recursive function.
628                  * Insert the new entry in the position of the pushed entry
629                  * if successful or return error and
630                  * store the new slot back in the ring
631                  */
632                 ret = make_space_bucket(h, prim_bkt, &nr_pushes);
633                 if (ret >= 0) {
634                         prim_bkt->sig_current[ret] = sig;
635                         prim_bkt->sig_alt[ret] = alt_hash;
636                         prim_bkt->key_idx[ret] = new_idx;
637                         if (h->add_key == ADD_KEY_MULTIWRITER)
638                                 rte_spinlock_unlock(h->multiwriter_lock);
639                         return new_idx - 1;
640                 }
641 #if defined(RTE_ARCH_X86)
642         }
643 #endif
644         /* Error in addition, store new slot back in the ring and return error */
645         enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
646
647 failure:
648         if (h->add_key == ADD_KEY_MULTIWRITER)
649                 rte_spinlock_unlock(h->multiwriter_lock);
650         return ret;
651 }
652
653 int32_t
654 rte_hash_add_key_with_hash(const struct rte_hash *h,
655                         const void *key, hash_sig_t sig)
656 {
657         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
658         return __rte_hash_add_key_with_hash(h, key, sig, 0);
659 }
660
661 int32_t
662 rte_hash_add_key(const struct rte_hash *h, const void *key)
663 {
664         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
665         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
666 }
667
668 int
669 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
670                         const void *key, hash_sig_t sig, void *data)
671 {
672         int ret;
673
674         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
675         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
676         if (ret >= 0)
677                 return 0;
678         else
679                 return ret;
680 }
681
682 int
683 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
684 {
685         int ret;
686
687         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
688
689         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
690         if (ret >= 0)
691                 return 0;
692         else
693                 return ret;
694 }
695 static inline int32_t
696 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
697                                         hash_sig_t sig, void **data)
698 {
699         uint32_t bucket_idx;
700         hash_sig_t alt_hash;
701         unsigned i;
702         struct rte_hash_bucket *bkt;
703         struct rte_hash_key *k, *keys = h->key_store;
704
705         bucket_idx = sig & h->bucket_bitmask;
706         bkt = &h->buckets[bucket_idx];
707
708         /* Check if key is in primary location */
709         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
710                 if (bkt->sig_current[i] == sig &&
711                                 bkt->key_idx[i] != EMPTY_SLOT) {
712                         k = (struct rte_hash_key *) ((char *)keys +
713                                         bkt->key_idx[i] * h->key_entry_size);
714                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
715                                 if (data != NULL)
716                                         *data = k->pdata;
717                                 /*
718                                  * Return index where key is stored,
719                                  * subtracting the first dummy index
720                                  */
721                                 return bkt->key_idx[i] - 1;
722                         }
723                 }
724         }
725
726         /* Calculate secondary hash */
727         alt_hash = rte_hash_secondary_hash(sig);
728         bucket_idx = alt_hash & h->bucket_bitmask;
729         bkt = &h->buckets[bucket_idx];
730
731         /* Check if key is in secondary location */
732         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
733                 if (bkt->sig_current[i] == alt_hash &&
734                                 bkt->sig_alt[i] == sig) {
735                         k = (struct rte_hash_key *) ((char *)keys +
736                                         bkt->key_idx[i] * h->key_entry_size);
737                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
738                                 if (data != NULL)
739                                         *data = k->pdata;
740                                 /*
741                                  * Return index where key is stored,
742                                  * subtracting the first dummy index
743                                  */
744                                 return bkt->key_idx[i] - 1;
745                         }
746                 }
747         }
748
749         return -ENOENT;
750 }
751
752 int32_t
753 rte_hash_lookup_with_hash(const struct rte_hash *h,
754                         const void *key, hash_sig_t sig)
755 {
756         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
757         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
758 }
759
760 int32_t
761 rte_hash_lookup(const struct rte_hash *h, const void *key)
762 {
763         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
764         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
765 }
766
767 int
768 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
769                         const void *key, hash_sig_t sig, void **data)
770 {
771         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
772         return __rte_hash_lookup_with_hash(h, key, sig, data);
773 }
774
775 int
776 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
777 {
778         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
779         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
780 }
781
782 static inline void
783 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
784 {
785         unsigned lcore_id, n_slots;
786         struct lcore_cache *cached_free_slots;
787
788         bkt->sig_current[i] = NULL_SIGNATURE;
789         bkt->sig_alt[i] = NULL_SIGNATURE;
790         if (h->hw_trans_mem_support) {
791                 lcore_id = rte_lcore_id();
792                 cached_free_slots = &h->local_free_slots[lcore_id];
793                 /* Cache full, need to free it. */
794                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
795                         /* Need to enqueue the free slots in global ring. */
796                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
797                                                 cached_free_slots->objs,
798                                                 LCORE_CACHE_SIZE, NULL);
799                         cached_free_slots->len -= n_slots;
800                 }
801                 /* Put index of new free slot in cache. */
802                 cached_free_slots->objs[cached_free_slots->len] =
803                                 (void *)((uintptr_t)bkt->key_idx[i]);
804                 cached_free_slots->len++;
805         } else {
806                 rte_ring_sp_enqueue(h->free_slots,
807                                 (void *)((uintptr_t)bkt->key_idx[i]));
808         }
809 }
810
811 static inline int32_t
812 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
813                                                 hash_sig_t sig)
814 {
815         uint32_t bucket_idx;
816         hash_sig_t alt_hash;
817         unsigned i;
818         struct rte_hash_bucket *bkt;
819         struct rte_hash_key *k, *keys = h->key_store;
820         int32_t ret;
821
822         bucket_idx = sig & h->bucket_bitmask;
823         bkt = &h->buckets[bucket_idx];
824
825         /* Check if key is in primary location */
826         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
827                 if (bkt->sig_current[i] == sig &&
828                                 bkt->key_idx[i] != EMPTY_SLOT) {
829                         k = (struct rte_hash_key *) ((char *)keys +
830                                         bkt->key_idx[i] * h->key_entry_size);
831                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
832                                 remove_entry(h, bkt, i);
833
834                                 /*
835                                  * Return index where key is stored,
836                                  * subtracting the first dummy index
837                                  */
838                                 ret = bkt->key_idx[i] - 1;
839                                 bkt->key_idx[i] = EMPTY_SLOT;
840                                 return ret;
841                         }
842                 }
843         }
844
845         /* Calculate secondary hash */
846         alt_hash = rte_hash_secondary_hash(sig);
847         bucket_idx = alt_hash & h->bucket_bitmask;
848         bkt = &h->buckets[bucket_idx];
849
850         /* Check if key is in secondary location */
851         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
852                 if (bkt->sig_current[i] == alt_hash &&
853                                 bkt->key_idx[i] != EMPTY_SLOT) {
854                         k = (struct rte_hash_key *) ((char *)keys +
855                                         bkt->key_idx[i] * h->key_entry_size);
856                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
857                                 remove_entry(h, bkt, i);
858
859                                 /*
860                                  * Return index where key is stored,
861                                  * subtracting the first dummy index
862                                  */
863                                 ret = bkt->key_idx[i] - 1;
864                                 bkt->key_idx[i] = EMPTY_SLOT;
865                                 return ret;
866                         }
867                 }
868         }
869
870         return -ENOENT;
871 }
872
873 int32_t
874 rte_hash_del_key_with_hash(const struct rte_hash *h,
875                         const void *key, hash_sig_t sig)
876 {
877         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
878         return __rte_hash_del_key_with_hash(h, key, sig);
879 }
880
881 int32_t
882 rte_hash_del_key(const struct rte_hash *h, const void *key)
883 {
884         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
885         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
886 }
887
888 int
889 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
890                                void **key)
891 {
892         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
893
894         struct rte_hash_key *k, *keys = h->key_store;
895         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
896                                      h->key_entry_size);
897         *key = k->key;
898
899         if (position !=
900             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
901                                         NULL)) {
902                 return -ENOENT;
903         }
904
905         return 0;
906 }
907
908 static inline void
909 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
910                         const struct rte_hash_bucket *prim_bkt,
911                         const struct rte_hash_bucket *sec_bkt,
912                         hash_sig_t prim_hash, hash_sig_t sec_hash,
913                         enum rte_hash_sig_compare_function sig_cmp_fn)
914 {
915         unsigned int i;
916
917         switch (sig_cmp_fn) {
918 #ifdef RTE_MACHINE_CPUFLAG_AVX2
919         case RTE_HASH_COMPARE_AVX2:
920                 *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
921                                 _mm256_load_si256(
922                                         (__m256i const *)prim_bkt->sig_current),
923                                 _mm256_set1_epi32(prim_hash)));
924                 *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
925                                 _mm256_load_si256(
926                                         (__m256i const *)sec_bkt->sig_current),
927                                 _mm256_set1_epi32(sec_hash)));
928                 break;
929 #endif
930 #ifdef RTE_MACHINE_CPUFLAG_SSE2
931         case RTE_HASH_COMPARE_SSE:
932                 /* Compare the first 4 signatures in the bucket */
933                 *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
934                                 _mm_load_si128(
935                                         (__m128i const *)prim_bkt->sig_current),
936                                 _mm_set1_epi32(prim_hash)));
937                 *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
938                                 _mm_load_si128(
939                                         (__m128i const *)&prim_bkt->sig_current[4]),
940                                 _mm_set1_epi32(prim_hash)))) << 4;
941                 /* Compare the first 4 signatures in the bucket */
942                 *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
943                                 _mm_load_si128(
944                                         (__m128i const *)sec_bkt->sig_current),
945                                 _mm_set1_epi32(sec_hash)));
946                 *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
947                                 _mm_load_si128(
948                                         (__m128i const *)&sec_bkt->sig_current[4]),
949                                 _mm_set1_epi32(sec_hash)))) << 4;
950                 break;
951 #endif
952         default:
953                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
954                         *prim_hash_matches |=
955                                 ((prim_hash == prim_bkt->sig_current[i]) << i);
956                         *sec_hash_matches |=
957                                 ((sec_hash == sec_bkt->sig_current[i]) << i);
958                 }
959         }
960
961 }
962
963 #define PREFETCH_OFFSET 4
964 static inline void
965 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
966                         int32_t num_keys, int32_t *positions,
967                         uint64_t *hit_mask, void *data[])
968 {
969         uint64_t hits = 0;
970         int32_t i;
971         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
972         uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
973         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
974         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
975         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
976         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
977
978         /* Prefetch first keys */
979         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
980                 rte_prefetch0(keys[i]);
981
982         /*
983          * Prefetch rest of the keys, calculate primary and
984          * secondary bucket and prefetch them
985          */
986         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
987                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
988
989                 prim_hash[i] = rte_hash_hash(h, keys[i]);
990                 sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
991
992                 primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
993                 secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
994
995                 rte_prefetch0(primary_bkt[i]);
996                 rte_prefetch0(secondary_bkt[i]);
997         }
998
999         /* Calculate and prefetch rest of the buckets */
1000         for (; i < num_keys; i++) {
1001                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1002                 sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
1003
1004                 primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
1005                 secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
1006
1007                 rte_prefetch0(primary_bkt[i]);
1008                 rte_prefetch0(secondary_bkt[i]);
1009         }
1010
1011         /* Compare signatures and prefetch key slot of first hit */
1012         for (i = 0; i < num_keys; i++) {
1013                 compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1014                                 primary_bkt[i], secondary_bkt[i],
1015                                 prim_hash[i], sec_hash[i], h->sig_cmp_fn);
1016
1017                 if (prim_hitmask[i]) {
1018                         uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
1019                         uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
1020                         const struct rte_hash_key *key_slot =
1021                                 (const struct rte_hash_key *)(
1022                                 (const char *)h->key_store +
1023                                 key_idx * h->key_entry_size);
1024                         rte_prefetch0(key_slot);
1025                         continue;
1026                 }
1027
1028                 if (sec_hitmask[i]) {
1029                         uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
1030                         uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
1031                         const struct rte_hash_key *key_slot =
1032                                 (const struct rte_hash_key *)(
1033                                 (const char *)h->key_store +
1034                                 key_idx * h->key_entry_size);
1035                         rte_prefetch0(key_slot);
1036                 }
1037         }
1038
1039         /* Compare keys, first hits in primary first */
1040         for (i = 0; i < num_keys; i++) {
1041                 positions[i] = -ENOENT;
1042                 while (prim_hitmask[i]) {
1043                         uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
1044
1045                         uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
1046                         const struct rte_hash_key *key_slot =
1047                                 (const struct rte_hash_key *)(
1048                                 (const char *)h->key_store +
1049                                 key_idx * h->key_entry_size);
1050                         /*
1051                          * If key index is 0, do not compare key,
1052                          * as it is checking the dummy slot
1053                          */
1054                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1055                                 if (data != NULL)
1056                                         data[i] = key_slot->pdata;
1057
1058                                 hits |= 1ULL << i;
1059                                 positions[i] = key_idx - 1;
1060                                 goto next_key;
1061                         }
1062                         prim_hitmask[i] &= ~(1 << (hit_index));
1063                 }
1064
1065                 while (sec_hitmask[i]) {
1066                         uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
1067
1068                         uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
1069                         const struct rte_hash_key *key_slot =
1070                                 (const struct rte_hash_key *)(
1071                                 (const char *)h->key_store +
1072                                 key_idx * h->key_entry_size);
1073                         /*
1074                          * If key index is 0, do not compare key,
1075                          * as it is checking the dummy slot
1076                          */
1077
1078                         if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
1079                                 if (data != NULL)
1080                                         data[i] = key_slot->pdata;
1081
1082                                 hits |= 1ULL << i;
1083                                 positions[i] = key_idx - 1;
1084                                 goto next_key;
1085                         }
1086                         sec_hitmask[i] &= ~(1 << (hit_index));
1087                 }
1088
1089 next_key:
1090                 continue;
1091         }
1092
1093         if (hit_mask != NULL)
1094                 *hit_mask = hits;
1095 }
1096
1097 int
1098 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1099                       uint32_t num_keys, int32_t *positions)
1100 {
1101         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1102                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1103                         (positions == NULL)), -EINVAL);
1104
1105         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1106         return 0;
1107 }
1108
1109 int
1110 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1111                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1112 {
1113         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1114                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1115                         (hit_mask == NULL)), -EINVAL);
1116
1117         int32_t positions[num_keys];
1118
1119         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1120
1121         /* Return number of hits */
1122         return __builtin_popcountl(*hit_mask);
1123 }
1124
1125 int32_t
1126 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1127 {
1128         uint32_t bucket_idx, idx, position;
1129         struct rte_hash_key *next_key;
1130
1131         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1132
1133         const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1134         /* Out of bounds */
1135         if (*next >= total_entries)
1136                 return -ENOENT;
1137
1138         /* Calculate bucket and index of current iterator */
1139         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1140         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1141
1142         /* If current position is empty, go to the next one */
1143         while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
1144                 (*next)++;
1145                 /* End of table */
1146                 if (*next == total_entries)
1147                         return -ENOENT;
1148                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1149                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1150         }
1151
1152         /* Get position of entry in key table */
1153         position = h->buckets[bucket_idx].key_idx[idx];
1154         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1155                                 position * h->key_entry_size);
1156         /* Return key and data */
1157         *key = next_key->key;
1158         *data = next_key->pdata;
1159
1160         /* Increment iterator */
1161         (*next)++;
1162
1163         return position - 1;
1164 }