1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
6 #include <rte_malloc.h>
7 #include <rte_prefetch.h>
8 #include <rte_random.h>
11 #include "rte_member.h"
12 #include "rte_member_ht.h"
14 #if defined(RTE_ARCH_X86)
15 #include "rte_member_x86.h"
18 /* Search bucket for entry with tmp_sig and update set_id */
20 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
21 struct member_ht_bucket *buckets,
26 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
27 if (buckets[bucket_id].sigs[i] == tmp_sig) {
28 buckets[bucket_id].sets[i] = set_id;
36 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
37 struct member_ht_bucket *buckets,
42 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
43 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
44 buckets[bucket_id].sets[iter] !=
45 RTE_MEMBER_NO_MATCH) {
46 *set_id = buckets[bucket_id].sets[iter];
54 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
55 struct member_ht_bucket *buckets,
57 uint32_t matches_per_key,
62 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
63 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
64 buckets[bucket_id].sets[iter] !=
65 RTE_MEMBER_NO_MATCH) {
66 set_id[*counter] = buckets[bucket_id].sets[iter];
68 if (*counter >= matches_per_key)
75 rte_member_create_ht(struct rte_member_setsum *ss,
76 const struct rte_member_parameters *params)
79 uint32_t size_bucket_t;
80 uint32_t num_entries = rte_align32pow2(params->num_keys);
82 if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
83 !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
84 num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
87 "Membership HT create with invalid parameters\n");
91 uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
93 size_bucket_t = sizeof(struct member_ht_bucket);
95 struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
96 num_buckets * size_bucket_t,
97 RTE_CACHE_LINE_SIZE, ss->socket_id);
99 if (buckets == NULL) {
100 RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
106 ss->bucket_cnt = num_buckets;
107 ss->bucket_mask = num_buckets - 1;
108 ss->cache = params->is_cache;
110 for (i = 0; i < num_buckets; i++) {
111 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
112 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
114 #if defined(RTE_ARCH_X86)
115 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
116 RTE_MEMBER_BUCKET_ENTRIES == 16)
117 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
120 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
122 RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
123 "the table has %u entries, %u buckets\n",
124 num_entries, num_buckets);
129 get_buckets_index(const struct rte_member_setsum *ss, const void *key,
130 uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
132 uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
134 uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
137 * We use the first hash value for the signature, and the second hash
138 * value to derive the primary and secondary bucket locations.
140 * For non-cache mode, we use the lower bits for the primary bucket
141 * location. Then we xor primary bucket location and the signature
142 * to get the secondary bucket location. This is called "partial-key
143 * cuckoo hashing" proposed by B. Fan, et al's paper
144 * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
145 * xor is that one could derive the alternative bucket location
146 * by only using the current bucket location and the signature. This is
147 * generally required by non-cache mode's eviction and deletion
148 * process without the need to store alternative hash value nor the full
151 * For cache mode, we use the lower bits for the primary bucket
152 * location and the higher bits for the secondary bucket location. In
153 * cache mode, keys are simply overwritten if bucket is full. We do not
154 * use xor since lower/higher bits are more independent hash values thus
155 * should provide slightly better table load.
159 *prim_bkt = sec_hash & ss->bucket_mask;
160 *sec_bkt = (sec_hash >> 16) & ss->bucket_mask;
162 *prim_bkt = sec_hash & ss->bucket_mask;
163 *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask;
168 rte_member_lookup_ht(const struct rte_member_setsum *ss,
169 const void *key, member_set_t *set_id)
171 uint32_t prim_bucket, sec_bucket;
172 member_sig_t tmp_sig;
173 struct member_ht_bucket *buckets = ss->table;
175 *set_id = RTE_MEMBER_NO_MATCH;
176 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
178 switch (ss->sig_cmp_fn) {
179 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
180 case RTE_MEMBER_COMPARE_AVX2:
181 if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
183 search_bucket_single_avx(sec_bucket, tmp_sig,
189 if (search_bucket_single(prim_bucket, tmp_sig, buckets,
191 search_bucket_single(sec_bucket, tmp_sig,
200 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
201 const void **keys, uint32_t num_keys, member_set_t *set_id)
204 uint32_t num_matches = 0;
205 struct member_ht_bucket *buckets = ss->table;
206 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
207 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
208 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
210 for (i = 0; i < num_keys; i++) {
211 get_buckets_index(ss, keys[i], &prim_buckets[i],
212 &sec_buckets[i], &tmp_sig[i]);
213 rte_prefetch0(&buckets[prim_buckets[i]]);
214 rte_prefetch0(&buckets[sec_buckets[i]]);
217 for (i = 0; i < num_keys; i++) {
218 switch (ss->sig_cmp_fn) {
219 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
220 case RTE_MEMBER_COMPARE_AVX2:
221 if (search_bucket_single_avx(prim_buckets[i],
222 tmp_sig[i], buckets, &set_id[i]) ||
223 search_bucket_single_avx(sec_buckets[i],
224 tmp_sig[i], buckets, &set_id[i]))
227 set_id[i] = RTE_MEMBER_NO_MATCH;
231 if (search_bucket_single(prim_buckets[i], tmp_sig[i],
232 buckets, &set_id[i]) ||
233 search_bucket_single(sec_buckets[i],
234 tmp_sig[i], buckets, &set_id[i]))
237 set_id[i] = RTE_MEMBER_NO_MATCH;
244 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
245 const void *key, uint32_t match_per_key,
246 member_set_t *set_id)
248 uint32_t num_matches = 0;
249 uint32_t prim_bucket, sec_bucket;
250 member_sig_t tmp_sig;
251 struct member_ht_bucket *buckets = ss->table;
253 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
255 switch (ss->sig_cmp_fn) {
256 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
257 case RTE_MEMBER_COMPARE_AVX2:
258 search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
259 &num_matches, match_per_key, set_id);
260 if (num_matches < match_per_key)
261 search_bucket_multi_avx(sec_bucket, tmp_sig,
262 buckets, &num_matches, match_per_key, set_id);
266 search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
267 match_per_key, set_id);
268 if (num_matches < match_per_key)
269 search_bucket_multi(sec_bucket, tmp_sig,
270 buckets, &num_matches, match_per_key, set_id);
276 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
277 const void **keys, uint32_t num_keys, uint32_t match_per_key,
278 uint32_t *match_count,
279 member_set_t *set_ids)
282 uint32_t num_matches = 0;
283 struct member_ht_bucket *buckets = ss->table;
284 uint32_t match_cnt_tmp;
285 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
286 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
287 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
289 for (i = 0; i < num_keys; i++) {
290 get_buckets_index(ss, keys[i], &prim_buckets[i],
291 &sec_buckets[i], &tmp_sig[i]);
292 rte_prefetch0(&buckets[prim_buckets[i]]);
293 rte_prefetch0(&buckets[sec_buckets[i]]);
295 for (i = 0; i < num_keys; i++) {
298 switch (ss->sig_cmp_fn) {
299 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
300 case RTE_MEMBER_COMPARE_AVX2:
301 search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
302 buckets, &match_cnt_tmp, match_per_key,
303 &set_ids[i*match_per_key]);
304 if (match_cnt_tmp < match_per_key)
305 search_bucket_multi_avx(sec_buckets[i],
306 tmp_sig[i], buckets, &match_cnt_tmp,
308 &set_ids[i*match_per_key]);
309 match_count[i] = match_cnt_tmp;
310 if (match_cnt_tmp != 0)
315 search_bucket_multi(prim_buckets[i], tmp_sig[i],
316 buckets, &match_cnt_tmp, match_per_key,
317 &set_ids[i*match_per_key]);
318 if (match_cnt_tmp < match_per_key)
319 search_bucket_multi(sec_buckets[i], tmp_sig[i],
320 buckets, &match_cnt_tmp, match_per_key,
321 &set_ids[i*match_per_key]);
322 match_count[i] = match_cnt_tmp;
323 if (match_cnt_tmp != 0)
331 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
332 member_sig_t sig, member_set_t set_id)
335 /* If not full then insert into one slot */
336 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
337 if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
338 buckets[prim].sigs[i] = sig;
339 buckets[prim].sets[i] = set_id;
343 /* If prim failed, we need to access second bucket */
344 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
345 if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
346 buckets[sec].sigs[i] = sig;
347 buckets[sec].sets[i] = set_id;
355 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
356 member_sig_t sig, member_set_t set_id,
357 enum rte_member_sig_compare_function cmp_fn)
360 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
361 case RTE_MEMBER_COMPARE_AVX2:
362 if (update_entry_search_avx(prim, sig, buckets, set_id) ||
363 update_entry_search_avx(sec, sig, buckets,
369 if (update_entry_search(prim, sig, buckets, set_id) ||
370 update_entry_search(sec, sig, buckets,
378 evict_from_bucket(void)
380 /* For now, we randomly pick one entry to evict */
381 return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
385 * This function is similar to the cuckoo hash make_space function in hash
389 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
390 unsigned int *nr_pushes)
394 struct member_ht_bucket *buckets = ss->table;
395 uint32_t next_bucket_idx;
396 struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
397 struct member_ht_bucket *bkt = &buckets[bkt_idx];
398 /* MSB is set to indicate if an entry has been already pushed */
399 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
402 * Push existing item (search for bucket with space in
403 * alternative locations) to its alternative location
405 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
406 /* Search for space in alternative locations */
407 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
408 next_bkt[i] = &buckets[next_bucket_idx];
409 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
410 if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
414 if (j != RTE_MEMBER_BUCKET_ENTRIES)
418 /* Alternative location has spare room (end of recursive function) */
419 if (i != RTE_MEMBER_BUCKET_ENTRIES) {
420 next_bkt[i]->sigs[j] = bkt->sigs[i];
421 next_bkt[i]->sets[j] = bkt->sets[i];
425 /* Pick entry that has not been pushed yet */
426 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
427 if ((bkt->sets[i] & flag_mask) == 0)
430 /* All entries have been pushed, so entry cannot be added */
431 if (i == RTE_MEMBER_BUCKET_ENTRIES ||
432 ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
435 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
436 /* Set flag to indicate that this entry is going to be pushed */
437 bkt->sets[i] |= flag_mask;
439 /* Need room in alternative bucket to insert the pushed entry */
440 ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
442 * After recursive function.
443 * Clear flags and insert the pushed entry
444 * in its alternative location if successful,
447 bkt->sets[i] &= ~flag_mask;
449 next_bkt[i]->sigs[ret] = bkt->sigs[i];
450 next_bkt[i]->sets[ret] = bkt->sets[i];
457 rte_member_add_ht(const struct rte_member_setsum *ss,
458 const void *key, member_set_t set_id)
461 unsigned int nr_pushes = 0;
462 uint32_t prim_bucket, sec_bucket;
463 member_sig_t tmp_sig;
464 struct member_ht_bucket *buckets = ss->table;
465 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
467 if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
470 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
473 * If it is cache based setsummary, we try overwriting (updating)
474 * existing entry with the same signature first. In cache mode, we allow
475 * false negatives and only cache the most recent keys.
477 * For non-cache mode, we do not update existing entry with the same
478 * signature. This is because if two keys with same signature update
479 * each other, false negative may happen, which is not the expected
480 * behavior for non-cache setsummary.
483 ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
484 set_id, ss->sig_cmp_fn);
488 /* If not full then insert into one slot */
489 ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
493 /* Random pick prim or sec for recursive displacement */
494 uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
496 ret = evict_from_bucket();
497 buckets[select_bucket].sigs[ret] = tmp_sig;
498 buckets[select_bucket].sets[ret] = set_id;
502 ret = make_space_bucket(ss, select_bucket, &nr_pushes);
504 buckets[select_bucket].sigs[ret] = tmp_sig;
505 buckets[select_bucket].sets[ret] = set_id;
513 rte_member_free_ht(struct rte_member_setsum *ss)
519 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
523 uint32_t prim_bucket, sec_bucket;
524 member_sig_t tmp_sig;
525 struct member_ht_bucket *buckets = ss->table;
527 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
529 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
530 if (tmp_sig == buckets[prim_bucket].sigs[i] &&
531 set_id == buckets[prim_bucket].sets[i]) {
532 buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
537 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
538 if (tmp_sig == buckets[sec_bucket].sigs[i] &&
539 set_id == buckets[sec_bucket].sets[i]) {
540 buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
548 rte_member_reset_ht(const struct rte_member_setsum *ss)
551 struct member_ht_bucket *buckets = ss->table;
553 for (i = 0; i < ss->bucket_cnt; i++) {
554 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
555 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;