4 * Copyright(c) 2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <rte_errno.h>
35 #include <rte_malloc.h>
36 #include <rte_prefetch.h>
37 #include <rte_random.h>
40 #include "rte_member.h"
41 #include "rte_member_ht.h"
43 #if defined(RTE_ARCH_X86)
44 #include "rte_member_x86.h"
47 /* Search bucket for entry with tmp_sig and update set_id */
49 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
50 struct member_ht_bucket *buckets,
55 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
56 if (buckets[bucket_id].sigs[i] == tmp_sig) {
57 buckets[bucket_id].sets[i] = set_id;
65 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
66 struct member_ht_bucket *buckets,
71 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
72 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
73 buckets[bucket_id].sets[iter] !=
74 RTE_MEMBER_NO_MATCH) {
75 *set_id = buckets[bucket_id].sets[iter];
83 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
84 struct member_ht_bucket *buckets,
86 uint32_t matches_per_key,
91 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
92 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
93 buckets[bucket_id].sets[iter] !=
94 RTE_MEMBER_NO_MATCH) {
95 set_id[*counter] = buckets[bucket_id].sets[iter];
97 if (*counter >= matches_per_key)
104 rte_member_create_ht(struct rte_member_setsum *ss,
105 const struct rte_member_parameters *params)
108 uint32_t size_bucket_t;
109 uint32_t num_entries = rte_align32pow2(params->num_keys);
111 if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
112 !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
113 num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
116 "Membership HT create with invalid parameters\n");
120 uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
122 size_bucket_t = sizeof(struct member_ht_bucket);
124 struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
125 num_buckets * size_bucket_t,
126 RTE_CACHE_LINE_SIZE, ss->socket_id);
128 if (buckets == NULL) {
129 RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
135 ss->bucket_cnt = num_buckets;
136 ss->bucket_mask = num_buckets - 1;
137 ss->cache = params->is_cache;
139 for (i = 0; i < num_buckets; i++) {
140 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
141 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
143 #if defined(RTE_ARCH_X86)
144 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
145 RTE_MEMBER_BUCKET_ENTRIES == 16)
146 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
149 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
151 RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
152 "the table has %u entries, %u buckets\n",
153 num_entries, num_buckets);
158 get_buckets_index(const struct rte_member_setsum *ss, const void *key,
159 uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
161 uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
163 uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
166 * We use the first hash value for the signature, and the second hash
167 * value to derive the primary and secondary bucket locations.
169 * For non-cache mode, we use the lower bits for the primary bucket
170 * location. Then we xor primary bucket location and the signature
171 * to get the secondary bucket location. This is called "partial-key
172 * cuckoo hashing" proposed by B. Fan, et al's paper
173 * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
174 * xor is that one could derive the alternative bucket location
175 * by only using the current bucket location and the signature. This is
176 * generally required by non-cache mode's eviction and deletion
177 * process without the need to store alternative hash value nor the full
180 * For cache mode, we use the lower bits for the primary bucket
181 * location and the higher bits for the secondary bucket location. In
182 * cache mode, keys are simply overwritten if bucket is full. We do not
183 * use xor since lower/higher bits are more independent hash values thus
184 * should provide slightly better table load.
188 *prim_bkt = sec_hash & ss->bucket_mask;
189 *sec_bkt = (sec_hash >> 16) & ss->bucket_mask;
191 *prim_bkt = sec_hash & ss->bucket_mask;
192 *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask;
197 rte_member_lookup_ht(const struct rte_member_setsum *ss,
198 const void *key, member_set_t *set_id)
200 uint32_t prim_bucket, sec_bucket;
201 member_sig_t tmp_sig;
202 struct member_ht_bucket *buckets = ss->table;
204 *set_id = RTE_MEMBER_NO_MATCH;
205 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
207 switch (ss->sig_cmp_fn) {
208 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
209 case RTE_MEMBER_COMPARE_AVX2:
210 if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
212 search_bucket_single_avx(sec_bucket, tmp_sig,
218 if (search_bucket_single(prim_bucket, tmp_sig, buckets,
220 search_bucket_single(sec_bucket, tmp_sig,
229 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
230 const void **keys, uint32_t num_keys, member_set_t *set_id)
233 uint32_t num_matches = 0;
234 struct member_ht_bucket *buckets = ss->table;
235 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
236 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
237 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
239 for (i = 0; i < num_keys; i++) {
240 get_buckets_index(ss, keys[i], &prim_buckets[i],
241 &sec_buckets[i], &tmp_sig[i]);
242 rte_prefetch0(&buckets[prim_buckets[i]]);
243 rte_prefetch0(&buckets[sec_buckets[i]]);
246 for (i = 0; i < num_keys; i++) {
247 switch (ss->sig_cmp_fn) {
248 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
249 case RTE_MEMBER_COMPARE_AVX2:
250 if (search_bucket_single_avx(prim_buckets[i],
251 tmp_sig[i], buckets, &set_id[i]) ||
252 search_bucket_single_avx(sec_buckets[i],
253 tmp_sig[i], buckets, &set_id[i]))
256 set_id[i] = RTE_MEMBER_NO_MATCH;
260 if (search_bucket_single(prim_buckets[i], tmp_sig[i],
261 buckets, &set_id[i]) ||
262 search_bucket_single(sec_buckets[i],
263 tmp_sig[i], buckets, &set_id[i]))
266 set_id[i] = RTE_MEMBER_NO_MATCH;
273 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
274 const void *key, uint32_t match_per_key,
275 member_set_t *set_id)
277 uint32_t num_matches = 0;
278 uint32_t prim_bucket, sec_bucket;
279 member_sig_t tmp_sig;
280 struct member_ht_bucket *buckets = ss->table;
282 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
284 switch (ss->sig_cmp_fn) {
285 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
286 case RTE_MEMBER_COMPARE_AVX2:
287 search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
288 &num_matches, match_per_key, set_id);
289 if (num_matches < match_per_key)
290 search_bucket_multi_avx(sec_bucket, tmp_sig,
291 buckets, &num_matches, match_per_key, set_id);
295 search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
296 match_per_key, set_id);
297 if (num_matches < match_per_key)
298 search_bucket_multi(sec_bucket, tmp_sig,
299 buckets, &num_matches, match_per_key, set_id);
305 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
306 const void **keys, uint32_t num_keys, uint32_t match_per_key,
307 uint32_t *match_count,
308 member_set_t *set_ids)
311 uint32_t num_matches = 0;
312 struct member_ht_bucket *buckets = ss->table;
313 uint32_t match_cnt_tmp;
314 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
315 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
316 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
318 for (i = 0; i < num_keys; i++) {
319 get_buckets_index(ss, keys[i], &prim_buckets[i],
320 &sec_buckets[i], &tmp_sig[i]);
321 rte_prefetch0(&buckets[prim_buckets[i]]);
322 rte_prefetch0(&buckets[sec_buckets[i]]);
324 for (i = 0; i < num_keys; i++) {
327 switch (ss->sig_cmp_fn) {
328 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
329 case RTE_MEMBER_COMPARE_AVX2:
330 search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
331 buckets, &match_cnt_tmp, match_per_key,
332 &set_ids[i*match_per_key]);
333 if (match_cnt_tmp < match_per_key)
334 search_bucket_multi_avx(sec_buckets[i],
335 tmp_sig[i], buckets, &match_cnt_tmp,
337 &set_ids[i*match_per_key]);
338 match_count[i] = match_cnt_tmp;
339 if (match_cnt_tmp != 0)
344 search_bucket_multi(prim_buckets[i], tmp_sig[i],
345 buckets, &match_cnt_tmp, match_per_key,
346 &set_ids[i*match_per_key]);
347 if (match_cnt_tmp < match_per_key)
348 search_bucket_multi(sec_buckets[i], tmp_sig[i],
349 buckets, &match_cnt_tmp, match_per_key,
350 &set_ids[i*match_per_key]);
351 match_count[i] = match_cnt_tmp;
352 if (match_cnt_tmp != 0)
360 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
361 member_sig_t sig, member_set_t set_id)
364 /* If not full then insert into one slot */
365 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
366 if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
367 buckets[prim].sigs[i] = sig;
368 buckets[prim].sets[i] = set_id;
372 /* If prim failed, we need to access second bucket */
373 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
374 if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
375 buckets[sec].sigs[i] = sig;
376 buckets[sec].sets[i] = set_id;
384 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
385 member_sig_t sig, member_set_t set_id,
386 enum rte_member_sig_compare_function cmp_fn)
389 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
390 case RTE_MEMBER_COMPARE_AVX2:
391 if (update_entry_search_avx(prim, sig, buckets, set_id) ||
392 update_entry_search_avx(sec, sig, buckets,
398 if (update_entry_search(prim, sig, buckets, set_id) ||
399 update_entry_search(sec, sig, buckets,
407 evict_from_bucket(void)
409 /* For now, we randomly pick one entry to evict */
410 return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
414 * This function is similar to the cuckoo hash make_space function in hash
418 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
419 unsigned int *nr_pushes)
423 struct member_ht_bucket *buckets = ss->table;
424 uint32_t next_bucket_idx;
425 struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
426 struct member_ht_bucket *bkt = &buckets[bkt_idx];
427 /* MSB is set to indicate if an entry has been already pushed */
428 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
431 * Push existing item (search for bucket with space in
432 * alternative locations) to its alternative location
434 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
435 /* Search for space in alternative locations */
436 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
437 next_bkt[i] = &buckets[next_bucket_idx];
438 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
439 if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
443 if (j != RTE_MEMBER_BUCKET_ENTRIES)
447 /* Alternative location has spare room (end of recursive function) */
448 if (i != RTE_MEMBER_BUCKET_ENTRIES) {
449 next_bkt[i]->sigs[j] = bkt->sigs[i];
450 next_bkt[i]->sets[j] = bkt->sets[i];
454 /* Pick entry that has not been pushed yet */
455 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
456 if ((bkt->sets[i] & flag_mask) == 0)
459 /* All entries have been pushed, so entry cannot be added */
460 if (i == RTE_MEMBER_BUCKET_ENTRIES ||
461 ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
464 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
465 /* Set flag to indicate that this entry is going to be pushed */
466 bkt->sets[i] |= flag_mask;
468 /* Need room in alternative bucket to insert the pushed entry */
469 ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
471 * After recursive function.
472 * Clear flags and insert the pushed entry
473 * in its alternative location if successful,
476 bkt->sets[i] &= ~flag_mask;
478 next_bkt[i]->sigs[ret] = bkt->sigs[i];
479 next_bkt[i]->sets[ret] = bkt->sets[i];
486 rte_member_add_ht(const struct rte_member_setsum *ss,
487 const void *key, member_set_t set_id)
490 unsigned int nr_pushes = 0;
491 uint32_t prim_bucket, sec_bucket;
492 member_sig_t tmp_sig;
493 struct member_ht_bucket *buckets = ss->table;
494 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
496 if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
499 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
502 * If it is cache based setsummary, we try overwriting (updating)
503 * existing entry with the same signature first. In cache mode, we allow
504 * false negatives and only cache the most recent keys.
506 * For non-cache mode, we do not update existing entry with the same
507 * signature. This is because if two keys with same signature update
508 * each other, false negative may happen, which is not the expected
509 * behavior for non-cache setsummary.
512 ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
513 set_id, ss->sig_cmp_fn);
517 /* If not full then insert into one slot */
518 ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
522 /* Random pick prim or sec for recursive displacement */
523 uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
525 ret = evict_from_bucket();
526 buckets[select_bucket].sigs[ret] = tmp_sig;
527 buckets[select_bucket].sets[ret] = set_id;
531 ret = make_space_bucket(ss, select_bucket, &nr_pushes);
533 buckets[select_bucket].sigs[ret] = tmp_sig;
534 buckets[select_bucket].sets[ret] = set_id;
542 rte_member_free_ht(struct rte_member_setsum *ss)
548 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
552 uint32_t prim_bucket, sec_bucket;
553 member_sig_t tmp_sig;
554 struct member_ht_bucket *buckets = ss->table;
556 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
558 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
559 if (tmp_sig == buckets[prim_bucket].sigs[i] &&
560 set_id == buckets[prim_bucket].sets[i]) {
561 buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
566 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
567 if (tmp_sig == buckets[sec_bucket].sigs[i] &&
568 set_id == buckets[sec_bucket].sets[i]) {
569 buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
577 rte_member_reset_ht(const struct rte_member_setsum *ss)
580 struct member_ht_bucket *buckets = ss->table;
582 for (i = 0; i < ss->bucket_cnt; i++) {
583 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
584 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;