4 * Copyright(c) 2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <rte_errno.h>
35 #include <rte_malloc.h>
36 #include <rte_prefetch.h>
37 #include <rte_random.h>
40 #include "rte_member.h"
41 #include "rte_member_ht.h"
43 /* Search bucket for entry with tmp_sig and update set_id */
45 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
46 struct member_ht_bucket *buckets,
51 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
52 if (buckets[bucket_id].sigs[i] == tmp_sig) {
53 buckets[bucket_id].sets[i] = set_id;
61 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
62 struct member_ht_bucket *buckets,
67 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
68 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
69 buckets[bucket_id].sets[iter] !=
70 RTE_MEMBER_NO_MATCH) {
71 *set_id = buckets[bucket_id].sets[iter];
79 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
80 struct member_ht_bucket *buckets,
82 uint32_t matches_per_key,
87 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
88 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
89 buckets[bucket_id].sets[iter] !=
90 RTE_MEMBER_NO_MATCH) {
91 set_id[*counter] = buckets[bucket_id].sets[iter];
93 if (*counter >= matches_per_key)
100 rte_member_create_ht(struct rte_member_setsum *ss,
101 const struct rte_member_parameters *params)
104 uint32_t size_bucket_t;
105 uint32_t num_entries = rte_align32pow2(params->num_keys);
107 if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
108 !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
109 num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
112 "Membership HT create with invalid parameters\n");
116 uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
118 size_bucket_t = sizeof(struct member_ht_bucket);
120 struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
121 num_buckets * size_bucket_t,
122 RTE_CACHE_LINE_SIZE, ss->socket_id);
124 if (buckets == NULL) {
125 RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
131 ss->bucket_cnt = num_buckets;
132 ss->bucket_mask = num_buckets - 1;
133 ss->cache = params->is_cache;
135 for (i = 0; i < num_buckets; i++) {
136 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
137 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
140 RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
141 "the table has %u entries, %u buckets\n",
142 num_entries, num_buckets);
147 get_buckets_index(const struct rte_member_setsum *ss, const void *key,
148 uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
150 uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
152 uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
155 * We use the first hash value for the signature, and the second hash
156 * value to derive the primary and secondary bucket locations.
158 * For non-cache mode, we use the lower bits for the primary bucket
159 * location. Then we xor primary bucket location and the signature
160 * to get the secondary bucket location. This is called "partial-key
161 * cuckoo hashing" proposed by B. Fan, et al's paper
162 * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
163 * xor is that one could derive the alternative bucket location
164 * by only using the current bucket location and the signature. This is
165 * generally required by non-cache mode's eviction and deletion
166 * process without the need to store alternative hash value nor the full
169 * For cache mode, we use the lower bits for the primary bucket
170 * location and the higher bits for the secondary bucket location. In
171 * cache mode, keys are simply overwritten if bucket is full. We do not
172 * use xor since lower/higher bits are more independent hash values thus
173 * should provide slightly better table load.
177 *prim_bkt = sec_hash & ss->bucket_mask;
178 *sec_bkt = (sec_hash >> 16) & ss->bucket_mask;
180 *prim_bkt = sec_hash & ss->bucket_mask;
181 *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask;
186 rte_member_lookup_ht(const struct rte_member_setsum *ss,
187 const void *key, member_set_t *set_id)
189 uint32_t prim_bucket, sec_bucket;
190 member_sig_t tmp_sig;
191 struct member_ht_bucket *buckets = ss->table;
193 *set_id = RTE_MEMBER_NO_MATCH;
194 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
196 if (search_bucket_single(prim_bucket, tmp_sig, buckets,
198 search_bucket_single(sec_bucket, tmp_sig,
206 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
207 const void **keys, uint32_t num_keys, member_set_t *set_id)
210 uint32_t num_matches = 0;
211 struct member_ht_bucket *buckets = ss->table;
212 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
213 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
214 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
216 for (i = 0; i < num_keys; i++) {
217 get_buckets_index(ss, keys[i], &prim_buckets[i],
218 &sec_buckets[i], &tmp_sig[i]);
219 rte_prefetch0(&buckets[prim_buckets[i]]);
220 rte_prefetch0(&buckets[sec_buckets[i]]);
223 for (i = 0; i < num_keys; i++) {
224 if (search_bucket_single(prim_buckets[i], tmp_sig[i],
225 buckets, &set_id[i]) ||
226 search_bucket_single(sec_buckets[i],
227 tmp_sig[i], buckets, &set_id[i]))
230 set_id[i] = RTE_MEMBER_NO_MATCH;
236 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
237 const void *key, uint32_t match_per_key,
238 member_set_t *set_id)
240 uint32_t num_matches = 0;
241 uint32_t prim_bucket, sec_bucket;
242 member_sig_t tmp_sig;
243 struct member_ht_bucket *buckets = ss->table;
245 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
247 search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
248 match_per_key, set_id);
249 if (num_matches < match_per_key)
250 search_bucket_multi(sec_bucket, tmp_sig,
251 buckets, &num_matches, match_per_key, set_id);
256 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
257 const void **keys, uint32_t num_keys, uint32_t match_per_key,
258 uint32_t *match_count,
259 member_set_t *set_ids)
262 uint32_t num_matches = 0;
263 struct member_ht_bucket *buckets = ss->table;
264 uint32_t match_cnt_tmp;
265 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
266 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
267 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
269 for (i = 0; i < num_keys; i++) {
270 get_buckets_index(ss, keys[i], &prim_buckets[i],
271 &sec_buckets[i], &tmp_sig[i]);
272 rte_prefetch0(&buckets[prim_buckets[i]]);
273 rte_prefetch0(&buckets[sec_buckets[i]]);
275 for (i = 0; i < num_keys; i++) {
278 search_bucket_multi(prim_buckets[i], tmp_sig[i],
279 buckets, &match_cnt_tmp, match_per_key,
280 &set_ids[i*match_per_key]);
281 if (match_cnt_tmp < match_per_key)
282 search_bucket_multi(sec_buckets[i], tmp_sig[i],
283 buckets, &match_cnt_tmp, match_per_key,
284 &set_ids[i*match_per_key]);
285 match_count[i] = match_cnt_tmp;
286 if (match_cnt_tmp != 0)
293 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
294 member_sig_t sig, member_set_t set_id)
297 /* If not full then insert into one slot */
298 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
299 if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
300 buckets[prim].sigs[i] = sig;
301 buckets[prim].sets[i] = set_id;
305 /* If prim failed, we need to access second bucket */
306 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
307 if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
308 buckets[sec].sigs[i] = sig;
309 buckets[sec].sets[i] = set_id;
317 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
318 member_sig_t sig, member_set_t set_id)
320 if (update_entry_search(prim, sig, buckets, set_id) ||
321 update_entry_search(sec, sig, buckets, set_id))
327 evict_from_bucket(void)
329 /* For now, we randomly pick one entry to evict */
330 return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
334 * This function is similar to the cuckoo hash make_space function in hash
338 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
339 unsigned int *nr_pushes)
343 struct member_ht_bucket *buckets = ss->table;
344 uint32_t next_bucket_idx;
345 struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
346 struct member_ht_bucket *bkt = &buckets[bkt_idx];
347 /* MSB is set to indicate if an entry has been already pushed */
348 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
351 * Push existing item (search for bucket with space in
352 * alternative locations) to its alternative location
354 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
355 /* Search for space in alternative locations */
356 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
357 next_bkt[i] = &buckets[next_bucket_idx];
358 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
359 if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
363 if (j != RTE_MEMBER_BUCKET_ENTRIES)
367 /* Alternative location has spare room (end of recursive function) */
368 if (i != RTE_MEMBER_BUCKET_ENTRIES) {
369 next_bkt[i]->sigs[j] = bkt->sigs[i];
370 next_bkt[i]->sets[j] = bkt->sets[i];
374 /* Pick entry that has not been pushed yet */
375 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
376 if ((bkt->sets[i] & flag_mask) == 0)
379 /* All entries have been pushed, so entry cannot be added */
380 if (i == RTE_MEMBER_BUCKET_ENTRIES ||
381 ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
384 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
385 /* Set flag to indicate that this entry is going to be pushed */
386 bkt->sets[i] |= flag_mask;
388 /* Need room in alternative bucket to insert the pushed entry */
389 ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
391 * After recursive function.
392 * Clear flags and insert the pushed entry
393 * in its alternative location if successful,
396 bkt->sets[i] &= ~flag_mask;
398 next_bkt[i]->sigs[ret] = bkt->sigs[i];
399 next_bkt[i]->sets[ret] = bkt->sets[i];
406 rte_member_add_ht(const struct rte_member_setsum *ss,
407 const void *key, member_set_t set_id)
410 unsigned int nr_pushes = 0;
411 uint32_t prim_bucket, sec_bucket;
412 member_sig_t tmp_sig;
413 struct member_ht_bucket *buckets = ss->table;
414 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
416 if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
419 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
422 * If it is cache based setsummary, we try overwriting (updating)
423 * existing entry with the same signature first. In cache mode, we allow
424 * false negatives and only cache the most recent keys.
426 * For non-cache mode, we do not update existing entry with the same
427 * signature. This is because if two keys with same signature update
428 * each other, false negative may happen, which is not the expected
429 * behavior for non-cache setsummary.
432 ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
437 /* If not full then insert into one slot */
438 ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
442 /* Random pick prim or sec for recursive displacement */
443 uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
445 ret = evict_from_bucket();
446 buckets[select_bucket].sigs[ret] = tmp_sig;
447 buckets[select_bucket].sets[ret] = set_id;
451 ret = make_space_bucket(ss, select_bucket, &nr_pushes);
453 buckets[select_bucket].sigs[ret] = tmp_sig;
454 buckets[select_bucket].sets[ret] = set_id;
462 rte_member_free_ht(struct rte_member_setsum *ss)
468 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
472 uint32_t prim_bucket, sec_bucket;
473 member_sig_t tmp_sig;
474 struct member_ht_bucket *buckets = ss->table;
476 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
478 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
479 if (tmp_sig == buckets[prim_bucket].sigs[i] &&
480 set_id == buckets[prim_bucket].sets[i]) {
481 buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
486 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
487 if (tmp_sig == buckets[sec_bucket].sigs[i] &&
488 set_id == buckets[sec_bucket].sets[i]) {
489 buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
497 rte_member_reset_ht(const struct rte_member_setsum *ss)
500 struct member_ht_bucket *buckets = ss->table;
502 for (i = 0; i < ss->bucket_cnt; i++) {
503 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
504 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;