member: implement HT mode
[dpdk.git] / lib / librte_member / rte_member_ht.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_errno.h>
35 #include <rte_malloc.h>
36 #include <rte_prefetch.h>
37 #include <rte_random.h>
38 #include <rte_log.h>
39
40 #include "rte_member.h"
41 #include "rte_member_ht.h"
42
43 /* Search bucket for entry with tmp_sig and update set_id */
44 static inline int
45 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
46                 struct member_ht_bucket *buckets,
47                 member_set_t set_id)
48 {
49         uint32_t i;
50
51         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
52                 if (buckets[bucket_id].sigs[i] == tmp_sig) {
53                         buckets[bucket_id].sets[i] = set_id;
54                         return 1;
55                 }
56         }
57         return 0;
58 }
59
60 static inline int
61 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
62                 struct member_ht_bucket *buckets,
63                 member_set_t *set_id)
64 {
65         uint32_t iter;
66
67         for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
68                 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
69                                 buckets[bucket_id].sets[iter] !=
70                                 RTE_MEMBER_NO_MATCH) {
71                         *set_id = buckets[bucket_id].sets[iter];
72                         return 1;
73                 }
74         }
75         return 0;
76 }
77
78 static inline void
79 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
80                 struct member_ht_bucket *buckets,
81                 uint32_t *counter,
82                 uint32_t matches_per_key,
83                 member_set_t *set_id)
84 {
85         uint32_t iter;
86
87         for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
88                 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
89                                 buckets[bucket_id].sets[iter] !=
90                                 RTE_MEMBER_NO_MATCH) {
91                         set_id[*counter] = buckets[bucket_id].sets[iter];
92                         (*counter)++;
93                         if (*counter >= matches_per_key)
94                                 return;
95                 }
96         }
97 }
98
99 int
100 rte_member_create_ht(struct rte_member_setsum *ss,
101                 const struct rte_member_parameters *params)
102 {
103         uint32_t i, j;
104         uint32_t size_bucket_t;
105         uint32_t num_entries = rte_align32pow2(params->num_keys);
106
107         if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
108                         !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
109                         num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
110                 rte_errno = EINVAL;
111                 RTE_MEMBER_LOG(ERR,
112                         "Membership HT create with invalid parameters\n");
113                 return -EINVAL;
114         }
115
116         uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
117
118         size_bucket_t = sizeof(struct member_ht_bucket);
119
120         struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
121                         num_buckets * size_bucket_t,
122                         RTE_CACHE_LINE_SIZE, ss->socket_id);
123
124         if (buckets == NULL) {
125                 RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
126                                                 "setsummary\n");
127                 return -ENOMEM;
128         }
129
130         ss->table = buckets;
131         ss->bucket_cnt = num_buckets;
132         ss->bucket_mask = num_buckets - 1;
133         ss->cache = params->is_cache;
134
135         for (i = 0; i < num_buckets; i++) {
136                 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
137                         buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
138         }
139
140         RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
141                         "the table has %u entries, %u buckets\n",
142                         num_entries, num_buckets);
143         return 0;
144 }
145
146 static inline void
147 get_buckets_index(const struct rte_member_setsum *ss, const void *key,
148                 uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
149 {
150         uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
151                                                 ss->prim_hash_seed);
152         uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
153                                                 ss->sec_hash_seed);
154         /*
155          * We use the first hash value for the signature, and the second hash
156          * value to derive the primary and secondary bucket locations.
157          *
158          * For non-cache mode, we use the lower bits for the primary bucket
159          * location. Then we xor primary bucket location and the signature
160          * to get the secondary bucket location. This is called "partial-key
161          * cuckoo hashing" proposed by B. Fan, et al's paper
162          * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
163          * xor is that one could derive the alternative bucket location
164          * by only using the current bucket location and the signature. This is
165          * generally required by non-cache mode's eviction and deletion
166          * process without the need to store alternative hash value nor the full
167          * key.
168          *
169          * For cache mode, we use the lower bits for the primary bucket
170          * location and the higher bits for the secondary bucket location. In
171          * cache mode, keys are simply overwritten if bucket is full. We do not
172          * use xor since lower/higher bits are more independent hash values thus
173          * should provide slightly better table load.
174          */
175         *sig = first_hash;
176         if (ss->cache) {
177                 *prim_bkt = sec_hash & ss->bucket_mask;
178                 *sec_bkt =  (sec_hash >> 16) & ss->bucket_mask;
179         } else {
180                 *prim_bkt = sec_hash & ss->bucket_mask;
181                 *sec_bkt =  (*prim_bkt ^ *sig) & ss->bucket_mask;
182         }
183 }
184
185 int
186 rte_member_lookup_ht(const struct rte_member_setsum *ss,
187                 const void *key, member_set_t *set_id)
188 {
189         uint32_t prim_bucket, sec_bucket;
190         member_sig_t tmp_sig;
191         struct member_ht_bucket *buckets = ss->table;
192
193         *set_id = RTE_MEMBER_NO_MATCH;
194         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
195
196         if (search_bucket_single(prim_bucket, tmp_sig, buckets,
197                         set_id) ||
198                         search_bucket_single(sec_bucket, tmp_sig,
199                                 buckets, set_id))
200                 return 1;
201
202         return 0;
203 }
204
205 uint32_t
206 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
207                 const void **keys, uint32_t num_keys, member_set_t *set_id)
208 {
209         uint32_t i;
210         uint32_t num_matches = 0;
211         struct member_ht_bucket *buckets = ss->table;
212         member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
213         uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
214         uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
215
216         for (i = 0; i < num_keys; i++) {
217                 get_buckets_index(ss, keys[i], &prim_buckets[i],
218                                 &sec_buckets[i], &tmp_sig[i]);
219                 rte_prefetch0(&buckets[prim_buckets[i]]);
220                 rte_prefetch0(&buckets[sec_buckets[i]]);
221         }
222
223         for (i = 0; i < num_keys; i++) {
224                 if (search_bucket_single(prim_buckets[i], tmp_sig[i],
225                                 buckets, &set_id[i]) ||
226                                 search_bucket_single(sec_buckets[i],
227                                 tmp_sig[i], buckets, &set_id[i]))
228                         num_matches++;
229                 else
230                         set_id[i] = RTE_MEMBER_NO_MATCH;
231         }
232         return num_matches;
233 }
234
235 uint32_t
236 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
237                 const void *key, uint32_t match_per_key,
238                 member_set_t *set_id)
239 {
240         uint32_t num_matches = 0;
241         uint32_t prim_bucket, sec_bucket;
242         member_sig_t tmp_sig;
243         struct member_ht_bucket *buckets = ss->table;
244
245         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
246
247         search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
248                          match_per_key, set_id);
249         if (num_matches < match_per_key)
250                 search_bucket_multi(sec_bucket, tmp_sig,
251                         buckets, &num_matches, match_per_key, set_id);
252         return num_matches;
253 }
254
255 uint32_t
256 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
257                 const void **keys, uint32_t num_keys, uint32_t match_per_key,
258                 uint32_t *match_count,
259                 member_set_t *set_ids)
260 {
261         uint32_t i;
262         uint32_t num_matches = 0;
263         struct member_ht_bucket *buckets = ss->table;
264         uint32_t match_cnt_tmp;
265         member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
266         uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
267         uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
268
269         for (i = 0; i < num_keys; i++) {
270                 get_buckets_index(ss, keys[i], &prim_buckets[i],
271                                 &sec_buckets[i], &tmp_sig[i]);
272                 rte_prefetch0(&buckets[prim_buckets[i]]);
273                 rte_prefetch0(&buckets[sec_buckets[i]]);
274         }
275         for (i = 0; i < num_keys; i++) {
276                 match_cnt_tmp = 0;
277
278                 search_bucket_multi(prim_buckets[i], tmp_sig[i],
279                         buckets, &match_cnt_tmp, match_per_key,
280                         &set_ids[i*match_per_key]);
281                 if (match_cnt_tmp < match_per_key)
282                         search_bucket_multi(sec_buckets[i], tmp_sig[i],
283                                 buckets, &match_cnt_tmp, match_per_key,
284                                 &set_ids[i*match_per_key]);
285                 match_count[i] = match_cnt_tmp;
286                 if (match_cnt_tmp != 0)
287                         num_matches++;
288         }
289         return num_matches;
290 }
291
292 static inline int
293 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
294                 member_sig_t sig, member_set_t set_id)
295 {
296         int i;
297         /* If not full then insert into one slot */
298         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
299                 if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
300                         buckets[prim].sigs[i] = sig;
301                         buckets[prim].sets[i] = set_id;
302                         return 0;
303                 }
304         }
305         /* If prim failed, we need to access second bucket */
306         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
307                 if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
308                         buckets[sec].sigs[i] = sig;
309                         buckets[sec].sets[i] = set_id;
310                         return 0;
311                 }
312         }
313         return -1;
314 }
315
316 static inline int
317 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
318                 member_sig_t sig, member_set_t set_id)
319 {
320         if (update_entry_search(prim, sig, buckets, set_id) ||
321                 update_entry_search(sec, sig, buckets, set_id))
322                 return 0;
323         return -1;
324 }
325
326 static inline int
327 evict_from_bucket(void)
328 {
329         /* For now, we randomly pick one entry to evict */
330         return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
331 }
332
333 /*
334  * This function is similar to the cuckoo hash make_space function in hash
335  * library
336  */
337 static inline int
338 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
339                         unsigned int *nr_pushes)
340 {
341         unsigned int i, j;
342         int ret;
343         struct member_ht_bucket *buckets = ss->table;
344         uint32_t next_bucket_idx;
345         struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
346         struct member_ht_bucket *bkt = &buckets[bkt_idx];
347         /* MSB is set to indicate if an entry has been already pushed */
348         member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
349
350         /*
351          * Push existing item (search for bucket with space in
352          * alternative locations) to its alternative location
353          */
354         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
355                 /* Search for space in alternative locations */
356                 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
357                 next_bkt[i] = &buckets[next_bucket_idx];
358                 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
359                         if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
360                                 break;
361                 }
362
363                 if (j != RTE_MEMBER_BUCKET_ENTRIES)
364                         break;
365         }
366
367         /* Alternative location has spare room (end of recursive function) */
368         if (i != RTE_MEMBER_BUCKET_ENTRIES) {
369                 next_bkt[i]->sigs[j] = bkt->sigs[i];
370                 next_bkt[i]->sets[j] = bkt->sets[i];
371                 return i;
372         }
373
374         /* Pick entry that has not been pushed yet */
375         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
376                 if ((bkt->sets[i] & flag_mask) == 0)
377                         break;
378
379         /* All entries have been pushed, so entry cannot be added */
380         if (i == RTE_MEMBER_BUCKET_ENTRIES ||
381                         ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
382                 return -ENOSPC;
383
384         next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
385         /* Set flag to indicate that this entry is going to be pushed */
386         bkt->sets[i] |= flag_mask;
387
388         /* Need room in alternative bucket to insert the pushed entry */
389         ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
390         /*
391          * After recursive function.
392          * Clear flags and insert the pushed entry
393          * in its alternative location if successful,
394          * or return error
395          */
396         bkt->sets[i] &= ~flag_mask;
397         if (ret >= 0) {
398                 next_bkt[i]->sigs[ret] = bkt->sigs[i];
399                 next_bkt[i]->sets[ret] = bkt->sets[i];
400                 return i;
401         } else
402                 return ret;
403 }
404
405 int
406 rte_member_add_ht(const struct rte_member_setsum *ss,
407                 const void *key, member_set_t set_id)
408 {
409         int ret;
410         unsigned int nr_pushes = 0;
411         uint32_t prim_bucket, sec_bucket;
412         member_sig_t tmp_sig;
413         struct member_ht_bucket *buckets = ss->table;
414         member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
415
416         if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
417                 return -EINVAL;
418
419         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
420
421         /*
422          * If it is cache based setsummary, we try overwriting (updating)
423          * existing entry with the same signature first. In cache mode, we allow
424          * false negatives and only cache the most recent keys.
425          *
426          * For non-cache mode, we do not update existing entry with the same
427          * signature. This is because if two keys with same signature update
428          * each other, false negative may happen, which is not the expected
429          * behavior for non-cache setsummary.
430          */
431         if (ss->cache) {
432                 ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
433                                         set_id);
434                 if (ret != -1)
435                         return ret;
436         }
437         /* If not full then insert into one slot */
438         ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
439         if (ret != -1)
440                 return ret;
441
442         /* Random pick prim or sec for recursive displacement */
443         uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
444         if (ss->cache) {
445                 ret = evict_from_bucket();
446                 buckets[select_bucket].sigs[ret] = tmp_sig;
447                 buckets[select_bucket].sets[ret] = set_id;
448                 return 1;
449         }
450
451         ret = make_space_bucket(ss, select_bucket, &nr_pushes);
452         if (ret >= 0) {
453                 buckets[select_bucket].sigs[ret] = tmp_sig;
454                 buckets[select_bucket].sets[ret] = set_id;
455                 ret = 1;
456         }
457
458         return ret;
459 }
460
461 void
462 rte_member_free_ht(struct rte_member_setsum *ss)
463 {
464         rte_free(ss->table);
465 }
466
467 int
468 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
469                 member_set_t set_id)
470 {
471         int i;
472         uint32_t prim_bucket, sec_bucket;
473         member_sig_t tmp_sig;
474         struct member_ht_bucket *buckets = ss->table;
475
476         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
477
478         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
479                 if (tmp_sig == buckets[prim_bucket].sigs[i] &&
480                                 set_id == buckets[prim_bucket].sets[i]) {
481                         buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
482                         return 0;
483                 }
484         }
485
486         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
487                 if (tmp_sig == buckets[sec_bucket].sigs[i] &&
488                                 set_id == buckets[sec_bucket].sets[i]) {
489                         buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
490                         return 0;
491                 }
492         }
493         return -ENOENT;
494 }
495
496 void
497 rte_member_reset_ht(const struct rte_member_setsum *ss)
498 {
499         uint32_t i, j;
500         struct member_ht_bucket *buckets = ss->table;
501
502         for (i = 0; i < ss->bucket_cnt; i++) {
503                 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
504                         buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
505         }
506 }