add prefix to cache line macros
[dpdk.git] / lib / librte_table / rte_table_hash_key32.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_malloc.h>
39 #include <rte_log.h>
40
41 #include "rte_table_hash.h"
42 #include "rte_lru.h"
43
44 #define RTE_TABLE_HASH_KEY_SIZE                                         32
45
46 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
47
48 struct rte_bucket_4_32 {
49         /* Cache line 0 */
50         uint64_t signature[4 + 1];
51         uint64_t lru_list;
52         struct rte_bucket_4_32 *next;
53         uint64_t next_valid;
54
55         /* Cache lines 1 and 2 */
56         uint64_t key[4][4];
57
58         /* Cache line 3 */
59         uint8_t data[0];
60 };
61
62 struct rte_table_hash {
63         /* Input parameters */
64         uint32_t n_buckets;
65         uint32_t n_entries_per_bucket;
66         uint32_t key_size;
67         uint32_t entry_size;
68         uint32_t bucket_size;
69         uint32_t signature_offset;
70         uint32_t key_offset;
71         rte_table_hash_op_hash f_hash;
72         uint64_t seed;
73
74         /* Extendible buckets */
75         uint32_t n_buckets_ext;
76         uint32_t stack_pos;
77         uint32_t *stack;
78
79         /* Lookup table */
80         uint8_t memory[0] __rte_cache_aligned;
81 };
82
83 static int
84 check_params_create_lru(struct rte_table_hash_key32_lru_params *params) {
85         /* n_entries */
86         if (params->n_entries == 0) {
87                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
88                 return -EINVAL;
89         }
90
91         /* signature offset */
92         if ((params->signature_offset & 0x3) != 0) {
93                 RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
94                 return -EINVAL;
95         }
96
97         /* key offset */
98         if ((params->key_offset & 0x7) != 0) {
99                 RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
100                 return -EINVAL;
101         }
102
103         /* f_hash */
104         if (params->f_hash == NULL) {
105                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
106                         __func__);
107                 return -EINVAL;
108         }
109
110         return 0;
111 }
112
113 static void *
114 rte_table_hash_create_key32_lru(void *params,
115                 int socket_id,
116                 uint32_t entry_size)
117 {
118         struct rte_table_hash_key32_lru_params *p =
119                 (struct rte_table_hash_key32_lru_params *) params;
120         struct rte_table_hash *f;
121         uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
122         uint32_t total_size, i;
123
124         /* Check input parameters */
125         if ((check_params_create_lru(p) != 0) ||
126                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
127                 ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
128                 return NULL;
129         }
130         n_entries_per_bucket = 4;
131         key_size = 32;
132
133         /* Memory allocation */
134         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
135                 n_entries_per_bucket);
136         bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
137                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
138         total_size = sizeof(struct rte_table_hash) + n_buckets *
139                 bucket_size_cl * RTE_CACHE_LINE_SIZE;
140
141         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
142         if (f == NULL) {
143                 RTE_LOG(ERR, TABLE,
144                         "%s: Cannot allocate %u bytes for hash table\n",
145                         __func__, total_size);
146                 return NULL;
147         }
148         RTE_LOG(INFO, TABLE,
149                 "%s: Hash table memory footprint is %u bytes\n", __func__,
150                 total_size);
151
152         /* Memory initialization */
153         f->n_buckets = n_buckets;
154         f->n_entries_per_bucket = n_entries_per_bucket;
155         f->key_size = key_size;
156         f->entry_size = entry_size;
157         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
158         f->signature_offset = p->signature_offset;
159         f->key_offset = p->key_offset;
160         f->f_hash = p->f_hash;
161         f->seed = p->seed;
162
163         for (i = 0; i < n_buckets; i++) {
164                 struct rte_bucket_4_32 *bucket;
165
166                 bucket = (struct rte_bucket_4_32 *) &f->memory[i *
167                         f->bucket_size];
168                 bucket->lru_list = 0x0000000100020003LLU;
169         }
170
171         return f;
172 }
173
174 static int
175 rte_table_hash_free_key32_lru(void *table)
176 {
177         struct rte_table_hash *f = (struct rte_table_hash *) table;
178
179         /* Check input parameters */
180         if (f == NULL) {
181                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
182                 return -EINVAL;
183         }
184
185         rte_free(f);
186         return 0;
187 }
188
189 static int
190 rte_table_hash_entry_add_key32_lru(
191         void *table,
192         void *key,
193         void *entry,
194         int *key_found,
195         void **entry_ptr)
196 {
197         struct rte_table_hash *f = (struct rte_table_hash *) table;
198         struct rte_bucket_4_32 *bucket;
199         uint64_t signature, pos;
200         uint32_t bucket_index, i;
201
202         signature = f->f_hash(key, f->key_size, f->seed);
203         bucket_index = signature & (f->n_buckets - 1);
204         bucket = (struct rte_bucket_4_32 *)
205                 &f->memory[bucket_index * f->bucket_size];
206         signature |= RTE_BUCKET_ENTRY_VALID;
207
208         /* Key is present in the bucket */
209         for (i = 0; i < 4; i++) {
210                 uint64_t bucket_signature = bucket->signature[i];
211                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
212
213                 if ((bucket_signature == signature) &&
214                         (memcmp(key, bucket_key, f->key_size) == 0)) {
215                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
216
217                         memcpy(bucket_data, entry, f->entry_size);
218                         lru_update(bucket, i);
219                         *key_found = 1;
220                         *entry_ptr = (void *) bucket_data;
221                         return 0;
222                 }
223         }
224
225         /* Key is not present in the bucket */
226         for (i = 0; i < 4; i++) {
227                 uint64_t bucket_signature = bucket->signature[i];
228                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
229
230                 if (bucket_signature == 0) {
231                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
232
233                         bucket->signature[i] = signature;
234                         memcpy(bucket_key, key, f->key_size);
235                         memcpy(bucket_data, entry, f->entry_size);
236                         lru_update(bucket, i);
237                         *key_found = 0;
238                         *entry_ptr = (void *) bucket_data;
239
240                         return 0;
241                 }
242         }
243
244         /* Bucket full: replace LRU entry */
245         pos = lru_pos(bucket);
246         bucket->signature[pos] = signature;
247         memcpy(bucket->key[pos], key, f->key_size);
248         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
249         lru_update(bucket, pos);
250         *key_found      = 0;
251         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
252
253         return 0;
254 }
255
256 static int
257 rte_table_hash_entry_delete_key32_lru(
258         void *table,
259         void *key,
260         int *key_found,
261         void *entry)
262 {
263         struct rte_table_hash *f = (struct rte_table_hash *) table;
264         struct rte_bucket_4_32 *bucket;
265         uint64_t signature;
266         uint32_t bucket_index, i;
267
268         signature = f->f_hash(key, f->key_size, f->seed);
269         bucket_index = signature & (f->n_buckets - 1);
270         bucket = (struct rte_bucket_4_32 *)
271                 &f->memory[bucket_index * f->bucket_size];
272         signature |= RTE_BUCKET_ENTRY_VALID;
273
274         /* Key is present in the bucket */
275         for (i = 0; i < 4; i++) {
276                 uint64_t bucket_signature = bucket->signature[i];
277                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
278
279                 if ((bucket_signature == signature) &&
280                         (memcmp(key, bucket_key, f->key_size) == 0)) {
281                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
282
283                         bucket->signature[i] = 0;
284                         *key_found = 1;
285                         if (entry)
286                                 memcpy(entry, bucket_data, f->entry_size);
287
288                         return 0;
289                 }
290         }
291
292         /* Key is not present in the bucket */
293         *key_found = 0;
294         return 0;
295 }
296
297 static int
298 check_params_create_ext(struct rte_table_hash_key32_ext_params *params) {
299         /* n_entries */
300         if (params->n_entries == 0) {
301                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
302                 return -EINVAL;
303         }
304
305         /* n_entries_ext */
306         if (params->n_entries_ext == 0) {
307                 RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
308                 return -EINVAL;
309         }
310
311         /* signature offset */
312         if ((params->signature_offset & 0x3) != 0) {
313                 RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
314                 return -EINVAL;
315         }
316
317         /* key offset */
318         if ((params->key_offset & 0x7) != 0) {
319                 RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
320                 return -EINVAL;
321         }
322
323         /* f_hash */
324         if (params->f_hash == NULL) {
325                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
326                         __func__);
327                 return -EINVAL;
328         }
329
330         return 0;
331 }
332
333 static void *
334 rte_table_hash_create_key32_ext(void *params,
335         int socket_id,
336         uint32_t entry_size)
337 {
338         struct rte_table_hash_key32_ext_params *p =
339                         (struct rte_table_hash_key32_ext_params *) params;
340         struct rte_table_hash *f;
341         uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket;
342         uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i;
343
344         /* Check input parameters */
345         if ((check_params_create_ext(p) != 0) ||
346                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
347                 ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
348                 return NULL;
349
350         n_entries_per_bucket = 4;
351         key_size = 32;
352
353         /* Memory allocation */
354         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
355                 n_entries_per_bucket);
356         n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
357                 n_entries_per_bucket;
358         bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
359                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
360         stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
361                 / RTE_CACHE_LINE_SIZE;
362         total_size = sizeof(struct rte_table_hash) +
363                 ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
364                 RTE_CACHE_LINE_SIZE;
365
366         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
367         if (f == NULL) {
368                 RTE_LOG(ERR, TABLE,
369                         "%s: Cannot allocate %u bytes for hash table\n",
370                         __func__, total_size);
371                 return NULL;
372         }
373         RTE_LOG(INFO, TABLE,
374                 "%s: Hash table memory footprint is %u bytes\n", __func__,
375                 total_size);
376
377         /* Memory initialization */
378         f->n_buckets = n_buckets;
379         f->n_entries_per_bucket = n_entries_per_bucket;
380         f->key_size = key_size;
381         f->entry_size = entry_size;
382         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
383         f->signature_offset = p->signature_offset;
384         f->key_offset = p->key_offset;
385         f->f_hash = p->f_hash;
386         f->seed = p->seed;
387
388         f->n_buckets_ext = n_buckets_ext;
389         f->stack_pos = n_buckets_ext;
390         f->stack = (uint32_t *)
391                 &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
392
393         for (i = 0; i < n_buckets_ext; i++)
394                 f->stack[i] = i;
395
396         return f;
397 }
398
399 static int
400 rte_table_hash_free_key32_ext(void *table)
401 {
402         struct rte_table_hash *f = (struct rte_table_hash *) table;
403
404         /* Check input parameters */
405         if (f == NULL) {
406                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
407                 return -EINVAL;
408         }
409
410         rte_free(f);
411         return 0;
412 }
413
414 static int
415 rte_table_hash_entry_add_key32_ext(
416         void *table,
417         void *key,
418         void *entry,
419         int *key_found,
420         void **entry_ptr)
421 {
422         struct rte_table_hash *f = (struct rte_table_hash *) table;
423         struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
424         uint64_t signature;
425         uint32_t bucket_index, i;
426
427         signature = f->f_hash(key, f->key_size, f->seed);
428         bucket_index = signature & (f->n_buckets - 1);
429         bucket0 = (struct rte_bucket_4_32 *)
430                         &f->memory[bucket_index * f->bucket_size];
431         signature |= RTE_BUCKET_ENTRY_VALID;
432
433         /* Key is present in the bucket */
434         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
435                 for (i = 0; i < 4; i++) {
436                         uint64_t bucket_signature = bucket->signature[i];
437                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
438
439                         if ((bucket_signature == signature) &&
440                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
441                                 uint8_t *bucket_data = &bucket->data[i *
442                                         f->entry_size];
443
444                                 memcpy(bucket_data, entry, f->entry_size);
445                                 *key_found = 1;
446                                 *entry_ptr = (void *) bucket_data;
447
448                                 return 0;
449                         }
450                 }
451         }
452
453         /* Key is not present in the bucket */
454         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
455                 bucket_prev = bucket, bucket = bucket->next)
456                 for (i = 0; i < 4; i++) {
457                         uint64_t bucket_signature = bucket->signature[i];
458                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
459
460                         if (bucket_signature == 0) {
461                                 uint8_t *bucket_data = &bucket->data[i *
462                                         f->entry_size];
463
464                                 bucket->signature[i] = signature;
465                                 memcpy(bucket_key, key, f->key_size);
466                                 memcpy(bucket_data, entry, f->entry_size);
467                                 *key_found = 0;
468                                 *entry_ptr = (void *) bucket_data;
469
470                                 return 0;
471                         }
472                 }
473
474         /* Bucket full: extend bucket */
475         if (f->stack_pos > 0) {
476                 bucket_index = f->stack[--f->stack_pos];
477
478                 bucket = (struct rte_bucket_4_32 *)
479                         &f->memory[(f->n_buckets + bucket_index) *
480                         f->bucket_size];
481                 bucket_prev->next = bucket;
482                 bucket_prev->next_valid = 1;
483
484                 bucket->signature[0] = signature;
485                 memcpy(bucket->key[0], key, f->key_size);
486                 memcpy(&bucket->data[0], entry, f->entry_size);
487                 *key_found = 0;
488                 *entry_ptr = (void *) &bucket->data[0];
489                 return 0;
490         }
491
492         return -ENOSPC;
493 }
494
495 static int
496 rte_table_hash_entry_delete_key32_ext(
497         void *table,
498         void *key,
499         int *key_found,
500         void *entry)
501 {
502         struct rte_table_hash *f = (struct rte_table_hash *) table;
503         struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
504         uint64_t signature;
505         uint32_t bucket_index, i;
506
507         signature = f->f_hash(key, f->key_size, f->seed);
508         bucket_index = signature & (f->n_buckets - 1);
509         bucket0 = (struct rte_bucket_4_32 *)
510                 &f->memory[bucket_index * f->bucket_size];
511         signature |= RTE_BUCKET_ENTRY_VALID;
512
513         /* Key is present in the bucket */
514         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
515                 bucket_prev = bucket, bucket = bucket->next)
516                 for (i = 0; i < 4; i++) {
517                         uint64_t bucket_signature = bucket->signature[i];
518                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
519
520                         if ((bucket_signature == signature) &&
521                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
522                                 uint8_t *bucket_data = &bucket->data[i *
523                                         f->entry_size];
524
525                                 bucket->signature[i] = 0;
526                                 *key_found = 1;
527                                 if (entry)
528                                         memcpy(entry, bucket_data,
529                                                 f->entry_size);
530
531                                 if ((bucket->signature[0] == 0) &&
532                                                 (bucket->signature[1] == 0) &&
533                                                 (bucket->signature[2] == 0) &&
534                                                 (bucket->signature[3] == 0) &&
535                                                 (bucket_prev != NULL)) {
536                                         bucket_prev->next = bucket->next;
537                                         bucket_prev->next_valid =
538                                                 bucket->next_valid;
539
540                                         memset(bucket, 0,
541                                                 sizeof(struct rte_bucket_4_32));
542                                         bucket_index = (bucket -
543                                                 ((struct rte_bucket_4_32 *)
544                                                 f->memory)) - f->n_buckets;
545                                         f->stack[f->stack_pos++] = bucket_index;
546                                 }
547
548                                 return 0;
549                         }
550                 }
551
552         /* Key is not present in the bucket */
553         *key_found = 0;
554         return 0;
555 }
556
557 #define lookup_key32_cmp(key_in, bucket, pos)                   \
558 {                                                               \
559         uint64_t xor[4][4], or[4], signature[4];                \
560                                                                 \
561         signature[0] = ((~bucket->signature[0]) & 1);           \
562         signature[1] = ((~bucket->signature[1]) & 1);           \
563         signature[2] = ((~bucket->signature[2]) & 1);           \
564         signature[3] = ((~bucket->signature[3]) & 1);           \
565                                                                 \
566         xor[0][0] = key_in[0] ^  bucket->key[0][0];             \
567         xor[0][1] = key_in[1] ^  bucket->key[0][1];             \
568         xor[0][2] = key_in[2] ^  bucket->key[0][2];             \
569         xor[0][3] = key_in[3] ^  bucket->key[0][3];             \
570                                                                 \
571         xor[1][0] = key_in[0] ^  bucket->key[1][0];             \
572         xor[1][1] = key_in[1] ^  bucket->key[1][1];             \
573         xor[1][2] = key_in[2] ^  bucket->key[1][2];             \
574         xor[1][3] = key_in[3] ^  bucket->key[1][3];             \
575                                                                 \
576         xor[2][0] = key_in[0] ^  bucket->key[2][0];             \
577         xor[2][1] = key_in[1] ^  bucket->key[2][1];             \
578         xor[2][2] = key_in[2] ^  bucket->key[2][2];             \
579         xor[2][3] = key_in[3] ^  bucket->key[2][3];             \
580                                                                 \
581         xor[3][0] = key_in[0] ^  bucket->key[3][0];             \
582         xor[3][1] = key_in[1] ^  bucket->key[3][1];             \
583         xor[3][2] = key_in[2] ^  bucket->key[3][2];             \
584         xor[3][3] = key_in[3] ^  bucket->key[3][3];             \
585                                                                 \
586         or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\
587         or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\
588         or[2] = xor[2][0] | xor[2][1] | xor[2][2] | xor[2][3] | signature[2];\
589         or[3] = xor[3][0] | xor[3][1] | xor[3][2] | xor[3][3] | signature[3];\
590                                                                 \
591         pos = 4;                                                \
592         if (or[0] == 0)                                         \
593                 pos = 0;                                        \
594         if (or[1] == 0)                                         \
595                 pos = 1;                                        \
596         if (or[2] == 0)                                         \
597                 pos = 2;                                        \
598         if (or[3] == 0)                                         \
599                 pos = 3;                                        \
600 }
601
602 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)      \
603 {                                                               \
604         uint64_t pkt_mask;                                      \
605                                                                 \
606         pkt0_index = __builtin_ctzll(pkts_mask);                \
607         pkt_mask = 1LLU << pkt0_index;                          \
608         pkts_mask &= ~pkt_mask;                                 \
609                                                                 \
610         mbuf0 = pkts[pkt0_index];                               \
611         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));   \
612 }
613
614 #define lookup1_stage1(mbuf1, bucket1, f)                       \
615 {                                                               \
616         uint64_t signature;                                     \
617         uint32_t bucket_index;                                  \
618                                                                 \
619         signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
620         bucket_index = signature & (f->n_buckets - 1);          \
621         bucket1 = (struct rte_bucket_4_32 *)                    \
622                 &f->memory[bucket_index * f->bucket_size];      \
623         rte_prefetch0(bucket1);                                 \
624         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
625         rte_prefetch0((void *)(((uintptr_t) bucket1) + 2 * RTE_CACHE_LINE_SIZE));\
626 }
627
628 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
629         pkts_mask_out, entries, f)                              \
630 {                                                               \
631         void *a;                                                \
632         uint64_t pkt_mask;                                      \
633         uint64_t *key;                                          \
634         uint32_t pos;                                           \
635                                                                 \
636         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
637                                                                 \
638         lookup_key32_cmp(key, bucket2, pos);                    \
639                                                                 \
640         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
641         pkts_mask_out |= pkt_mask;                              \
642                                                                 \
643         a = (void *) &bucket2->data[pos * f->entry_size];       \
644         rte_prefetch0(a);                                       \
645         entries[pkt2_index] = a;                                \
646         lru_update(bucket2, pos);                               \
647 }
648
649 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
650         entries, buckets_mask, buckets, keys, f)                \
651 {                                                               \
652         struct rte_bucket_4_32 *bucket_next;                    \
653         void *a;                                                \
654         uint64_t pkt_mask, bucket_mask;                         \
655         uint64_t *key;                                          \
656         uint32_t pos;                                           \
657                                                                 \
658         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
659                                                                 \
660         lookup_key32_cmp(key, bucket2, pos);                    \
661                                                                 \
662         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
663         pkts_mask_out |= pkt_mask;                              \
664                                                                 \
665         a = (void *) &bucket2->data[pos * f->entry_size];       \
666         rte_prefetch0(a);                                       \
667         entries[pkt2_index] = a;                                \
668                                                                 \
669         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
670         buckets_mask |= bucket_mask;                            \
671         bucket_next = bucket2->next;                            \
672         buckets[pkt2_index] = bucket_next;                      \
673         keys[pkt2_index] = key;                                 \
674 }
675
676 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, \
677         entries, buckets_mask, f)                               \
678 {                                                               \
679         struct rte_bucket_4_32 *bucket, *bucket_next;           \
680         void *a;                                                \
681         uint64_t pkt_mask, bucket_mask;                         \
682         uint64_t *key;                                          \
683         uint32_t pos;                                           \
684                                                                 \
685         bucket = buckets[pkt_index];                            \
686         key = keys[pkt_index];                                  \
687                                                                 \
688         lookup_key32_cmp(key, bucket, pos);                     \
689                                                                 \
690         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
691         pkts_mask_out |= pkt_mask;                              \
692                                                                 \
693         a = (void *) &bucket->data[pos * f->entry_size];        \
694         rte_prefetch0(a);                                       \
695         entries[pkt_index] = a;                                 \
696                                                                 \
697         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
698         buckets_mask |= bucket_mask;                            \
699         bucket_next = bucket->next;                             \
700         rte_prefetch0(bucket_next);                             \
701         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
702         rte_prefetch0((void *)(((uintptr_t) bucket_next) +      \
703                 2 * RTE_CACHE_LINE_SIZE));                              \
704         buckets[pkt_index] = bucket_next;                       \
705         keys[pkt_index] = key;                                  \
706 }
707
708 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
709         pkts, pkts_mask)                                        \
710 {                                                               \
711         uint64_t pkt00_mask, pkt01_mask;                        \
712                                                                 \
713         pkt00_index = __builtin_ctzll(pkts_mask);               \
714         pkt00_mask = 1LLU << pkt00_index;                       \
715         pkts_mask &= ~pkt00_mask;                               \
716                                                                 \
717         mbuf00 = pkts[pkt00_index];                             \
718         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
719                                                                 \
720         pkt01_index = __builtin_ctzll(pkts_mask);               \
721         pkt01_mask = 1LLU << pkt01_index;                       \
722         pkts_mask &= ~pkt01_mask;                               \
723                                                                 \
724         mbuf01 = pkts[pkt01_index];                             \
725         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
726 }
727
728 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
729         mbuf00, mbuf01, pkts, pkts_mask)                        \
730 {                                                               \
731         uint64_t pkt00_mask, pkt01_mask;                        \
732                                                                 \
733         pkt00_index = __builtin_ctzll(pkts_mask);               \
734         pkt00_mask = 1LLU << pkt00_index;                       \
735         pkts_mask &= ~pkt00_mask;                               \
736                                                                 \
737         mbuf00 = pkts[pkt00_index];                             \
738         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
739                                                                 \
740         pkt01_index = __builtin_ctzll(pkts_mask);               \
741         if (pkts_mask == 0)                                     \
742                 pkt01_index = pkt00_index;                      \
743                                                                 \
744         pkt01_mask = 1LLU << pkt01_index;                       \
745         pkts_mask &= ~pkt01_mask;                               \
746                                                                 \
747         mbuf01 = pkts[pkt01_index];                             \
748         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
749 }
750
751 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
752 {                                                               \
753         uint64_t signature10, signature11;                      \
754         uint32_t bucket10_index, bucket11_index;                \
755                                                                 \
756         signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
757         bucket10_index = signature10 & (f->n_buckets - 1);      \
758         bucket10 = (struct rte_bucket_4_32 *)                   \
759                 &f->memory[bucket10_index * f->bucket_size];    \
760         rte_prefetch0(bucket10);                                \
761         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
762         rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\
763                                                                 \
764         signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
765         bucket11_index = signature11 & (f->n_buckets - 1);      \
766         bucket11 = (struct rte_bucket_4_32 *)                   \
767                 &f->memory[bucket11_index * f->bucket_size];    \
768         rte_prefetch0(bucket11);                                \
769         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
770         rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\
771 }
772
773 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
774         bucket20, bucket21, pkts_mask_out, entries, f)          \
775 {                                                               \
776         void *a20, *a21;                                        \
777         uint64_t pkt20_mask, pkt21_mask;                        \
778         uint64_t *key20, *key21;                                \
779         uint32_t pos20, pos21;                                  \
780                                                                 \
781         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
782         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
783                                                                 \
784         lookup_key32_cmp(key20, bucket20, pos20);               \
785         lookup_key32_cmp(key21, bucket21, pos21);               \
786                                                                 \
787         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
788         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
789         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
790                                                                 \
791         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
792         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
793         rte_prefetch0(a20);                                     \
794         rte_prefetch0(a21);                                     \
795         entries[pkt20_index] = a20;                             \
796         entries[pkt21_index] = a21;                             \
797         lru_update(bucket20, pos20);                            \
798         lru_update(bucket21, pos21);                            \
799 }
800
801 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
802         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
803 {                                                               \
804         struct rte_bucket_4_32 *bucket20_next, *bucket21_next;  \
805         void *a20, *a21;                                        \
806         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
807         uint64_t *key20, *key21;                                \
808         uint32_t pos20, pos21;                                  \
809                                                                 \
810         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
811         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
812                                                                 \
813         lookup_key32_cmp(key20, bucket20, pos20);               \
814         lookup_key32_cmp(key21, bucket21, pos21);               \
815                                                                 \
816         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
817         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
818         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
819                                                                 \
820         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
821         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
822         rte_prefetch0(a20);                                     \
823         rte_prefetch0(a21);                                     \
824         entries[pkt20_index] = a20;                             \
825         entries[pkt21_index] = a21;                             \
826                                                                 \
827         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
828         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
829         buckets_mask |= bucket20_mask | bucket21_mask;          \
830         bucket20_next = bucket20->next;                         \
831         bucket21_next = bucket21->next;                         \
832         buckets[pkt20_index] = bucket20_next;                   \
833         buckets[pkt21_index] = bucket21_next;                   \
834         keys[pkt20_index] = key20;                              \
835         keys[pkt21_index] = key21;                              \
836 }
837
838 static int
839 rte_table_hash_lookup_key32_lru(
840         void *table,
841         struct rte_mbuf **pkts,
842         uint64_t pkts_mask,
843         uint64_t *lookup_hit_mask,
844         void **entries)
845 {
846         struct rte_table_hash *f = (struct rte_table_hash *) table;
847         struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
848         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
849         uint32_t pkt00_index, pkt01_index, pkt10_index;
850         uint32_t pkt11_index, pkt20_index, pkt21_index;
851         uint64_t pkts_mask_out = 0;
852
853         /* Cannot run the pipeline with less than 5 packets */
854         if (__builtin_popcountll(pkts_mask) < 5) {
855                 for ( ; pkts_mask; ) {
856                         struct rte_bucket_4_32 *bucket;
857                         struct rte_mbuf *mbuf;
858                         uint32_t pkt_index;
859
860                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
861                         lookup1_stage1(mbuf, bucket, f);
862                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
863                                         pkts_mask_out, entries, f);
864                 }
865
866                 *lookup_hit_mask = pkts_mask_out;
867                 return 0;
868         }
869
870         /*
871          * Pipeline fill
872          *
873          */
874         /* Pipeline stage 0 */
875         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
876                 pkts_mask);
877
878         /* Pipeline feed */
879         mbuf10 = mbuf00;
880         mbuf11 = mbuf01;
881         pkt10_index = pkt00_index;
882         pkt11_index = pkt01_index;
883
884         /* Pipeline stage 0 */
885         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
886                 pkts_mask);
887
888         /* Pipeline stage 1 */
889         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
890
891         /*
892          * Pipeline run
893          *
894          */
895         for ( ; pkts_mask; ) {
896                 /* Pipeline feed */
897                 bucket20 = bucket10;
898                 bucket21 = bucket11;
899                 mbuf20 = mbuf10;
900                 mbuf21 = mbuf11;
901                 mbuf10 = mbuf00;
902                 mbuf11 = mbuf01;
903                 pkt20_index = pkt10_index;
904                 pkt21_index = pkt11_index;
905                 pkt10_index = pkt00_index;
906                 pkt11_index = pkt01_index;
907
908                 /* Pipeline stage 0 */
909                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
910                         mbuf00, mbuf01, pkts, pkts_mask);
911
912                 /* Pipeline stage 1 */
913                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
914
915                 /* Pipeline stage 2 */
916                 lookup2_stage2_lru(pkt20_index, pkt21_index,
917                         mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out,
918                         entries, f);
919         }
920
921         /*
922          * Pipeline flush
923          *
924          */
925         /* Pipeline feed */
926         bucket20 = bucket10;
927         bucket21 = bucket11;
928         mbuf20 = mbuf10;
929         mbuf21 = mbuf11;
930         mbuf10 = mbuf00;
931         mbuf11 = mbuf01;
932         pkt20_index = pkt10_index;
933         pkt21_index = pkt11_index;
934         pkt10_index = pkt00_index;
935         pkt11_index = pkt01_index;
936
937         /* Pipeline stage 1 */
938         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
939
940         /* Pipeline stage 2 */
941         lookup2_stage2_lru(pkt20_index, pkt21_index,
942                 mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
943
944         /* Pipeline feed */
945         bucket20 = bucket10;
946         bucket21 = bucket11;
947         mbuf20 = mbuf10;
948         mbuf21 = mbuf11;
949         pkt20_index = pkt10_index;
950         pkt21_index = pkt11_index;
951
952         /* Pipeline stage 2 */
953         lookup2_stage2_lru(pkt20_index, pkt21_index,
954                 mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
955
956         *lookup_hit_mask = pkts_mask_out;
957         return 0;
958 } /* rte_table_hash_lookup_key32_lru() */
959
960 static int
961 rte_table_hash_lookup_key32_ext(
962         void *table,
963         struct rte_mbuf **pkts,
964         uint64_t pkts_mask,
965         uint64_t *lookup_hit_mask,
966         void **entries)
967 {
968         struct rte_table_hash *f = (struct rte_table_hash *) table;
969         struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
970         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
971         uint32_t pkt00_index, pkt01_index, pkt10_index;
972         uint32_t pkt11_index, pkt20_index, pkt21_index;
973         uint64_t pkts_mask_out = 0, buckets_mask = 0;
974         struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
975         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
976
977         /* Cannot run the pipeline with less than 5 packets */
978         if (__builtin_popcountll(pkts_mask) < 5) {
979                 for ( ; pkts_mask; ) {
980                         struct rte_bucket_4_32 *bucket;
981                         struct rte_mbuf *mbuf;
982                         uint32_t pkt_index;
983
984                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
985                         lookup1_stage1(mbuf, bucket, f);
986                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
987                                 pkts_mask_out, entries, buckets_mask, buckets,
988                                 keys, f);
989                 }
990
991                 goto grind_next_buckets;
992         }
993
994         /*
995          * Pipeline fill
996          *
997          */
998         /* Pipeline stage 0 */
999         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1000                 pkts_mask);
1001
1002         /* Pipeline feed */
1003         mbuf10 = mbuf00;
1004         mbuf11 = mbuf01;
1005         pkt10_index = pkt00_index;
1006         pkt11_index = pkt01_index;
1007
1008         /* Pipeline stage 0 */
1009         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1010                 pkts_mask);
1011
1012         /* Pipeline stage 1 */
1013         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1014
1015         /*
1016          * Pipeline run
1017          *
1018          */
1019         for ( ; pkts_mask; ) {
1020                 /* Pipeline feed */
1021                 bucket20 = bucket10;
1022                 bucket21 = bucket11;
1023                 mbuf20 = mbuf10;
1024                 mbuf21 = mbuf11;
1025                 mbuf10 = mbuf00;
1026                 mbuf11 = mbuf01;
1027                 pkt20_index = pkt10_index;
1028                 pkt21_index = pkt11_index;
1029                 pkt10_index = pkt00_index;
1030                 pkt11_index = pkt01_index;
1031
1032                 /* Pipeline stage 0 */
1033                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1034                         mbuf00, mbuf01, pkts, pkts_mask);
1035
1036                 /* Pipeline stage 1 */
1037                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1038
1039                 /* Pipeline stage 2 */
1040                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1041                         bucket20, bucket21, pkts_mask_out, entries,
1042                         buckets_mask, buckets, keys, f);
1043         }
1044
1045         /*
1046          * Pipeline flush
1047          *
1048          */
1049         /* Pipeline feed */
1050         bucket20 = bucket10;
1051         bucket21 = bucket11;
1052         mbuf20 = mbuf10;
1053         mbuf21 = mbuf11;
1054         mbuf10 = mbuf00;
1055         mbuf11 = mbuf01;
1056         pkt20_index = pkt10_index;
1057         pkt21_index = pkt11_index;
1058         pkt10_index = pkt00_index;
1059         pkt11_index = pkt01_index;
1060
1061         /* Pipeline stage 1 */
1062         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1063
1064         /* Pipeline stage 2 */
1065         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1066                 bucket20, bucket21, pkts_mask_out, entries,
1067                 buckets_mask, buckets, keys, f);
1068
1069         /* Pipeline feed */
1070         bucket20 = bucket10;
1071         bucket21 = bucket11;
1072         mbuf20 = mbuf10;
1073         mbuf21 = mbuf11;
1074         pkt20_index = pkt10_index;
1075         pkt21_index = pkt11_index;
1076
1077         /* Pipeline stage 2 */
1078         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1079                 bucket20, bucket21, pkts_mask_out, entries,
1080                 buckets_mask, buckets, keys, f);
1081
1082 grind_next_buckets:
1083         /* Grind next buckets */
1084         for ( ; buckets_mask; ) {
1085                 uint64_t buckets_mask_next = 0;
1086
1087                 for ( ; buckets_mask; ) {
1088                         uint64_t pkt_mask;
1089                         uint32_t pkt_index;
1090
1091                         pkt_index = __builtin_ctzll(buckets_mask);
1092                         pkt_mask = 1LLU << pkt_index;
1093                         buckets_mask &= ~pkt_mask;
1094
1095                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1096                                 entries, buckets_mask_next, f);
1097                 }
1098
1099                 buckets_mask = buckets_mask_next;
1100         }
1101
1102         *lookup_hit_mask = pkts_mask_out;
1103         return 0;
1104 } /* rte_table_hash_lookup_key32_ext() */
1105
1106 struct rte_table_ops rte_table_hash_key32_lru_ops = {
1107         .f_create = rte_table_hash_create_key32_lru,
1108         .f_free = rte_table_hash_free_key32_lru,
1109         .f_add = rte_table_hash_entry_add_key32_lru,
1110         .f_delete = rte_table_hash_entry_delete_key32_lru,
1111         .f_lookup = rte_table_hash_lookup_key32_lru,
1112 };
1113
1114 struct rte_table_ops rte_table_hash_key32_ext_ops = {
1115         .f_create = rte_table_hash_create_key32_ext,
1116         .f_free = rte_table_hash_free_key32_ext,
1117         .f_add = rte_table_hash_entry_add_key32_ext,
1118         .f_delete = rte_table_hash_entry_delete_key32_ext,
1119         .f_lookup = rte_table_hash_lookup_key32_ext,
1120 };