lib: fix cache alignment of structures
[dpdk.git] / lib / librte_table / rte_table_hash_key16.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_memory.h>
39 #include <rte_malloc.h>
40 #include <rte_log.h>
41
42 #include "rte_table_hash.h"
43 #include "rte_lru.h"
44
45 #define RTE_TABLE_HASH_KEY_SIZE                                         16
46
47 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
48
49 struct rte_bucket_4_16 {
50         /* Cache line 0 */
51         uint64_t signature[4 + 1];
52         uint64_t lru_list;
53         struct rte_bucket_4_16 *next;
54         uint64_t next_valid;
55
56         /* Cache line 1 */
57         uint64_t key[4][2];
58
59         /* Cache line 2 */
60         uint8_t data[0];
61 };
62
63 struct rte_table_hash {
64         /* Input parameters */
65         uint32_t n_buckets;
66         uint32_t n_entries_per_bucket;
67         uint32_t key_size;
68         uint32_t entry_size;
69         uint32_t bucket_size;
70         uint32_t signature_offset;
71         uint32_t key_offset;
72         rte_table_hash_op_hash f_hash;
73         uint64_t seed;
74
75         /* Extendible buckets */
76         uint32_t n_buckets_ext;
77         uint32_t stack_pos;
78         uint32_t *stack;
79
80         /* Lookup table */
81         uint8_t memory[0] __rte_cache_aligned;
82 };
83
84 static int
85 check_params_create_lru(struct rte_table_hash_key16_lru_params *params) {
86         /* n_entries */
87         if (params->n_entries == 0) {
88                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
89                 return -EINVAL;
90         }
91
92         /* signature offset */
93         if ((params->signature_offset & 0x3) != 0) {
94                 RTE_LOG(ERR, TABLE, "%s: invalid signature_offset\n", __func__);
95                 return -EINVAL;
96         }
97
98         /* key offset */
99         if ((params->key_offset & 0x7) != 0) {
100                 RTE_LOG(ERR, TABLE, "%s: invalid key_offset\n", __func__);
101                 return -EINVAL;
102         }
103
104         /* f_hash */
105         if (params->f_hash == NULL) {
106                 RTE_LOG(ERR, TABLE,
107                         "%s: f_hash function pointer is NULL\n", __func__);
108                 return -EINVAL;
109         }
110
111         return 0;
112 }
113
114 static void *
115 rte_table_hash_create_key16_lru(void *params,
116                 int socket_id,
117                 uint32_t entry_size)
118 {
119         struct rte_table_hash_key16_lru_params *p =
120                         (struct rte_table_hash_key16_lru_params *) params;
121         struct rte_table_hash *f;
122         uint32_t n_buckets, n_entries_per_bucket,
123                         key_size, bucket_size_cl, total_size, i;
124
125         /* Check input parameters */
126         if ((check_params_create_lru(p) != 0) ||
127                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
128                 ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
129                 return NULL;
130         n_entries_per_bucket = 4;
131         key_size = 16;
132
133         /* Memory allocation */
134         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
135                 n_entries_per_bucket);
136         bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
137                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
138         total_size = sizeof(struct rte_table_hash) + n_buckets *
139                 bucket_size_cl * RTE_CACHE_LINE_SIZE;
140
141         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
142         if (f == NULL) {
143                 RTE_LOG(ERR, TABLE,
144                 "%s: Cannot allocate %u bytes for hash table\n",
145                 __func__, total_size);
146                 return NULL;
147         }
148         RTE_LOG(INFO, TABLE,
149                 "%s: Hash table memory footprint is %u bytes\n",
150                 __func__, total_size);
151
152         /* Memory initialization */
153         f->n_buckets = n_buckets;
154         f->n_entries_per_bucket = n_entries_per_bucket;
155         f->key_size = key_size;
156         f->entry_size = entry_size;
157         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
158         f->signature_offset = p->signature_offset;
159         f->key_offset = p->key_offset;
160         f->f_hash = p->f_hash;
161         f->seed = p->seed;
162
163         for (i = 0; i < n_buckets; i++) {
164                 struct rte_bucket_4_16 *bucket;
165
166                 bucket = (struct rte_bucket_4_16 *) &f->memory[i *
167                         f->bucket_size];
168                 lru_init(bucket);
169         }
170
171         return f;
172 }
173
174 static int
175 rte_table_hash_free_key16_lru(void *table)
176 {
177         struct rte_table_hash *f = (struct rte_table_hash *) table;
178
179         /* Check input parameters */
180         if (f == NULL) {
181                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
182                 return -EINVAL;
183         }
184
185         rte_free(f);
186         return 0;
187 }
188
189 static int
190 rte_table_hash_entry_add_key16_lru(
191         void *table,
192         void *key,
193         void *entry,
194         int *key_found,
195         void **entry_ptr)
196 {
197         struct rte_table_hash *f = (struct rte_table_hash *) table;
198         struct rte_bucket_4_16 *bucket;
199         uint64_t signature, pos;
200         uint32_t bucket_index, i;
201
202         signature = f->f_hash(key, f->key_size, f->seed);
203         bucket_index = signature & (f->n_buckets - 1);
204         bucket = (struct rte_bucket_4_16 *)
205                         &f->memory[bucket_index * f->bucket_size];
206         signature |= RTE_BUCKET_ENTRY_VALID;
207
208         /* Key is present in the bucket */
209         for (i = 0; i < 4; i++) {
210                 uint64_t bucket_signature = bucket->signature[i];
211                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
212
213                 if ((bucket_signature == signature) &&
214                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
215                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
216
217                         memcpy(bucket_data, entry, f->entry_size);
218                         lru_update(bucket, i);
219                         *key_found = 1;
220                         *entry_ptr = (void *) bucket_data;
221                         return 0;
222                 }
223         }
224
225         /* Key is not present in the bucket */
226         for (i = 0; i < 4; i++) {
227                 uint64_t bucket_signature = bucket->signature[i];
228                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
229
230                 if (bucket_signature == 0) {
231                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
232
233                         bucket->signature[i] = signature;
234                         memcpy(bucket_key, key, f->key_size);
235                         memcpy(bucket_data, entry, f->entry_size);
236                         lru_update(bucket, i);
237                         *key_found = 0;
238                         *entry_ptr = (void *) bucket_data;
239
240                         return 0;
241                 }
242         }
243
244         /* Bucket full: replace LRU entry */
245         pos = lru_pos(bucket);
246         bucket->signature[pos] = signature;
247         memcpy(bucket->key[pos], key, f->key_size);
248         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
249         lru_update(bucket, pos);
250         *key_found = 0;
251         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
252
253         return 0;
254 }
255
256 static int
257 rte_table_hash_entry_delete_key16_lru(
258         void *table,
259         void *key,
260         int *key_found,
261         void *entry)
262 {
263         struct rte_table_hash *f = (struct rte_table_hash *) table;
264         struct rte_bucket_4_16 *bucket;
265         uint64_t signature;
266         uint32_t bucket_index, i;
267
268         signature = f->f_hash(key, f->key_size, f->seed);
269         bucket_index = signature & (f->n_buckets - 1);
270         bucket = (struct rte_bucket_4_16 *)
271                         &f->memory[bucket_index * f->bucket_size];
272         signature |= RTE_BUCKET_ENTRY_VALID;
273
274         /* Key is present in the bucket */
275         for (i = 0; i < 4; i++) {
276                 uint64_t bucket_signature = bucket->signature[i];
277                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
278
279                 if ((bucket_signature == signature) &&
280                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
281                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
282
283                         bucket->signature[i] = 0;
284                         *key_found = 1;
285                         if (entry)
286                                 memcpy(entry, bucket_data, f->entry_size);
287                         return 0;
288                 }
289         }
290
291         /* Key is not present in the bucket */
292         *key_found = 0;
293         return 0;
294 }
295
296 static int
297 check_params_create_ext(struct rte_table_hash_key16_ext_params *params) {
298         /* n_entries */
299         if (params->n_entries == 0) {
300                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
301                 return -EINVAL;
302         }
303
304         /* n_entries_ext */
305         if (params->n_entries_ext == 0) {
306                 RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
307                 return -EINVAL;
308         }
309
310         /* signature offset */
311         if ((params->signature_offset & 0x3) != 0) {
312                 RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
313                 return -EINVAL;
314         }
315
316         /* key offset */
317         if ((params->key_offset & 0x7) != 0) {
318                 RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
319                 return -EINVAL;
320         }
321
322         /* f_hash */
323         if (params->f_hash == NULL) {
324                 RTE_LOG(ERR, TABLE,
325                         "%s: f_hash function pointer is NULL\n", __func__);
326                 return -EINVAL;
327         }
328
329         return 0;
330 }
331
332 static void *
333 rte_table_hash_create_key16_ext(void *params,
334                 int socket_id,
335                 uint32_t entry_size)
336 {
337         struct rte_table_hash_key16_ext_params *p =
338                         (struct rte_table_hash_key16_ext_params *) params;
339         struct rte_table_hash *f;
340         uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
341                         bucket_size_cl, stack_size_cl, total_size, i;
342
343         /* Check input parameters */
344         if ((check_params_create_ext(p) != 0) ||
345                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
346                 ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
347                 return NULL;
348
349         n_entries_per_bucket = 4;
350         key_size = 16;
351
352         /* Memory allocation */
353         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
354                 n_entries_per_bucket);
355         n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
356                 n_entries_per_bucket;
357         bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
358                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
359         stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
360                 / RTE_CACHE_LINE_SIZE;
361         total_size = sizeof(struct rte_table_hash) +
362                 ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
363                 RTE_CACHE_LINE_SIZE;
364
365         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
366         if (f == NULL) {
367                 RTE_LOG(ERR, TABLE,
368                         "%s: Cannot allocate %u bytes for hash table\n",
369                         __func__, total_size);
370                 return NULL;
371         }
372         RTE_LOG(INFO, TABLE,
373                 "%s: Hash table memory footprint is %u bytes\n",
374                 __func__, total_size);
375
376         /* Memory initialization */
377         f->n_buckets = n_buckets;
378         f->n_entries_per_bucket = n_entries_per_bucket;
379         f->key_size = key_size;
380         f->entry_size = entry_size;
381         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
382         f->signature_offset = p->signature_offset;
383         f->key_offset = p->key_offset;
384         f->f_hash = p->f_hash;
385         f->seed = p->seed;
386
387         f->n_buckets_ext = n_buckets_ext;
388         f->stack_pos = n_buckets_ext;
389         f->stack = (uint32_t *)
390                 &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
391
392         for (i = 0; i < n_buckets_ext; i++)
393                 f->stack[i] = i;
394
395         return f;
396 }
397
398 static int
399 rte_table_hash_free_key16_ext(void *table)
400 {
401         struct rte_table_hash *f = (struct rte_table_hash *) table;
402
403         /* Check input parameters */
404         if (f == NULL) {
405                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
406                 return -EINVAL;
407         }
408
409         rte_free(f);
410         return 0;
411 }
412
413 static int
414 rte_table_hash_entry_add_key16_ext(
415         void *table,
416         void *key,
417         void *entry,
418         int *key_found,
419         void **entry_ptr)
420 {
421         struct rte_table_hash *f = (struct rte_table_hash *) table;
422         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
423         uint64_t signature;
424         uint32_t bucket_index, i;
425
426         signature = f->f_hash(key, f->key_size, f->seed);
427         bucket_index = signature & (f->n_buckets - 1);
428         bucket0 = (struct rte_bucket_4_16 *)
429                         &f->memory[bucket_index * f->bucket_size];
430         signature |= RTE_BUCKET_ENTRY_VALID;
431
432         /* Key is present in the bucket */
433         for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
434                 for (i = 0; i < 4; i++) {
435                         uint64_t bucket_signature = bucket->signature[i];
436                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
437
438                         if ((bucket_signature == signature) &&
439                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
440                                 uint8_t *bucket_data = &bucket->data[i *
441                                         f->entry_size];
442
443                                 memcpy(bucket_data, entry, f->entry_size);
444                                 *key_found = 1;
445                                 *entry_ptr = (void *) bucket_data;
446                                 return 0;
447                         }
448                 }
449
450         /* Key is not present in the bucket */
451         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
452                          bucket_prev = bucket, bucket = bucket->next)
453                 for (i = 0; i < 4; i++) {
454                         uint64_t bucket_signature = bucket->signature[i];
455                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
456
457                         if (bucket_signature == 0) {
458                                 uint8_t *bucket_data = &bucket->data[i *
459                                         f->entry_size];
460
461                                 bucket->signature[i] = signature;
462                                 memcpy(bucket_key, key, f->key_size);
463                                 memcpy(bucket_data, entry, f->entry_size);
464                                 *key_found = 0;
465                                 *entry_ptr = (void *) bucket_data;
466
467                                 return 0;
468                         }
469                 }
470
471         /* Bucket full: extend bucket */
472         if (f->stack_pos > 0) {
473                 bucket_index = f->stack[--f->stack_pos];
474
475                 bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets +
476                         bucket_index) * f->bucket_size];
477                 bucket_prev->next = bucket;
478                 bucket_prev->next_valid = 1;
479
480                 bucket->signature[0] = signature;
481                 memcpy(bucket->key[0], key, f->key_size);
482                 memcpy(&bucket->data[0], entry, f->entry_size);
483                 *key_found = 0;
484                 *entry_ptr = (void *) &bucket->data[0];
485                 return 0;
486         }
487
488         return -ENOSPC;
489 }
490
491 static int
492 rte_table_hash_entry_delete_key16_ext(
493         void *table,
494         void *key,
495         int *key_found,
496         void *entry)
497 {
498         struct rte_table_hash *f = (struct rte_table_hash *) table;
499         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
500         uint64_t signature;
501         uint32_t bucket_index, i;
502
503         signature = f->f_hash(key, f->key_size, f->seed);
504         bucket_index = signature & (f->n_buckets - 1);
505         bucket0 = (struct rte_bucket_4_16 *)
506                 &f->memory[bucket_index * f->bucket_size];
507         signature |= RTE_BUCKET_ENTRY_VALID;
508
509         /* Key is present in the bucket */
510         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
511                 bucket_prev = bucket, bucket = bucket->next)
512                 for (i = 0; i < 4; i++) {
513                         uint64_t bucket_signature = bucket->signature[i];
514                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
515
516                         if ((bucket_signature == signature) &&
517                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
518                                 uint8_t *bucket_data = &bucket->data[i *
519                                         f->entry_size];
520
521                                 bucket->signature[i] = 0;
522                                 *key_found = 1;
523                                 if (entry)
524                                         memcpy(entry, bucket_data,
525                                         f->entry_size);
526
527                                 if ((bucket->signature[0] == 0) &&
528                                         (bucket->signature[1] == 0) &&
529                                         (bucket->signature[2] == 0) &&
530                                         (bucket->signature[3] == 0) &&
531                                         (bucket_prev != NULL)) {
532                                         bucket_prev->next = bucket->next;
533                                         bucket_prev->next_valid =
534                                                 bucket->next_valid;
535
536                                         memset(bucket, 0,
537                                                 sizeof(struct rte_bucket_4_16));
538                                         bucket_index = (bucket -
539                                                 ((struct rte_bucket_4_16 *)
540                                                 f->memory)) - f->n_buckets;
541                                         f->stack[f->stack_pos++] = bucket_index;
542                                 }
543
544                                 return 0;
545                         }
546                 }
547
548         /* Key is not present in the bucket */
549         *key_found = 0;
550         return 0;
551 }
552
553 #define lookup_key16_cmp(key_in, bucket, pos)                   \
554 {                                                               \
555         uint64_t xor[4][2], or[4], signature[4];                \
556                                                                 \
557         signature[0] = (~bucket->signature[0]) & 1;             \
558         signature[1] = (~bucket->signature[1]) & 1;             \
559         signature[2] = (~bucket->signature[2]) & 1;             \
560         signature[3] = (~bucket->signature[3]) & 1;             \
561                                                                 \
562         xor[0][0] = key_in[0] ^  bucket->key[0][0];             \
563         xor[0][1] = key_in[1] ^  bucket->key[0][1];             \
564                                                                 \
565         xor[1][0] = key_in[0] ^  bucket->key[1][0];             \
566         xor[1][1] = key_in[1] ^  bucket->key[1][1];             \
567                                                                 \
568         xor[2][0] = key_in[0] ^  bucket->key[2][0];             \
569         xor[2][1] = key_in[1] ^  bucket->key[2][1];             \
570                                                                 \
571         xor[3][0] = key_in[0] ^  bucket->key[3][0];             \
572         xor[3][1] = key_in[1] ^  bucket->key[3][1];             \
573                                                                 \
574         or[0] = xor[0][0] | xor[0][1] | signature[0];           \
575         or[1] = xor[1][0] | xor[1][1] | signature[1];           \
576         or[2] = xor[2][0] | xor[2][1] | signature[2];           \
577         or[3] = xor[3][0] | xor[3][1] | signature[3];           \
578                                                                 \
579         pos = 4;                                                \
580         if (or[0] == 0)                                         \
581                 pos = 0;                                        \
582         if (or[1] == 0)                                         \
583                 pos = 1;                                        \
584         if (or[2] == 0)                                         \
585                 pos = 2;                                        \
586         if (or[3] == 0)                                         \
587                 pos = 3;                                        \
588 }
589
590 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)      \
591 {                                                               \
592         uint64_t pkt_mask;                                      \
593                                                                 \
594         pkt0_index = __builtin_ctzll(pkts_mask);                \
595         pkt_mask = 1LLU << pkt0_index;                          \
596         pkts_mask &= ~pkt_mask;                                 \
597                                                                 \
598         mbuf0 = pkts[pkt0_index];                               \
599         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));   \
600 }
601
602 #define lookup1_stage1(mbuf1, bucket1, f)                       \
603 {                                                               \
604         uint64_t signature;                                     \
605         uint32_t bucket_index;                                  \
606                                                                 \
607         signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
608         bucket_index = signature & (f->n_buckets - 1);          \
609         bucket1 = (struct rte_bucket_4_16 *)                    \
610                 &f->memory[bucket_index * f->bucket_size];      \
611         rte_prefetch0(bucket1);                                 \
612         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
613 }
614
615 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
616                 pkts_mask_out, entries, f)                      \
617 {                                                               \
618         void *a;                                                \
619         uint64_t pkt_mask;                                      \
620         uint64_t *key;                                          \
621         uint32_t pos;                                           \
622                                                                 \
623         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
624                                                                 \
625         lookup_key16_cmp(key, bucket2, pos);                    \
626                                                                 \
627         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
628         pkts_mask_out |= pkt_mask;                              \
629                                                                 \
630         a = (void *) &bucket2->data[pos * f->entry_size];       \
631         rte_prefetch0(a);                                       \
632         entries[pkt2_index] = a;                                \
633         lru_update(bucket2, pos);                               \
634 }
635
636 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \
637         buckets_mask, buckets, keys, f)                         \
638 {                                                               \
639         struct rte_bucket_4_16 *bucket_next;                    \
640         void *a;                                                \
641         uint64_t pkt_mask, bucket_mask;                         \
642         uint64_t *key;                                          \
643         uint32_t pos;                                           \
644                                                                 \
645         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
646                                                                 \
647         lookup_key16_cmp(key, bucket2, pos);                    \
648                                                                 \
649         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
650         pkts_mask_out |= pkt_mask;                              \
651                                                                 \
652         a = (void *) &bucket2->data[pos * f->entry_size];       \
653         rte_prefetch0(a);                                       \
654         entries[pkt2_index] = a;                                \
655                                                                 \
656         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
657         buckets_mask |= bucket_mask;                            \
658         bucket_next = bucket2->next;                            \
659         buckets[pkt2_index] = bucket_next;                      \
660         keys[pkt2_index] = key;                                 \
661 }
662
663 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
664         buckets_mask, f)                                        \
665 {                                                               \
666         struct rte_bucket_4_16 *bucket, *bucket_next;           \
667         void *a;                                                \
668         uint64_t pkt_mask, bucket_mask;                         \
669         uint64_t *key;                                          \
670         uint32_t pos;                                           \
671                                                                 \
672         bucket = buckets[pkt_index];                            \
673         key = keys[pkt_index];                                  \
674                                                                 \
675         lookup_key16_cmp(key, bucket, pos);                     \
676                                                                 \
677         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
678         pkts_mask_out |= pkt_mask;                              \
679                                                                 \
680         a = (void *) &bucket->data[pos * f->entry_size];        \
681         rte_prefetch0(a);                                       \
682         entries[pkt_index] = a;                                 \
683                                                                 \
684         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
685         buckets_mask |= bucket_mask;                            \
686         bucket_next = bucket->next;                             \
687         rte_prefetch0(bucket_next);                             \
688         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
689         buckets[pkt_index] = bucket_next;                       \
690         keys[pkt_index] = key;                                  \
691 }
692
693 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
694                 pkts, pkts_mask)                                \
695 {                                                               \
696         uint64_t pkt00_mask, pkt01_mask;                        \
697                                                                 \
698         pkt00_index = __builtin_ctzll(pkts_mask);               \
699         pkt00_mask = 1LLU << pkt00_index;                       \
700         pkts_mask &= ~pkt00_mask;                               \
701                                                                 \
702         mbuf00 = pkts[pkt00_index];                             \
703         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
704                                                                 \
705         pkt01_index = __builtin_ctzll(pkts_mask);               \
706         pkt01_mask = 1LLU << pkt01_index;                       \
707         pkts_mask &= ~pkt01_mask;                               \
708                                                                 \
709         mbuf01 = pkts[pkt01_index];                             \
710         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
711 }
712
713 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
714                 mbuf00, mbuf01, pkts, pkts_mask)                \
715 {                                                               \
716         uint64_t pkt00_mask, pkt01_mask;                        \
717                                                                 \
718         pkt00_index = __builtin_ctzll(pkts_mask);               \
719         pkt00_mask = 1LLU << pkt00_index;                       \
720         pkts_mask &= ~pkt00_mask;                               \
721                                                                 \
722         mbuf00 = pkts[pkt00_index];                             \
723         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
724                                                                 \
725         pkt01_index = __builtin_ctzll(pkts_mask);               \
726         if (pkts_mask == 0)                                     \
727                 pkt01_index = pkt00_index;                      \
728         pkt01_mask = 1LLU << pkt01_index;                       \
729         pkts_mask &= ~pkt01_mask;                               \
730                                                                 \
731         mbuf01 = pkts[pkt01_index];                             \
732         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
733 }
734
735 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
736 {                                                               \
737         uint64_t signature10, signature11;                      \
738         uint32_t bucket10_index, bucket11_index;                \
739                                                                 \
740         signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
741         bucket10_index = signature10 & (f->n_buckets - 1);      \
742         bucket10 = (struct rte_bucket_4_16 *)                   \
743                 &f->memory[bucket10_index * f->bucket_size];    \
744         rte_prefetch0(bucket10);                                \
745         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
746                                                                 \
747         signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
748         bucket11_index = signature11 & (f->n_buckets - 1);      \
749         bucket11 = (struct rte_bucket_4_16 *)                   \
750                 &f->memory[bucket11_index * f->bucket_size];    \
751         rte_prefetch0(bucket11);                                \
752         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
753 }
754
755 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
756                 bucket20, bucket21, pkts_mask_out, entries, f)  \
757 {                                                               \
758         void *a20, *a21;                                        \
759         uint64_t pkt20_mask, pkt21_mask;                        \
760         uint64_t *key20, *key21;                                \
761         uint32_t pos20, pos21;                                  \
762                                                                 \
763         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
764         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
765                                                                 \
766         lookup_key16_cmp(key20, bucket20, pos20);               \
767         lookup_key16_cmp(key21, bucket21, pos21);               \
768                                                                 \
769         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
770         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
771         pkts_mask_out |= pkt20_mask | pkt21_mask;                       \
772                                                                 \
773         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
774         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
775         rte_prefetch0(a20);                                     \
776         rte_prefetch0(a21);                                     \
777         entries[pkt20_index] = a20;                             \
778         entries[pkt21_index] = a21;                             \
779         lru_update(bucket20, pos20);                            \
780         lru_update(bucket21, pos21);                            \
781 }
782
783 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
784         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \
785 {                                                               \
786         struct rte_bucket_4_16 *bucket20_next, *bucket21_next;  \
787         void *a20, *a21;                                        \
788         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
789         uint64_t *key20, *key21;                                \
790         uint32_t pos20, pos21;                                  \
791                                                                 \
792         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
793         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
794                                                                 \
795         lookup_key16_cmp(key20, bucket20, pos20);               \
796         lookup_key16_cmp(key21, bucket21, pos21);               \
797                                                                 \
798         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
799         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
800         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
801                                                                 \
802         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
803         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
804         rte_prefetch0(a20);                                     \
805         rte_prefetch0(a21);                                     \
806         entries[pkt20_index] = a20;                             \
807         entries[pkt21_index] = a21;                             \
808                                                                 \
809         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
810         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
811         buckets_mask |= bucket20_mask | bucket21_mask;          \
812         bucket20_next = bucket20->next;                         \
813         bucket21_next = bucket21->next;                         \
814         buckets[pkt20_index] = bucket20_next;                   \
815         buckets[pkt21_index] = bucket21_next;                   \
816         keys[pkt20_index] = key20;                              \
817         keys[pkt21_index] = key21;                              \
818 }
819
820 static int
821 rte_table_hash_lookup_key16_lru(
822         void *table,
823         struct rte_mbuf **pkts,
824         uint64_t pkts_mask,
825         uint64_t *lookup_hit_mask,
826         void **entries)
827 {
828         struct rte_table_hash *f = (struct rte_table_hash *) table;
829         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
830         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
831         uint32_t pkt00_index, pkt01_index, pkt10_index;
832         uint32_t pkt11_index, pkt20_index, pkt21_index;
833         uint64_t pkts_mask_out = 0;
834
835         /* Cannot run the pipeline with less than 5 packets */
836         if (__builtin_popcountll(pkts_mask) < 5) {
837                 for ( ; pkts_mask; ) {
838                         struct rte_bucket_4_16 *bucket;
839                         struct rte_mbuf *mbuf;
840                         uint32_t pkt_index;
841
842                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
843                         lookup1_stage1(mbuf, bucket, f);
844                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
845                                 pkts_mask_out, entries, f);
846                 }
847
848                 *lookup_hit_mask = pkts_mask_out;
849                 return 0;
850         }
851
852         /*
853          * Pipeline fill
854          *
855          */
856         /* Pipeline stage 0 */
857         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
858                 pkts_mask);
859
860         /* Pipeline feed */
861         mbuf10 = mbuf00;
862         mbuf11 = mbuf01;
863         pkt10_index = pkt00_index;
864         pkt11_index = pkt01_index;
865
866         /* Pipeline stage 0 */
867         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
868                 pkts_mask);
869
870         /* Pipeline stage 1 */
871         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
872
873         /*
874          * Pipeline run
875          *
876          */
877         for ( ; pkts_mask; ) {
878                 /* Pipeline feed */
879                 bucket20 = bucket10;
880                 bucket21 = bucket11;
881                 mbuf20 = mbuf10;
882                 mbuf21 = mbuf11;
883                 mbuf10 = mbuf00;
884                 mbuf11 = mbuf01;
885                 pkt20_index = pkt10_index;
886                 pkt21_index = pkt11_index;
887                 pkt10_index = pkt00_index;
888                 pkt11_index = pkt01_index;
889
890                 /* Pipeline stage 0 */
891                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
892                         mbuf00, mbuf01, pkts, pkts_mask);
893
894                 /* Pipeline stage 1 */
895                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
896
897                 /* Pipeline stage 2 */
898                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
899                         bucket20, bucket21, pkts_mask_out, entries, f);
900         }
901
902         /*
903          * Pipeline flush
904          *
905          */
906         /* Pipeline feed */
907         bucket20 = bucket10;
908         bucket21 = bucket11;
909         mbuf20 = mbuf10;
910         mbuf21 = mbuf11;
911         mbuf10 = mbuf00;
912         mbuf11 = mbuf01;
913         pkt20_index = pkt10_index;
914         pkt21_index = pkt11_index;
915         pkt10_index = pkt00_index;
916         pkt11_index = pkt01_index;
917
918         /* Pipeline stage 1 */
919         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
920
921         /* Pipeline stage 2 */
922         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
923                 bucket20, bucket21, pkts_mask_out, entries, f);
924
925         /* Pipeline feed */
926         bucket20 = bucket10;
927         bucket21 = bucket11;
928         mbuf20 = mbuf10;
929         mbuf21 = mbuf11;
930         pkt20_index = pkt10_index;
931         pkt21_index = pkt11_index;
932
933         /* Pipeline stage 2 */
934         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
935                 bucket20, bucket21, pkts_mask_out, entries, f);
936
937         *lookup_hit_mask = pkts_mask_out;
938         return 0;
939 } /* rte_table_hash_lookup_key16_lru() */
940
941 static int
942 rte_table_hash_lookup_key16_ext(
943         void *table,
944         struct rte_mbuf **pkts,
945         uint64_t pkts_mask,
946         uint64_t *lookup_hit_mask,
947         void **entries)
948 {
949         struct rte_table_hash *f = (struct rte_table_hash *) table;
950         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
951         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
952         uint32_t pkt00_index, pkt01_index, pkt10_index;
953         uint32_t pkt11_index, pkt20_index, pkt21_index;
954         uint64_t pkts_mask_out = 0, buckets_mask = 0;
955         struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
956         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
957
958         /* Cannot run the pipeline with less than 5 packets */
959         if (__builtin_popcountll(pkts_mask) < 5) {
960                 for ( ; pkts_mask; ) {
961                         struct rte_bucket_4_16 *bucket;
962                         struct rte_mbuf *mbuf;
963                         uint32_t pkt_index;
964
965                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
966                         lookup1_stage1(mbuf, bucket, f);
967                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
968                                 pkts_mask_out, entries, buckets_mask,
969                                 buckets, keys, f);
970                 }
971
972                 goto grind_next_buckets;
973         }
974
975         /*
976          * Pipeline fill
977          *
978          */
979         /* Pipeline stage 0 */
980         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
981                 pkts_mask);
982
983         /* Pipeline feed */
984         mbuf10 = mbuf00;
985         mbuf11 = mbuf01;
986         pkt10_index = pkt00_index;
987         pkt11_index = pkt01_index;
988
989         /* Pipeline stage 0 */
990         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
991                 pkts_mask);
992
993         /* Pipeline stage 1 */
994         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
995
996         /*
997          * Pipeline run
998          *
999          */
1000         for ( ; pkts_mask; ) {
1001                 /* Pipeline feed */
1002                 bucket20 = bucket10;
1003                 bucket21 = bucket11;
1004                 mbuf20 = mbuf10;
1005                 mbuf21 = mbuf11;
1006                 mbuf10 = mbuf00;
1007                 mbuf11 = mbuf01;
1008                 pkt20_index = pkt10_index;
1009                 pkt21_index = pkt11_index;
1010                 pkt10_index = pkt00_index;
1011                 pkt11_index = pkt01_index;
1012
1013                 /* Pipeline stage 0 */
1014                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1015                         mbuf00, mbuf01, pkts, pkts_mask);
1016
1017                 /* Pipeline stage 1 */
1018                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1019
1020                 /* Pipeline stage 2 */
1021                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1022                         bucket20, bucket21, pkts_mask_out, entries,
1023                         buckets_mask, buckets, keys, f);
1024         }
1025
1026         /*
1027          * Pipeline flush
1028          *
1029          */
1030         /* Pipeline feed */
1031         bucket20 = bucket10;
1032         bucket21 = bucket11;
1033         mbuf20 = mbuf10;
1034         mbuf21 = mbuf11;
1035         mbuf10 = mbuf00;
1036         mbuf11 = mbuf01;
1037         pkt20_index = pkt10_index;
1038         pkt21_index = pkt11_index;
1039         pkt10_index = pkt00_index;
1040         pkt11_index = pkt01_index;
1041
1042         /* Pipeline stage 1 */
1043         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1044
1045         /* Pipeline stage 2 */
1046         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1047                 bucket20, bucket21, pkts_mask_out, entries,
1048                 buckets_mask, buckets, keys, f);
1049
1050         /* Pipeline feed */
1051         bucket20 = bucket10;
1052         bucket21 = bucket11;
1053         mbuf20 = mbuf10;
1054         mbuf21 = mbuf11;
1055         pkt20_index = pkt10_index;
1056         pkt21_index = pkt11_index;
1057
1058         /* Pipeline stage 2 */
1059         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1060                 bucket20, bucket21, pkts_mask_out, entries,
1061                 buckets_mask, buckets, keys, f);
1062
1063 grind_next_buckets:
1064         /* Grind next buckets */
1065         for ( ; buckets_mask; ) {
1066                 uint64_t buckets_mask_next = 0;
1067
1068                 for ( ; buckets_mask; ) {
1069                         uint64_t pkt_mask;
1070                         uint32_t pkt_index;
1071
1072                         pkt_index = __builtin_ctzll(buckets_mask);
1073                         pkt_mask = 1LLU << pkt_index;
1074                         buckets_mask &= ~pkt_mask;
1075
1076                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1077                                 entries, buckets_mask_next, f);
1078                 }
1079
1080                 buckets_mask = buckets_mask_next;
1081         }
1082
1083         *lookup_hit_mask = pkts_mask_out;
1084         return 0;
1085 } /* rte_table_hash_lookup_key16_ext() */
1086
1087 struct rte_table_ops rte_table_hash_key16_lru_ops = {
1088         .f_create = rte_table_hash_create_key16_lru,
1089         .f_free = rte_table_hash_free_key16_lru,
1090         .f_add = rte_table_hash_entry_add_key16_lru,
1091         .f_delete = rte_table_hash_entry_delete_key16_lru,
1092         .f_lookup = rte_table_hash_lookup_key16_lru,
1093 };
1094
1095 struct rte_table_ops rte_table_hash_key16_ext_ops = {
1096         .f_create = rte_table_hash_create_key16_ext,
1097         .f_free = rte_table_hash_free_key16_ext,
1098         .f_add = rte_table_hash_entry_add_key16_ext,
1099         .f_delete = rte_table_hash_entry_delete_key16_ext,
1100         .f_lookup = rte_table_hash_lookup_key16_ext,
1101 };