add prefix to cache line macros
[dpdk.git] / lib / librte_table / rte_table_hash_key16.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_malloc.h>
39 #include <rte_log.h>
40
41 #include "rte_table_hash.h"
42 #include "rte_lru.h"
43
44 #define RTE_TABLE_HASH_KEY_SIZE                                         16
45
46 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
47
48 struct rte_bucket_4_16 {
49         /* Cache line 0 */
50         uint64_t signature[4 + 1];
51         uint64_t lru_list;
52         struct rte_bucket_4_16 *next;
53         uint64_t next_valid;
54
55         /* Cache line 1 */
56         uint64_t key[4][2];
57
58         /* Cache line 2 */
59         uint8_t data[0];
60 };
61
62 struct rte_table_hash {
63         /* Input parameters */
64         uint32_t n_buckets;
65         uint32_t n_entries_per_bucket;
66         uint32_t key_size;
67         uint32_t entry_size;
68         uint32_t bucket_size;
69         uint32_t signature_offset;
70         uint32_t key_offset;
71         rte_table_hash_op_hash f_hash;
72         uint64_t seed;
73
74         /* Extendible buckets */
75         uint32_t n_buckets_ext;
76         uint32_t stack_pos;
77         uint32_t *stack;
78
79         /* Lookup table */
80         uint8_t memory[0] __rte_cache_aligned;
81 };
82
83 static int
84 check_params_create_lru(struct rte_table_hash_key16_lru_params *params) {
85         /* n_entries */
86         if (params->n_entries == 0) {
87                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
88                 return -EINVAL;
89         }
90
91         /* signature offset */
92         if ((params->signature_offset & 0x3) != 0) {
93                 RTE_LOG(ERR, TABLE, "%s: invalid signature_offset\n", __func__);
94                 return -EINVAL;
95         }
96
97         /* key offset */
98         if ((params->key_offset & 0x7) != 0) {
99                 RTE_LOG(ERR, TABLE, "%s: invalid key_offset\n", __func__);
100                 return -EINVAL;
101         }
102
103         /* f_hash */
104         if (params->f_hash == NULL) {
105                 RTE_LOG(ERR, TABLE,
106                         "%s: f_hash function pointer is NULL\n", __func__);
107                 return -EINVAL;
108         }
109
110         return 0;
111 }
112
113 static void *
114 rte_table_hash_create_key16_lru(void *params,
115                 int socket_id,
116                 uint32_t entry_size)
117 {
118         struct rte_table_hash_key16_lru_params *p =
119                         (struct rte_table_hash_key16_lru_params *) params;
120         struct rte_table_hash *f;
121         uint32_t n_buckets, n_entries_per_bucket,
122                         key_size, bucket_size_cl, total_size, i;
123
124         /* Check input parameters */
125         if ((check_params_create_lru(p) != 0) ||
126                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
127                 ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
128                 return NULL;
129         n_entries_per_bucket = 4;
130         key_size = 16;
131
132         /* Memory allocation */
133         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
134                 n_entries_per_bucket);
135         bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
136                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
137         total_size = sizeof(struct rte_table_hash) + n_buckets *
138                 bucket_size_cl * RTE_CACHE_LINE_SIZE;
139
140         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
141         if (f == NULL) {
142                 RTE_LOG(ERR, TABLE,
143                 "%s: Cannot allocate %u bytes for hash table\n",
144                 __func__, total_size);
145                 return NULL;
146         }
147         RTE_LOG(INFO, TABLE,
148                 "%s: Hash table memory footprint is %u bytes\n",
149                 __func__, total_size);
150
151         /* Memory initialization */
152         f->n_buckets = n_buckets;
153         f->n_entries_per_bucket = n_entries_per_bucket;
154         f->key_size = key_size;
155         f->entry_size = entry_size;
156         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
157         f->signature_offset = p->signature_offset;
158         f->key_offset = p->key_offset;
159         f->f_hash = p->f_hash;
160         f->seed = p->seed;
161
162         for (i = 0; i < n_buckets; i++) {
163                 struct rte_bucket_4_16 *bucket;
164
165                 bucket = (struct rte_bucket_4_16 *) &f->memory[i *
166                         f->bucket_size];
167                 lru_init(bucket);
168         }
169
170         return f;
171 }
172
173 static int
174 rte_table_hash_free_key16_lru(void *table)
175 {
176         struct rte_table_hash *f = (struct rte_table_hash *) table;
177
178         /* Check input parameters */
179         if (f == NULL) {
180                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
181                 return -EINVAL;
182         }
183
184         rte_free(f);
185         return 0;
186 }
187
188 static int
189 rte_table_hash_entry_add_key16_lru(
190         void *table,
191         void *key,
192         void *entry,
193         int *key_found,
194         void **entry_ptr)
195 {
196         struct rte_table_hash *f = (struct rte_table_hash *) table;
197         struct rte_bucket_4_16 *bucket;
198         uint64_t signature, pos;
199         uint32_t bucket_index, i;
200
201         signature = f->f_hash(key, f->key_size, f->seed);
202         bucket_index = signature & (f->n_buckets - 1);
203         bucket = (struct rte_bucket_4_16 *)
204                         &f->memory[bucket_index * f->bucket_size];
205         signature |= RTE_BUCKET_ENTRY_VALID;
206
207         /* Key is present in the bucket */
208         for (i = 0; i < 4; i++) {
209                 uint64_t bucket_signature = bucket->signature[i];
210                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
211
212                 if ((bucket_signature == signature) &&
213                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
214                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
215
216                         memcpy(bucket_data, entry, f->entry_size);
217                         lru_update(bucket, i);
218                         *key_found = 1;
219                         *entry_ptr = (void *) bucket_data;
220                         return 0;
221                 }
222         }
223
224         /* Key is not present in the bucket */
225         for (i = 0; i < 4; i++) {
226                 uint64_t bucket_signature = bucket->signature[i];
227                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
228
229                 if (bucket_signature == 0) {
230                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
231
232                         bucket->signature[i] = signature;
233                         memcpy(bucket_key, key, f->key_size);
234                         memcpy(bucket_data, entry, f->entry_size);
235                         lru_update(bucket, i);
236                         *key_found = 0;
237                         *entry_ptr = (void *) bucket_data;
238
239                         return 0;
240                 }
241         }
242
243         /* Bucket full: replace LRU entry */
244         pos = lru_pos(bucket);
245         bucket->signature[pos] = signature;
246         memcpy(bucket->key[pos], key, f->key_size);
247         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
248         lru_update(bucket, pos);
249         *key_found = 0;
250         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
251
252         return 0;
253 }
254
255 static int
256 rte_table_hash_entry_delete_key16_lru(
257         void *table,
258         void *key,
259         int *key_found,
260         void *entry)
261 {
262         struct rte_table_hash *f = (struct rte_table_hash *) table;
263         struct rte_bucket_4_16 *bucket;
264         uint64_t signature;
265         uint32_t bucket_index, i;
266
267         signature = f->f_hash(key, f->key_size, f->seed);
268         bucket_index = signature & (f->n_buckets - 1);
269         bucket = (struct rte_bucket_4_16 *)
270                         &f->memory[bucket_index * f->bucket_size];
271         signature |= RTE_BUCKET_ENTRY_VALID;
272
273         /* Key is present in the bucket */
274         for (i = 0; i < 4; i++) {
275                 uint64_t bucket_signature = bucket->signature[i];
276                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
277
278                 if ((bucket_signature == signature) &&
279                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
280                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
281
282                         bucket->signature[i] = 0;
283                         *key_found = 1;
284                         if (entry)
285                                 memcpy(entry, bucket_data, f->entry_size);
286                         return 0;
287                 }
288         }
289
290         /* Key is not present in the bucket */
291         *key_found = 0;
292         return 0;
293 }
294
295 static int
296 check_params_create_ext(struct rte_table_hash_key16_ext_params *params) {
297         /* n_entries */
298         if (params->n_entries == 0) {
299                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
300                 return -EINVAL;
301         }
302
303         /* n_entries_ext */
304         if (params->n_entries_ext == 0) {
305                 RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
306                 return -EINVAL;
307         }
308
309         /* signature offset */
310         if ((params->signature_offset & 0x3) != 0) {
311                 RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
312                 return -EINVAL;
313         }
314
315         /* key offset */
316         if ((params->key_offset & 0x7) != 0) {
317                 RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
318                 return -EINVAL;
319         }
320
321         /* f_hash */
322         if (params->f_hash == NULL) {
323                 RTE_LOG(ERR, TABLE,
324                         "%s: f_hash function pointer is NULL\n", __func__);
325                 return -EINVAL;
326         }
327
328         return 0;
329 }
330
331 static void *
332 rte_table_hash_create_key16_ext(void *params,
333                 int socket_id,
334                 uint32_t entry_size)
335 {
336         struct rte_table_hash_key16_ext_params *p =
337                         (struct rte_table_hash_key16_ext_params *) params;
338         struct rte_table_hash *f;
339         uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
340                         bucket_size_cl, stack_size_cl, total_size, i;
341
342         /* Check input parameters */
343         if ((check_params_create_ext(p) != 0) ||
344                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
345                 ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
346                 return NULL;
347
348         n_entries_per_bucket = 4;
349         key_size = 16;
350
351         /* Memory allocation */
352         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
353                 n_entries_per_bucket);
354         n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
355                 n_entries_per_bucket;
356         bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
357                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
358         stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
359                 / RTE_CACHE_LINE_SIZE;
360         total_size = sizeof(struct rte_table_hash) +
361                 ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
362                 RTE_CACHE_LINE_SIZE;
363
364         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
365         if (f == NULL) {
366                 RTE_LOG(ERR, TABLE,
367                         "%s: Cannot allocate %u bytes for hash table\n",
368                         __func__, total_size);
369                 return NULL;
370         }
371         RTE_LOG(INFO, TABLE,
372                 "%s: Hash table memory footprint is %u bytes\n",
373                 __func__, total_size);
374
375         /* Memory initialization */
376         f->n_buckets = n_buckets;
377         f->n_entries_per_bucket = n_entries_per_bucket;
378         f->key_size = key_size;
379         f->entry_size = entry_size;
380         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
381         f->signature_offset = p->signature_offset;
382         f->key_offset = p->key_offset;
383         f->f_hash = p->f_hash;
384         f->seed = p->seed;
385
386         f->n_buckets_ext = n_buckets_ext;
387         f->stack_pos = n_buckets_ext;
388         f->stack = (uint32_t *)
389                 &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
390
391         for (i = 0; i < n_buckets_ext; i++)
392                 f->stack[i] = i;
393
394         return f;
395 }
396
397 static int
398 rte_table_hash_free_key16_ext(void *table)
399 {
400         struct rte_table_hash *f = (struct rte_table_hash *) table;
401
402         /* Check input parameters */
403         if (f == NULL) {
404                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
405                 return -EINVAL;
406         }
407
408         rte_free(f);
409         return 0;
410 }
411
412 static int
413 rte_table_hash_entry_add_key16_ext(
414         void *table,
415         void *key,
416         void *entry,
417         int *key_found,
418         void **entry_ptr)
419 {
420         struct rte_table_hash *f = (struct rte_table_hash *) table;
421         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
422         uint64_t signature;
423         uint32_t bucket_index, i;
424
425         signature = f->f_hash(key, f->key_size, f->seed);
426         bucket_index = signature & (f->n_buckets - 1);
427         bucket0 = (struct rte_bucket_4_16 *)
428                         &f->memory[bucket_index * f->bucket_size];
429         signature |= RTE_BUCKET_ENTRY_VALID;
430
431         /* Key is present in the bucket */
432         for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
433                 for (i = 0; i < 4; i++) {
434                         uint64_t bucket_signature = bucket->signature[i];
435                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
436
437                         if ((bucket_signature == signature) &&
438                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
439                                 uint8_t *bucket_data = &bucket->data[i *
440                                         f->entry_size];
441
442                                 memcpy(bucket_data, entry, f->entry_size);
443                                 *key_found = 1;
444                                 *entry_ptr = (void *) bucket_data;
445                                 return 0;
446                         }
447                 }
448
449         /* Key is not present in the bucket */
450         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
451                          bucket_prev = bucket, bucket = bucket->next)
452                 for (i = 0; i < 4; i++) {
453                         uint64_t bucket_signature = bucket->signature[i];
454                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
455
456                         if (bucket_signature == 0) {
457                                 uint8_t *bucket_data = &bucket->data[i *
458                                         f->entry_size];
459
460                                 bucket->signature[i] = signature;
461                                 memcpy(bucket_key, key, f->key_size);
462                                 memcpy(bucket_data, entry, f->entry_size);
463                                 *key_found = 0;
464                                 *entry_ptr = (void *) bucket_data;
465
466                                 return 0;
467                         }
468                 }
469
470         /* Bucket full: extend bucket */
471         if (f->stack_pos > 0) {
472                 bucket_index = f->stack[--f->stack_pos];
473
474                 bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets +
475                         bucket_index) * f->bucket_size];
476                 bucket_prev->next = bucket;
477                 bucket_prev->next_valid = 1;
478
479                 bucket->signature[0] = signature;
480                 memcpy(bucket->key[0], key, f->key_size);
481                 memcpy(&bucket->data[0], entry, f->entry_size);
482                 *key_found = 0;
483                 *entry_ptr = (void *) &bucket->data[0];
484                 return 0;
485         }
486
487         return -ENOSPC;
488 }
489
490 static int
491 rte_table_hash_entry_delete_key16_ext(
492         void *table,
493         void *key,
494         int *key_found,
495         void *entry)
496 {
497         struct rte_table_hash *f = (struct rte_table_hash *) table;
498         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
499         uint64_t signature;
500         uint32_t bucket_index, i;
501
502         signature = f->f_hash(key, f->key_size, f->seed);
503         bucket_index = signature & (f->n_buckets - 1);
504         bucket0 = (struct rte_bucket_4_16 *)
505                 &f->memory[bucket_index * f->bucket_size];
506         signature |= RTE_BUCKET_ENTRY_VALID;
507
508         /* Key is present in the bucket */
509         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
510                 bucket_prev = bucket, bucket = bucket->next)
511                 for (i = 0; i < 4; i++) {
512                         uint64_t bucket_signature = bucket->signature[i];
513                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
514
515                         if ((bucket_signature == signature) &&
516                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
517                                 uint8_t *bucket_data = &bucket->data[i *
518                                         f->entry_size];
519
520                                 bucket->signature[i] = 0;
521                                 *key_found = 1;
522                                 if (entry)
523                                         memcpy(entry, bucket_data,
524                                         f->entry_size);
525
526                                 if ((bucket->signature[0] == 0) &&
527                                         (bucket->signature[1] == 0) &&
528                                         (bucket->signature[2] == 0) &&
529                                         (bucket->signature[3] == 0) &&
530                                         (bucket_prev != NULL)) {
531                                         bucket_prev->next = bucket->next;
532                                         bucket_prev->next_valid =
533                                                 bucket->next_valid;
534
535                                         memset(bucket, 0,
536                                                 sizeof(struct rte_bucket_4_16));
537                                         bucket_index = (bucket -
538                                                 ((struct rte_bucket_4_16 *)
539                                                 f->memory)) - f->n_buckets;
540                                         f->stack[f->stack_pos++] = bucket_index;
541                                 }
542
543                                 return 0;
544                         }
545                 }
546
547         /* Key is not present in the bucket */
548         *key_found = 0;
549         return 0;
550 }
551
552 #define lookup_key16_cmp(key_in, bucket, pos)                   \
553 {                                                               \
554         uint64_t xor[4][2], or[4], signature[4];                \
555                                                                 \
556         signature[0] = (~bucket->signature[0]) & 1;             \
557         signature[1] = (~bucket->signature[1]) & 1;             \
558         signature[2] = (~bucket->signature[2]) & 1;             \
559         signature[3] = (~bucket->signature[3]) & 1;             \
560                                                                 \
561         xor[0][0] = key_in[0] ^  bucket->key[0][0];             \
562         xor[0][1] = key_in[1] ^  bucket->key[0][1];             \
563                                                                 \
564         xor[1][0] = key_in[0] ^  bucket->key[1][0];             \
565         xor[1][1] = key_in[1] ^  bucket->key[1][1];             \
566                                                                 \
567         xor[2][0] = key_in[0] ^  bucket->key[2][0];             \
568         xor[2][1] = key_in[1] ^  bucket->key[2][1];             \
569                                                                 \
570         xor[3][0] = key_in[0] ^  bucket->key[3][0];             \
571         xor[3][1] = key_in[1] ^  bucket->key[3][1];             \
572                                                                 \
573         or[0] = xor[0][0] | xor[0][1] | signature[0];           \
574         or[1] = xor[1][0] | xor[1][1] | signature[1];           \
575         or[2] = xor[2][0] | xor[2][1] | signature[2];           \
576         or[3] = xor[3][0] | xor[3][1] | signature[3];           \
577                                                                 \
578         pos = 4;                                                \
579         if (or[0] == 0)                                         \
580                 pos = 0;                                        \
581         if (or[1] == 0)                                         \
582                 pos = 1;                                        \
583         if (or[2] == 0)                                         \
584                 pos = 2;                                        \
585         if (or[3] == 0)                                         \
586                 pos = 3;                                        \
587 }
588
589 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)      \
590 {                                                               \
591         uint64_t pkt_mask;                                      \
592                                                                 \
593         pkt0_index = __builtin_ctzll(pkts_mask);                \
594         pkt_mask = 1LLU << pkt0_index;                          \
595         pkts_mask &= ~pkt_mask;                                 \
596                                                                 \
597         mbuf0 = pkts[pkt0_index];                               \
598         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));   \
599 }
600
601 #define lookup1_stage1(mbuf1, bucket1, f)                       \
602 {                                                               \
603         uint64_t signature;                                     \
604         uint32_t bucket_index;                                  \
605                                                                 \
606         signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
607         bucket_index = signature & (f->n_buckets - 1);          \
608         bucket1 = (struct rte_bucket_4_16 *)                    \
609                 &f->memory[bucket_index * f->bucket_size];      \
610         rte_prefetch0(bucket1);                                 \
611         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
612 }
613
614 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
615                 pkts_mask_out, entries, f)                      \
616 {                                                               \
617         void *a;                                                \
618         uint64_t pkt_mask;                                      \
619         uint64_t *key;                                          \
620         uint32_t pos;                                           \
621                                                                 \
622         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
623                                                                 \
624         lookup_key16_cmp(key, bucket2, pos);                    \
625                                                                 \
626         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
627         pkts_mask_out |= pkt_mask;                              \
628                                                                 \
629         a = (void *) &bucket2->data[pos * f->entry_size];       \
630         rte_prefetch0(a);                                       \
631         entries[pkt2_index] = a;                                \
632         lru_update(bucket2, pos);                               \
633 }
634
635 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \
636         buckets_mask, buckets, keys, f)                         \
637 {                                                               \
638         struct rte_bucket_4_16 *bucket_next;                    \
639         void *a;                                                \
640         uint64_t pkt_mask, bucket_mask;                         \
641         uint64_t *key;                                          \
642         uint32_t pos;                                           \
643                                                                 \
644         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
645                                                                 \
646         lookup_key16_cmp(key, bucket2, pos);                    \
647                                                                 \
648         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
649         pkts_mask_out |= pkt_mask;                              \
650                                                                 \
651         a = (void *) &bucket2->data[pos * f->entry_size];       \
652         rte_prefetch0(a);                                       \
653         entries[pkt2_index] = a;                                \
654                                                                 \
655         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
656         buckets_mask |= bucket_mask;                            \
657         bucket_next = bucket2->next;                            \
658         buckets[pkt2_index] = bucket_next;                      \
659         keys[pkt2_index] = key;                                 \
660 }
661
662 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
663         buckets_mask, f)                                        \
664 {                                                               \
665         struct rte_bucket_4_16 *bucket, *bucket_next;           \
666         void *a;                                                \
667         uint64_t pkt_mask, bucket_mask;                         \
668         uint64_t *key;                                          \
669         uint32_t pos;                                           \
670                                                                 \
671         bucket = buckets[pkt_index];                            \
672         key = keys[pkt_index];                                  \
673                                                                 \
674         lookup_key16_cmp(key, bucket, pos);                     \
675                                                                 \
676         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
677         pkts_mask_out |= pkt_mask;                              \
678                                                                 \
679         a = (void *) &bucket->data[pos * f->entry_size];        \
680         rte_prefetch0(a);                                       \
681         entries[pkt_index] = a;                                 \
682                                                                 \
683         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
684         buckets_mask |= bucket_mask;                            \
685         bucket_next = bucket->next;                             \
686         rte_prefetch0(bucket_next);                             \
687         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
688         buckets[pkt_index] = bucket_next;                       \
689         keys[pkt_index] = key;                                  \
690 }
691
692 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
693                 pkts, pkts_mask)                                \
694 {                                                               \
695         uint64_t pkt00_mask, pkt01_mask;                        \
696                                                                 \
697         pkt00_index = __builtin_ctzll(pkts_mask);               \
698         pkt00_mask = 1LLU << pkt00_index;                       \
699         pkts_mask &= ~pkt00_mask;                               \
700                                                                 \
701         mbuf00 = pkts[pkt00_index];                             \
702         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
703                                                                 \
704         pkt01_index = __builtin_ctzll(pkts_mask);               \
705         pkt01_mask = 1LLU << pkt01_index;                       \
706         pkts_mask &= ~pkt01_mask;                               \
707                                                                 \
708         mbuf01 = pkts[pkt01_index];                             \
709         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
710 }
711
712 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
713                 mbuf00, mbuf01, pkts, pkts_mask)                \
714 {                                                               \
715         uint64_t pkt00_mask, pkt01_mask;                        \
716                                                                 \
717         pkt00_index = __builtin_ctzll(pkts_mask);               \
718         pkt00_mask = 1LLU << pkt00_index;                       \
719         pkts_mask &= ~pkt00_mask;                               \
720                                                                 \
721         mbuf00 = pkts[pkt00_index];                             \
722         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
723                                                                 \
724         pkt01_index = __builtin_ctzll(pkts_mask);               \
725         if (pkts_mask == 0)                                     \
726                 pkt01_index = pkt00_index;                      \
727         pkt01_mask = 1LLU << pkt01_index;                       \
728         pkts_mask &= ~pkt01_mask;                               \
729                                                                 \
730         mbuf01 = pkts[pkt01_index];                             \
731         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
732 }
733
734 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
735 {                                                               \
736         uint64_t signature10, signature11;                      \
737         uint32_t bucket10_index, bucket11_index;                \
738                                                                 \
739         signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
740         bucket10_index = signature10 & (f->n_buckets - 1);      \
741         bucket10 = (struct rte_bucket_4_16 *)                   \
742                 &f->memory[bucket10_index * f->bucket_size];    \
743         rte_prefetch0(bucket10);                                \
744         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
745                                                                 \
746         signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
747         bucket11_index = signature11 & (f->n_buckets - 1);      \
748         bucket11 = (struct rte_bucket_4_16 *)                   \
749                 &f->memory[bucket11_index * f->bucket_size];    \
750         rte_prefetch0(bucket11);                                \
751         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
752 }
753
754 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
755                 bucket20, bucket21, pkts_mask_out, entries, f)  \
756 {                                                               \
757         void *a20, *a21;                                        \
758         uint64_t pkt20_mask, pkt21_mask;                        \
759         uint64_t *key20, *key21;                                \
760         uint32_t pos20, pos21;                                  \
761                                                                 \
762         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
763         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
764                                                                 \
765         lookup_key16_cmp(key20, bucket20, pos20);               \
766         lookup_key16_cmp(key21, bucket21, pos21);               \
767                                                                 \
768         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
769         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
770         pkts_mask_out |= pkt20_mask | pkt21_mask;                       \
771                                                                 \
772         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
773         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
774         rte_prefetch0(a20);                                     \
775         rte_prefetch0(a21);                                     \
776         entries[pkt20_index] = a20;                             \
777         entries[pkt21_index] = a21;                             \
778         lru_update(bucket20, pos20);                            \
779         lru_update(bucket21, pos21);                            \
780 }
781
782 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
783         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \
784 {                                                               \
785         struct rte_bucket_4_16 *bucket20_next, *bucket21_next;  \
786         void *a20, *a21;                                        \
787         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
788         uint64_t *key20, *key21;                                \
789         uint32_t pos20, pos21;                                  \
790                                                                 \
791         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
792         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
793                                                                 \
794         lookup_key16_cmp(key20, bucket20, pos20);               \
795         lookup_key16_cmp(key21, bucket21, pos21);               \
796                                                                 \
797         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
798         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
799         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
800                                                                 \
801         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
802         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
803         rte_prefetch0(a20);                                     \
804         rte_prefetch0(a21);                                     \
805         entries[pkt20_index] = a20;                             \
806         entries[pkt21_index] = a21;                             \
807                                                                 \
808         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
809         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
810         buckets_mask |= bucket20_mask | bucket21_mask;          \
811         bucket20_next = bucket20->next;                         \
812         bucket21_next = bucket21->next;                         \
813         buckets[pkt20_index] = bucket20_next;                   \
814         buckets[pkt21_index] = bucket21_next;                   \
815         keys[pkt20_index] = key20;                              \
816         keys[pkt21_index] = key21;                              \
817 }
818
819 static int
820 rte_table_hash_lookup_key16_lru(
821         void *table,
822         struct rte_mbuf **pkts,
823         uint64_t pkts_mask,
824         uint64_t *lookup_hit_mask,
825         void **entries)
826 {
827         struct rte_table_hash *f = (struct rte_table_hash *) table;
828         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
829         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
830         uint32_t pkt00_index, pkt01_index, pkt10_index;
831         uint32_t pkt11_index, pkt20_index, pkt21_index;
832         uint64_t pkts_mask_out = 0;
833
834         /* Cannot run the pipeline with less than 5 packets */
835         if (__builtin_popcountll(pkts_mask) < 5) {
836                 for ( ; pkts_mask; ) {
837                         struct rte_bucket_4_16 *bucket;
838                         struct rte_mbuf *mbuf;
839                         uint32_t pkt_index;
840
841                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
842                         lookup1_stage1(mbuf, bucket, f);
843                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
844                                 pkts_mask_out, entries, f);
845                 }
846
847                 *lookup_hit_mask = pkts_mask_out;
848                 return 0;
849         }
850
851         /*
852          * Pipeline fill
853          *
854          */
855         /* Pipeline stage 0 */
856         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
857                 pkts_mask);
858
859         /* Pipeline feed */
860         mbuf10 = mbuf00;
861         mbuf11 = mbuf01;
862         pkt10_index = pkt00_index;
863         pkt11_index = pkt01_index;
864
865         /* Pipeline stage 0 */
866         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
867                 pkts_mask);
868
869         /* Pipeline stage 1 */
870         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
871
872         /*
873          * Pipeline run
874          *
875          */
876         for ( ; pkts_mask; ) {
877                 /* Pipeline feed */
878                 bucket20 = bucket10;
879                 bucket21 = bucket11;
880                 mbuf20 = mbuf10;
881                 mbuf21 = mbuf11;
882                 mbuf10 = mbuf00;
883                 mbuf11 = mbuf01;
884                 pkt20_index = pkt10_index;
885                 pkt21_index = pkt11_index;
886                 pkt10_index = pkt00_index;
887                 pkt11_index = pkt01_index;
888
889                 /* Pipeline stage 0 */
890                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
891                         mbuf00, mbuf01, pkts, pkts_mask);
892
893                 /* Pipeline stage 1 */
894                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
895
896                 /* Pipeline stage 2 */
897                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
898                         bucket20, bucket21, pkts_mask_out, entries, f);
899         }
900
901         /*
902          * Pipeline flush
903          *
904          */
905         /* Pipeline feed */
906         bucket20 = bucket10;
907         bucket21 = bucket11;
908         mbuf20 = mbuf10;
909         mbuf21 = mbuf11;
910         mbuf10 = mbuf00;
911         mbuf11 = mbuf01;
912         pkt20_index = pkt10_index;
913         pkt21_index = pkt11_index;
914         pkt10_index = pkt00_index;
915         pkt11_index = pkt01_index;
916
917         /* Pipeline stage 1 */
918         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
919
920         /* Pipeline stage 2 */
921         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
922                 bucket20, bucket21, pkts_mask_out, entries, f);
923
924         /* Pipeline feed */
925         bucket20 = bucket10;
926         bucket21 = bucket11;
927         mbuf20 = mbuf10;
928         mbuf21 = mbuf11;
929         pkt20_index = pkt10_index;
930         pkt21_index = pkt11_index;
931
932         /* Pipeline stage 2 */
933         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
934                 bucket20, bucket21, pkts_mask_out, entries, f);
935
936         *lookup_hit_mask = pkts_mask_out;
937         return 0;
938 } /* rte_table_hash_lookup_key16_lru() */
939
940 static int
941 rte_table_hash_lookup_key16_ext(
942         void *table,
943         struct rte_mbuf **pkts,
944         uint64_t pkts_mask,
945         uint64_t *lookup_hit_mask,
946         void **entries)
947 {
948         struct rte_table_hash *f = (struct rte_table_hash *) table;
949         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
950         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
951         uint32_t pkt00_index, pkt01_index, pkt10_index;
952         uint32_t pkt11_index, pkt20_index, pkt21_index;
953         uint64_t pkts_mask_out = 0, buckets_mask = 0;
954         struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
955         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
956
957         /* Cannot run the pipeline with less than 5 packets */
958         if (__builtin_popcountll(pkts_mask) < 5) {
959                 for ( ; pkts_mask; ) {
960                         struct rte_bucket_4_16 *bucket;
961                         struct rte_mbuf *mbuf;
962                         uint32_t pkt_index;
963
964                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
965                         lookup1_stage1(mbuf, bucket, f);
966                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
967                                 pkts_mask_out, entries, buckets_mask,
968                                 buckets, keys, f);
969                 }
970
971                 goto grind_next_buckets;
972         }
973
974         /*
975          * Pipeline fill
976          *
977          */
978         /* Pipeline stage 0 */
979         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
980                 pkts_mask);
981
982         /* Pipeline feed */
983         mbuf10 = mbuf00;
984         mbuf11 = mbuf01;
985         pkt10_index = pkt00_index;
986         pkt11_index = pkt01_index;
987
988         /* Pipeline stage 0 */
989         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
990                 pkts_mask);
991
992         /* Pipeline stage 1 */
993         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
994
995         /*
996          * Pipeline run
997          *
998          */
999         for ( ; pkts_mask; ) {
1000                 /* Pipeline feed */
1001                 bucket20 = bucket10;
1002                 bucket21 = bucket11;
1003                 mbuf20 = mbuf10;
1004                 mbuf21 = mbuf11;
1005                 mbuf10 = mbuf00;
1006                 mbuf11 = mbuf01;
1007                 pkt20_index = pkt10_index;
1008                 pkt21_index = pkt11_index;
1009                 pkt10_index = pkt00_index;
1010                 pkt11_index = pkt01_index;
1011
1012                 /* Pipeline stage 0 */
1013                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1014                         mbuf00, mbuf01, pkts, pkts_mask);
1015
1016                 /* Pipeline stage 1 */
1017                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1018
1019                 /* Pipeline stage 2 */
1020                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1021                         bucket20, bucket21, pkts_mask_out, entries,
1022                         buckets_mask, buckets, keys, f);
1023         }
1024
1025         /*
1026          * Pipeline flush
1027          *
1028          */
1029         /* Pipeline feed */
1030         bucket20 = bucket10;
1031         bucket21 = bucket11;
1032         mbuf20 = mbuf10;
1033         mbuf21 = mbuf11;
1034         mbuf10 = mbuf00;
1035         mbuf11 = mbuf01;
1036         pkt20_index = pkt10_index;
1037         pkt21_index = pkt11_index;
1038         pkt10_index = pkt00_index;
1039         pkt11_index = pkt01_index;
1040
1041         /* Pipeline stage 1 */
1042         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1043
1044         /* Pipeline stage 2 */
1045         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1046                 bucket20, bucket21, pkts_mask_out, entries,
1047                 buckets_mask, buckets, keys, f);
1048
1049         /* Pipeline feed */
1050         bucket20 = bucket10;
1051         bucket21 = bucket11;
1052         mbuf20 = mbuf10;
1053         mbuf21 = mbuf11;
1054         pkt20_index = pkt10_index;
1055         pkt21_index = pkt11_index;
1056
1057         /* Pipeline stage 2 */
1058         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1059                 bucket20, bucket21, pkts_mask_out, entries,
1060                 buckets_mask, buckets, keys, f);
1061
1062 grind_next_buckets:
1063         /* Grind next buckets */
1064         for ( ; buckets_mask; ) {
1065                 uint64_t buckets_mask_next = 0;
1066
1067                 for ( ; buckets_mask; ) {
1068                         uint64_t pkt_mask;
1069                         uint32_t pkt_index;
1070
1071                         pkt_index = __builtin_ctzll(buckets_mask);
1072                         pkt_mask = 1LLU << pkt_index;
1073                         buckets_mask &= ~pkt_mask;
1074
1075                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1076                                 entries, buckets_mask_next, f);
1077                 }
1078
1079                 buckets_mask = buckets_mask_next;
1080         }
1081
1082         *lookup_hit_mask = pkts_mask_out;
1083         return 0;
1084 } /* rte_table_hash_lookup_key16_ext() */
1085
1086 struct rte_table_ops rte_table_hash_key16_lru_ops = {
1087         .f_create = rte_table_hash_create_key16_lru,
1088         .f_free = rte_table_hash_free_key16_lru,
1089         .f_add = rte_table_hash_entry_add_key16_lru,
1090         .f_delete = rte_table_hash_entry_delete_key16_lru,
1091         .f_lookup = rte_table_hash_lookup_key16_lru,
1092 };
1093
1094 struct rte_table_ops rte_table_hash_key16_ext_ops = {
1095         .f_create = rte_table_hash_create_key16_ext,
1096         .f_free = rte_table_hash_free_key16_ext,
1097         .f_add = rte_table_hash_entry_add_key16_ext,
1098         .f_delete = rte_table_hash_entry_delete_key16_ext,
1099         .f_lookup = rte_table_hash_lookup_key16_ext,
1100 };