1fdb75d0d429b4d15631365dcb9df058f2212230
[dpdk.git] / lib / librte_table / rte_table_hash_key32.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_memory.h>
39 #include <rte_malloc.h>
40 #include <rte_log.h>
41
42 #include "rte_table_hash.h"
43 #include "rte_lru.h"
44
45 #define RTE_TABLE_HASH_KEY_SIZE                                         32
46
47 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
48
49 struct rte_bucket_4_32 {
50         /* Cache line 0 */
51         uint64_t signature[4 + 1];
52         uint64_t lru_list;
53         struct rte_bucket_4_32 *next;
54         uint64_t next_valid;
55
56         /* Cache lines 1 and 2 */
57         uint64_t key[4][4];
58
59         /* Cache line 3 */
60         uint8_t data[0];
61 };
62
63 struct rte_table_hash {
64         /* Input parameters */
65         uint32_t n_buckets;
66         uint32_t n_entries_per_bucket;
67         uint32_t key_size;
68         uint32_t entry_size;
69         uint32_t bucket_size;
70         uint32_t signature_offset;
71         uint32_t key_offset;
72         rte_table_hash_op_hash f_hash;
73         uint64_t seed;
74
75         /* Extendible buckets */
76         uint32_t n_buckets_ext;
77         uint32_t stack_pos;
78         uint32_t *stack;
79
80         /* Lookup table */
81         uint8_t memory[0] __rte_cache_aligned;
82 };
83
84 static int
85 check_params_create_lru(struct rte_table_hash_key32_lru_params *params) {
86         /* n_entries */
87         if (params->n_entries == 0) {
88                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
89                 return -EINVAL;
90         }
91
92         /* f_hash */
93         if (params->f_hash == NULL) {
94                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
95                         __func__);
96                 return -EINVAL;
97         }
98
99         return 0;
100 }
101
102 static void *
103 rte_table_hash_create_key32_lru(void *params,
104                 int socket_id,
105                 uint32_t entry_size)
106 {
107         struct rte_table_hash_key32_lru_params *p =
108                 (struct rte_table_hash_key32_lru_params *) params;
109         struct rte_table_hash *f;
110         uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
111         uint32_t total_size, i;
112
113         /* Check input parameters */
114         if ((check_params_create_lru(p) != 0) ||
115                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
116                 ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
117                 return NULL;
118         }
119         n_entries_per_bucket = 4;
120         key_size = 32;
121
122         /* Memory allocation */
123         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
124                 n_entries_per_bucket);
125         bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
126                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
127         total_size = sizeof(struct rte_table_hash) + n_buckets *
128                 bucket_size_cl * RTE_CACHE_LINE_SIZE;
129
130         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
131         if (f == NULL) {
132                 RTE_LOG(ERR, TABLE,
133                         "%s: Cannot allocate %u bytes for hash table\n",
134                         __func__, total_size);
135                 return NULL;
136         }
137         RTE_LOG(INFO, TABLE,
138                 "%s: Hash table memory footprint is %u bytes\n", __func__,
139                 total_size);
140
141         /* Memory initialization */
142         f->n_buckets = n_buckets;
143         f->n_entries_per_bucket = n_entries_per_bucket;
144         f->key_size = key_size;
145         f->entry_size = entry_size;
146         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
147         f->signature_offset = p->signature_offset;
148         f->key_offset = p->key_offset;
149         f->f_hash = p->f_hash;
150         f->seed = p->seed;
151
152         for (i = 0; i < n_buckets; i++) {
153                 struct rte_bucket_4_32 *bucket;
154
155                 bucket = (struct rte_bucket_4_32 *) &f->memory[i *
156                         f->bucket_size];
157                 bucket->lru_list = 0x0000000100020003LLU;
158         }
159
160         return f;
161 }
162
163 static int
164 rte_table_hash_free_key32_lru(void *table)
165 {
166         struct rte_table_hash *f = (struct rte_table_hash *) table;
167
168         /* Check input parameters */
169         if (f == NULL) {
170                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
171                 return -EINVAL;
172         }
173
174         rte_free(f);
175         return 0;
176 }
177
178 static int
179 rte_table_hash_entry_add_key32_lru(
180         void *table,
181         void *key,
182         void *entry,
183         int *key_found,
184         void **entry_ptr)
185 {
186         struct rte_table_hash *f = (struct rte_table_hash *) table;
187         struct rte_bucket_4_32 *bucket;
188         uint64_t signature, pos;
189         uint32_t bucket_index, i;
190
191         signature = f->f_hash(key, f->key_size, f->seed);
192         bucket_index = signature & (f->n_buckets - 1);
193         bucket = (struct rte_bucket_4_32 *)
194                 &f->memory[bucket_index * f->bucket_size];
195         signature |= RTE_BUCKET_ENTRY_VALID;
196
197         /* Key is present in the bucket */
198         for (i = 0; i < 4; i++) {
199                 uint64_t bucket_signature = bucket->signature[i];
200                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
201
202                 if ((bucket_signature == signature) &&
203                         (memcmp(key, bucket_key, f->key_size) == 0)) {
204                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
205
206                         memcpy(bucket_data, entry, f->entry_size);
207                         lru_update(bucket, i);
208                         *key_found = 1;
209                         *entry_ptr = (void *) bucket_data;
210                         return 0;
211                 }
212         }
213
214         /* Key is not present in the bucket */
215         for (i = 0; i < 4; i++) {
216                 uint64_t bucket_signature = bucket->signature[i];
217                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
218
219                 if (bucket_signature == 0) {
220                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
221
222                         bucket->signature[i] = signature;
223                         memcpy(bucket_key, key, f->key_size);
224                         memcpy(bucket_data, entry, f->entry_size);
225                         lru_update(bucket, i);
226                         *key_found = 0;
227                         *entry_ptr = (void *) bucket_data;
228
229                         return 0;
230                 }
231         }
232
233         /* Bucket full: replace LRU entry */
234         pos = lru_pos(bucket);
235         bucket->signature[pos] = signature;
236         memcpy(bucket->key[pos], key, f->key_size);
237         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
238         lru_update(bucket, pos);
239         *key_found      = 0;
240         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
241
242         return 0;
243 }
244
245 static int
246 rte_table_hash_entry_delete_key32_lru(
247         void *table,
248         void *key,
249         int *key_found,
250         void *entry)
251 {
252         struct rte_table_hash *f = (struct rte_table_hash *) table;
253         struct rte_bucket_4_32 *bucket;
254         uint64_t signature;
255         uint32_t bucket_index, i;
256
257         signature = f->f_hash(key, f->key_size, f->seed);
258         bucket_index = signature & (f->n_buckets - 1);
259         bucket = (struct rte_bucket_4_32 *)
260                 &f->memory[bucket_index * f->bucket_size];
261         signature |= RTE_BUCKET_ENTRY_VALID;
262
263         /* Key is present in the bucket */
264         for (i = 0; i < 4; i++) {
265                 uint64_t bucket_signature = bucket->signature[i];
266                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
267
268                 if ((bucket_signature == signature) &&
269                         (memcmp(key, bucket_key, f->key_size) == 0)) {
270                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
271
272                         bucket->signature[i] = 0;
273                         *key_found = 1;
274                         if (entry)
275                                 memcpy(entry, bucket_data, f->entry_size);
276
277                         return 0;
278                 }
279         }
280
281         /* Key is not present in the bucket */
282         *key_found = 0;
283         return 0;
284 }
285
286 static int
287 check_params_create_ext(struct rte_table_hash_key32_ext_params *params) {
288         /* n_entries */
289         if (params->n_entries == 0) {
290                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
291                 return -EINVAL;
292         }
293
294         /* n_entries_ext */
295         if (params->n_entries_ext == 0) {
296                 RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
297                 return -EINVAL;
298         }
299
300         /* f_hash */
301         if (params->f_hash == NULL) {
302                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
303                         __func__);
304                 return -EINVAL;
305         }
306
307         return 0;
308 }
309
310 static void *
311 rte_table_hash_create_key32_ext(void *params,
312         int socket_id,
313         uint32_t entry_size)
314 {
315         struct rte_table_hash_key32_ext_params *p =
316                         (struct rte_table_hash_key32_ext_params *) params;
317         struct rte_table_hash *f;
318         uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket;
319         uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i;
320
321         /* Check input parameters */
322         if ((check_params_create_ext(p) != 0) ||
323                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
324                 ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
325                 return NULL;
326
327         n_entries_per_bucket = 4;
328         key_size = 32;
329
330         /* Memory allocation */
331         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
332                 n_entries_per_bucket);
333         n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
334                 n_entries_per_bucket;
335         bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
336                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
337         stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
338                 / RTE_CACHE_LINE_SIZE;
339         total_size = sizeof(struct rte_table_hash) +
340                 ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
341                 RTE_CACHE_LINE_SIZE;
342
343         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
344         if (f == NULL) {
345                 RTE_LOG(ERR, TABLE,
346                         "%s: Cannot allocate %u bytes for hash table\n",
347                         __func__, total_size);
348                 return NULL;
349         }
350         RTE_LOG(INFO, TABLE,
351                 "%s: Hash table memory footprint is %u bytes\n", __func__,
352                 total_size);
353
354         /* Memory initialization */
355         f->n_buckets = n_buckets;
356         f->n_entries_per_bucket = n_entries_per_bucket;
357         f->key_size = key_size;
358         f->entry_size = entry_size;
359         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
360         f->signature_offset = p->signature_offset;
361         f->key_offset = p->key_offset;
362         f->f_hash = p->f_hash;
363         f->seed = p->seed;
364
365         f->n_buckets_ext = n_buckets_ext;
366         f->stack_pos = n_buckets_ext;
367         f->stack = (uint32_t *)
368                 &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
369
370         for (i = 0; i < n_buckets_ext; i++)
371                 f->stack[i] = i;
372
373         return f;
374 }
375
376 static int
377 rte_table_hash_free_key32_ext(void *table)
378 {
379         struct rte_table_hash *f = (struct rte_table_hash *) table;
380
381         /* Check input parameters */
382         if (f == NULL) {
383                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
384                 return -EINVAL;
385         }
386
387         rte_free(f);
388         return 0;
389 }
390
391 static int
392 rte_table_hash_entry_add_key32_ext(
393         void *table,
394         void *key,
395         void *entry,
396         int *key_found,
397         void **entry_ptr)
398 {
399         struct rte_table_hash *f = (struct rte_table_hash *) table;
400         struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
401         uint64_t signature;
402         uint32_t bucket_index, i;
403
404         signature = f->f_hash(key, f->key_size, f->seed);
405         bucket_index = signature & (f->n_buckets - 1);
406         bucket0 = (struct rte_bucket_4_32 *)
407                         &f->memory[bucket_index * f->bucket_size];
408         signature |= RTE_BUCKET_ENTRY_VALID;
409
410         /* Key is present in the bucket */
411         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
412                 for (i = 0; i < 4; i++) {
413                         uint64_t bucket_signature = bucket->signature[i];
414                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
415
416                         if ((bucket_signature == signature) &&
417                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
418                                 uint8_t *bucket_data = &bucket->data[i *
419                                         f->entry_size];
420
421                                 memcpy(bucket_data, entry, f->entry_size);
422                                 *key_found = 1;
423                                 *entry_ptr = (void *) bucket_data;
424
425                                 return 0;
426                         }
427                 }
428         }
429
430         /* Key is not present in the bucket */
431         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
432                 bucket_prev = bucket, bucket = bucket->next)
433                 for (i = 0; i < 4; i++) {
434                         uint64_t bucket_signature = bucket->signature[i];
435                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
436
437                         if (bucket_signature == 0) {
438                                 uint8_t *bucket_data = &bucket->data[i *
439                                         f->entry_size];
440
441                                 bucket->signature[i] = signature;
442                                 memcpy(bucket_key, key, f->key_size);
443                                 memcpy(bucket_data, entry, f->entry_size);
444                                 *key_found = 0;
445                                 *entry_ptr = (void *) bucket_data;
446
447                                 return 0;
448                         }
449                 }
450
451         /* Bucket full: extend bucket */
452         if (f->stack_pos > 0) {
453                 bucket_index = f->stack[--f->stack_pos];
454
455                 bucket = (struct rte_bucket_4_32 *)
456                         &f->memory[(f->n_buckets + bucket_index) *
457                         f->bucket_size];
458                 bucket_prev->next = bucket;
459                 bucket_prev->next_valid = 1;
460
461                 bucket->signature[0] = signature;
462                 memcpy(bucket->key[0], key, f->key_size);
463                 memcpy(&bucket->data[0], entry, f->entry_size);
464                 *key_found = 0;
465                 *entry_ptr = (void *) &bucket->data[0];
466                 return 0;
467         }
468
469         return -ENOSPC;
470 }
471
472 static int
473 rte_table_hash_entry_delete_key32_ext(
474         void *table,
475         void *key,
476         int *key_found,
477         void *entry)
478 {
479         struct rte_table_hash *f = (struct rte_table_hash *) table;
480         struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
481         uint64_t signature;
482         uint32_t bucket_index, i;
483
484         signature = f->f_hash(key, f->key_size, f->seed);
485         bucket_index = signature & (f->n_buckets - 1);
486         bucket0 = (struct rte_bucket_4_32 *)
487                 &f->memory[bucket_index * f->bucket_size];
488         signature |= RTE_BUCKET_ENTRY_VALID;
489
490         /* Key is present in the bucket */
491         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
492                 bucket_prev = bucket, bucket = bucket->next)
493                 for (i = 0; i < 4; i++) {
494                         uint64_t bucket_signature = bucket->signature[i];
495                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
496
497                         if ((bucket_signature == signature) &&
498                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
499                                 uint8_t *bucket_data = &bucket->data[i *
500                                         f->entry_size];
501
502                                 bucket->signature[i] = 0;
503                                 *key_found = 1;
504                                 if (entry)
505                                         memcpy(entry, bucket_data,
506                                                 f->entry_size);
507
508                                 if ((bucket->signature[0] == 0) &&
509                                                 (bucket->signature[1] == 0) &&
510                                                 (bucket->signature[2] == 0) &&
511                                                 (bucket->signature[3] == 0) &&
512                                                 (bucket_prev != NULL)) {
513                                         bucket_prev->next = bucket->next;
514                                         bucket_prev->next_valid =
515                                                 bucket->next_valid;
516
517                                         memset(bucket, 0,
518                                                 sizeof(struct rte_bucket_4_32));
519                                         bucket_index = (((uint8_t *)bucket -
520                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
521                                         f->stack[f->stack_pos++] = bucket_index;
522                                 }
523
524                                 return 0;
525                         }
526                 }
527
528         /* Key is not present in the bucket */
529         *key_found = 0;
530         return 0;
531 }
532
533 #define lookup_key32_cmp(key_in, bucket, pos)                   \
534 {                                                               \
535         uint64_t xor[4][4], or[4], signature[4];                \
536                                                                 \
537         signature[0] = ((~bucket->signature[0]) & 1);           \
538         signature[1] = ((~bucket->signature[1]) & 1);           \
539         signature[2] = ((~bucket->signature[2]) & 1);           \
540         signature[3] = ((~bucket->signature[3]) & 1);           \
541                                                                 \
542         xor[0][0] = key_in[0] ^  bucket->key[0][0];             \
543         xor[0][1] = key_in[1] ^  bucket->key[0][1];             \
544         xor[0][2] = key_in[2] ^  bucket->key[0][2];             \
545         xor[0][3] = key_in[3] ^  bucket->key[0][3];             \
546                                                                 \
547         xor[1][0] = key_in[0] ^  bucket->key[1][0];             \
548         xor[1][1] = key_in[1] ^  bucket->key[1][1];             \
549         xor[1][2] = key_in[2] ^  bucket->key[1][2];             \
550         xor[1][3] = key_in[3] ^  bucket->key[1][3];             \
551                                                                 \
552         xor[2][0] = key_in[0] ^  bucket->key[2][0];             \
553         xor[2][1] = key_in[1] ^  bucket->key[2][1];             \
554         xor[2][2] = key_in[2] ^  bucket->key[2][2];             \
555         xor[2][3] = key_in[3] ^  bucket->key[2][3];             \
556                                                                 \
557         xor[3][0] = key_in[0] ^  bucket->key[3][0];             \
558         xor[3][1] = key_in[1] ^  bucket->key[3][1];             \
559         xor[3][2] = key_in[2] ^  bucket->key[3][2];             \
560         xor[3][3] = key_in[3] ^  bucket->key[3][3];             \
561                                                                 \
562         or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\
563         or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\
564         or[2] = xor[2][0] | xor[2][1] | xor[2][2] | xor[2][3] | signature[2];\
565         or[3] = xor[3][0] | xor[3][1] | xor[3][2] | xor[3][3] | signature[3];\
566                                                                 \
567         pos = 4;                                                \
568         if (or[0] == 0)                                         \
569                 pos = 0;                                        \
570         if (or[1] == 0)                                         \
571                 pos = 1;                                        \
572         if (or[2] == 0)                                         \
573                 pos = 2;                                        \
574         if (or[3] == 0)                                         \
575                 pos = 3;                                        \
576 }
577
578 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)      \
579 {                                                               \
580         uint64_t pkt_mask;                                      \
581                                                                 \
582         pkt0_index = __builtin_ctzll(pkts_mask);                \
583         pkt_mask = 1LLU << pkt0_index;                          \
584         pkts_mask &= ~pkt_mask;                                 \
585                                                                 \
586         mbuf0 = pkts[pkt0_index];                               \
587         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));   \
588 }
589
590 #define lookup1_stage1(mbuf1, bucket1, f)                       \
591 {                                                               \
592         uint64_t signature;                                     \
593         uint32_t bucket_index;                                  \
594                                                                 \
595         signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
596         bucket_index = signature & (f->n_buckets - 1);          \
597         bucket1 = (struct rte_bucket_4_32 *)                    \
598                 &f->memory[bucket_index * f->bucket_size];      \
599         rte_prefetch0(bucket1);                                 \
600         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
601         rte_prefetch0((void *)(((uintptr_t) bucket1) + 2 * RTE_CACHE_LINE_SIZE));\
602 }
603
604 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
605         pkts_mask_out, entries, f)                              \
606 {                                                               \
607         void *a;                                                \
608         uint64_t pkt_mask;                                      \
609         uint64_t *key;                                          \
610         uint32_t pos;                                           \
611                                                                 \
612         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
613                                                                 \
614         lookup_key32_cmp(key, bucket2, pos);                    \
615                                                                 \
616         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
617         pkts_mask_out |= pkt_mask;                              \
618                                                                 \
619         a = (void *) &bucket2->data[pos * f->entry_size];       \
620         rte_prefetch0(a);                                       \
621         entries[pkt2_index] = a;                                \
622         lru_update(bucket2, pos);                               \
623 }
624
625 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
626         entries, buckets_mask, buckets, keys, f)                \
627 {                                                               \
628         struct rte_bucket_4_32 *bucket_next;                    \
629         void *a;                                                \
630         uint64_t pkt_mask, bucket_mask;                         \
631         uint64_t *key;                                          \
632         uint32_t pos;                                           \
633                                                                 \
634         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
635                                                                 \
636         lookup_key32_cmp(key, bucket2, pos);                    \
637                                                                 \
638         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
639         pkts_mask_out |= pkt_mask;                              \
640                                                                 \
641         a = (void *) &bucket2->data[pos * f->entry_size];       \
642         rte_prefetch0(a);                                       \
643         entries[pkt2_index] = a;                                \
644                                                                 \
645         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
646         buckets_mask |= bucket_mask;                            \
647         bucket_next = bucket2->next;                            \
648         buckets[pkt2_index] = bucket_next;                      \
649         keys[pkt2_index] = key;                                 \
650 }
651
652 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, \
653         entries, buckets_mask, f)                               \
654 {                                                               \
655         struct rte_bucket_4_32 *bucket, *bucket_next;           \
656         void *a;                                                \
657         uint64_t pkt_mask, bucket_mask;                         \
658         uint64_t *key;                                          \
659         uint32_t pos;                                           \
660                                                                 \
661         bucket = buckets[pkt_index];                            \
662         key = keys[pkt_index];                                  \
663                                                                 \
664         lookup_key32_cmp(key, bucket, pos);                     \
665                                                                 \
666         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
667         pkts_mask_out |= pkt_mask;                              \
668                                                                 \
669         a = (void *) &bucket->data[pos * f->entry_size];        \
670         rte_prefetch0(a);                                       \
671         entries[pkt_index] = a;                                 \
672                                                                 \
673         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
674         buckets_mask |= bucket_mask;                            \
675         bucket_next = bucket->next;                             \
676         rte_prefetch0(bucket_next);                             \
677         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
678         rte_prefetch0((void *)(((uintptr_t) bucket_next) +      \
679                 2 * RTE_CACHE_LINE_SIZE));                              \
680         buckets[pkt_index] = bucket_next;                       \
681         keys[pkt_index] = key;                                  \
682 }
683
684 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
685         pkts, pkts_mask)                                        \
686 {                                                               \
687         uint64_t pkt00_mask, pkt01_mask;                        \
688                                                                 \
689         pkt00_index = __builtin_ctzll(pkts_mask);               \
690         pkt00_mask = 1LLU << pkt00_index;                       \
691         pkts_mask &= ~pkt00_mask;                               \
692                                                                 \
693         mbuf00 = pkts[pkt00_index];                             \
694         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
695                                                                 \
696         pkt01_index = __builtin_ctzll(pkts_mask);               \
697         pkt01_mask = 1LLU << pkt01_index;                       \
698         pkts_mask &= ~pkt01_mask;                               \
699                                                                 \
700         mbuf01 = pkts[pkt01_index];                             \
701         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
702 }
703
704 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
705         mbuf00, mbuf01, pkts, pkts_mask)                        \
706 {                                                               \
707         uint64_t pkt00_mask, pkt01_mask;                        \
708                                                                 \
709         pkt00_index = __builtin_ctzll(pkts_mask);               \
710         pkt00_mask = 1LLU << pkt00_index;                       \
711         pkts_mask &= ~pkt00_mask;                               \
712                                                                 \
713         mbuf00 = pkts[pkt00_index];                             \
714         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
715                                                                 \
716         pkt01_index = __builtin_ctzll(pkts_mask);               \
717         if (pkts_mask == 0)                                     \
718                 pkt01_index = pkt00_index;                      \
719                                                                 \
720         pkt01_mask = 1LLU << pkt01_index;                       \
721         pkts_mask &= ~pkt01_mask;                               \
722                                                                 \
723         mbuf01 = pkts[pkt01_index];                             \
724         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
725 }
726
727 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
728 {                                                               \
729         uint64_t signature10, signature11;                      \
730         uint32_t bucket10_index, bucket11_index;                \
731                                                                 \
732         signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
733         bucket10_index = signature10 & (f->n_buckets - 1);      \
734         bucket10 = (struct rte_bucket_4_32 *)                   \
735                 &f->memory[bucket10_index * f->bucket_size];    \
736         rte_prefetch0(bucket10);                                \
737         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
738         rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\
739                                                                 \
740         signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
741         bucket11_index = signature11 & (f->n_buckets - 1);      \
742         bucket11 = (struct rte_bucket_4_32 *)                   \
743                 &f->memory[bucket11_index * f->bucket_size];    \
744         rte_prefetch0(bucket11);                                \
745         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
746         rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\
747 }
748
749 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
750         bucket20, bucket21, pkts_mask_out, entries, f)          \
751 {                                                               \
752         void *a20, *a21;                                        \
753         uint64_t pkt20_mask, pkt21_mask;                        \
754         uint64_t *key20, *key21;                                \
755         uint32_t pos20, pos21;                                  \
756                                                                 \
757         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
758         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
759                                                                 \
760         lookup_key32_cmp(key20, bucket20, pos20);               \
761         lookup_key32_cmp(key21, bucket21, pos21);               \
762                                                                 \
763         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
764         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
765         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
766                                                                 \
767         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
768         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
769         rte_prefetch0(a20);                                     \
770         rte_prefetch0(a21);                                     \
771         entries[pkt20_index] = a20;                             \
772         entries[pkt21_index] = a21;                             \
773         lru_update(bucket20, pos20);                            \
774         lru_update(bucket21, pos21);                            \
775 }
776
777 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
778         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
779 {                                                               \
780         struct rte_bucket_4_32 *bucket20_next, *bucket21_next;  \
781         void *a20, *a21;                                        \
782         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
783         uint64_t *key20, *key21;                                \
784         uint32_t pos20, pos21;                                  \
785                                                                 \
786         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
787         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
788                                                                 \
789         lookup_key32_cmp(key20, bucket20, pos20);               \
790         lookup_key32_cmp(key21, bucket21, pos21);               \
791                                                                 \
792         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
793         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
794         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
795                                                                 \
796         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
797         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
798         rte_prefetch0(a20);                                     \
799         rte_prefetch0(a21);                                     \
800         entries[pkt20_index] = a20;                             \
801         entries[pkt21_index] = a21;                             \
802                                                                 \
803         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
804         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
805         buckets_mask |= bucket20_mask | bucket21_mask;          \
806         bucket20_next = bucket20->next;                         \
807         bucket21_next = bucket21->next;                         \
808         buckets[pkt20_index] = bucket20_next;                   \
809         buckets[pkt21_index] = bucket21_next;                   \
810         keys[pkt20_index] = key20;                              \
811         keys[pkt21_index] = key21;                              \
812 }
813
814 static int
815 rte_table_hash_lookup_key32_lru(
816         void *table,
817         struct rte_mbuf **pkts,
818         uint64_t pkts_mask,
819         uint64_t *lookup_hit_mask,
820         void **entries)
821 {
822         struct rte_table_hash *f = (struct rte_table_hash *) table;
823         struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
824         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
825         uint32_t pkt00_index, pkt01_index, pkt10_index;
826         uint32_t pkt11_index, pkt20_index, pkt21_index;
827         uint64_t pkts_mask_out = 0;
828
829         /* Cannot run the pipeline with less than 5 packets */
830         if (__builtin_popcountll(pkts_mask) < 5) {
831                 for ( ; pkts_mask; ) {
832                         struct rte_bucket_4_32 *bucket;
833                         struct rte_mbuf *mbuf;
834                         uint32_t pkt_index;
835
836                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
837                         lookup1_stage1(mbuf, bucket, f);
838                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
839                                         pkts_mask_out, entries, f);
840                 }
841
842                 *lookup_hit_mask = pkts_mask_out;
843                 return 0;
844         }
845
846         /*
847          * Pipeline fill
848          *
849          */
850         /* Pipeline stage 0 */
851         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
852                 pkts_mask);
853
854         /* Pipeline feed */
855         mbuf10 = mbuf00;
856         mbuf11 = mbuf01;
857         pkt10_index = pkt00_index;
858         pkt11_index = pkt01_index;
859
860         /* Pipeline stage 0 */
861         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
862                 pkts_mask);
863
864         /* Pipeline stage 1 */
865         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
866
867         /*
868          * Pipeline run
869          *
870          */
871         for ( ; pkts_mask; ) {
872                 /* Pipeline feed */
873                 bucket20 = bucket10;
874                 bucket21 = bucket11;
875                 mbuf20 = mbuf10;
876                 mbuf21 = mbuf11;
877                 mbuf10 = mbuf00;
878                 mbuf11 = mbuf01;
879                 pkt20_index = pkt10_index;
880                 pkt21_index = pkt11_index;
881                 pkt10_index = pkt00_index;
882                 pkt11_index = pkt01_index;
883
884                 /* Pipeline stage 0 */
885                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
886                         mbuf00, mbuf01, pkts, pkts_mask);
887
888                 /* Pipeline stage 1 */
889                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
890
891                 /* Pipeline stage 2 */
892                 lookup2_stage2_lru(pkt20_index, pkt21_index,
893                         mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out,
894                         entries, f);
895         }
896
897         /*
898          * Pipeline flush
899          *
900          */
901         /* Pipeline feed */
902         bucket20 = bucket10;
903         bucket21 = bucket11;
904         mbuf20 = mbuf10;
905         mbuf21 = mbuf11;
906         mbuf10 = mbuf00;
907         mbuf11 = mbuf01;
908         pkt20_index = pkt10_index;
909         pkt21_index = pkt11_index;
910         pkt10_index = pkt00_index;
911         pkt11_index = pkt01_index;
912
913         /* Pipeline stage 1 */
914         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
915
916         /* Pipeline stage 2 */
917         lookup2_stage2_lru(pkt20_index, pkt21_index,
918                 mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
919
920         /* Pipeline feed */
921         bucket20 = bucket10;
922         bucket21 = bucket11;
923         mbuf20 = mbuf10;
924         mbuf21 = mbuf11;
925         pkt20_index = pkt10_index;
926         pkt21_index = pkt11_index;
927
928         /* Pipeline stage 2 */
929         lookup2_stage2_lru(pkt20_index, pkt21_index,
930                 mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
931
932         *lookup_hit_mask = pkts_mask_out;
933         return 0;
934 } /* rte_table_hash_lookup_key32_lru() */
935
936 static int
937 rte_table_hash_lookup_key32_ext(
938         void *table,
939         struct rte_mbuf **pkts,
940         uint64_t pkts_mask,
941         uint64_t *lookup_hit_mask,
942         void **entries)
943 {
944         struct rte_table_hash *f = (struct rte_table_hash *) table;
945         struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
946         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
947         uint32_t pkt00_index, pkt01_index, pkt10_index;
948         uint32_t pkt11_index, pkt20_index, pkt21_index;
949         uint64_t pkts_mask_out = 0, buckets_mask = 0;
950         struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
951         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
952
953         /* Cannot run the pipeline with less than 5 packets */
954         if (__builtin_popcountll(pkts_mask) < 5) {
955                 for ( ; pkts_mask; ) {
956                         struct rte_bucket_4_32 *bucket;
957                         struct rte_mbuf *mbuf;
958                         uint32_t pkt_index;
959
960                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
961                         lookup1_stage1(mbuf, bucket, f);
962                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
963                                 pkts_mask_out, entries, buckets_mask, buckets,
964                                 keys, f);
965                 }
966
967                 goto grind_next_buckets;
968         }
969
970         /*
971          * Pipeline fill
972          *
973          */
974         /* Pipeline stage 0 */
975         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
976                 pkts_mask);
977
978         /* Pipeline feed */
979         mbuf10 = mbuf00;
980         mbuf11 = mbuf01;
981         pkt10_index = pkt00_index;
982         pkt11_index = pkt01_index;
983
984         /* Pipeline stage 0 */
985         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
986                 pkts_mask);
987
988         /* Pipeline stage 1 */
989         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
990
991         /*
992          * Pipeline run
993          *
994          */
995         for ( ; pkts_mask; ) {
996                 /* Pipeline feed */
997                 bucket20 = bucket10;
998                 bucket21 = bucket11;
999                 mbuf20 = mbuf10;
1000                 mbuf21 = mbuf11;
1001                 mbuf10 = mbuf00;
1002                 mbuf11 = mbuf01;
1003                 pkt20_index = pkt10_index;
1004                 pkt21_index = pkt11_index;
1005                 pkt10_index = pkt00_index;
1006                 pkt11_index = pkt01_index;
1007
1008                 /* Pipeline stage 0 */
1009                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1010                         mbuf00, mbuf01, pkts, pkts_mask);
1011
1012                 /* Pipeline stage 1 */
1013                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1014
1015                 /* Pipeline stage 2 */
1016                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1017                         bucket20, bucket21, pkts_mask_out, entries,
1018                         buckets_mask, buckets, keys, f);
1019         }
1020
1021         /*
1022          * Pipeline flush
1023          *
1024          */
1025         /* Pipeline feed */
1026         bucket20 = bucket10;
1027         bucket21 = bucket11;
1028         mbuf20 = mbuf10;
1029         mbuf21 = mbuf11;
1030         mbuf10 = mbuf00;
1031         mbuf11 = mbuf01;
1032         pkt20_index = pkt10_index;
1033         pkt21_index = pkt11_index;
1034         pkt10_index = pkt00_index;
1035         pkt11_index = pkt01_index;
1036
1037         /* Pipeline stage 1 */
1038         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1039
1040         /* Pipeline stage 2 */
1041         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1042                 bucket20, bucket21, pkts_mask_out, entries,
1043                 buckets_mask, buckets, keys, f);
1044
1045         /* Pipeline feed */
1046         bucket20 = bucket10;
1047         bucket21 = bucket11;
1048         mbuf20 = mbuf10;
1049         mbuf21 = mbuf11;
1050         pkt20_index = pkt10_index;
1051         pkt21_index = pkt11_index;
1052
1053         /* Pipeline stage 2 */
1054         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1055                 bucket20, bucket21, pkts_mask_out, entries,
1056                 buckets_mask, buckets, keys, f);
1057
1058 grind_next_buckets:
1059         /* Grind next buckets */
1060         for ( ; buckets_mask; ) {
1061                 uint64_t buckets_mask_next = 0;
1062
1063                 for ( ; buckets_mask; ) {
1064                         uint64_t pkt_mask;
1065                         uint32_t pkt_index;
1066
1067                         pkt_index = __builtin_ctzll(buckets_mask);
1068                         pkt_mask = 1LLU << pkt_index;
1069                         buckets_mask &= ~pkt_mask;
1070
1071                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1072                                 entries, buckets_mask_next, f);
1073                 }
1074
1075                 buckets_mask = buckets_mask_next;
1076         }
1077
1078         *lookup_hit_mask = pkts_mask_out;
1079         return 0;
1080 } /* rte_table_hash_lookup_key32_ext() */
1081
1082 struct rte_table_ops rte_table_hash_key32_lru_ops = {
1083         .f_create = rte_table_hash_create_key32_lru,
1084         .f_free = rte_table_hash_free_key32_lru,
1085         .f_add = rte_table_hash_entry_add_key32_lru,
1086         .f_delete = rte_table_hash_entry_delete_key32_lru,
1087         .f_lookup = rte_table_hash_lookup_key32_lru,
1088 };
1089
1090 struct rte_table_ops rte_table_hash_key32_ext_ops = {
1091         .f_create = rte_table_hash_create_key32_ext,
1092         .f_free = rte_table_hash_free_key32_ext,
1093         .f_add = rte_table_hash_entry_add_key32_ext,
1094         .f_delete = rte_table_hash_entry_delete_key32_ext,
1095         .f_lookup = rte_table_hash_lookup_key32_ext,
1096 };