port: fix unaligned access to metadata
[dpdk.git] / lib / librte_table / rte_table_hash_key16.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_memory.h>
39 #include <rte_malloc.h>
40 #include <rte_log.h>
41
42 #include "rte_table_hash.h"
43 #include "rte_lru.h"
44
45 #define RTE_TABLE_HASH_KEY_SIZE                                         16
46
47 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
48
49 struct rte_bucket_4_16 {
50         /* Cache line 0 */
51         uint64_t signature[4 + 1];
52         uint64_t lru_list;
53         struct rte_bucket_4_16 *next;
54         uint64_t next_valid;
55
56         /* Cache line 1 */
57         uint64_t key[4][2];
58
59         /* Cache line 2 */
60         uint8_t data[0];
61 };
62
63 struct rte_table_hash {
64         /* Input parameters */
65         uint32_t n_buckets;
66         uint32_t n_entries_per_bucket;
67         uint32_t key_size;
68         uint32_t entry_size;
69         uint32_t bucket_size;
70         uint32_t signature_offset;
71         uint32_t key_offset;
72         rte_table_hash_op_hash f_hash;
73         uint64_t seed;
74
75         /* Extendible buckets */
76         uint32_t n_buckets_ext;
77         uint32_t stack_pos;
78         uint32_t *stack;
79
80         /* Lookup table */
81         uint8_t memory[0] __rte_cache_aligned;
82 };
83
84 static int
85 check_params_create_lru(struct rte_table_hash_key16_lru_params *params) {
86         /* n_entries */
87         if (params->n_entries == 0) {
88                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
89                 return -EINVAL;
90         }
91
92         /* f_hash */
93         if (params->f_hash == NULL) {
94                 RTE_LOG(ERR, TABLE,
95                         "%s: f_hash function pointer is NULL\n", __func__);
96                 return -EINVAL;
97         }
98
99         return 0;
100 }
101
102 static void *
103 rte_table_hash_create_key16_lru(void *params,
104                 int socket_id,
105                 uint32_t entry_size)
106 {
107         struct rte_table_hash_key16_lru_params *p =
108                         (struct rte_table_hash_key16_lru_params *) params;
109         struct rte_table_hash *f;
110         uint32_t n_buckets, n_entries_per_bucket,
111                         key_size, bucket_size_cl, total_size, i;
112
113         /* Check input parameters */
114         if ((check_params_create_lru(p) != 0) ||
115                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
116                 ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
117                 return NULL;
118         n_entries_per_bucket = 4;
119         key_size = 16;
120
121         /* Memory allocation */
122         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
123                 n_entries_per_bucket);
124         bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
125                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
126         total_size = sizeof(struct rte_table_hash) + n_buckets *
127                 bucket_size_cl * RTE_CACHE_LINE_SIZE;
128
129         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
130         if (f == NULL) {
131                 RTE_LOG(ERR, TABLE,
132                 "%s: Cannot allocate %u bytes for hash table\n",
133                 __func__, total_size);
134                 return NULL;
135         }
136         RTE_LOG(INFO, TABLE,
137                 "%s: Hash table memory footprint is %u bytes\n",
138                 __func__, total_size);
139
140         /* Memory initialization */
141         f->n_buckets = n_buckets;
142         f->n_entries_per_bucket = n_entries_per_bucket;
143         f->key_size = key_size;
144         f->entry_size = entry_size;
145         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
146         f->signature_offset = p->signature_offset;
147         f->key_offset = p->key_offset;
148         f->f_hash = p->f_hash;
149         f->seed = p->seed;
150
151         for (i = 0; i < n_buckets; i++) {
152                 struct rte_bucket_4_16 *bucket;
153
154                 bucket = (struct rte_bucket_4_16 *) &f->memory[i *
155                         f->bucket_size];
156                 lru_init(bucket);
157         }
158
159         return f;
160 }
161
162 static int
163 rte_table_hash_free_key16_lru(void *table)
164 {
165         struct rte_table_hash *f = (struct rte_table_hash *) table;
166
167         /* Check input parameters */
168         if (f == NULL) {
169                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
170                 return -EINVAL;
171         }
172
173         rte_free(f);
174         return 0;
175 }
176
177 static int
178 rte_table_hash_entry_add_key16_lru(
179         void *table,
180         void *key,
181         void *entry,
182         int *key_found,
183         void **entry_ptr)
184 {
185         struct rte_table_hash *f = (struct rte_table_hash *) table;
186         struct rte_bucket_4_16 *bucket;
187         uint64_t signature, pos;
188         uint32_t bucket_index, i;
189
190         signature = f->f_hash(key, f->key_size, f->seed);
191         bucket_index = signature & (f->n_buckets - 1);
192         bucket = (struct rte_bucket_4_16 *)
193                         &f->memory[bucket_index * f->bucket_size];
194         signature |= RTE_BUCKET_ENTRY_VALID;
195
196         /* Key is present in the bucket */
197         for (i = 0; i < 4; i++) {
198                 uint64_t bucket_signature = bucket->signature[i];
199                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
200
201                 if ((bucket_signature == signature) &&
202                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
203                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
204
205                         memcpy(bucket_data, entry, f->entry_size);
206                         lru_update(bucket, i);
207                         *key_found = 1;
208                         *entry_ptr = (void *) bucket_data;
209                         return 0;
210                 }
211         }
212
213         /* Key is not present in the bucket */
214         for (i = 0; i < 4; i++) {
215                 uint64_t bucket_signature = bucket->signature[i];
216                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
217
218                 if (bucket_signature == 0) {
219                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
220
221                         bucket->signature[i] = signature;
222                         memcpy(bucket_key, key, f->key_size);
223                         memcpy(bucket_data, entry, f->entry_size);
224                         lru_update(bucket, i);
225                         *key_found = 0;
226                         *entry_ptr = (void *) bucket_data;
227
228                         return 0;
229                 }
230         }
231
232         /* Bucket full: replace LRU entry */
233         pos = lru_pos(bucket);
234         bucket->signature[pos] = signature;
235         memcpy(bucket->key[pos], key, f->key_size);
236         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
237         lru_update(bucket, pos);
238         *key_found = 0;
239         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
240
241         return 0;
242 }
243
244 static int
245 rte_table_hash_entry_delete_key16_lru(
246         void *table,
247         void *key,
248         int *key_found,
249         void *entry)
250 {
251         struct rte_table_hash *f = (struct rte_table_hash *) table;
252         struct rte_bucket_4_16 *bucket;
253         uint64_t signature;
254         uint32_t bucket_index, i;
255
256         signature = f->f_hash(key, f->key_size, f->seed);
257         bucket_index = signature & (f->n_buckets - 1);
258         bucket = (struct rte_bucket_4_16 *)
259                         &f->memory[bucket_index * f->bucket_size];
260         signature |= RTE_BUCKET_ENTRY_VALID;
261
262         /* Key is present in the bucket */
263         for (i = 0; i < 4; i++) {
264                 uint64_t bucket_signature = bucket->signature[i];
265                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
266
267                 if ((bucket_signature == signature) &&
268                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
269                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
270
271                         bucket->signature[i] = 0;
272                         *key_found = 1;
273                         if (entry)
274                                 memcpy(entry, bucket_data, f->entry_size);
275                         return 0;
276                 }
277         }
278
279         /* Key is not present in the bucket */
280         *key_found = 0;
281         return 0;
282 }
283
284 static int
285 check_params_create_ext(struct rte_table_hash_key16_ext_params *params) {
286         /* n_entries */
287         if (params->n_entries == 0) {
288                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
289                 return -EINVAL;
290         }
291
292         /* n_entries_ext */
293         if (params->n_entries_ext == 0) {
294                 RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
295                 return -EINVAL;
296         }
297
298         /* f_hash */
299         if (params->f_hash == NULL) {
300                 RTE_LOG(ERR, TABLE,
301                         "%s: f_hash function pointer is NULL\n", __func__);
302                 return -EINVAL;
303         }
304
305         return 0;
306 }
307
308 static void *
309 rte_table_hash_create_key16_ext(void *params,
310                 int socket_id,
311                 uint32_t entry_size)
312 {
313         struct rte_table_hash_key16_ext_params *p =
314                         (struct rte_table_hash_key16_ext_params *) params;
315         struct rte_table_hash *f;
316         uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
317                         bucket_size_cl, stack_size_cl, total_size, i;
318
319         /* Check input parameters */
320         if ((check_params_create_ext(p) != 0) ||
321                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
322                 ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
323                 return NULL;
324
325         n_entries_per_bucket = 4;
326         key_size = 16;
327
328         /* Memory allocation */
329         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
330                 n_entries_per_bucket);
331         n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
332                 n_entries_per_bucket;
333         bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
334                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
335         stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
336                 / RTE_CACHE_LINE_SIZE;
337         total_size = sizeof(struct rte_table_hash) +
338                 ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
339                 RTE_CACHE_LINE_SIZE;
340
341         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
342         if (f == NULL) {
343                 RTE_LOG(ERR, TABLE,
344                         "%s: Cannot allocate %u bytes for hash table\n",
345                         __func__, total_size);
346                 return NULL;
347         }
348         RTE_LOG(INFO, TABLE,
349                 "%s: Hash table memory footprint is %u bytes\n",
350                 __func__, total_size);
351
352         /* Memory initialization */
353         f->n_buckets = n_buckets;
354         f->n_entries_per_bucket = n_entries_per_bucket;
355         f->key_size = key_size;
356         f->entry_size = entry_size;
357         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
358         f->signature_offset = p->signature_offset;
359         f->key_offset = p->key_offset;
360         f->f_hash = p->f_hash;
361         f->seed = p->seed;
362
363         f->n_buckets_ext = n_buckets_ext;
364         f->stack_pos = n_buckets_ext;
365         f->stack = (uint32_t *)
366                 &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
367
368         for (i = 0; i < n_buckets_ext; i++)
369                 f->stack[i] = i;
370
371         return f;
372 }
373
374 static int
375 rte_table_hash_free_key16_ext(void *table)
376 {
377         struct rte_table_hash *f = (struct rte_table_hash *) table;
378
379         /* Check input parameters */
380         if (f == NULL) {
381                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
382                 return -EINVAL;
383         }
384
385         rte_free(f);
386         return 0;
387 }
388
389 static int
390 rte_table_hash_entry_add_key16_ext(
391         void *table,
392         void *key,
393         void *entry,
394         int *key_found,
395         void **entry_ptr)
396 {
397         struct rte_table_hash *f = (struct rte_table_hash *) table;
398         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
399         uint64_t signature;
400         uint32_t bucket_index, i;
401
402         signature = f->f_hash(key, f->key_size, f->seed);
403         bucket_index = signature & (f->n_buckets - 1);
404         bucket0 = (struct rte_bucket_4_16 *)
405                         &f->memory[bucket_index * f->bucket_size];
406         signature |= RTE_BUCKET_ENTRY_VALID;
407
408         /* Key is present in the bucket */
409         for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
410                 for (i = 0; i < 4; i++) {
411                         uint64_t bucket_signature = bucket->signature[i];
412                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
413
414                         if ((bucket_signature == signature) &&
415                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
416                                 uint8_t *bucket_data = &bucket->data[i *
417                                         f->entry_size];
418
419                                 memcpy(bucket_data, entry, f->entry_size);
420                                 *key_found = 1;
421                                 *entry_ptr = (void *) bucket_data;
422                                 return 0;
423                         }
424                 }
425
426         /* Key is not present in the bucket */
427         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
428                          bucket_prev = bucket, bucket = bucket->next)
429                 for (i = 0; i < 4; i++) {
430                         uint64_t bucket_signature = bucket->signature[i];
431                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
432
433                         if (bucket_signature == 0) {
434                                 uint8_t *bucket_data = &bucket->data[i *
435                                         f->entry_size];
436
437                                 bucket->signature[i] = signature;
438                                 memcpy(bucket_key, key, f->key_size);
439                                 memcpy(bucket_data, entry, f->entry_size);
440                                 *key_found = 0;
441                                 *entry_ptr = (void *) bucket_data;
442
443                                 return 0;
444                         }
445                 }
446
447         /* Bucket full: extend bucket */
448         if (f->stack_pos > 0) {
449                 bucket_index = f->stack[--f->stack_pos];
450
451                 bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets +
452                         bucket_index) * f->bucket_size];
453                 bucket_prev->next = bucket;
454                 bucket_prev->next_valid = 1;
455
456                 bucket->signature[0] = signature;
457                 memcpy(bucket->key[0], key, f->key_size);
458                 memcpy(&bucket->data[0], entry, f->entry_size);
459                 *key_found = 0;
460                 *entry_ptr = (void *) &bucket->data[0];
461                 return 0;
462         }
463
464         return -ENOSPC;
465 }
466
467 static int
468 rte_table_hash_entry_delete_key16_ext(
469         void *table,
470         void *key,
471         int *key_found,
472         void *entry)
473 {
474         struct rte_table_hash *f = (struct rte_table_hash *) table;
475         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
476         uint64_t signature;
477         uint32_t bucket_index, i;
478
479         signature = f->f_hash(key, f->key_size, f->seed);
480         bucket_index = signature & (f->n_buckets - 1);
481         bucket0 = (struct rte_bucket_4_16 *)
482                 &f->memory[bucket_index * f->bucket_size];
483         signature |= RTE_BUCKET_ENTRY_VALID;
484
485         /* Key is present in the bucket */
486         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
487                 bucket_prev = bucket, bucket = bucket->next)
488                 for (i = 0; i < 4; i++) {
489                         uint64_t bucket_signature = bucket->signature[i];
490                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
491
492                         if ((bucket_signature == signature) &&
493                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
494                                 uint8_t *bucket_data = &bucket->data[i *
495                                         f->entry_size];
496
497                                 bucket->signature[i] = 0;
498                                 *key_found = 1;
499                                 if (entry)
500                                         memcpy(entry, bucket_data,
501                                         f->entry_size);
502
503                                 if ((bucket->signature[0] == 0) &&
504                                         (bucket->signature[1] == 0) &&
505                                         (bucket->signature[2] == 0) &&
506                                         (bucket->signature[3] == 0) &&
507                                         (bucket_prev != NULL)) {
508                                         bucket_prev->next = bucket->next;
509                                         bucket_prev->next_valid =
510                                                 bucket->next_valid;
511
512                                         memset(bucket, 0,
513                                                 sizeof(struct rte_bucket_4_16));
514                                         bucket_index = (((uint8_t *)bucket -
515                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
516                                         f->stack[f->stack_pos++] = bucket_index;
517                                 }
518
519                                 return 0;
520                         }
521                 }
522
523         /* Key is not present in the bucket */
524         *key_found = 0;
525         return 0;
526 }
527
528 #define lookup_key16_cmp(key_in, bucket, pos)                   \
529 {                                                               \
530         uint64_t xor[4][2], or[4], signature[4];                \
531                                                                 \
532         signature[0] = (~bucket->signature[0]) & 1;             \
533         signature[1] = (~bucket->signature[1]) & 1;             \
534         signature[2] = (~bucket->signature[2]) & 1;             \
535         signature[3] = (~bucket->signature[3]) & 1;             \
536                                                                 \
537         xor[0][0] = key_in[0] ^  bucket->key[0][0];             \
538         xor[0][1] = key_in[1] ^  bucket->key[0][1];             \
539                                                                 \
540         xor[1][0] = key_in[0] ^  bucket->key[1][0];             \
541         xor[1][1] = key_in[1] ^  bucket->key[1][1];             \
542                                                                 \
543         xor[2][0] = key_in[0] ^  bucket->key[2][0];             \
544         xor[2][1] = key_in[1] ^  bucket->key[2][1];             \
545                                                                 \
546         xor[3][0] = key_in[0] ^  bucket->key[3][0];             \
547         xor[3][1] = key_in[1] ^  bucket->key[3][1];             \
548                                                                 \
549         or[0] = xor[0][0] | xor[0][1] | signature[0];           \
550         or[1] = xor[1][0] | xor[1][1] | signature[1];           \
551         or[2] = xor[2][0] | xor[2][1] | signature[2];           \
552         or[3] = xor[3][0] | xor[3][1] | signature[3];           \
553                                                                 \
554         pos = 4;                                                \
555         if (or[0] == 0)                                         \
556                 pos = 0;                                        \
557         if (or[1] == 0)                                         \
558                 pos = 1;                                        \
559         if (or[2] == 0)                                         \
560                 pos = 2;                                        \
561         if (or[3] == 0)                                         \
562                 pos = 3;                                        \
563 }
564
565 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)      \
566 {                                                               \
567         uint64_t pkt_mask;                                      \
568                                                                 \
569         pkt0_index = __builtin_ctzll(pkts_mask);                \
570         pkt_mask = 1LLU << pkt0_index;                          \
571         pkts_mask &= ~pkt_mask;                                 \
572                                                                 \
573         mbuf0 = pkts[pkt0_index];                               \
574         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));   \
575 }
576
577 #define lookup1_stage1(mbuf1, bucket1, f)                       \
578 {                                                               \
579         uint64_t signature;                                     \
580         uint32_t bucket_index;                                  \
581                                                                 \
582         signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
583         bucket_index = signature & (f->n_buckets - 1);          \
584         bucket1 = (struct rte_bucket_4_16 *)                    \
585                 &f->memory[bucket_index * f->bucket_size];      \
586         rte_prefetch0(bucket1);                                 \
587         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
588 }
589
590 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
591                 pkts_mask_out, entries, f)                      \
592 {                                                               \
593         void *a;                                                \
594         uint64_t pkt_mask;                                      \
595         uint64_t *key;                                          \
596         uint32_t pos;                                           \
597                                                                 \
598         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
599                                                                 \
600         lookup_key16_cmp(key, bucket2, pos);                    \
601                                                                 \
602         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
603         pkts_mask_out |= pkt_mask;                              \
604                                                                 \
605         a = (void *) &bucket2->data[pos * f->entry_size];       \
606         rte_prefetch0(a);                                       \
607         entries[pkt2_index] = a;                                \
608         lru_update(bucket2, pos);                               \
609 }
610
611 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \
612         buckets_mask, buckets, keys, f)                         \
613 {                                                               \
614         struct rte_bucket_4_16 *bucket_next;                    \
615         void *a;                                                \
616         uint64_t pkt_mask, bucket_mask;                         \
617         uint64_t *key;                                          \
618         uint32_t pos;                                           \
619                                                                 \
620         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
621                                                                 \
622         lookup_key16_cmp(key, bucket2, pos);                    \
623                                                                 \
624         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
625         pkts_mask_out |= pkt_mask;                              \
626                                                                 \
627         a = (void *) &bucket2->data[pos * f->entry_size];       \
628         rte_prefetch0(a);                                       \
629         entries[pkt2_index] = a;                                \
630                                                                 \
631         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
632         buckets_mask |= bucket_mask;                            \
633         bucket_next = bucket2->next;                            \
634         buckets[pkt2_index] = bucket_next;                      \
635         keys[pkt2_index] = key;                                 \
636 }
637
638 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
639         buckets_mask, f)                                        \
640 {                                                               \
641         struct rte_bucket_4_16 *bucket, *bucket_next;           \
642         void *a;                                                \
643         uint64_t pkt_mask, bucket_mask;                         \
644         uint64_t *key;                                          \
645         uint32_t pos;                                           \
646                                                                 \
647         bucket = buckets[pkt_index];                            \
648         key = keys[pkt_index];                                  \
649                                                                 \
650         lookup_key16_cmp(key, bucket, pos);                     \
651                                                                 \
652         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
653         pkts_mask_out |= pkt_mask;                              \
654                                                                 \
655         a = (void *) &bucket->data[pos * f->entry_size];        \
656         rte_prefetch0(a);                                       \
657         entries[pkt_index] = a;                                 \
658                                                                 \
659         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
660         buckets_mask |= bucket_mask;                            \
661         bucket_next = bucket->next;                             \
662         rte_prefetch0(bucket_next);                             \
663         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
664         buckets[pkt_index] = bucket_next;                       \
665         keys[pkt_index] = key;                                  \
666 }
667
668 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
669                 pkts, pkts_mask)                                \
670 {                                                               \
671         uint64_t pkt00_mask, pkt01_mask;                        \
672                                                                 \
673         pkt00_index = __builtin_ctzll(pkts_mask);               \
674         pkt00_mask = 1LLU << pkt00_index;                       \
675         pkts_mask &= ~pkt00_mask;                               \
676                                                                 \
677         mbuf00 = pkts[pkt00_index];                             \
678         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
679                                                                 \
680         pkt01_index = __builtin_ctzll(pkts_mask);               \
681         pkt01_mask = 1LLU << pkt01_index;                       \
682         pkts_mask &= ~pkt01_mask;                               \
683                                                                 \
684         mbuf01 = pkts[pkt01_index];                             \
685         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
686 }
687
688 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
689                 mbuf00, mbuf01, pkts, pkts_mask)                \
690 {                                                               \
691         uint64_t pkt00_mask, pkt01_mask;                        \
692                                                                 \
693         pkt00_index = __builtin_ctzll(pkts_mask);               \
694         pkt00_mask = 1LLU << pkt00_index;                       \
695         pkts_mask &= ~pkt00_mask;                               \
696                                                                 \
697         mbuf00 = pkts[pkt00_index];                             \
698         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
699                                                                 \
700         pkt01_index = __builtin_ctzll(pkts_mask);               \
701         if (pkts_mask == 0)                                     \
702                 pkt01_index = pkt00_index;                      \
703         pkt01_mask = 1LLU << pkt01_index;                       \
704         pkts_mask &= ~pkt01_mask;                               \
705                                                                 \
706         mbuf01 = pkts[pkt01_index];                             \
707         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
708 }
709
710 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
711 {                                                               \
712         uint64_t signature10, signature11;                      \
713         uint32_t bucket10_index, bucket11_index;                \
714                                                                 \
715         signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
716         bucket10_index = signature10 & (f->n_buckets - 1);      \
717         bucket10 = (struct rte_bucket_4_16 *)                   \
718                 &f->memory[bucket10_index * f->bucket_size];    \
719         rte_prefetch0(bucket10);                                \
720         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
721                                                                 \
722         signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
723         bucket11_index = signature11 & (f->n_buckets - 1);      \
724         bucket11 = (struct rte_bucket_4_16 *)                   \
725                 &f->memory[bucket11_index * f->bucket_size];    \
726         rte_prefetch0(bucket11);                                \
727         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
728 }
729
730 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
731                 bucket20, bucket21, pkts_mask_out, entries, f)  \
732 {                                                               \
733         void *a20, *a21;                                        \
734         uint64_t pkt20_mask, pkt21_mask;                        \
735         uint64_t *key20, *key21;                                \
736         uint32_t pos20, pos21;                                  \
737                                                                 \
738         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
739         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
740                                                                 \
741         lookup_key16_cmp(key20, bucket20, pos20);               \
742         lookup_key16_cmp(key21, bucket21, pos21);               \
743                                                                 \
744         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
745         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
746         pkts_mask_out |= pkt20_mask | pkt21_mask;                       \
747                                                                 \
748         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
749         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
750         rte_prefetch0(a20);                                     \
751         rte_prefetch0(a21);                                     \
752         entries[pkt20_index] = a20;                             \
753         entries[pkt21_index] = a21;                             \
754         lru_update(bucket20, pos20);                            \
755         lru_update(bucket21, pos21);                            \
756 }
757
758 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
759         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \
760 {                                                               \
761         struct rte_bucket_4_16 *bucket20_next, *bucket21_next;  \
762         void *a20, *a21;                                        \
763         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
764         uint64_t *key20, *key21;                                \
765         uint32_t pos20, pos21;                                  \
766                                                                 \
767         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
768         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
769                                                                 \
770         lookup_key16_cmp(key20, bucket20, pos20);               \
771         lookup_key16_cmp(key21, bucket21, pos21);               \
772                                                                 \
773         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
774         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
775         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
776                                                                 \
777         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
778         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
779         rte_prefetch0(a20);                                     \
780         rte_prefetch0(a21);                                     \
781         entries[pkt20_index] = a20;                             \
782         entries[pkt21_index] = a21;                             \
783                                                                 \
784         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
785         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
786         buckets_mask |= bucket20_mask | bucket21_mask;          \
787         bucket20_next = bucket20->next;                         \
788         bucket21_next = bucket21->next;                         \
789         buckets[pkt20_index] = bucket20_next;                   \
790         buckets[pkt21_index] = bucket21_next;                   \
791         keys[pkt20_index] = key20;                              \
792         keys[pkt21_index] = key21;                              \
793 }
794
795 static int
796 rte_table_hash_lookup_key16_lru(
797         void *table,
798         struct rte_mbuf **pkts,
799         uint64_t pkts_mask,
800         uint64_t *lookup_hit_mask,
801         void **entries)
802 {
803         struct rte_table_hash *f = (struct rte_table_hash *) table;
804         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
805         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
806         uint32_t pkt00_index, pkt01_index, pkt10_index;
807         uint32_t pkt11_index, pkt20_index, pkt21_index;
808         uint64_t pkts_mask_out = 0;
809
810         /* Cannot run the pipeline with less than 5 packets */
811         if (__builtin_popcountll(pkts_mask) < 5) {
812                 for ( ; pkts_mask; ) {
813                         struct rte_bucket_4_16 *bucket;
814                         struct rte_mbuf *mbuf;
815                         uint32_t pkt_index;
816
817                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
818                         lookup1_stage1(mbuf, bucket, f);
819                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
820                                 pkts_mask_out, entries, f);
821                 }
822
823                 *lookup_hit_mask = pkts_mask_out;
824                 return 0;
825         }
826
827         /*
828          * Pipeline fill
829          *
830          */
831         /* Pipeline stage 0 */
832         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
833                 pkts_mask);
834
835         /* Pipeline feed */
836         mbuf10 = mbuf00;
837         mbuf11 = mbuf01;
838         pkt10_index = pkt00_index;
839         pkt11_index = pkt01_index;
840
841         /* Pipeline stage 0 */
842         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
843                 pkts_mask);
844
845         /* Pipeline stage 1 */
846         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
847
848         /*
849          * Pipeline run
850          *
851          */
852         for ( ; pkts_mask; ) {
853                 /* Pipeline feed */
854                 bucket20 = bucket10;
855                 bucket21 = bucket11;
856                 mbuf20 = mbuf10;
857                 mbuf21 = mbuf11;
858                 mbuf10 = mbuf00;
859                 mbuf11 = mbuf01;
860                 pkt20_index = pkt10_index;
861                 pkt21_index = pkt11_index;
862                 pkt10_index = pkt00_index;
863                 pkt11_index = pkt01_index;
864
865                 /* Pipeline stage 0 */
866                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
867                         mbuf00, mbuf01, pkts, pkts_mask);
868
869                 /* Pipeline stage 1 */
870                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
871
872                 /* Pipeline stage 2 */
873                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
874                         bucket20, bucket21, pkts_mask_out, entries, f);
875         }
876
877         /*
878          * Pipeline flush
879          *
880          */
881         /* Pipeline feed */
882         bucket20 = bucket10;
883         bucket21 = bucket11;
884         mbuf20 = mbuf10;
885         mbuf21 = mbuf11;
886         mbuf10 = mbuf00;
887         mbuf11 = mbuf01;
888         pkt20_index = pkt10_index;
889         pkt21_index = pkt11_index;
890         pkt10_index = pkt00_index;
891         pkt11_index = pkt01_index;
892
893         /* Pipeline stage 1 */
894         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
895
896         /* Pipeline stage 2 */
897         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
898                 bucket20, bucket21, pkts_mask_out, entries, f);
899
900         /* Pipeline feed */
901         bucket20 = bucket10;
902         bucket21 = bucket11;
903         mbuf20 = mbuf10;
904         mbuf21 = mbuf11;
905         pkt20_index = pkt10_index;
906         pkt21_index = pkt11_index;
907
908         /* Pipeline stage 2 */
909         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
910                 bucket20, bucket21, pkts_mask_out, entries, f);
911
912         *lookup_hit_mask = pkts_mask_out;
913         return 0;
914 } /* rte_table_hash_lookup_key16_lru() */
915
916 static int
917 rte_table_hash_lookup_key16_ext(
918         void *table,
919         struct rte_mbuf **pkts,
920         uint64_t pkts_mask,
921         uint64_t *lookup_hit_mask,
922         void **entries)
923 {
924         struct rte_table_hash *f = (struct rte_table_hash *) table;
925         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
926         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
927         uint32_t pkt00_index, pkt01_index, pkt10_index;
928         uint32_t pkt11_index, pkt20_index, pkt21_index;
929         uint64_t pkts_mask_out = 0, buckets_mask = 0;
930         struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
931         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
932
933         /* Cannot run the pipeline with less than 5 packets */
934         if (__builtin_popcountll(pkts_mask) < 5) {
935                 for ( ; pkts_mask; ) {
936                         struct rte_bucket_4_16 *bucket;
937                         struct rte_mbuf *mbuf;
938                         uint32_t pkt_index;
939
940                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
941                         lookup1_stage1(mbuf, bucket, f);
942                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
943                                 pkts_mask_out, entries, buckets_mask,
944                                 buckets, keys, f);
945                 }
946
947                 goto grind_next_buckets;
948         }
949
950         /*
951          * Pipeline fill
952          *
953          */
954         /* Pipeline stage 0 */
955         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
956                 pkts_mask);
957
958         /* Pipeline feed */
959         mbuf10 = mbuf00;
960         mbuf11 = mbuf01;
961         pkt10_index = pkt00_index;
962         pkt11_index = pkt01_index;
963
964         /* Pipeline stage 0 */
965         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
966                 pkts_mask);
967
968         /* Pipeline stage 1 */
969         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
970
971         /*
972          * Pipeline run
973          *
974          */
975         for ( ; pkts_mask; ) {
976                 /* Pipeline feed */
977                 bucket20 = bucket10;
978                 bucket21 = bucket11;
979                 mbuf20 = mbuf10;
980                 mbuf21 = mbuf11;
981                 mbuf10 = mbuf00;
982                 mbuf11 = mbuf01;
983                 pkt20_index = pkt10_index;
984                 pkt21_index = pkt11_index;
985                 pkt10_index = pkt00_index;
986                 pkt11_index = pkt01_index;
987
988                 /* Pipeline stage 0 */
989                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
990                         mbuf00, mbuf01, pkts, pkts_mask);
991
992                 /* Pipeline stage 1 */
993                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
994
995                 /* Pipeline stage 2 */
996                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
997                         bucket20, bucket21, pkts_mask_out, entries,
998                         buckets_mask, buckets, keys, f);
999         }
1000
1001         /*
1002          * Pipeline flush
1003          *
1004          */
1005         /* Pipeline feed */
1006         bucket20 = bucket10;
1007         bucket21 = bucket11;
1008         mbuf20 = mbuf10;
1009         mbuf21 = mbuf11;
1010         mbuf10 = mbuf00;
1011         mbuf11 = mbuf01;
1012         pkt20_index = pkt10_index;
1013         pkt21_index = pkt11_index;
1014         pkt10_index = pkt00_index;
1015         pkt11_index = pkt01_index;
1016
1017         /* Pipeline stage 1 */
1018         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1019
1020         /* Pipeline stage 2 */
1021         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1022                 bucket20, bucket21, pkts_mask_out, entries,
1023                 buckets_mask, buckets, keys, f);
1024
1025         /* Pipeline feed */
1026         bucket20 = bucket10;
1027         bucket21 = bucket11;
1028         mbuf20 = mbuf10;
1029         mbuf21 = mbuf11;
1030         pkt20_index = pkt10_index;
1031         pkt21_index = pkt11_index;
1032
1033         /* Pipeline stage 2 */
1034         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1035                 bucket20, bucket21, pkts_mask_out, entries,
1036                 buckets_mask, buckets, keys, f);
1037
1038 grind_next_buckets:
1039         /* Grind next buckets */
1040         for ( ; buckets_mask; ) {
1041                 uint64_t buckets_mask_next = 0;
1042
1043                 for ( ; buckets_mask; ) {
1044                         uint64_t pkt_mask;
1045                         uint32_t pkt_index;
1046
1047                         pkt_index = __builtin_ctzll(buckets_mask);
1048                         pkt_mask = 1LLU << pkt_index;
1049                         buckets_mask &= ~pkt_mask;
1050
1051                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1052                                 entries, buckets_mask_next, f);
1053                 }
1054
1055                 buckets_mask = buckets_mask_next;
1056         }
1057
1058         *lookup_hit_mask = pkts_mask_out;
1059         return 0;
1060 } /* rte_table_hash_lookup_key16_ext() */
1061
1062 struct rte_table_ops rte_table_hash_key16_lru_ops = {
1063         .f_create = rte_table_hash_create_key16_lru,
1064         .f_free = rte_table_hash_free_key16_lru,
1065         .f_add = rte_table_hash_entry_add_key16_lru,
1066         .f_delete = rte_table_hash_entry_delete_key16_lru,
1067         .f_lookup = rte_table_hash_lookup_key16_lru,
1068 };
1069
1070 struct rte_table_ops rte_table_hash_key16_ext_ops = {
1071         .f_create = rte_table_hash_create_key16_ext,
1072         .f_free = rte_table_hash_free_key16_ext,
1073         .f_add = rte_table_hash_entry_add_key16_ext,
1074         .f_delete = rte_table_hash_entry_delete_key16_ext,
1075         .f_lookup = rte_table_hash_lookup_key16_ext,
1076 };