lib: fix cache alignment of structures
[dpdk.git] / lib / librte_table / rte_table_hash_key32.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_memory.h>
39 #include <rte_malloc.h>
40 #include <rte_log.h>
41
42 #include "rte_table_hash.h"
43 #include "rte_lru.h"
44
45 #define RTE_TABLE_HASH_KEY_SIZE                                         32
46
47 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
48
49 struct rte_bucket_4_32 {
50         /* Cache line 0 */
51         uint64_t signature[4 + 1];
52         uint64_t lru_list;
53         struct rte_bucket_4_32 *next;
54         uint64_t next_valid;
55
56         /* Cache lines 1 and 2 */
57         uint64_t key[4][4];
58
59         /* Cache line 3 */
60         uint8_t data[0];
61 };
62
63 struct rte_table_hash {
64         /* Input parameters */
65         uint32_t n_buckets;
66         uint32_t n_entries_per_bucket;
67         uint32_t key_size;
68         uint32_t entry_size;
69         uint32_t bucket_size;
70         uint32_t signature_offset;
71         uint32_t key_offset;
72         rte_table_hash_op_hash f_hash;
73         uint64_t seed;
74
75         /* Extendible buckets */
76         uint32_t n_buckets_ext;
77         uint32_t stack_pos;
78         uint32_t *stack;
79
80         /* Lookup table */
81         uint8_t memory[0] __rte_cache_aligned;
82 };
83
84 static int
85 check_params_create_lru(struct rte_table_hash_key32_lru_params *params) {
86         /* n_entries */
87         if (params->n_entries == 0) {
88                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
89                 return -EINVAL;
90         }
91
92         /* signature offset */
93         if ((params->signature_offset & 0x3) != 0) {
94                 RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
95                 return -EINVAL;
96         }
97
98         /* key offset */
99         if ((params->key_offset & 0x7) != 0) {
100                 RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
101                 return -EINVAL;
102         }
103
104         /* f_hash */
105         if (params->f_hash == NULL) {
106                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
107                         __func__);
108                 return -EINVAL;
109         }
110
111         return 0;
112 }
113
114 static void *
115 rte_table_hash_create_key32_lru(void *params,
116                 int socket_id,
117                 uint32_t entry_size)
118 {
119         struct rte_table_hash_key32_lru_params *p =
120                 (struct rte_table_hash_key32_lru_params *) params;
121         struct rte_table_hash *f;
122         uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
123         uint32_t total_size, i;
124
125         /* Check input parameters */
126         if ((check_params_create_lru(p) != 0) ||
127                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
128                 ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
129                 return NULL;
130         }
131         n_entries_per_bucket = 4;
132         key_size = 32;
133
134         /* Memory allocation */
135         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
136                 n_entries_per_bucket);
137         bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
138                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
139         total_size = sizeof(struct rte_table_hash) + n_buckets *
140                 bucket_size_cl * RTE_CACHE_LINE_SIZE;
141
142         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
143         if (f == NULL) {
144                 RTE_LOG(ERR, TABLE,
145                         "%s: Cannot allocate %u bytes for hash table\n",
146                         __func__, total_size);
147                 return NULL;
148         }
149         RTE_LOG(INFO, TABLE,
150                 "%s: Hash table memory footprint is %u bytes\n", __func__,
151                 total_size);
152
153         /* Memory initialization */
154         f->n_buckets = n_buckets;
155         f->n_entries_per_bucket = n_entries_per_bucket;
156         f->key_size = key_size;
157         f->entry_size = entry_size;
158         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
159         f->signature_offset = p->signature_offset;
160         f->key_offset = p->key_offset;
161         f->f_hash = p->f_hash;
162         f->seed = p->seed;
163
164         for (i = 0; i < n_buckets; i++) {
165                 struct rte_bucket_4_32 *bucket;
166
167                 bucket = (struct rte_bucket_4_32 *) &f->memory[i *
168                         f->bucket_size];
169                 bucket->lru_list = 0x0000000100020003LLU;
170         }
171
172         return f;
173 }
174
175 static int
176 rte_table_hash_free_key32_lru(void *table)
177 {
178         struct rte_table_hash *f = (struct rte_table_hash *) table;
179
180         /* Check input parameters */
181         if (f == NULL) {
182                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
183                 return -EINVAL;
184         }
185
186         rte_free(f);
187         return 0;
188 }
189
190 static int
191 rte_table_hash_entry_add_key32_lru(
192         void *table,
193         void *key,
194         void *entry,
195         int *key_found,
196         void **entry_ptr)
197 {
198         struct rte_table_hash *f = (struct rte_table_hash *) table;
199         struct rte_bucket_4_32 *bucket;
200         uint64_t signature, pos;
201         uint32_t bucket_index, i;
202
203         signature = f->f_hash(key, f->key_size, f->seed);
204         bucket_index = signature & (f->n_buckets - 1);
205         bucket = (struct rte_bucket_4_32 *)
206                 &f->memory[bucket_index * f->bucket_size];
207         signature |= RTE_BUCKET_ENTRY_VALID;
208
209         /* Key is present in the bucket */
210         for (i = 0; i < 4; i++) {
211                 uint64_t bucket_signature = bucket->signature[i];
212                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
213
214                 if ((bucket_signature == signature) &&
215                         (memcmp(key, bucket_key, f->key_size) == 0)) {
216                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
217
218                         memcpy(bucket_data, entry, f->entry_size);
219                         lru_update(bucket, i);
220                         *key_found = 1;
221                         *entry_ptr = (void *) bucket_data;
222                         return 0;
223                 }
224         }
225
226         /* Key is not present in the bucket */
227         for (i = 0; i < 4; i++) {
228                 uint64_t bucket_signature = bucket->signature[i];
229                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
230
231                 if (bucket_signature == 0) {
232                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
233
234                         bucket->signature[i] = signature;
235                         memcpy(bucket_key, key, f->key_size);
236                         memcpy(bucket_data, entry, f->entry_size);
237                         lru_update(bucket, i);
238                         *key_found = 0;
239                         *entry_ptr = (void *) bucket_data;
240
241                         return 0;
242                 }
243         }
244
245         /* Bucket full: replace LRU entry */
246         pos = lru_pos(bucket);
247         bucket->signature[pos] = signature;
248         memcpy(bucket->key[pos], key, f->key_size);
249         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
250         lru_update(bucket, pos);
251         *key_found      = 0;
252         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
253
254         return 0;
255 }
256
257 static int
258 rte_table_hash_entry_delete_key32_lru(
259         void *table,
260         void *key,
261         int *key_found,
262         void *entry)
263 {
264         struct rte_table_hash *f = (struct rte_table_hash *) table;
265         struct rte_bucket_4_32 *bucket;
266         uint64_t signature;
267         uint32_t bucket_index, i;
268
269         signature = f->f_hash(key, f->key_size, f->seed);
270         bucket_index = signature & (f->n_buckets - 1);
271         bucket = (struct rte_bucket_4_32 *)
272                 &f->memory[bucket_index * f->bucket_size];
273         signature |= RTE_BUCKET_ENTRY_VALID;
274
275         /* Key is present in the bucket */
276         for (i = 0; i < 4; i++) {
277                 uint64_t bucket_signature = bucket->signature[i];
278                 uint8_t *bucket_key = (uint8_t *) bucket->key[i];
279
280                 if ((bucket_signature == signature) &&
281                         (memcmp(key, bucket_key, f->key_size) == 0)) {
282                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
283
284                         bucket->signature[i] = 0;
285                         *key_found = 1;
286                         if (entry)
287                                 memcpy(entry, bucket_data, f->entry_size);
288
289                         return 0;
290                 }
291         }
292
293         /* Key is not present in the bucket */
294         *key_found = 0;
295         return 0;
296 }
297
298 static int
299 check_params_create_ext(struct rte_table_hash_key32_ext_params *params) {
300         /* n_entries */
301         if (params->n_entries == 0) {
302                 RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
303                 return -EINVAL;
304         }
305
306         /* n_entries_ext */
307         if (params->n_entries_ext == 0) {
308                 RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
309                 return -EINVAL;
310         }
311
312         /* signature offset */
313         if ((params->signature_offset & 0x3) != 0) {
314                 RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
315                 return -EINVAL;
316         }
317
318         /* key offset */
319         if ((params->key_offset & 0x7) != 0) {
320                 RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
321                 return -EINVAL;
322         }
323
324         /* f_hash */
325         if (params->f_hash == NULL) {
326                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
327                         __func__);
328                 return -EINVAL;
329         }
330
331         return 0;
332 }
333
334 static void *
335 rte_table_hash_create_key32_ext(void *params,
336         int socket_id,
337         uint32_t entry_size)
338 {
339         struct rte_table_hash_key32_ext_params *p =
340                         (struct rte_table_hash_key32_ext_params *) params;
341         struct rte_table_hash *f;
342         uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket;
343         uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i;
344
345         /* Check input parameters */
346         if ((check_params_create_ext(p) != 0) ||
347                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
348                 ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
349                 return NULL;
350
351         n_entries_per_bucket = 4;
352         key_size = 32;
353
354         /* Memory allocation */
355         n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
356                 n_entries_per_bucket);
357         n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
358                 n_entries_per_bucket;
359         bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
360                 * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
361         stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
362                 / RTE_CACHE_LINE_SIZE;
363         total_size = sizeof(struct rte_table_hash) +
364                 ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
365                 RTE_CACHE_LINE_SIZE;
366
367         f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
368         if (f == NULL) {
369                 RTE_LOG(ERR, TABLE,
370                         "%s: Cannot allocate %u bytes for hash table\n",
371                         __func__, total_size);
372                 return NULL;
373         }
374         RTE_LOG(INFO, TABLE,
375                 "%s: Hash table memory footprint is %u bytes\n", __func__,
376                 total_size);
377
378         /* Memory initialization */
379         f->n_buckets = n_buckets;
380         f->n_entries_per_bucket = n_entries_per_bucket;
381         f->key_size = key_size;
382         f->entry_size = entry_size;
383         f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
384         f->signature_offset = p->signature_offset;
385         f->key_offset = p->key_offset;
386         f->f_hash = p->f_hash;
387         f->seed = p->seed;
388
389         f->n_buckets_ext = n_buckets_ext;
390         f->stack_pos = n_buckets_ext;
391         f->stack = (uint32_t *)
392                 &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
393
394         for (i = 0; i < n_buckets_ext; i++)
395                 f->stack[i] = i;
396
397         return f;
398 }
399
400 static int
401 rte_table_hash_free_key32_ext(void *table)
402 {
403         struct rte_table_hash *f = (struct rte_table_hash *) table;
404
405         /* Check input parameters */
406         if (f == NULL) {
407                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
408                 return -EINVAL;
409         }
410
411         rte_free(f);
412         return 0;
413 }
414
415 static int
416 rte_table_hash_entry_add_key32_ext(
417         void *table,
418         void *key,
419         void *entry,
420         int *key_found,
421         void **entry_ptr)
422 {
423         struct rte_table_hash *f = (struct rte_table_hash *) table;
424         struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
425         uint64_t signature;
426         uint32_t bucket_index, i;
427
428         signature = f->f_hash(key, f->key_size, f->seed);
429         bucket_index = signature & (f->n_buckets - 1);
430         bucket0 = (struct rte_bucket_4_32 *)
431                         &f->memory[bucket_index * f->bucket_size];
432         signature |= RTE_BUCKET_ENTRY_VALID;
433
434         /* Key is present in the bucket */
435         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
436                 for (i = 0; i < 4; i++) {
437                         uint64_t bucket_signature = bucket->signature[i];
438                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
439
440                         if ((bucket_signature == signature) &&
441                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
442                                 uint8_t *bucket_data = &bucket->data[i *
443                                         f->entry_size];
444
445                                 memcpy(bucket_data, entry, f->entry_size);
446                                 *key_found = 1;
447                                 *entry_ptr = (void *) bucket_data;
448
449                                 return 0;
450                         }
451                 }
452         }
453
454         /* Key is not present in the bucket */
455         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
456                 bucket_prev = bucket, bucket = bucket->next)
457                 for (i = 0; i < 4; i++) {
458                         uint64_t bucket_signature = bucket->signature[i];
459                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
460
461                         if (bucket_signature == 0) {
462                                 uint8_t *bucket_data = &bucket->data[i *
463                                         f->entry_size];
464
465                                 bucket->signature[i] = signature;
466                                 memcpy(bucket_key, key, f->key_size);
467                                 memcpy(bucket_data, entry, f->entry_size);
468                                 *key_found = 0;
469                                 *entry_ptr = (void *) bucket_data;
470
471                                 return 0;
472                         }
473                 }
474
475         /* Bucket full: extend bucket */
476         if (f->stack_pos > 0) {
477                 bucket_index = f->stack[--f->stack_pos];
478
479                 bucket = (struct rte_bucket_4_32 *)
480                         &f->memory[(f->n_buckets + bucket_index) *
481                         f->bucket_size];
482                 bucket_prev->next = bucket;
483                 bucket_prev->next_valid = 1;
484
485                 bucket->signature[0] = signature;
486                 memcpy(bucket->key[0], key, f->key_size);
487                 memcpy(&bucket->data[0], entry, f->entry_size);
488                 *key_found = 0;
489                 *entry_ptr = (void *) &bucket->data[0];
490                 return 0;
491         }
492
493         return -ENOSPC;
494 }
495
496 static int
497 rte_table_hash_entry_delete_key32_ext(
498         void *table,
499         void *key,
500         int *key_found,
501         void *entry)
502 {
503         struct rte_table_hash *f = (struct rte_table_hash *) table;
504         struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
505         uint64_t signature;
506         uint32_t bucket_index, i;
507
508         signature = f->f_hash(key, f->key_size, f->seed);
509         bucket_index = signature & (f->n_buckets - 1);
510         bucket0 = (struct rte_bucket_4_32 *)
511                 &f->memory[bucket_index * f->bucket_size];
512         signature |= RTE_BUCKET_ENTRY_VALID;
513
514         /* Key is present in the bucket */
515         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
516                 bucket_prev = bucket, bucket = bucket->next)
517                 for (i = 0; i < 4; i++) {
518                         uint64_t bucket_signature = bucket->signature[i];
519                         uint8_t *bucket_key = (uint8_t *) bucket->key[i];
520
521                         if ((bucket_signature == signature) &&
522                                 (memcmp(key, bucket_key, f->key_size) == 0)) {
523                                 uint8_t *bucket_data = &bucket->data[i *
524                                         f->entry_size];
525
526                                 bucket->signature[i] = 0;
527                                 *key_found = 1;
528                                 if (entry)
529                                         memcpy(entry, bucket_data,
530                                                 f->entry_size);
531
532                                 if ((bucket->signature[0] == 0) &&
533                                                 (bucket->signature[1] == 0) &&
534                                                 (bucket->signature[2] == 0) &&
535                                                 (bucket->signature[3] == 0) &&
536                                                 (bucket_prev != NULL)) {
537                                         bucket_prev->next = bucket->next;
538                                         bucket_prev->next_valid =
539                                                 bucket->next_valid;
540
541                                         memset(bucket, 0,
542                                                 sizeof(struct rte_bucket_4_32));
543                                         bucket_index = (bucket -
544                                                 ((struct rte_bucket_4_32 *)
545                                                 f->memory)) - f->n_buckets;
546                                         f->stack[f->stack_pos++] = bucket_index;
547                                 }
548
549                                 return 0;
550                         }
551                 }
552
553         /* Key is not present in the bucket */
554         *key_found = 0;
555         return 0;
556 }
557
558 #define lookup_key32_cmp(key_in, bucket, pos)                   \
559 {                                                               \
560         uint64_t xor[4][4], or[4], signature[4];                \
561                                                                 \
562         signature[0] = ((~bucket->signature[0]) & 1);           \
563         signature[1] = ((~bucket->signature[1]) & 1);           \
564         signature[2] = ((~bucket->signature[2]) & 1);           \
565         signature[3] = ((~bucket->signature[3]) & 1);           \
566                                                                 \
567         xor[0][0] = key_in[0] ^  bucket->key[0][0];             \
568         xor[0][1] = key_in[1] ^  bucket->key[0][1];             \
569         xor[0][2] = key_in[2] ^  bucket->key[0][2];             \
570         xor[0][3] = key_in[3] ^  bucket->key[0][3];             \
571                                                                 \
572         xor[1][0] = key_in[0] ^  bucket->key[1][0];             \
573         xor[1][1] = key_in[1] ^  bucket->key[1][1];             \
574         xor[1][2] = key_in[2] ^  bucket->key[1][2];             \
575         xor[1][3] = key_in[3] ^  bucket->key[1][3];             \
576                                                                 \
577         xor[2][0] = key_in[0] ^  bucket->key[2][0];             \
578         xor[2][1] = key_in[1] ^  bucket->key[2][1];             \
579         xor[2][2] = key_in[2] ^  bucket->key[2][2];             \
580         xor[2][3] = key_in[3] ^  bucket->key[2][3];             \
581                                                                 \
582         xor[3][0] = key_in[0] ^  bucket->key[3][0];             \
583         xor[3][1] = key_in[1] ^  bucket->key[3][1];             \
584         xor[3][2] = key_in[2] ^  bucket->key[3][2];             \
585         xor[3][3] = key_in[3] ^  bucket->key[3][3];             \
586                                                                 \
587         or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\
588         or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\
589         or[2] = xor[2][0] | xor[2][1] | xor[2][2] | xor[2][3] | signature[2];\
590         or[3] = xor[3][0] | xor[3][1] | xor[3][2] | xor[3][3] | signature[3];\
591                                                                 \
592         pos = 4;                                                \
593         if (or[0] == 0)                                         \
594                 pos = 0;                                        \
595         if (or[1] == 0)                                         \
596                 pos = 1;                                        \
597         if (or[2] == 0)                                         \
598                 pos = 2;                                        \
599         if (or[3] == 0)                                         \
600                 pos = 3;                                        \
601 }
602
603 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)      \
604 {                                                               \
605         uint64_t pkt_mask;                                      \
606                                                                 \
607         pkt0_index = __builtin_ctzll(pkts_mask);                \
608         pkt_mask = 1LLU << pkt0_index;                          \
609         pkts_mask &= ~pkt_mask;                                 \
610                                                                 \
611         mbuf0 = pkts[pkt0_index];                               \
612         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));   \
613 }
614
615 #define lookup1_stage1(mbuf1, bucket1, f)                       \
616 {                                                               \
617         uint64_t signature;                                     \
618         uint32_t bucket_index;                                  \
619                                                                 \
620         signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
621         bucket_index = signature & (f->n_buckets - 1);          \
622         bucket1 = (struct rte_bucket_4_32 *)                    \
623                 &f->memory[bucket_index * f->bucket_size];      \
624         rte_prefetch0(bucket1);                                 \
625         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
626         rte_prefetch0((void *)(((uintptr_t) bucket1) + 2 * RTE_CACHE_LINE_SIZE));\
627 }
628
629 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
630         pkts_mask_out, entries, f)                              \
631 {                                                               \
632         void *a;                                                \
633         uint64_t pkt_mask;                                      \
634         uint64_t *key;                                          \
635         uint32_t pos;                                           \
636                                                                 \
637         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
638                                                                 \
639         lookup_key32_cmp(key, bucket2, pos);                    \
640                                                                 \
641         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
642         pkts_mask_out |= pkt_mask;                              \
643                                                                 \
644         a = (void *) &bucket2->data[pos * f->entry_size];       \
645         rte_prefetch0(a);                                       \
646         entries[pkt2_index] = a;                                \
647         lru_update(bucket2, pos);                               \
648 }
649
650 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
651         entries, buckets_mask, buckets, keys, f)                \
652 {                                                               \
653         struct rte_bucket_4_32 *bucket_next;                    \
654         void *a;                                                \
655         uint64_t pkt_mask, bucket_mask;                         \
656         uint64_t *key;                                          \
657         uint32_t pos;                                           \
658                                                                 \
659         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
660                                                                 \
661         lookup_key32_cmp(key, bucket2, pos);                    \
662                                                                 \
663         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
664         pkts_mask_out |= pkt_mask;                              \
665                                                                 \
666         a = (void *) &bucket2->data[pos * f->entry_size];       \
667         rte_prefetch0(a);                                       \
668         entries[pkt2_index] = a;                                \
669                                                                 \
670         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
671         buckets_mask |= bucket_mask;                            \
672         bucket_next = bucket2->next;                            \
673         buckets[pkt2_index] = bucket_next;                      \
674         keys[pkt2_index] = key;                                 \
675 }
676
677 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, \
678         entries, buckets_mask, f)                               \
679 {                                                               \
680         struct rte_bucket_4_32 *bucket, *bucket_next;           \
681         void *a;                                                \
682         uint64_t pkt_mask, bucket_mask;                         \
683         uint64_t *key;                                          \
684         uint32_t pos;                                           \
685                                                                 \
686         bucket = buckets[pkt_index];                            \
687         key = keys[pkt_index];                                  \
688                                                                 \
689         lookup_key32_cmp(key, bucket, pos);                     \
690                                                                 \
691         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
692         pkts_mask_out |= pkt_mask;                              \
693                                                                 \
694         a = (void *) &bucket->data[pos * f->entry_size];        \
695         rte_prefetch0(a);                                       \
696         entries[pkt_index] = a;                                 \
697                                                                 \
698         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
699         buckets_mask |= bucket_mask;                            \
700         bucket_next = bucket->next;                             \
701         rte_prefetch0(bucket_next);                             \
702         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
703         rte_prefetch0((void *)(((uintptr_t) bucket_next) +      \
704                 2 * RTE_CACHE_LINE_SIZE));                              \
705         buckets[pkt_index] = bucket_next;                       \
706         keys[pkt_index] = key;                                  \
707 }
708
709 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
710         pkts, pkts_mask)                                        \
711 {                                                               \
712         uint64_t pkt00_mask, pkt01_mask;                        \
713                                                                 \
714         pkt00_index = __builtin_ctzll(pkts_mask);               \
715         pkt00_mask = 1LLU << pkt00_index;                       \
716         pkts_mask &= ~pkt00_mask;                               \
717                                                                 \
718         mbuf00 = pkts[pkt00_index];                             \
719         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
720                                                                 \
721         pkt01_index = __builtin_ctzll(pkts_mask);               \
722         pkt01_mask = 1LLU << pkt01_index;                       \
723         pkts_mask &= ~pkt01_mask;                               \
724                                                                 \
725         mbuf01 = pkts[pkt01_index];                             \
726         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
727 }
728
729 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
730         mbuf00, mbuf01, pkts, pkts_mask)                        \
731 {                                                               \
732         uint64_t pkt00_mask, pkt01_mask;                        \
733                                                                 \
734         pkt00_index = __builtin_ctzll(pkts_mask);               \
735         pkt00_mask = 1LLU << pkt00_index;                       \
736         pkts_mask &= ~pkt00_mask;                               \
737                                                                 \
738         mbuf00 = pkts[pkt00_index];                             \
739         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
740                                                                 \
741         pkt01_index = __builtin_ctzll(pkts_mask);               \
742         if (pkts_mask == 0)                                     \
743                 pkt01_index = pkt00_index;                      \
744                                                                 \
745         pkt01_mask = 1LLU << pkt01_index;                       \
746         pkts_mask &= ~pkt01_mask;                               \
747                                                                 \
748         mbuf01 = pkts[pkt01_index];                             \
749         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
750 }
751
752 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
753 {                                                               \
754         uint64_t signature10, signature11;                      \
755         uint32_t bucket10_index, bucket11_index;                \
756                                                                 \
757         signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
758         bucket10_index = signature10 & (f->n_buckets - 1);      \
759         bucket10 = (struct rte_bucket_4_32 *)                   \
760                 &f->memory[bucket10_index * f->bucket_size];    \
761         rte_prefetch0(bucket10);                                \
762         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
763         rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\
764                                                                 \
765         signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
766         bucket11_index = signature11 & (f->n_buckets - 1);      \
767         bucket11 = (struct rte_bucket_4_32 *)                   \
768                 &f->memory[bucket11_index * f->bucket_size];    \
769         rte_prefetch0(bucket11);                                \
770         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
771         rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\
772 }
773
774 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
775         bucket20, bucket21, pkts_mask_out, entries, f)          \
776 {                                                               \
777         void *a20, *a21;                                        \
778         uint64_t pkt20_mask, pkt21_mask;                        \
779         uint64_t *key20, *key21;                                \
780         uint32_t pos20, pos21;                                  \
781                                                                 \
782         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
783         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
784                                                                 \
785         lookup_key32_cmp(key20, bucket20, pos20);               \
786         lookup_key32_cmp(key21, bucket21, pos21);               \
787                                                                 \
788         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
789         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
790         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
791                                                                 \
792         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
793         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
794         rte_prefetch0(a20);                                     \
795         rte_prefetch0(a21);                                     \
796         entries[pkt20_index] = a20;                             \
797         entries[pkt21_index] = a21;                             \
798         lru_update(bucket20, pos20);                            \
799         lru_update(bucket21, pos21);                            \
800 }
801
802 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
803         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
804 {                                                               \
805         struct rte_bucket_4_32 *bucket20_next, *bucket21_next;  \
806         void *a20, *a21;                                        \
807         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
808         uint64_t *key20, *key21;                                \
809         uint32_t pos20, pos21;                                  \
810                                                                 \
811         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
812         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
813                                                                 \
814         lookup_key32_cmp(key20, bucket20, pos20);               \
815         lookup_key32_cmp(key21, bucket21, pos21);               \
816                                                                 \
817         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
818         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
819         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
820                                                                 \
821         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
822         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
823         rte_prefetch0(a20);                                     \
824         rte_prefetch0(a21);                                     \
825         entries[pkt20_index] = a20;                             \
826         entries[pkt21_index] = a21;                             \
827                                                                 \
828         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
829         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
830         buckets_mask |= bucket20_mask | bucket21_mask;          \
831         bucket20_next = bucket20->next;                         \
832         bucket21_next = bucket21->next;                         \
833         buckets[pkt20_index] = bucket20_next;                   \
834         buckets[pkt21_index] = bucket21_next;                   \
835         keys[pkt20_index] = key20;                              \
836         keys[pkt21_index] = key21;                              \
837 }
838
839 static int
840 rte_table_hash_lookup_key32_lru(
841         void *table,
842         struct rte_mbuf **pkts,
843         uint64_t pkts_mask,
844         uint64_t *lookup_hit_mask,
845         void **entries)
846 {
847         struct rte_table_hash *f = (struct rte_table_hash *) table;
848         struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
849         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
850         uint32_t pkt00_index, pkt01_index, pkt10_index;
851         uint32_t pkt11_index, pkt20_index, pkt21_index;
852         uint64_t pkts_mask_out = 0;
853
854         /* Cannot run the pipeline with less than 5 packets */
855         if (__builtin_popcountll(pkts_mask) < 5) {
856                 for ( ; pkts_mask; ) {
857                         struct rte_bucket_4_32 *bucket;
858                         struct rte_mbuf *mbuf;
859                         uint32_t pkt_index;
860
861                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
862                         lookup1_stage1(mbuf, bucket, f);
863                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
864                                         pkts_mask_out, entries, f);
865                 }
866
867                 *lookup_hit_mask = pkts_mask_out;
868                 return 0;
869         }
870
871         /*
872          * Pipeline fill
873          *
874          */
875         /* Pipeline stage 0 */
876         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
877                 pkts_mask);
878
879         /* Pipeline feed */
880         mbuf10 = mbuf00;
881         mbuf11 = mbuf01;
882         pkt10_index = pkt00_index;
883         pkt11_index = pkt01_index;
884
885         /* Pipeline stage 0 */
886         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
887                 pkts_mask);
888
889         /* Pipeline stage 1 */
890         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
891
892         /*
893          * Pipeline run
894          *
895          */
896         for ( ; pkts_mask; ) {
897                 /* Pipeline feed */
898                 bucket20 = bucket10;
899                 bucket21 = bucket11;
900                 mbuf20 = mbuf10;
901                 mbuf21 = mbuf11;
902                 mbuf10 = mbuf00;
903                 mbuf11 = mbuf01;
904                 pkt20_index = pkt10_index;
905                 pkt21_index = pkt11_index;
906                 pkt10_index = pkt00_index;
907                 pkt11_index = pkt01_index;
908
909                 /* Pipeline stage 0 */
910                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
911                         mbuf00, mbuf01, pkts, pkts_mask);
912
913                 /* Pipeline stage 1 */
914                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
915
916                 /* Pipeline stage 2 */
917                 lookup2_stage2_lru(pkt20_index, pkt21_index,
918                         mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out,
919                         entries, f);
920         }
921
922         /*
923          * Pipeline flush
924          *
925          */
926         /* Pipeline feed */
927         bucket20 = bucket10;
928         bucket21 = bucket11;
929         mbuf20 = mbuf10;
930         mbuf21 = mbuf11;
931         mbuf10 = mbuf00;
932         mbuf11 = mbuf01;
933         pkt20_index = pkt10_index;
934         pkt21_index = pkt11_index;
935         pkt10_index = pkt00_index;
936         pkt11_index = pkt01_index;
937
938         /* Pipeline stage 1 */
939         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
940
941         /* Pipeline stage 2 */
942         lookup2_stage2_lru(pkt20_index, pkt21_index,
943                 mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
944
945         /* Pipeline feed */
946         bucket20 = bucket10;
947         bucket21 = bucket11;
948         mbuf20 = mbuf10;
949         mbuf21 = mbuf11;
950         pkt20_index = pkt10_index;
951         pkt21_index = pkt11_index;
952
953         /* Pipeline stage 2 */
954         lookup2_stage2_lru(pkt20_index, pkt21_index,
955                 mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
956
957         *lookup_hit_mask = pkts_mask_out;
958         return 0;
959 } /* rte_table_hash_lookup_key32_lru() */
960
961 static int
962 rte_table_hash_lookup_key32_ext(
963         void *table,
964         struct rte_mbuf **pkts,
965         uint64_t pkts_mask,
966         uint64_t *lookup_hit_mask,
967         void **entries)
968 {
969         struct rte_table_hash *f = (struct rte_table_hash *) table;
970         struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
971         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
972         uint32_t pkt00_index, pkt01_index, pkt10_index;
973         uint32_t pkt11_index, pkt20_index, pkt21_index;
974         uint64_t pkts_mask_out = 0, buckets_mask = 0;
975         struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
976         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
977
978         /* Cannot run the pipeline with less than 5 packets */
979         if (__builtin_popcountll(pkts_mask) < 5) {
980                 for ( ; pkts_mask; ) {
981                         struct rte_bucket_4_32 *bucket;
982                         struct rte_mbuf *mbuf;
983                         uint32_t pkt_index;
984
985                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
986                         lookup1_stage1(mbuf, bucket, f);
987                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
988                                 pkts_mask_out, entries, buckets_mask, buckets,
989                                 keys, f);
990                 }
991
992                 goto grind_next_buckets;
993         }
994
995         /*
996          * Pipeline fill
997          *
998          */
999         /* Pipeline stage 0 */
1000         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1001                 pkts_mask);
1002
1003         /* Pipeline feed */
1004         mbuf10 = mbuf00;
1005         mbuf11 = mbuf01;
1006         pkt10_index = pkt00_index;
1007         pkt11_index = pkt01_index;
1008
1009         /* Pipeline stage 0 */
1010         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1011                 pkts_mask);
1012
1013         /* Pipeline stage 1 */
1014         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1015
1016         /*
1017          * Pipeline run
1018          *
1019          */
1020         for ( ; pkts_mask; ) {
1021                 /* Pipeline feed */
1022                 bucket20 = bucket10;
1023                 bucket21 = bucket11;
1024                 mbuf20 = mbuf10;
1025                 mbuf21 = mbuf11;
1026                 mbuf10 = mbuf00;
1027                 mbuf11 = mbuf01;
1028                 pkt20_index = pkt10_index;
1029                 pkt21_index = pkt11_index;
1030                 pkt10_index = pkt00_index;
1031                 pkt11_index = pkt01_index;
1032
1033                 /* Pipeline stage 0 */
1034                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1035                         mbuf00, mbuf01, pkts, pkts_mask);
1036
1037                 /* Pipeline stage 1 */
1038                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1039
1040                 /* Pipeline stage 2 */
1041                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1042                         bucket20, bucket21, pkts_mask_out, entries,
1043                         buckets_mask, buckets, keys, f);
1044         }
1045
1046         /*
1047          * Pipeline flush
1048          *
1049          */
1050         /* Pipeline feed */
1051         bucket20 = bucket10;
1052         bucket21 = bucket11;
1053         mbuf20 = mbuf10;
1054         mbuf21 = mbuf11;
1055         mbuf10 = mbuf00;
1056         mbuf11 = mbuf01;
1057         pkt20_index = pkt10_index;
1058         pkt21_index = pkt11_index;
1059         pkt10_index = pkt00_index;
1060         pkt11_index = pkt01_index;
1061
1062         /* Pipeline stage 1 */
1063         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1064
1065         /* Pipeline stage 2 */
1066         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1067                 bucket20, bucket21, pkts_mask_out, entries,
1068                 buckets_mask, buckets, keys, f);
1069
1070         /* Pipeline feed */
1071         bucket20 = bucket10;
1072         bucket21 = bucket11;
1073         mbuf20 = mbuf10;
1074         mbuf21 = mbuf11;
1075         pkt20_index = pkt10_index;
1076         pkt21_index = pkt11_index;
1077
1078         /* Pipeline stage 2 */
1079         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1080                 bucket20, bucket21, pkts_mask_out, entries,
1081                 buckets_mask, buckets, keys, f);
1082
1083 grind_next_buckets:
1084         /* Grind next buckets */
1085         for ( ; buckets_mask; ) {
1086                 uint64_t buckets_mask_next = 0;
1087
1088                 for ( ; buckets_mask; ) {
1089                         uint64_t pkt_mask;
1090                         uint32_t pkt_index;
1091
1092                         pkt_index = __builtin_ctzll(buckets_mask);
1093                         pkt_mask = 1LLU << pkt_index;
1094                         buckets_mask &= ~pkt_mask;
1095
1096                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1097                                 entries, buckets_mask_next, f);
1098                 }
1099
1100                 buckets_mask = buckets_mask_next;
1101         }
1102
1103         *lookup_hit_mask = pkts_mask_out;
1104         return 0;
1105 } /* rte_table_hash_lookup_key32_ext() */
1106
1107 struct rte_table_ops rte_table_hash_key32_lru_ops = {
1108         .f_create = rte_table_hash_create_key32_lru,
1109         .f_free = rte_table_hash_free_key32_lru,
1110         .f_add = rte_table_hash_entry_add_key32_lru,
1111         .f_delete = rte_table_hash_entry_delete_key32_lru,
1112         .f_lookup = rte_table_hash_lookup_key32_lru,
1113 };
1114
1115 struct rte_table_ops rte_table_hash_key32_ext_ops = {
1116         .f_create = rte_table_hash_create_key32_ext,
1117         .f_free = rte_table_hash_free_key32_ext,
1118         .f_add = rte_table_hash_entry_add_key32_ext,
1119         .f_delete = rte_table_hash_entry_delete_key32_ext,
1120         .f_lookup = rte_table_hash_lookup_key32_ext,
1121 };