crypto/mlx5: add maximum segments configuration
[dpdk.git] / lib / table / rte_table_hash_key8.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6
7 #include <rte_common.h>
8 #include <rte_mbuf.h>
9 #include <rte_memory.h>
10 #include <rte_malloc.h>
11 #include <rte_log.h>
12
13 #include "rte_table_hash.h"
14 #include "rte_lru.h"
15
16 #define KEY_SIZE                                                8
17
18 #define KEYS_PER_BUCKET                                 4
19
20 #ifdef RTE_TABLE_STATS_COLLECT
21
22 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
23         table->stats.n_pkts_in += val
24 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
25         table->stats.n_pkts_lookup_miss += val
26
27 #else
28
29 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
30 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
31
32 #endif
33
34 #ifdef RTE_ARCH_64
35 struct rte_bucket_4_8 {
36         /* Cache line 0 */
37         uint64_t signature;
38         uint64_t lru_list;
39         struct rte_bucket_4_8 *next;
40         uint64_t next_valid;
41
42         uint64_t key[4];
43
44         /* Cache line 1 */
45         uint8_t data[0];
46 };
47 #else
48 struct rte_bucket_4_8 {
49         /* Cache line 0 */
50         uint64_t signature;
51         uint64_t lru_list;
52         struct rte_bucket_4_8 *next;
53         uint32_t pad;
54         uint64_t next_valid;
55
56         uint64_t key[4];
57
58         /* Cache line 1 */
59         uint8_t data[0];
60 };
61 #endif
62
63 struct rte_table_hash {
64         struct rte_table_stats stats;
65
66         /* Input parameters */
67         uint32_t n_buckets;
68         uint32_t key_size;
69         uint32_t entry_size;
70         uint32_t bucket_size;
71         uint32_t key_offset;
72         uint64_t key_mask;
73         rte_table_hash_op_hash f_hash;
74         uint64_t seed;
75
76         /* Extendible buckets */
77         uint32_t n_buckets_ext;
78         uint32_t stack_pos;
79         uint32_t *stack;
80
81         /* Lookup table */
82         uint8_t memory[0] __rte_cache_aligned;
83 };
84
85 static int
86 keycmp(void *a, void *b, void *b_mask)
87 {
88         uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
89
90         return a64[0] != (b64[0] & b_mask64[0]);
91 }
92
93 static void
94 keycpy(void *dst, void *src, void *src_mask)
95 {
96         uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
97
98         dst64[0] = src64[0] & src_mask64[0];
99 }
100
101 static int
102 check_params_create(struct rte_table_hash_params *params)
103 {
104         /* name */
105         if (params->name == NULL) {
106                 RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
107                 return -EINVAL;
108         }
109
110         /* key_size */
111         if (params->key_size != KEY_SIZE) {
112                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
113                 return -EINVAL;
114         }
115
116         /* n_keys */
117         if (params->n_keys == 0) {
118                 RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
119                 return -EINVAL;
120         }
121
122         /* n_buckets */
123         if ((params->n_buckets == 0) ||
124                 (!rte_is_power_of_2(params->n_buckets))) {
125                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
126                 return -EINVAL;
127         }
128
129         /* f_hash */
130         if (params->f_hash == NULL) {
131                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
132                         __func__);
133                 return -EINVAL;
134         }
135
136         return 0;
137 }
138
139 static void *
140 rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
141 {
142         struct rte_table_hash_params *p = params;
143         struct rte_table_hash *f;
144         uint64_t bucket_size, total_size;
145         uint32_t n_buckets, i;
146
147         /* Check input parameters */
148         if ((check_params_create(p) != 0) ||
149                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
150                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
151                 return NULL;
152
153         /*
154          * Table dimensioning
155          *
156          * Objective: Pick the number of buckets (n_buckets) so that there a chance
157          * to store n_keys keys in the table.
158          *
159          * Note: Since the buckets do not get extended, it is not possible to
160          * guarantee that n_keys keys can be stored in the table at any time. In the
161          * worst case scenario when all the n_keys fall into the same bucket, only
162          * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
163          * defeats the purpose of the hash table. It indicates unsuitable f_hash or
164          * n_keys to n_buckets ratio.
165          *
166          * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
167          */
168         n_buckets = rte_align32pow2(
169                 (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
170         n_buckets = RTE_MAX(n_buckets, p->n_buckets);
171
172         /* Memory allocation */
173         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
174                 KEYS_PER_BUCKET * entry_size);
175         total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
176
177         if (total_size > SIZE_MAX) {
178                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
179                         " for hash table %s\n",
180                         __func__, total_size, p->name);
181                 return NULL;
182         }
183
184         f = rte_zmalloc_socket(p->name,
185                 (size_t)total_size,
186                 RTE_CACHE_LINE_SIZE,
187                 socket_id);
188         if (f == NULL) {
189                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
190                         " for hash table %s\n",
191                         __func__, total_size, p->name);
192                 return NULL;
193         }
194
195         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
196                 "is %" PRIu64 " bytes\n",
197                 __func__, p->name, total_size);
198
199         /* Memory initialization */
200         f->n_buckets = n_buckets;
201         f->key_size = KEY_SIZE;
202         f->entry_size = entry_size;
203         f->bucket_size = bucket_size;
204         f->key_offset = p->key_offset;
205         f->f_hash = p->f_hash;
206         f->seed = p->seed;
207
208         if (p->key_mask != NULL)
209                 f->key_mask = ((uint64_t *)p->key_mask)[0];
210         else
211                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
212
213         for (i = 0; i < n_buckets; i++) {
214                 struct rte_bucket_4_8 *bucket;
215
216                 bucket = (struct rte_bucket_4_8 *) &f->memory[i *
217                         f->bucket_size];
218                 bucket->lru_list = 0x0000000100020003LLU;
219         }
220
221         return f;
222 }
223
224 static int
225 rte_table_hash_free_key8_lru(void *table)
226 {
227         struct rte_table_hash *f = table;
228
229         /* Check input parameters */
230         if (f == NULL) {
231                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
232                 return -EINVAL;
233         }
234
235         rte_free(f);
236         return 0;
237 }
238
239 static int
240 rte_table_hash_entry_add_key8_lru(
241         void *table,
242         void *key,
243         void *entry,
244         int *key_found,
245         void **entry_ptr)
246 {
247         struct rte_table_hash *f = table;
248         struct rte_bucket_4_8 *bucket;
249         uint64_t signature, mask, pos;
250         uint32_t bucket_index, i;
251
252         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
253         bucket_index = signature & (f->n_buckets - 1);
254         bucket = (struct rte_bucket_4_8 *)
255                 &f->memory[bucket_index * f->bucket_size];
256
257         /* Key is present in the bucket */
258         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
259                 uint64_t bucket_signature = bucket->signature;
260                 uint64_t *bucket_key = &bucket->key[i];
261
262                 if ((bucket_signature & mask) &&
263                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
264                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
265
266                         memcpy(bucket_data, entry, f->entry_size);
267                         lru_update(bucket, i);
268                         *key_found = 1;
269                         *entry_ptr = (void *) bucket_data;
270                         return 0;
271                 }
272         }
273
274         /* Key is not present in the bucket */
275         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
276                 uint64_t bucket_signature = bucket->signature;
277
278                 if ((bucket_signature & mask) == 0) {
279                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
280
281                         bucket->signature |= mask;
282                         keycpy(&bucket->key[i], key, &f->key_mask);
283                         memcpy(bucket_data, entry, f->entry_size);
284                         lru_update(bucket, i);
285                         *key_found = 0;
286                         *entry_ptr = (void *) bucket_data;
287
288                         return 0;
289                 }
290         }
291
292         /* Bucket full: replace LRU entry */
293         pos = lru_pos(bucket);
294         keycpy(&bucket->key[pos], key, &f->key_mask);
295         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
296         lru_update(bucket, pos);
297         *key_found = 0;
298         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
299
300         return 0;
301 }
302
303 static int
304 rte_table_hash_entry_delete_key8_lru(
305         void *table,
306         void *key,
307         int *key_found,
308         void *entry)
309 {
310         struct rte_table_hash *f = table;
311         struct rte_bucket_4_8 *bucket;
312         uint64_t signature, mask;
313         uint32_t bucket_index, i;
314
315         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
316         bucket_index = signature & (f->n_buckets - 1);
317         bucket = (struct rte_bucket_4_8 *)
318                 &f->memory[bucket_index * f->bucket_size];
319
320         /* Key is present in the bucket */
321         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
322                 uint64_t bucket_signature = bucket->signature;
323                 uint64_t *bucket_key = &bucket->key[i];
324
325                 if ((bucket_signature & mask) &&
326                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
327                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
328
329                         bucket->signature &= ~mask;
330                         *key_found = 1;
331                         if (entry)
332                                 memcpy(entry, bucket_data, f->entry_size);
333
334                         return 0;
335                 }
336         }
337
338         /* Key is not present in the bucket */
339         *key_found = 0;
340         return 0;
341 }
342
343 static void *
344 rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
345 {
346         struct rte_table_hash_params *p = params;
347         struct rte_table_hash *f;
348         uint64_t bucket_size, stack_size, total_size;
349         uint32_t n_buckets_ext, i;
350
351         /* Check input parameters */
352         if ((check_params_create(p) != 0) ||
353                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
354                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
355                 return NULL;
356
357         /*
358          * Table dimensioning
359          *
360          * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
361          * it is guaranteed that n_keys keys can be stored in the table at any time.
362          *
363          * The worst case scenario takes place when all the n_keys keys fall into
364          * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
365          * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
366          * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
367          * into a different bucket. This case defeats the purpose of the hash table.
368          * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
369          *
370          * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
371          */
372         n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
373
374         /* Memory allocation */
375         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
376                 KEYS_PER_BUCKET * entry_size);
377         stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
378         total_size = sizeof(struct rte_table_hash) +
379                 (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
380
381         if (total_size > SIZE_MAX) {
382                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
383                         "for hash table %s\n",
384                         __func__, total_size, p->name);
385                 return NULL;
386         }
387
388         f = rte_zmalloc_socket(p->name,
389                 (size_t)total_size,
390                 RTE_CACHE_LINE_SIZE,
391                 socket_id);
392         if (f == NULL) {
393                 RTE_LOG(ERR, TABLE,
394                         "%s: Cannot allocate %" PRIu64 " bytes "
395                         "for hash table %s\n",
396                         __func__, total_size, p->name);
397                 return NULL;
398         }
399         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
400                 "is %" PRIu64 " bytes\n",
401                 __func__, p->name, total_size);
402
403         /* Memory initialization */
404         f->n_buckets = p->n_buckets;
405         f->key_size = KEY_SIZE;
406         f->entry_size = entry_size;
407         f->bucket_size = bucket_size;
408         f->key_offset = p->key_offset;
409         f->f_hash = p->f_hash;
410         f->seed = p->seed;
411
412         f->n_buckets_ext = n_buckets_ext;
413         f->stack_pos = n_buckets_ext;
414         f->stack = (uint32_t *)
415                 &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
416
417         if (p->key_mask != NULL)
418                 f->key_mask = ((uint64_t *)p->key_mask)[0];
419         else
420                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
421
422         for (i = 0; i < n_buckets_ext; i++)
423                 f->stack[i] = i;
424
425         return f;
426 }
427
428 static int
429 rte_table_hash_free_key8_ext(void *table)
430 {
431         struct rte_table_hash *f = table;
432
433         /* Check input parameters */
434         if (f == NULL) {
435                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
436                 return -EINVAL;
437         }
438
439         rte_free(f);
440         return 0;
441 }
442
443 static int
444 rte_table_hash_entry_add_key8_ext(
445         void *table,
446         void *key,
447         void *entry,
448         int *key_found,
449         void **entry_ptr)
450 {
451         struct rte_table_hash *f = table;
452         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
453         uint64_t signature;
454         uint32_t bucket_index, i;
455
456         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
457         bucket_index = signature & (f->n_buckets - 1);
458         bucket0 = (struct rte_bucket_4_8 *)
459                 &f->memory[bucket_index * f->bucket_size];
460
461         /* Key is present in the bucket */
462         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
463                 uint64_t mask;
464
465                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
466                         uint64_t bucket_signature = bucket->signature;
467                         uint64_t *bucket_key = &bucket->key[i];
468
469                         if ((bucket_signature & mask) &&
470                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
471                                 uint8_t *bucket_data = &bucket->data[i *
472                                         f->entry_size];
473
474                                 memcpy(bucket_data, entry, f->entry_size);
475                                 *key_found = 1;
476                                 *entry_ptr = (void *) bucket_data;
477                                 return 0;
478                         }
479                 }
480         }
481
482         /* Key is not present in the bucket */
483         for (bucket_prev = NULL, bucket = bucket0;
484                 bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
485                 uint64_t mask;
486
487                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
488                         uint64_t bucket_signature = bucket->signature;
489
490                         if ((bucket_signature & mask) == 0) {
491                                 uint8_t *bucket_data = &bucket->data[i *
492                                         f->entry_size];
493
494                                 bucket->signature |= mask;
495                                 keycpy(&bucket->key[i], key, &f->key_mask);
496                                 memcpy(bucket_data, entry, f->entry_size);
497                                 *key_found = 0;
498                                 *entry_ptr = (void *) bucket_data;
499
500                                 return 0;
501                         }
502                 }
503         }
504
505         /* Bucket full: extend bucket */
506         if (f->stack_pos > 0) {
507                 bucket_index = f->stack[--f->stack_pos];
508
509                 bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
510                         bucket_index) * f->bucket_size];
511                 bucket_prev->next = bucket;
512                 bucket_prev->next_valid = 1;
513
514                 bucket->signature = 1;
515                 keycpy(&bucket->key[0], key, &f->key_mask);
516                 memcpy(&bucket->data[0], entry, f->entry_size);
517                 *key_found = 0;
518                 *entry_ptr = (void *) &bucket->data[0];
519                 return 0;
520         }
521
522         return -ENOSPC;
523 }
524
525 static int
526 rte_table_hash_entry_delete_key8_ext(
527         void *table,
528         void *key,
529         int *key_found,
530         void *entry)
531 {
532         struct rte_table_hash *f = table;
533         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
534         uint64_t signature;
535         uint32_t bucket_index, i;
536
537         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
538         bucket_index = signature & (f->n_buckets - 1);
539         bucket0 = (struct rte_bucket_4_8 *)
540                 &f->memory[bucket_index * f->bucket_size];
541
542         /* Key is present in the bucket */
543         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
544                 bucket_prev = bucket, bucket = bucket->next) {
545                 uint64_t mask;
546
547                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
548                         uint64_t bucket_signature = bucket->signature;
549                         uint64_t *bucket_key = &bucket->key[i];
550
551                         if ((bucket_signature & mask) &&
552                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
553                                 uint8_t *bucket_data = &bucket->data[i *
554                                         f->entry_size];
555
556                                 bucket->signature &= ~mask;
557                                 *key_found = 1;
558                                 if (entry)
559                                         memcpy(entry, bucket_data,
560                                                 f->entry_size);
561
562                                 if ((bucket->signature == 0) &&
563                                     (bucket_prev != NULL)) {
564                                         bucket_prev->next = bucket->next;
565                                         bucket_prev->next_valid =
566                                                 bucket->next_valid;
567
568                                         memset(bucket, 0,
569                                                 sizeof(struct rte_bucket_4_8));
570                                         bucket_index = (((uint8_t *)bucket -
571                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
572                                         f->stack[f->stack_pos++] = bucket_index;
573                                 }
574
575                                 return 0;
576                         }
577                 }
578         }
579
580         /* Key is not present in the bucket */
581         *key_found = 0;
582         return 0;
583 }
584
585 #define lookup_key8_cmp(key_in, bucket, pos, f)                 \
586 {                                                               \
587         uint64_t xor[4], signature, k;                          \
588                                                                 \
589         signature = ~bucket->signature;                         \
590                                                                 \
591         k = key_in[0] & f->key_mask;                            \
592         xor[0] = (k ^ bucket->key[0]) | (signature & 1);                \
593         xor[1] = (k ^ bucket->key[1]) | (signature & 2);                \
594         xor[2] = (k ^ bucket->key[2]) | (signature & 4);                \
595         xor[3] = (k ^ bucket->key[3]) | (signature & 8);                \
596                                                                 \
597         pos = 4;                                                \
598         if (xor[0] == 0)                                        \
599                 pos = 0;                                        \
600         if (xor[1] == 0)                                        \
601                 pos = 1;                                        \
602         if (xor[2] == 0)                                        \
603                 pos = 2;                                        \
604         if (xor[3] == 0)                                        \
605                 pos = 3;                                        \
606 }
607
608 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)   \
609 {                                                               \
610         uint64_t pkt_mask;                                      \
611         uint32_t key_offset = f->key_offset;\
612                                                                 \
613         pkt0_index = __builtin_ctzll(pkts_mask);                \
614         pkt_mask = 1LLU << pkt0_index;                          \
615         pkts_mask &= ~pkt_mask;                                 \
616                                                                 \
617         mbuf0 = pkts[pkt0_index];                               \
618         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));  \
619 }
620
621 #define lookup1_stage1(mbuf1, bucket1, f)                       \
622 {                                                               \
623         uint64_t *key;                                          \
624         uint64_t signature;                                     \
625         uint32_t bucket_index;                                  \
626                                                                 \
627         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
628         signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed);    \
629         bucket_index = signature & (f->n_buckets - 1);          \
630         bucket1 = (struct rte_bucket_4_8 *)                     \
631                 &f->memory[bucket_index * f->bucket_size];      \
632         rte_prefetch0(bucket1);                                 \
633 }
634
635 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
636         pkts_mask_out, entries, f)                              \
637 {                                                               \
638         void *a;                                                \
639         uint64_t pkt_mask;                                      \
640         uint64_t *key;                                          \
641         uint32_t pos;                                           \
642                                                                 \
643         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
644         lookup_key8_cmp(key, bucket2, pos, f);  \
645                                                                 \
646         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
647         pkts_mask_out |= pkt_mask;                              \
648                                                                 \
649         a = (void *) &bucket2->data[pos * f->entry_size];       \
650         rte_prefetch0(a);                                       \
651         entries[pkt2_index] = a;                                \
652         lru_update(bucket2, pos);                               \
653 }
654
655 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
656         entries, buckets_mask, buckets, keys, f)                \
657 {                                                               \
658         struct rte_bucket_4_8 *bucket_next;                     \
659         void *a;                                                \
660         uint64_t pkt_mask, bucket_mask;                         \
661         uint64_t *key;                                          \
662         uint32_t pos;                                           \
663                                                                 \
664         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
665         lookup_key8_cmp(key, bucket2, pos, f);  \
666                                                                 \
667         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
668         pkts_mask_out |= pkt_mask;                              \
669                                                                 \
670         a = (void *) &bucket2->data[pos * f->entry_size];       \
671         rte_prefetch0(a);                                       \
672         entries[pkt2_index] = a;                                \
673                                                                 \
674         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
675         buckets_mask |= bucket_mask;                            \
676         bucket_next = bucket2->next;                            \
677         buckets[pkt2_index] = bucket_next;                      \
678         keys[pkt2_index] = key;                                 \
679 }
680
681 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
682         buckets_mask, f)                                        \
683 {                                                               \
684         struct rte_bucket_4_8 *bucket, *bucket_next;            \
685         void *a;                                                \
686         uint64_t pkt_mask, bucket_mask;                         \
687         uint64_t *key;                                          \
688         uint32_t pos;                                           \
689                                                                 \
690         bucket = buckets[pkt_index];                            \
691         key = keys[pkt_index];                                  \
692         lookup_key8_cmp(key, bucket, pos, f);                   \
693                                                                 \
694         pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
695         pkts_mask_out |= pkt_mask;                              \
696                                                                 \
697         a = (void *) &bucket->data[pos * f->entry_size];        \
698         rte_prefetch0(a);                                       \
699         entries[pkt_index] = a;                                 \
700                                                                 \
701         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
702         buckets_mask |= bucket_mask;                            \
703         bucket_next = bucket->next;                             \
704         rte_prefetch0(bucket_next);                             \
705         buckets[pkt_index] = bucket_next;                       \
706         keys[pkt_index] = key;                                  \
707 }
708
709 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
710         pkts, pkts_mask, f)                                     \
711 {                                                               \
712         uint64_t pkt00_mask, pkt01_mask;                        \
713         uint32_t key_offset = f->key_offset;            \
714                                                                 \
715         pkt00_index = __builtin_ctzll(pkts_mask);               \
716         pkt00_mask = 1LLU << pkt00_index;                       \
717         pkts_mask &= ~pkt00_mask;                               \
718                                                                 \
719         mbuf00 = pkts[pkt00_index];                             \
720         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
721                                                                 \
722         pkt01_index = __builtin_ctzll(pkts_mask);               \
723         pkt01_mask = 1LLU << pkt01_index;                       \
724         pkts_mask &= ~pkt01_mask;                               \
725                                                                 \
726         mbuf01 = pkts[pkt01_index];                             \
727         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
728 }
729
730 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
731         mbuf00, mbuf01, pkts, pkts_mask, f)                     \
732 {                                                               \
733         uint64_t pkt00_mask, pkt01_mask;                        \
734         uint32_t key_offset = f->key_offset;            \
735                                                                 \
736         pkt00_index = __builtin_ctzll(pkts_mask);               \
737         pkt00_mask = 1LLU << pkt00_index;                       \
738         pkts_mask &= ~pkt00_mask;                               \
739                                                                 \
740         mbuf00 = pkts[pkt00_index];                             \
741         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
742                                                                 \
743         pkt01_index = __builtin_ctzll(pkts_mask);               \
744         if (pkts_mask == 0)                                     \
745                 pkt01_index = pkt00_index;                      \
746                                                                 \
747         pkt01_mask = 1LLU << pkt01_index;                       \
748         pkts_mask &= ~pkt01_mask;                               \
749                                                                 \
750         mbuf01 = pkts[pkt01_index];                             \
751         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
752 }
753
754 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
755 {                                                               \
756         uint64_t *key10, *key11;                                \
757         uint64_t signature10, signature11;                      \
758         uint32_t bucket10_index, bucket11_index;                \
759         rte_table_hash_op_hash f_hash = f->f_hash;              \
760         uint64_t seed = f->seed;                                \
761         uint32_t key_offset = f->key_offset;                    \
762                                                                 \
763         key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
764         key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
765                                                                 \
766         signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed);      \
767         bucket10_index = signature10 & (f->n_buckets - 1);      \
768         bucket10 = (struct rte_bucket_4_8 *)                    \
769                 &f->memory[bucket10_index * f->bucket_size];    \
770         rte_prefetch0(bucket10);                                \
771                                                                 \
772         signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed);      \
773         bucket11_index = signature11 & (f->n_buckets - 1);      \
774         bucket11 = (struct rte_bucket_4_8 *)                    \
775                 &f->memory[bucket11_index * f->bucket_size];    \
776         rte_prefetch0(bucket11);                                \
777 }
778
779 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
780         bucket20, bucket21, pkts_mask_out, entries, f)          \
781 {                                                               \
782         void *a20, *a21;                                        \
783         uint64_t pkt20_mask, pkt21_mask;                        \
784         uint64_t *key20, *key21;                                \
785         uint32_t pos20, pos21;                                  \
786                                                                 \
787         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
788         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
789                                                                 \
790         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
791         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
792                                                                 \
793         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
794         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
795         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
796                                                                 \
797         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
798         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
799         rte_prefetch0(a20);                                     \
800         rte_prefetch0(a21);                                     \
801         entries[pkt20_index] = a20;                             \
802         entries[pkt21_index] = a21;                             \
803         lru_update(bucket20, pos20);                            \
804         lru_update(bucket21, pos21);                            \
805 }
806
807 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
808         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
809 {                                                               \
810         struct rte_bucket_4_8 *bucket20_next, *bucket21_next;   \
811         void *a20, *a21;                                        \
812         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
813         uint64_t *key20, *key21;                                \
814         uint32_t pos20, pos21;                                  \
815                                                                 \
816         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
817         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
818                                                                 \
819         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
820         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
821                                                                 \
822         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
823         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
824         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
825                                                                 \
826         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
827         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
828         rte_prefetch0(a20);                                     \
829         rte_prefetch0(a21);                                     \
830         entries[pkt20_index] = a20;                             \
831         entries[pkt21_index] = a21;                             \
832                                                                 \
833         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
834         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
835         buckets_mask |= bucket20_mask | bucket21_mask;          \
836         bucket20_next = bucket20->next;                         \
837         bucket21_next = bucket21->next;                         \
838         buckets[pkt20_index] = bucket20_next;                   \
839         buckets[pkt21_index] = bucket21_next;                   \
840         keys[pkt20_index] = key20;                              \
841         keys[pkt21_index] = key21;                              \
842 }
843
844 static int
845 rte_table_hash_lookup_key8_lru(
846         void *table,
847         struct rte_mbuf **pkts,
848         uint64_t pkts_mask,
849         uint64_t *lookup_hit_mask,
850         void **entries)
851 {
852         struct rte_table_hash *f = (struct rte_table_hash *) table;
853         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
854         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
855         uint32_t pkt00_index, pkt01_index, pkt10_index;
856         uint32_t pkt11_index, pkt20_index, pkt21_index;
857         uint64_t pkts_mask_out = 0;
858
859         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
860         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
861
862         /* Cannot run the pipeline with less than 5 packets */
863         if (__builtin_popcountll(pkts_mask) < 5) {
864                 for ( ; pkts_mask; ) {
865                         struct rte_bucket_4_8 *bucket;
866                         struct rte_mbuf *mbuf;
867                         uint32_t pkt_index;
868
869                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
870                         lookup1_stage1(mbuf, bucket, f);
871                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
872                                 pkts_mask_out, entries, f);
873                 }
874
875                 *lookup_hit_mask = pkts_mask_out;
876                 RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
877                 return 0;
878         }
879
880         /*
881          * Pipeline fill
882          *
883          */
884         /* Pipeline stage 0 */
885         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
886                 pkts_mask, f);
887
888         /* Pipeline feed */
889         mbuf10 = mbuf00;
890         mbuf11 = mbuf01;
891         pkt10_index = pkt00_index;
892         pkt11_index = pkt01_index;
893
894         /* Pipeline stage 0 */
895         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
896                 pkts_mask, f);
897
898         /* Pipeline stage 1 */
899         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
900
901         /*
902          * Pipeline run
903          *
904          */
905         for ( ; pkts_mask; ) {
906                 /* Pipeline feed */
907                 bucket20 = bucket10;
908                 bucket21 = bucket11;
909                 mbuf20 = mbuf10;
910                 mbuf21 = mbuf11;
911                 mbuf10 = mbuf00;
912                 mbuf11 = mbuf01;
913                 pkt20_index = pkt10_index;
914                 pkt21_index = pkt11_index;
915                 pkt10_index = pkt00_index;
916                 pkt11_index = pkt01_index;
917
918                 /* Pipeline stage 0 */
919                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
920                         mbuf00, mbuf01, pkts, pkts_mask, f);
921
922                 /* Pipeline stage 1 */
923                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
924
925                 /* Pipeline stage 2 */
926                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
927                         bucket20, bucket21, pkts_mask_out, entries, f);
928         }
929
930         /*
931          * Pipeline flush
932          *
933          */
934         /* Pipeline feed */
935         bucket20 = bucket10;
936         bucket21 = bucket11;
937         mbuf20 = mbuf10;
938         mbuf21 = mbuf11;
939         mbuf10 = mbuf00;
940         mbuf11 = mbuf01;
941         pkt20_index = pkt10_index;
942         pkt21_index = pkt11_index;
943         pkt10_index = pkt00_index;
944         pkt11_index = pkt01_index;
945
946         /* Pipeline stage 1 */
947         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
948
949         /* Pipeline stage 2 */
950         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
951                 bucket20, bucket21, pkts_mask_out, entries, f);
952
953         /* Pipeline feed */
954         bucket20 = bucket10;
955         bucket21 = bucket11;
956         mbuf20 = mbuf10;
957         mbuf21 = mbuf11;
958         pkt20_index = pkt10_index;
959         pkt21_index = pkt11_index;
960
961         /* Pipeline stage 2 */
962         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
963                 bucket20, bucket21, pkts_mask_out, entries, f);
964
965         *lookup_hit_mask = pkts_mask_out;
966         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
967         return 0;
968 } /* lookup LRU */
969
970 static int
971 rte_table_hash_lookup_key8_ext(
972         void *table,
973         struct rte_mbuf **pkts,
974         uint64_t pkts_mask,
975         uint64_t *lookup_hit_mask,
976         void **entries)
977 {
978         struct rte_table_hash *f = (struct rte_table_hash *) table;
979         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
980         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
981         uint32_t pkt00_index, pkt01_index, pkt10_index;
982         uint32_t pkt11_index, pkt20_index, pkt21_index;
983         uint64_t pkts_mask_out = 0, buckets_mask = 0;
984         struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
985         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
986
987         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
988         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
989
990         /* Cannot run the pipeline with less than 5 packets */
991         if (__builtin_popcountll(pkts_mask) < 5) {
992                 for ( ; pkts_mask; ) {
993                         struct rte_bucket_4_8 *bucket;
994                         struct rte_mbuf *mbuf;
995                         uint32_t pkt_index;
996
997                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
998                         lookup1_stage1(mbuf, bucket, f);
999                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
1000                                 pkts_mask_out, entries, buckets_mask,
1001                                 buckets, keys, f);
1002                 }
1003
1004                 goto grind_next_buckets;
1005         }
1006
1007         /*
1008          * Pipeline fill
1009          *
1010          */
1011         /* Pipeline stage 0 */
1012         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1013                 pkts_mask, f);
1014
1015         /* Pipeline feed */
1016         mbuf10 = mbuf00;
1017         mbuf11 = mbuf01;
1018         pkt10_index = pkt00_index;
1019         pkt11_index = pkt01_index;
1020
1021         /* Pipeline stage 0 */
1022         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1023                 pkts_mask, f);
1024
1025         /* Pipeline stage 1 */
1026         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1027
1028         /*
1029          * Pipeline run
1030          *
1031          */
1032         for ( ; pkts_mask; ) {
1033                 /* Pipeline feed */
1034                 bucket20 = bucket10;
1035                 bucket21 = bucket11;
1036                 mbuf20 = mbuf10;
1037                 mbuf21 = mbuf11;
1038                 mbuf10 = mbuf00;
1039                 mbuf11 = mbuf01;
1040                 pkt20_index = pkt10_index;
1041                 pkt21_index = pkt11_index;
1042                 pkt10_index = pkt00_index;
1043                 pkt11_index = pkt01_index;
1044
1045                 /* Pipeline stage 0 */
1046                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1047                         mbuf00, mbuf01, pkts, pkts_mask, f);
1048
1049                 /* Pipeline stage 1 */
1050                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1051
1052                 /* Pipeline stage 2 */
1053                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1054                         bucket20, bucket21, pkts_mask_out, entries,
1055                         buckets_mask, buckets, keys, f);
1056         }
1057
1058         /*
1059          * Pipeline flush
1060          *
1061          */
1062         /* Pipeline feed */
1063         bucket20 = bucket10;
1064         bucket21 = bucket11;
1065         mbuf20 = mbuf10;
1066         mbuf21 = mbuf11;
1067         mbuf10 = mbuf00;
1068         mbuf11 = mbuf01;
1069         pkt20_index = pkt10_index;
1070         pkt21_index = pkt11_index;
1071         pkt10_index = pkt00_index;
1072         pkt11_index = pkt01_index;
1073
1074         /* Pipeline stage 1 */
1075         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1076
1077         /* Pipeline stage 2 */
1078         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1079                 bucket20, bucket21, pkts_mask_out, entries,
1080                 buckets_mask, buckets, keys, f);
1081
1082         /* Pipeline feed */
1083         bucket20 = bucket10;
1084         bucket21 = bucket11;
1085         mbuf20 = mbuf10;
1086         mbuf21 = mbuf11;
1087         pkt20_index = pkt10_index;
1088         pkt21_index = pkt11_index;
1089
1090         /* Pipeline stage 2 */
1091         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1092                 bucket20, bucket21, pkts_mask_out, entries,
1093                 buckets_mask, buckets, keys, f);
1094
1095 grind_next_buckets:
1096         /* Grind next buckets */
1097         for ( ; buckets_mask; ) {
1098                 uint64_t buckets_mask_next = 0;
1099
1100                 for ( ; buckets_mask; ) {
1101                         uint64_t pkt_mask;
1102                         uint32_t pkt_index;
1103
1104                         pkt_index = __builtin_ctzll(buckets_mask);
1105                         pkt_mask = 1LLU << pkt_index;
1106                         buckets_mask &= ~pkt_mask;
1107
1108                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1109                                 entries, buckets_mask_next, f);
1110                 }
1111
1112                 buckets_mask = buckets_mask_next;
1113         }
1114
1115         *lookup_hit_mask = pkts_mask_out;
1116         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
1117         return 0;
1118 } /* lookup EXT */
1119
1120 static int
1121 rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
1122 {
1123         struct rte_table_hash *t = table;
1124
1125         if (stats != NULL)
1126                 memcpy(stats, &t->stats, sizeof(t->stats));
1127
1128         if (clear)
1129                 memset(&t->stats, 0, sizeof(t->stats));
1130
1131         return 0;
1132 }
1133
1134 struct rte_table_ops rte_table_hash_key8_lru_ops = {
1135         .f_create = rte_table_hash_create_key8_lru,
1136         .f_free = rte_table_hash_free_key8_lru,
1137         .f_add = rte_table_hash_entry_add_key8_lru,
1138         .f_delete = rte_table_hash_entry_delete_key8_lru,
1139         .f_add_bulk = NULL,
1140         .f_delete_bulk = NULL,
1141         .f_lookup = rte_table_hash_lookup_key8_lru,
1142         .f_stats = rte_table_hash_key8_stats_read,
1143 };
1144
1145 struct rte_table_ops rte_table_hash_key8_ext_ops = {
1146         .f_create = rte_table_hash_create_key8_ext,
1147         .f_free = rte_table_hash_free_key8_ext,
1148         .f_add = rte_table_hash_entry_add_key8_ext,
1149         .f_delete = rte_table_hash_entry_delete_key8_ext,
1150         .f_add_bulk = NULL,
1151         .f_delete_bulk = NULL,
1152         .f_lookup = rte_table_hash_lookup_key8_ext,
1153         .f_stats = rte_table_hash_key8_stats_read,
1154 };