net/ice: support Tx buffers cleanup in DCF mode
[dpdk.git] / lib / table / rte_table_hash_key8.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6
7 #include <rte_common.h>
8 #include <rte_malloc.h>
9 #include <rte_log.h>
10
11 #include "rte_table_hash.h"
12 #include "rte_lru.h"
13
14 #define KEY_SIZE                                                8
15
16 #define KEYS_PER_BUCKET                                 4
17
18 #ifdef RTE_TABLE_STATS_COLLECT
19
20 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
21         table->stats.n_pkts_in += val
22 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
23         table->stats.n_pkts_lookup_miss += val
24
25 #else
26
27 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
28 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
29
30 #endif
31
32 #ifdef RTE_ARCH_64
33 struct rte_bucket_4_8 {
34         /* Cache line 0 */
35         uint64_t signature;
36         uint64_t lru_list;
37         struct rte_bucket_4_8 *next;
38         uint64_t next_valid;
39
40         uint64_t key[4];
41
42         /* Cache line 1 */
43         uint8_t data[0];
44 };
45 #else
46 struct rte_bucket_4_8 {
47         /* Cache line 0 */
48         uint64_t signature;
49         uint64_t lru_list;
50         struct rte_bucket_4_8 *next;
51         uint32_t pad;
52         uint64_t next_valid;
53
54         uint64_t key[4];
55
56         /* Cache line 1 */
57         uint8_t data[0];
58 };
59 #endif
60
61 struct rte_table_hash {
62         struct rte_table_stats stats;
63
64         /* Input parameters */
65         uint32_t n_buckets;
66         uint32_t key_size;
67         uint32_t entry_size;
68         uint32_t bucket_size;
69         uint32_t key_offset;
70         uint64_t key_mask;
71         rte_table_hash_op_hash f_hash;
72         uint64_t seed;
73
74         /* Extendible buckets */
75         uint32_t n_buckets_ext;
76         uint32_t stack_pos;
77         uint32_t *stack;
78
79         /* Lookup table */
80         uint8_t memory[0] __rte_cache_aligned;
81 };
82
83 static int
84 keycmp(void *a, void *b, void *b_mask)
85 {
86         uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
87
88         return a64[0] != (b64[0] & b_mask64[0]);
89 }
90
91 static void
92 keycpy(void *dst, void *src, void *src_mask)
93 {
94         uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
95
96         dst64[0] = src64[0] & src_mask64[0];
97 }
98
99 static int
100 check_params_create(struct rte_table_hash_params *params)
101 {
102         /* name */
103         if (params->name == NULL) {
104                 RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
105                 return -EINVAL;
106         }
107
108         /* key_size */
109         if (params->key_size != KEY_SIZE) {
110                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
111                 return -EINVAL;
112         }
113
114         /* n_keys */
115         if (params->n_keys == 0) {
116                 RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
117                 return -EINVAL;
118         }
119
120         /* n_buckets */
121         if ((params->n_buckets == 0) ||
122                 (!rte_is_power_of_2(params->n_buckets))) {
123                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
124                 return -EINVAL;
125         }
126
127         /* f_hash */
128         if (params->f_hash == NULL) {
129                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
130                         __func__);
131                 return -EINVAL;
132         }
133
134         return 0;
135 }
136
137 static void *
138 rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
139 {
140         struct rte_table_hash_params *p = params;
141         struct rte_table_hash *f;
142         uint64_t bucket_size, total_size;
143         uint32_t n_buckets, i;
144
145         /* Check input parameters */
146         if ((check_params_create(p) != 0) ||
147                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
148                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
149                 return NULL;
150
151         /*
152          * Table dimensioning
153          *
154          * Objective: Pick the number of buckets (n_buckets) so that there a chance
155          * to store n_keys keys in the table.
156          *
157          * Note: Since the buckets do not get extended, it is not possible to
158          * guarantee that n_keys keys can be stored in the table at any time. In the
159          * worst case scenario when all the n_keys fall into the same bucket, only
160          * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
161          * defeats the purpose of the hash table. It indicates unsuitable f_hash or
162          * n_keys to n_buckets ratio.
163          *
164          * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
165          */
166         n_buckets = rte_align32pow2(
167                 (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
168         n_buckets = RTE_MAX(n_buckets, p->n_buckets);
169
170         /* Memory allocation */
171         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
172                 KEYS_PER_BUCKET * entry_size);
173         total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
174
175         if (total_size > SIZE_MAX) {
176                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
177                         " for hash table %s\n",
178                         __func__, total_size, p->name);
179                 return NULL;
180         }
181
182         f = rte_zmalloc_socket(p->name,
183                 (size_t)total_size,
184                 RTE_CACHE_LINE_SIZE,
185                 socket_id);
186         if (f == NULL) {
187                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
188                         " for hash table %s\n",
189                         __func__, total_size, p->name);
190                 return NULL;
191         }
192
193         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
194                 "is %" PRIu64 " bytes\n",
195                 __func__, p->name, total_size);
196
197         /* Memory initialization */
198         f->n_buckets = n_buckets;
199         f->key_size = KEY_SIZE;
200         f->entry_size = entry_size;
201         f->bucket_size = bucket_size;
202         f->key_offset = p->key_offset;
203         f->f_hash = p->f_hash;
204         f->seed = p->seed;
205
206         if (p->key_mask != NULL)
207                 f->key_mask = ((uint64_t *)p->key_mask)[0];
208         else
209                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
210
211         for (i = 0; i < n_buckets; i++) {
212                 struct rte_bucket_4_8 *bucket;
213
214                 bucket = (struct rte_bucket_4_8 *) &f->memory[i *
215                         f->bucket_size];
216                 bucket->lru_list = 0x0000000100020003LLU;
217         }
218
219         return f;
220 }
221
222 static int
223 rte_table_hash_free_key8_lru(void *table)
224 {
225         struct rte_table_hash *f = table;
226
227         /* Check input parameters */
228         if (f == NULL) {
229                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
230                 return -EINVAL;
231         }
232
233         rte_free(f);
234         return 0;
235 }
236
237 static int
238 rte_table_hash_entry_add_key8_lru(
239         void *table,
240         void *key,
241         void *entry,
242         int *key_found,
243         void **entry_ptr)
244 {
245         struct rte_table_hash *f = table;
246         struct rte_bucket_4_8 *bucket;
247         uint64_t signature, mask, pos;
248         uint32_t bucket_index, i;
249
250         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
251         bucket_index = signature & (f->n_buckets - 1);
252         bucket = (struct rte_bucket_4_8 *)
253                 &f->memory[bucket_index * f->bucket_size];
254
255         /* Key is present in the bucket */
256         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
257                 uint64_t bucket_signature = bucket->signature;
258                 uint64_t *bucket_key = &bucket->key[i];
259
260                 if ((bucket_signature & mask) &&
261                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
262                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
263
264                         memcpy(bucket_data, entry, f->entry_size);
265                         lru_update(bucket, i);
266                         *key_found = 1;
267                         *entry_ptr = (void *) bucket_data;
268                         return 0;
269                 }
270         }
271
272         /* Key is not present in the bucket */
273         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
274                 uint64_t bucket_signature = bucket->signature;
275
276                 if ((bucket_signature & mask) == 0) {
277                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
278
279                         bucket->signature |= mask;
280                         keycpy(&bucket->key[i], key, &f->key_mask);
281                         memcpy(bucket_data, entry, f->entry_size);
282                         lru_update(bucket, i);
283                         *key_found = 0;
284                         *entry_ptr = (void *) bucket_data;
285
286                         return 0;
287                 }
288         }
289
290         /* Bucket full: replace LRU entry */
291         pos = lru_pos(bucket);
292         keycpy(&bucket->key[pos], key, &f->key_mask);
293         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
294         lru_update(bucket, pos);
295         *key_found = 0;
296         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
297
298         return 0;
299 }
300
301 static int
302 rte_table_hash_entry_delete_key8_lru(
303         void *table,
304         void *key,
305         int *key_found,
306         void *entry)
307 {
308         struct rte_table_hash *f = table;
309         struct rte_bucket_4_8 *bucket;
310         uint64_t signature, mask;
311         uint32_t bucket_index, i;
312
313         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
314         bucket_index = signature & (f->n_buckets - 1);
315         bucket = (struct rte_bucket_4_8 *)
316                 &f->memory[bucket_index * f->bucket_size];
317
318         /* Key is present in the bucket */
319         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
320                 uint64_t bucket_signature = bucket->signature;
321                 uint64_t *bucket_key = &bucket->key[i];
322
323                 if ((bucket_signature & mask) &&
324                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
325                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
326
327                         bucket->signature &= ~mask;
328                         *key_found = 1;
329                         if (entry)
330                                 memcpy(entry, bucket_data, f->entry_size);
331
332                         return 0;
333                 }
334         }
335
336         /* Key is not present in the bucket */
337         *key_found = 0;
338         return 0;
339 }
340
341 static void *
342 rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
343 {
344         struct rte_table_hash_params *p = params;
345         struct rte_table_hash *f;
346         uint64_t bucket_size, stack_size, total_size;
347         uint32_t n_buckets_ext, i;
348
349         /* Check input parameters */
350         if ((check_params_create(p) != 0) ||
351                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
352                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
353                 return NULL;
354
355         /*
356          * Table dimensioning
357          *
358          * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
359          * it is guaranteed that n_keys keys can be stored in the table at any time.
360          *
361          * The worst case scenario takes place when all the n_keys keys fall into
362          * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
363          * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
364          * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
365          * into a different bucket. This case defeats the purpose of the hash table.
366          * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
367          *
368          * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
369          */
370         n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
371
372         /* Memory allocation */
373         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
374                 KEYS_PER_BUCKET * entry_size);
375         stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
376         total_size = sizeof(struct rte_table_hash) +
377                 (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
378
379         if (total_size > SIZE_MAX) {
380                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
381                         "for hash table %s\n",
382                         __func__, total_size, p->name);
383                 return NULL;
384         }
385
386         f = rte_zmalloc_socket(p->name,
387                 (size_t)total_size,
388                 RTE_CACHE_LINE_SIZE,
389                 socket_id);
390         if (f == NULL) {
391                 RTE_LOG(ERR, TABLE,
392                         "%s: Cannot allocate %" PRIu64 " bytes "
393                         "for hash table %s\n",
394                         __func__, total_size, p->name);
395                 return NULL;
396         }
397         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
398                 "is %" PRIu64 " bytes\n",
399                 __func__, p->name, total_size);
400
401         /* Memory initialization */
402         f->n_buckets = p->n_buckets;
403         f->key_size = KEY_SIZE;
404         f->entry_size = entry_size;
405         f->bucket_size = bucket_size;
406         f->key_offset = p->key_offset;
407         f->f_hash = p->f_hash;
408         f->seed = p->seed;
409
410         f->n_buckets_ext = n_buckets_ext;
411         f->stack_pos = n_buckets_ext;
412         f->stack = (uint32_t *)
413                 &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
414
415         if (p->key_mask != NULL)
416                 f->key_mask = ((uint64_t *)p->key_mask)[0];
417         else
418                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
419
420         for (i = 0; i < n_buckets_ext; i++)
421                 f->stack[i] = i;
422
423         return f;
424 }
425
426 static int
427 rte_table_hash_free_key8_ext(void *table)
428 {
429         struct rte_table_hash *f = table;
430
431         /* Check input parameters */
432         if (f == NULL) {
433                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
434                 return -EINVAL;
435         }
436
437         rte_free(f);
438         return 0;
439 }
440
441 static int
442 rte_table_hash_entry_add_key8_ext(
443         void *table,
444         void *key,
445         void *entry,
446         int *key_found,
447         void **entry_ptr)
448 {
449         struct rte_table_hash *f = table;
450         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
451         uint64_t signature;
452         uint32_t bucket_index, i;
453
454         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
455         bucket_index = signature & (f->n_buckets - 1);
456         bucket0 = (struct rte_bucket_4_8 *)
457                 &f->memory[bucket_index * f->bucket_size];
458
459         /* Key is present in the bucket */
460         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
461                 uint64_t mask;
462
463                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
464                         uint64_t bucket_signature = bucket->signature;
465                         uint64_t *bucket_key = &bucket->key[i];
466
467                         if ((bucket_signature & mask) &&
468                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
469                                 uint8_t *bucket_data = &bucket->data[i *
470                                         f->entry_size];
471
472                                 memcpy(bucket_data, entry, f->entry_size);
473                                 *key_found = 1;
474                                 *entry_ptr = (void *) bucket_data;
475                                 return 0;
476                         }
477                 }
478         }
479
480         /* Key is not present in the bucket */
481         for (bucket_prev = NULL, bucket = bucket0;
482                 bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
483                 uint64_t mask;
484
485                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
486                         uint64_t bucket_signature = bucket->signature;
487
488                         if ((bucket_signature & mask) == 0) {
489                                 uint8_t *bucket_data = &bucket->data[i *
490                                         f->entry_size];
491
492                                 bucket->signature |= mask;
493                                 keycpy(&bucket->key[i], key, &f->key_mask);
494                                 memcpy(bucket_data, entry, f->entry_size);
495                                 *key_found = 0;
496                                 *entry_ptr = (void *) bucket_data;
497
498                                 return 0;
499                         }
500                 }
501         }
502
503         /* Bucket full: extend bucket */
504         if (f->stack_pos > 0) {
505                 bucket_index = f->stack[--f->stack_pos];
506
507                 bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
508                         bucket_index) * f->bucket_size];
509                 bucket_prev->next = bucket;
510                 bucket_prev->next_valid = 1;
511
512                 bucket->signature = 1;
513                 keycpy(&bucket->key[0], key, &f->key_mask);
514                 memcpy(&bucket->data[0], entry, f->entry_size);
515                 *key_found = 0;
516                 *entry_ptr = (void *) &bucket->data[0];
517                 return 0;
518         }
519
520         return -ENOSPC;
521 }
522
523 static int
524 rte_table_hash_entry_delete_key8_ext(
525         void *table,
526         void *key,
527         int *key_found,
528         void *entry)
529 {
530         struct rte_table_hash *f = table;
531         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
532         uint64_t signature;
533         uint32_t bucket_index, i;
534
535         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
536         bucket_index = signature & (f->n_buckets - 1);
537         bucket0 = (struct rte_bucket_4_8 *)
538                 &f->memory[bucket_index * f->bucket_size];
539
540         /* Key is present in the bucket */
541         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
542                 bucket_prev = bucket, bucket = bucket->next) {
543                 uint64_t mask;
544
545                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
546                         uint64_t bucket_signature = bucket->signature;
547                         uint64_t *bucket_key = &bucket->key[i];
548
549                         if ((bucket_signature & mask) &&
550                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
551                                 uint8_t *bucket_data = &bucket->data[i *
552                                         f->entry_size];
553
554                                 bucket->signature &= ~mask;
555                                 *key_found = 1;
556                                 if (entry)
557                                         memcpy(entry, bucket_data,
558                                                 f->entry_size);
559
560                                 if ((bucket->signature == 0) &&
561                                     (bucket_prev != NULL)) {
562                                         bucket_prev->next = bucket->next;
563                                         bucket_prev->next_valid =
564                                                 bucket->next_valid;
565
566                                         memset(bucket, 0,
567                                                 sizeof(struct rte_bucket_4_8));
568                                         bucket_index = (((uint8_t *)bucket -
569                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
570                                         f->stack[f->stack_pos++] = bucket_index;
571                                 }
572
573                                 return 0;
574                         }
575                 }
576         }
577
578         /* Key is not present in the bucket */
579         *key_found = 0;
580         return 0;
581 }
582
583 #define lookup_key8_cmp(key_in, bucket, pos, f)                 \
584 {                                                               \
585         uint64_t xor[4], signature, k;                          \
586                                                                 \
587         signature = ~bucket->signature;                         \
588                                                                 \
589         k = key_in[0] & f->key_mask;                            \
590         xor[0] = (k ^ bucket->key[0]) | (signature & 1);                \
591         xor[1] = (k ^ bucket->key[1]) | (signature & 2);                \
592         xor[2] = (k ^ bucket->key[2]) | (signature & 4);                \
593         xor[3] = (k ^ bucket->key[3]) | (signature & 8);                \
594                                                                 \
595         pos = 4;                                                \
596         if (xor[0] == 0)                                        \
597                 pos = 0;                                        \
598         if (xor[1] == 0)                                        \
599                 pos = 1;                                        \
600         if (xor[2] == 0)                                        \
601                 pos = 2;                                        \
602         if (xor[3] == 0)                                        \
603                 pos = 3;                                        \
604 }
605
606 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)   \
607 {                                                               \
608         uint64_t pkt_mask;                                      \
609         uint32_t key_offset = f->key_offset;\
610                                                                 \
611         pkt0_index = __builtin_ctzll(pkts_mask);                \
612         pkt_mask = 1LLU << pkt0_index;                          \
613         pkts_mask &= ~pkt_mask;                                 \
614                                                                 \
615         mbuf0 = pkts[pkt0_index];                               \
616         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));  \
617 }
618
619 #define lookup1_stage1(mbuf1, bucket1, f)                       \
620 {                                                               \
621         uint64_t *key;                                          \
622         uint64_t signature;                                     \
623         uint32_t bucket_index;                                  \
624                                                                 \
625         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
626         signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed);    \
627         bucket_index = signature & (f->n_buckets - 1);          \
628         bucket1 = (struct rte_bucket_4_8 *)                     \
629                 &f->memory[bucket_index * f->bucket_size];      \
630         rte_prefetch0(bucket1);                                 \
631 }
632
633 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
634         pkts_mask_out, entries, f)                              \
635 {                                                               \
636         void *a;                                                \
637         uint64_t pkt_mask;                                      \
638         uint64_t *key;                                          \
639         uint32_t pos;                                           \
640                                                                 \
641         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
642         lookup_key8_cmp(key, bucket2, pos, f);  \
643                                                                 \
644         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
645         pkts_mask_out |= pkt_mask;                              \
646                                                                 \
647         a = (void *) &bucket2->data[pos * f->entry_size];       \
648         rte_prefetch0(a);                                       \
649         entries[pkt2_index] = a;                                \
650         lru_update(bucket2, pos);                               \
651 }
652
653 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
654         entries, buckets_mask, buckets, keys, f)                \
655 {                                                               \
656         struct rte_bucket_4_8 *bucket_next;                     \
657         void *a;                                                \
658         uint64_t pkt_mask, bucket_mask;                         \
659         uint64_t *key;                                          \
660         uint32_t pos;                                           \
661                                                                 \
662         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
663         lookup_key8_cmp(key, bucket2, pos, f);  \
664                                                                 \
665         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
666         pkts_mask_out |= pkt_mask;                              \
667                                                                 \
668         a = (void *) &bucket2->data[pos * f->entry_size];       \
669         rte_prefetch0(a);                                       \
670         entries[pkt2_index] = a;                                \
671                                                                 \
672         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
673         buckets_mask |= bucket_mask;                            \
674         bucket_next = bucket2->next;                            \
675         buckets[pkt2_index] = bucket_next;                      \
676         keys[pkt2_index] = key;                                 \
677 }
678
679 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
680         buckets_mask, f)                                        \
681 {                                                               \
682         struct rte_bucket_4_8 *bucket, *bucket_next;            \
683         void *a;                                                \
684         uint64_t pkt_mask, bucket_mask;                         \
685         uint64_t *key;                                          \
686         uint32_t pos;                                           \
687                                                                 \
688         bucket = buckets[pkt_index];                            \
689         key = keys[pkt_index];                                  \
690         lookup_key8_cmp(key, bucket, pos, f);                   \
691                                                                 \
692         pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
693         pkts_mask_out |= pkt_mask;                              \
694                                                                 \
695         a = (void *) &bucket->data[pos * f->entry_size];        \
696         rte_prefetch0(a);                                       \
697         entries[pkt_index] = a;                                 \
698                                                                 \
699         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
700         buckets_mask |= bucket_mask;                            \
701         bucket_next = bucket->next;                             \
702         rte_prefetch0(bucket_next);                             \
703         buckets[pkt_index] = bucket_next;                       \
704         keys[pkt_index] = key;                                  \
705 }
706
707 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
708         pkts, pkts_mask, f)                                     \
709 {                                                               \
710         uint64_t pkt00_mask, pkt01_mask;                        \
711         uint32_t key_offset = f->key_offset;            \
712                                                                 \
713         pkt00_index = __builtin_ctzll(pkts_mask);               \
714         pkt00_mask = 1LLU << pkt00_index;                       \
715         pkts_mask &= ~pkt00_mask;                               \
716                                                                 \
717         mbuf00 = pkts[pkt00_index];                             \
718         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
719                                                                 \
720         pkt01_index = __builtin_ctzll(pkts_mask);               \
721         pkt01_mask = 1LLU << pkt01_index;                       \
722         pkts_mask &= ~pkt01_mask;                               \
723                                                                 \
724         mbuf01 = pkts[pkt01_index];                             \
725         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
726 }
727
728 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
729         mbuf00, mbuf01, pkts, pkts_mask, f)                     \
730 {                                                               \
731         uint64_t pkt00_mask, pkt01_mask;                        \
732         uint32_t key_offset = f->key_offset;            \
733                                                                 \
734         pkt00_index = __builtin_ctzll(pkts_mask);               \
735         pkt00_mask = 1LLU << pkt00_index;                       \
736         pkts_mask &= ~pkt00_mask;                               \
737                                                                 \
738         mbuf00 = pkts[pkt00_index];                             \
739         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
740                                                                 \
741         pkt01_index = __builtin_ctzll(pkts_mask);               \
742         if (pkts_mask == 0)                                     \
743                 pkt01_index = pkt00_index;                      \
744                                                                 \
745         pkt01_mask = 1LLU << pkt01_index;                       \
746         pkts_mask &= ~pkt01_mask;                               \
747                                                                 \
748         mbuf01 = pkts[pkt01_index];                             \
749         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
750 }
751
752 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
753 {                                                               \
754         uint64_t *key10, *key11;                                \
755         uint64_t signature10, signature11;                      \
756         uint32_t bucket10_index, bucket11_index;                \
757         rte_table_hash_op_hash f_hash = f->f_hash;              \
758         uint64_t seed = f->seed;                                \
759         uint32_t key_offset = f->key_offset;                    \
760                                                                 \
761         key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
762         key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
763                                                                 \
764         signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed);      \
765         bucket10_index = signature10 & (f->n_buckets - 1);      \
766         bucket10 = (struct rte_bucket_4_8 *)                    \
767                 &f->memory[bucket10_index * f->bucket_size];    \
768         rte_prefetch0(bucket10);                                \
769                                                                 \
770         signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed);      \
771         bucket11_index = signature11 & (f->n_buckets - 1);      \
772         bucket11 = (struct rte_bucket_4_8 *)                    \
773                 &f->memory[bucket11_index * f->bucket_size];    \
774         rte_prefetch0(bucket11);                                \
775 }
776
777 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
778         bucket20, bucket21, pkts_mask_out, entries, f)          \
779 {                                                               \
780         void *a20, *a21;                                        \
781         uint64_t pkt20_mask, pkt21_mask;                        \
782         uint64_t *key20, *key21;                                \
783         uint32_t pos20, pos21;                                  \
784                                                                 \
785         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
786         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
787                                                                 \
788         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
789         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
790                                                                 \
791         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
792         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
793         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
794                                                                 \
795         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
796         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
797         rte_prefetch0(a20);                                     \
798         rte_prefetch0(a21);                                     \
799         entries[pkt20_index] = a20;                             \
800         entries[pkt21_index] = a21;                             \
801         lru_update(bucket20, pos20);                            \
802         lru_update(bucket21, pos21);                            \
803 }
804
805 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
806         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
807 {                                                               \
808         struct rte_bucket_4_8 *bucket20_next, *bucket21_next;   \
809         void *a20, *a21;                                        \
810         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
811         uint64_t *key20, *key21;                                \
812         uint32_t pos20, pos21;                                  \
813                                                                 \
814         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
815         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
816                                                                 \
817         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
818         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
819                                                                 \
820         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
821         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
822         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
823                                                                 \
824         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
825         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
826         rte_prefetch0(a20);                                     \
827         rte_prefetch0(a21);                                     \
828         entries[pkt20_index] = a20;                             \
829         entries[pkt21_index] = a21;                             \
830                                                                 \
831         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
832         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
833         buckets_mask |= bucket20_mask | bucket21_mask;          \
834         bucket20_next = bucket20->next;                         \
835         bucket21_next = bucket21->next;                         \
836         buckets[pkt20_index] = bucket20_next;                   \
837         buckets[pkt21_index] = bucket21_next;                   \
838         keys[pkt20_index] = key20;                              \
839         keys[pkt21_index] = key21;                              \
840 }
841
842 static int
843 rte_table_hash_lookup_key8_lru(
844         void *table,
845         struct rte_mbuf **pkts,
846         uint64_t pkts_mask,
847         uint64_t *lookup_hit_mask,
848         void **entries)
849 {
850         struct rte_table_hash *f = (struct rte_table_hash *) table;
851         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
852         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
853         uint32_t pkt00_index, pkt01_index, pkt10_index;
854         uint32_t pkt11_index, pkt20_index, pkt21_index;
855         uint64_t pkts_mask_out = 0;
856
857         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
858         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
859
860         /* Cannot run the pipeline with less than 5 packets */
861         if (__builtin_popcountll(pkts_mask) < 5) {
862                 for ( ; pkts_mask; ) {
863                         struct rte_bucket_4_8 *bucket;
864                         struct rte_mbuf *mbuf;
865                         uint32_t pkt_index;
866
867                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
868                         lookup1_stage1(mbuf, bucket, f);
869                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
870                                 pkts_mask_out, entries, f);
871                 }
872
873                 *lookup_hit_mask = pkts_mask_out;
874                 RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
875                 return 0;
876         }
877
878         /*
879          * Pipeline fill
880          *
881          */
882         /* Pipeline stage 0 */
883         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
884                 pkts_mask, f);
885
886         /* Pipeline feed */
887         mbuf10 = mbuf00;
888         mbuf11 = mbuf01;
889         pkt10_index = pkt00_index;
890         pkt11_index = pkt01_index;
891
892         /* Pipeline stage 0 */
893         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
894                 pkts_mask, f);
895
896         /* Pipeline stage 1 */
897         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
898
899         /*
900          * Pipeline run
901          *
902          */
903         for ( ; pkts_mask; ) {
904                 /* Pipeline feed */
905                 bucket20 = bucket10;
906                 bucket21 = bucket11;
907                 mbuf20 = mbuf10;
908                 mbuf21 = mbuf11;
909                 mbuf10 = mbuf00;
910                 mbuf11 = mbuf01;
911                 pkt20_index = pkt10_index;
912                 pkt21_index = pkt11_index;
913                 pkt10_index = pkt00_index;
914                 pkt11_index = pkt01_index;
915
916                 /* Pipeline stage 0 */
917                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
918                         mbuf00, mbuf01, pkts, pkts_mask, f);
919
920                 /* Pipeline stage 1 */
921                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
922
923                 /* Pipeline stage 2 */
924                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
925                         bucket20, bucket21, pkts_mask_out, entries, f);
926         }
927
928         /*
929          * Pipeline flush
930          *
931          */
932         /* Pipeline feed */
933         bucket20 = bucket10;
934         bucket21 = bucket11;
935         mbuf20 = mbuf10;
936         mbuf21 = mbuf11;
937         mbuf10 = mbuf00;
938         mbuf11 = mbuf01;
939         pkt20_index = pkt10_index;
940         pkt21_index = pkt11_index;
941         pkt10_index = pkt00_index;
942         pkt11_index = pkt01_index;
943
944         /* Pipeline stage 1 */
945         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
946
947         /* Pipeline stage 2 */
948         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
949                 bucket20, bucket21, pkts_mask_out, entries, f);
950
951         /* Pipeline feed */
952         bucket20 = bucket10;
953         bucket21 = bucket11;
954         mbuf20 = mbuf10;
955         mbuf21 = mbuf11;
956         pkt20_index = pkt10_index;
957         pkt21_index = pkt11_index;
958
959         /* Pipeline stage 2 */
960         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
961                 bucket20, bucket21, pkts_mask_out, entries, f);
962
963         *lookup_hit_mask = pkts_mask_out;
964         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
965         return 0;
966 } /* lookup LRU */
967
968 static int
969 rte_table_hash_lookup_key8_ext(
970         void *table,
971         struct rte_mbuf **pkts,
972         uint64_t pkts_mask,
973         uint64_t *lookup_hit_mask,
974         void **entries)
975 {
976         struct rte_table_hash *f = (struct rte_table_hash *) table;
977         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
978         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
979         uint32_t pkt00_index, pkt01_index, pkt10_index;
980         uint32_t pkt11_index, pkt20_index, pkt21_index;
981         uint64_t pkts_mask_out = 0, buckets_mask = 0;
982         struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
983         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
984
985         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
986         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
987
988         /* Cannot run the pipeline with less than 5 packets */
989         if (__builtin_popcountll(pkts_mask) < 5) {
990                 for ( ; pkts_mask; ) {
991                         struct rte_bucket_4_8 *bucket;
992                         struct rte_mbuf *mbuf;
993                         uint32_t pkt_index;
994
995                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
996                         lookup1_stage1(mbuf, bucket, f);
997                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
998                                 pkts_mask_out, entries, buckets_mask,
999                                 buckets, keys, f);
1000                 }
1001
1002                 goto grind_next_buckets;
1003         }
1004
1005         /*
1006          * Pipeline fill
1007          *
1008          */
1009         /* Pipeline stage 0 */
1010         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1011                 pkts_mask, f);
1012
1013         /* Pipeline feed */
1014         mbuf10 = mbuf00;
1015         mbuf11 = mbuf01;
1016         pkt10_index = pkt00_index;
1017         pkt11_index = pkt01_index;
1018
1019         /* Pipeline stage 0 */
1020         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1021                 pkts_mask, f);
1022
1023         /* Pipeline stage 1 */
1024         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1025
1026         /*
1027          * Pipeline run
1028          *
1029          */
1030         for ( ; pkts_mask; ) {
1031                 /* Pipeline feed */
1032                 bucket20 = bucket10;
1033                 bucket21 = bucket11;
1034                 mbuf20 = mbuf10;
1035                 mbuf21 = mbuf11;
1036                 mbuf10 = mbuf00;
1037                 mbuf11 = mbuf01;
1038                 pkt20_index = pkt10_index;
1039                 pkt21_index = pkt11_index;
1040                 pkt10_index = pkt00_index;
1041                 pkt11_index = pkt01_index;
1042
1043                 /* Pipeline stage 0 */
1044                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1045                         mbuf00, mbuf01, pkts, pkts_mask, f);
1046
1047                 /* Pipeline stage 1 */
1048                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1049
1050                 /* Pipeline stage 2 */
1051                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1052                         bucket20, bucket21, pkts_mask_out, entries,
1053                         buckets_mask, buckets, keys, f);
1054         }
1055
1056         /*
1057          * Pipeline flush
1058          *
1059          */
1060         /* Pipeline feed */
1061         bucket20 = bucket10;
1062         bucket21 = bucket11;
1063         mbuf20 = mbuf10;
1064         mbuf21 = mbuf11;
1065         mbuf10 = mbuf00;
1066         mbuf11 = mbuf01;
1067         pkt20_index = pkt10_index;
1068         pkt21_index = pkt11_index;
1069         pkt10_index = pkt00_index;
1070         pkt11_index = pkt01_index;
1071
1072         /* Pipeline stage 1 */
1073         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1074
1075         /* Pipeline stage 2 */
1076         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1077                 bucket20, bucket21, pkts_mask_out, entries,
1078                 buckets_mask, buckets, keys, f);
1079
1080         /* Pipeline feed */
1081         bucket20 = bucket10;
1082         bucket21 = bucket11;
1083         mbuf20 = mbuf10;
1084         mbuf21 = mbuf11;
1085         pkt20_index = pkt10_index;
1086         pkt21_index = pkt11_index;
1087
1088         /* Pipeline stage 2 */
1089         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1090                 bucket20, bucket21, pkts_mask_out, entries,
1091                 buckets_mask, buckets, keys, f);
1092
1093 grind_next_buckets:
1094         /* Grind next buckets */
1095         for ( ; buckets_mask; ) {
1096                 uint64_t buckets_mask_next = 0;
1097
1098                 for ( ; buckets_mask; ) {
1099                         uint64_t pkt_mask;
1100                         uint32_t pkt_index;
1101
1102                         pkt_index = __builtin_ctzll(buckets_mask);
1103                         pkt_mask = 1LLU << pkt_index;
1104                         buckets_mask &= ~pkt_mask;
1105
1106                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1107                                 entries, buckets_mask_next, f);
1108                 }
1109
1110                 buckets_mask = buckets_mask_next;
1111         }
1112
1113         *lookup_hit_mask = pkts_mask_out;
1114         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
1115         return 0;
1116 } /* lookup EXT */
1117
1118 static int
1119 rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
1120 {
1121         struct rte_table_hash *t = table;
1122
1123         if (stats != NULL)
1124                 memcpy(stats, &t->stats, sizeof(t->stats));
1125
1126         if (clear)
1127                 memset(&t->stats, 0, sizeof(t->stats));
1128
1129         return 0;
1130 }
1131
1132 struct rte_table_ops rte_table_hash_key8_lru_ops = {
1133         .f_create = rte_table_hash_create_key8_lru,
1134         .f_free = rte_table_hash_free_key8_lru,
1135         .f_add = rte_table_hash_entry_add_key8_lru,
1136         .f_delete = rte_table_hash_entry_delete_key8_lru,
1137         .f_add_bulk = NULL,
1138         .f_delete_bulk = NULL,
1139         .f_lookup = rte_table_hash_lookup_key8_lru,
1140         .f_stats = rte_table_hash_key8_stats_read,
1141 };
1142
1143 struct rte_table_ops rte_table_hash_key8_ext_ops = {
1144         .f_create = rte_table_hash_create_key8_ext,
1145         .f_free = rte_table_hash_free_key8_ext,
1146         .f_add = rte_table_hash_entry_add_key8_ext,
1147         .f_delete = rte_table_hash_entry_delete_key8_ext,
1148         .f_add_bulk = NULL,
1149         .f_delete_bulk = NULL,
1150         .f_lookup = rte_table_hash_lookup_key8_ext,
1151         .f_stats = rte_table_hash_key8_stats_read,
1152 };