port: fix unaligned access to metadata
[dpdk.git] / lib / librte_table / rte_table_hash_lru.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <string.h>
35 #include <stdio.h>
36
37 #include <rte_common.h>
38 #include <rte_mbuf.h>
39 #include <rte_memory.h>
40 #include <rte_malloc.h>
41 #include <rte_log.h>
42
43 #include "rte_table_hash.h"
44 #include "rte_lru.h"
45
46 #define KEYS_PER_BUCKET 4
47
48 struct bucket {
49         union {
50                 struct bucket *next;
51                 uint64_t lru_list;
52         };
53         uint16_t sig[KEYS_PER_BUCKET];
54         uint32_t key_pos[KEYS_PER_BUCKET];
55 };
56
57 struct grinder {
58         struct bucket *bkt;
59         uint64_t sig;
60         uint64_t match;
61         uint64_t match_pos;
62         uint32_t key_index;
63 };
64
65 struct rte_table_hash {
66         /* Input parameters */
67         uint32_t key_size;
68         uint32_t entry_size;
69         uint32_t n_keys;
70         uint32_t n_buckets;
71         rte_table_hash_op_hash f_hash;
72         uint64_t seed;
73         uint32_t signature_offset;
74         uint32_t key_offset;
75
76         /* Internal */
77         uint64_t bucket_mask;
78         uint32_t key_size_shl;
79         uint32_t data_size_shl;
80         uint32_t key_stack_tos;
81
82         /* Grinder */
83         struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
84
85         /* Tables */
86         struct bucket *buckets;
87         uint8_t *key_mem;
88         uint8_t *data_mem;
89         uint32_t *key_stack;
90
91         /* Table memory */
92         uint8_t memory[0] __rte_cache_aligned;
93 };
94
95 static int
96 check_params_create(struct rte_table_hash_lru_params *params)
97 {
98         uint32_t n_buckets_min;
99
100         /* key_size */
101         if ((params->key_size == 0) ||
102                 (!rte_is_power_of_2(params->key_size))) {
103                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
104                 return -EINVAL;
105         }
106
107         /* n_keys */
108         if ((params->n_keys == 0) ||
109                 (!rte_is_power_of_2(params->n_keys))) {
110                 RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
111                 return -EINVAL;
112         }
113
114         /* n_buckets */
115         n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys;
116         if ((params->n_buckets == 0) ||
117                 (!rte_is_power_of_2(params->n_keys)) ||
118                 (params->n_buckets < n_buckets_min)) {
119                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
120                 return -EINVAL;
121         }
122
123         /* f_hash */
124         if (params->f_hash == NULL) {
125                 RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__);
126                 return -EINVAL;
127         }
128
129         return 0;
130 }
131
132 static void *
133 rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
134 {
135         struct rte_table_hash_lru_params *p =
136                 (struct rte_table_hash_lru_params *) params;
137         struct rte_table_hash *t;
138         uint32_t total_size, table_meta_sz;
139         uint32_t bucket_sz, key_sz, key_stack_sz, data_sz;
140         uint32_t bucket_offset, key_offset, key_stack_offset, data_offset;
141         uint32_t i;
142
143         /* Check input parameters */
144         if ((check_params_create(p) != 0) ||
145                 (!rte_is_power_of_2(entry_size)) ||
146                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
147                 (sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2))) {
148                 return NULL;
149         }
150
151         /* Memory allocation */
152         table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
153         bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
154         key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
155         key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
156         data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
157         total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz +
158                 data_sz;
159
160         t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
161         if (t == NULL) {
162                 RTE_LOG(ERR, TABLE,
163                         "%s: Cannot allocate %u bytes for hash table\n",
164                         __func__, total_size);
165                 return NULL;
166         }
167         RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is "
168                 "%u bytes\n", __func__, p->key_size, total_size);
169
170         /* Memory initialization */
171         t->key_size = p->key_size;
172         t->entry_size = entry_size;
173         t->n_keys = p->n_keys;
174         t->n_buckets = p->n_buckets;
175         t->f_hash = p->f_hash;
176         t->seed = p->seed;
177         t->signature_offset = p->signature_offset;
178         t->key_offset = p->key_offset;
179
180         /* Internal */
181         t->bucket_mask = t->n_buckets - 1;
182         t->key_size_shl = __builtin_ctzl(p->key_size);
183         t->data_size_shl = __builtin_ctzl(entry_size);
184
185         /* Tables */
186         bucket_offset = 0;
187         key_offset = bucket_offset + bucket_sz;
188         key_stack_offset = key_offset + key_sz;
189         data_offset = key_stack_offset + key_stack_sz;
190
191         t->buckets = (struct bucket *) &t->memory[bucket_offset];
192         t->key_mem = &t->memory[key_offset];
193         t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
194         t->data_mem = &t->memory[data_offset];
195
196         /* Key stack */
197         for (i = 0; i < t->n_keys; i++)
198                 t->key_stack[i] = t->n_keys - 1 - i;
199         t->key_stack_tos = t->n_keys;
200
201         /* LRU */
202         for (i = 0; i < t->n_buckets; i++) {
203                 struct bucket *bkt = &t->buckets[i];
204
205                 lru_init(bkt);
206         }
207
208         return t;
209 }
210
211 static int
212 rte_table_hash_lru_free(void *table)
213 {
214         struct rte_table_hash *t = (struct rte_table_hash *) table;
215
216         /* Check input parameters */
217         if (t == NULL)
218                 return -EINVAL;
219
220         rte_free(t);
221         return 0;
222 }
223
224 static int
225 rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
226         int *key_found, void **entry_ptr)
227 {
228         struct rte_table_hash *t = (struct rte_table_hash *) table;
229         struct bucket *bkt;
230         uint64_t sig;
231         uint32_t bkt_index, i;
232
233         sig = t->f_hash(key, t->key_size, t->seed);
234         bkt_index = sig & t->bucket_mask;
235         bkt = &t->buckets[bkt_index];
236         sig = (sig >> 16) | 1LLU;
237
238         /* Key is present in the bucket */
239         for (i = 0; i < KEYS_PER_BUCKET; i++) {
240                 uint64_t bkt_sig = (uint64_t) bkt->sig[i];
241                 uint32_t bkt_key_index = bkt->key_pos[i];
242                 uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
243                         t->key_size_shl];
244
245                 if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size)
246                         == 0)) {
247                         uint8_t *data = &t->data_mem[bkt_key_index <<
248                                 t->data_size_shl];
249
250                         memcpy(data, entry, t->entry_size);
251                         lru_update(bkt, i);
252                         *key_found = 1;
253                         *entry_ptr = (void *) data;
254                         return 0;
255                 }
256         }
257
258         /* Key is not present in the bucket */
259         for (i = 0; i < KEYS_PER_BUCKET; i++) {
260                 uint64_t bkt_sig = (uint64_t) bkt->sig[i];
261
262                 if (bkt_sig == 0) {
263                         uint32_t bkt_key_index;
264                         uint8_t *bkt_key, *data;
265
266                         /* Allocate new key */
267                         if (t->key_stack_tos == 0) {
268                                 /* No keys available */
269                                 return -ENOSPC;
270                         }
271                         bkt_key_index = t->key_stack[--t->key_stack_tos];
272
273                         /* Install new key */
274                         bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl];
275                         data = &t->data_mem[bkt_key_index << t->data_size_shl];
276
277                         bkt->sig[i] = (uint16_t) sig;
278                         bkt->key_pos[i] = bkt_key_index;
279                         memcpy(bkt_key, key, t->key_size);
280                         memcpy(data, entry, t->entry_size);
281                         lru_update(bkt, i);
282
283                         *key_found = 0;
284                         *entry_ptr = (void *) data;
285                         return 0;
286                 }
287         }
288
289         /* Bucket full */
290         {
291                 uint64_t pos = lru_pos(bkt);
292                 uint32_t bkt_key_index = bkt->key_pos[pos];
293                 uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
294                         t->key_size_shl];
295                 uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl];
296
297                 bkt->sig[pos] = (uint16_t) sig;
298                 memcpy(bkt_key, key, t->key_size);
299                 memcpy(data, entry, t->entry_size);
300                 lru_update(bkt, pos);
301
302                 *key_found = 0;
303                 *entry_ptr = (void *) data;
304                 return 0;
305         }
306 }
307
308 static int
309 rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found,
310         void *entry)
311 {
312         struct rte_table_hash *t = (struct rte_table_hash *) table;
313         struct bucket *bkt;
314         uint64_t sig;
315         uint32_t bkt_index, i;
316
317         sig = t->f_hash(key, t->key_size, t->seed);
318         bkt_index = sig & t->bucket_mask;
319         bkt = &t->buckets[bkt_index];
320         sig = (sig >> 16) | 1LLU;
321
322         /* Key is present in the bucket */
323         for (i = 0; i < KEYS_PER_BUCKET; i++) {
324                 uint64_t bkt_sig = (uint64_t) bkt->sig[i];
325                 uint32_t bkt_key_index = bkt->key_pos[i];
326                 uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
327                         t->key_size_shl];
328
329                 if ((sig == bkt_sig) &&
330                         (memcmp(key, bkt_key, t->key_size) == 0)) {
331                         uint8_t *data = &t->data_mem[bkt_key_index <<
332                                 t->data_size_shl];
333
334                         bkt->sig[i] = 0;
335                         t->key_stack[t->key_stack_tos++] = bkt_key_index;
336                         *key_found = 1;
337                         memcpy(entry, data, t->entry_size);
338                         return 0;
339                 }
340         }
341
342         /* Key is not present in the bucket */
343         *key_found = 0;
344         return 0;
345 }
346
347 static int rte_table_hash_lru_lookup_unoptimized(
348         void *table,
349         struct rte_mbuf **pkts,
350         uint64_t pkts_mask,
351         uint64_t *lookup_hit_mask,
352         void **entries,
353         int dosig)
354 {
355         struct rte_table_hash *t = (struct rte_table_hash *) table;
356         uint64_t pkts_mask_out = 0;
357
358         for ( ; pkts_mask; ) {
359                 struct bucket *bkt;
360                 struct rte_mbuf *pkt;
361                 uint8_t *key;
362                 uint64_t pkt_mask, sig;
363                 uint32_t pkt_index, bkt_index, i;
364
365                 pkt_index = __builtin_ctzll(pkts_mask);
366                 pkt_mask = 1LLU << pkt_index;
367                 pkts_mask &= ~pkt_mask;
368
369                 pkt = pkts[pkt_index];
370                 key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
371                 if (dosig)
372                         sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
373                 else
374                         sig = RTE_MBUF_METADATA_UINT32(pkt,
375                                 t->signature_offset);
376
377                 bkt_index = sig & t->bucket_mask;
378                 bkt = &t->buckets[bkt_index];
379                 sig = (sig >> 16) | 1LLU;
380
381                 /* Key is present in the bucket */
382                 for (i = 0; i < KEYS_PER_BUCKET; i++) {
383                         uint64_t bkt_sig = (uint64_t) bkt->sig[i];
384                         uint32_t bkt_key_index = bkt->key_pos[i];
385                         uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
386                                 t->key_size_shl];
387
388                         if ((sig == bkt_sig) && (memcmp(key, bkt_key,
389                                 t->key_size) == 0)) {
390                                 uint8_t *data = &t->data_mem[bkt_key_index <<
391                                         t->data_size_shl];
392
393                                 lru_update(bkt, i);
394                                 pkts_mask_out |= pkt_mask;
395                                 entries[pkt_index] = (void *) data;
396                                 break;
397                         }
398                 }
399         }
400
401         *lookup_hit_mask = pkts_mask_out;
402         return 0;
403 }
404
405 /***
406 *
407 * mask = match bitmask
408 * match = at least one match
409 * match_many = more than one match
410 * match_pos = position of first match
411 *
412 * ----------------------------------------
413 * mask           match   match_many       match_pos
414 * ----------------------------------------
415 * 0000           0               0                        00
416 * 0001           1               0                        00
417 * 0010           1               0                        01
418 * 0011           1               1                        00
419 * ----------------------------------------
420 * 0100           1               0                        10
421 * 0101           1               1                        00
422 * 0110           1               1                        01
423 * 0111           1               1                        00
424 * ----------------------------------------
425 * 1000           1               0                        11
426 * 1001           1               1                        00
427 * 1010           1               1                        01
428 * 1011           1               1                        00
429 * ----------------------------------------
430 * 1100           1               1                        10
431 * 1101           1               1                        00
432 * 1110           1               1                        01
433 * 1111           1               1                        00
434 * ----------------------------------------
435 *
436 * match = 1111_1111_1111_1110
437 * match_many = 1111_1110_1110_1000
438 * match_pos = 0001_0010_0001_0011__0001_0010_0001_0000
439 *
440 * match = 0xFFFELLU
441 * match_many = 0xFEE8LLU
442 * match_pos = 0x12131210LLU
443 *
444 ***/
445
446 #define LUT_MATCH                                               0xFFFELLU
447 #define LUT_MATCH_MANY                                          0xFEE8LLU
448 #define LUT_MATCH_POS                                           0x12131210LLU
449
450 #define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)\
451 {                                                               \
452         uint64_t bucket_sig[4], mask[4], mask_all;              \
453                                                                 \
454         bucket_sig[0] = bucket->sig[0];                         \
455         bucket_sig[1] = bucket->sig[1];                         \
456         bucket_sig[2] = bucket->sig[2];                         \
457         bucket_sig[3] = bucket->sig[3];                         \
458                                                                 \
459         bucket_sig[0] ^= mbuf_sig;                              \
460         bucket_sig[1] ^= mbuf_sig;                              \
461         bucket_sig[2] ^= mbuf_sig;                              \
462         bucket_sig[3] ^= mbuf_sig;                              \
463                                                                 \
464         mask[0] = 0;                                            \
465         mask[1] = 0;                                            \
466         mask[2] = 0;                                            \
467         mask[3] = 0;                                            \
468                                                                 \
469         if (bucket_sig[0] == 0)                                 \
470                 mask[0] = 1;                                    \
471         if (bucket_sig[1] == 0)                                 \
472                 mask[1] = 2;                                    \
473         if (bucket_sig[2] == 0)                                 \
474                 mask[2] = 4;                                    \
475         if (bucket_sig[3] == 0)                                 \
476                 mask[3] = 8;                                    \
477                                                                 \
478         mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]);   \
479                                                                 \
480         match = (LUT_MATCH >> mask_all) & 1;                    \
481         match_many = (LUT_MATCH_MANY >> mask_all) & 1;          \
482         match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3;     \
483 }
484
485 #define lookup_cmp_key(mbuf, key, match_key, f)                 \
486 {                                                               \
487         uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\
488         uint64_t *bkt_key = (uint64_t *) key;                   \
489                                                                 \
490         switch (f->key_size) {                                  \
491         case 8:                                                 \
492         {                                                       \
493                 uint64_t xor = pkt_key[0] ^ bkt_key[0];         \
494                 match_key = 0;                                  \
495                 if (xor == 0)                                   \
496                         match_key = 1;                          \
497         }                                                       \
498         break;                                                  \
499                                                                 \
500         case 16:                                                \
501         {                                                       \
502                 uint64_t xor[2], or;                            \
503                                                                 \
504                 xor[0] = pkt_key[0] ^ bkt_key[0];               \
505                 xor[1] = pkt_key[1] ^ bkt_key[1];               \
506                 or = xor[0] | xor[1];                           \
507                 match_key = 0;                                  \
508                 if (or == 0)                                    \
509                         match_key = 1;                          \
510         }                                                       \
511         break;                                                  \
512                                                                 \
513         case 32:                                                \
514         {                                                       \
515                 uint64_t xor[4], or;                            \
516                                                                 \
517                 xor[0] = pkt_key[0] ^ bkt_key[0];               \
518                 xor[1] = pkt_key[1] ^ bkt_key[1];               \
519                 xor[2] = pkt_key[2] ^ bkt_key[2];               \
520                 xor[3] = pkt_key[3] ^ bkt_key[3];               \
521                 or = xor[0] | xor[1] | xor[2] | xor[3];         \
522                 match_key = 0;                                  \
523                 if (or == 0)                                    \
524                         match_key = 1;                          \
525         }                                                       \
526         break;                                                  \
527                                                                 \
528         case 64:                                                \
529         {                                                       \
530                 uint64_t xor[8], or;                            \
531                                                                 \
532                 xor[0] = pkt_key[0] ^ bkt_key[0];               \
533                 xor[1] = pkt_key[1] ^ bkt_key[1];               \
534                 xor[2] = pkt_key[2] ^ bkt_key[2];               \
535                 xor[3] = pkt_key[3] ^ bkt_key[3];               \
536                 xor[4] = pkt_key[4] ^ bkt_key[4];               \
537                 xor[5] = pkt_key[5] ^ bkt_key[5];               \
538                 xor[6] = pkt_key[6] ^ bkt_key[6];               \
539                 xor[7] = pkt_key[7] ^ bkt_key[7];               \
540                 or = xor[0] | xor[1] | xor[2] | xor[3] |        \
541                         xor[4] | xor[5] | xor[6] | xor[7];      \
542                 match_key = 0;                                  \
543                 if (or == 0)                                    \
544                         match_key = 1;                          \
545         }                                                       \
546         break;                                                  \
547                                                                 \
548         default:                                                \
549                 match_key = 0;                                  \
550                 if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \
551                         match_key = 1;                          \
552         }                                                       \
553 }
554
555 #define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)\
556 {                                                               \
557         uint64_t pkt00_mask, pkt01_mask;                        \
558         struct rte_mbuf *mbuf00, *mbuf01;                       \
559                                                                 \
560         pkt00_index = __builtin_ctzll(pkts_mask);               \
561         pkt00_mask = 1LLU << pkt00_index;                       \
562         pkts_mask &= ~pkt00_mask;                               \
563         mbuf00 = pkts[pkt00_index];                             \
564                                                                 \
565         pkt01_index = __builtin_ctzll(pkts_mask);               \
566         pkt01_mask = 1LLU << pkt01_index;                       \
567         pkts_mask &= ~pkt01_mask;                               \
568         mbuf01 = pkts[pkt01_index];                             \
569                                                                 \
570         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
571         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
572 }
573
574 #define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \
575         pkt01_index)                                            \
576 {                                                               \
577         uint64_t pkt00_mask, pkt01_mask;                        \
578         struct rte_mbuf *mbuf00, *mbuf01;                       \
579                                                                 \
580         pkt00_index = __builtin_ctzll(pkts_mask);               \
581         pkt00_mask = 1LLU << pkt00_index;                       \
582         pkts_mask &= ~pkt00_mask;                               \
583         mbuf00 = pkts[pkt00_index];                             \
584                                                                 \
585         pkt01_index = __builtin_ctzll(pkts_mask);               \
586         if (pkts_mask == 0)                                     \
587                 pkt01_index = pkt00_index;                      \
588                                                                 \
589         pkt01_mask = 1LLU << pkt01_index;                       \
590         pkts_mask &= ~pkt01_mask;                               \
591         mbuf01 = pkts[pkt01_index];                             \
592                                                                 \
593         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
594         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
595 }
596
597 #define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)    \
598 {                                                               \
599         struct grinder *g10, *g11;                              \
600         uint64_t sig10, sig11, bkt10_index, bkt11_index;        \
601         struct rte_mbuf *mbuf10, *mbuf11;                       \
602         struct bucket *bkt10, *bkt11, *buckets = t->buckets;    \
603         uint64_t bucket_mask = t->bucket_mask;                  \
604         uint32_t signature_offset = t->signature_offset;        \
605                                                                 \
606         mbuf10 = pkts[pkt10_index];                             \
607         sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\
608         bkt10_index = sig10 & bucket_mask;                      \
609         bkt10 = &buckets[bkt10_index];                          \
610                                                                 \
611         mbuf11 = pkts[pkt11_index];                             \
612         sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\
613         bkt11_index = sig11 & bucket_mask;                      \
614         bkt11 = &buckets[bkt11_index];                          \
615                                                                 \
616         rte_prefetch0(bkt10);                                   \
617         rte_prefetch0(bkt11);                                   \
618                                                                 \
619         g10 = &g[pkt10_index];                                  \
620         g10->sig = sig10;                                       \
621         g10->bkt = bkt10;                                       \
622                                                                 \
623         g11 = &g[pkt11_index];                                  \
624         g11->sig = sig11;                                       \
625         g11->bkt = bkt11;                                       \
626 }
627
628 #define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)\
629 {                                                               \
630         struct grinder *g10, *g11;                              \
631         uint64_t sig10, sig11, bkt10_index, bkt11_index;        \
632         struct rte_mbuf *mbuf10, *mbuf11;                       \
633         struct bucket *bkt10, *bkt11, *buckets = t->buckets;    \
634         uint8_t *key10, *key11;                                 \
635         uint64_t bucket_mask = t->bucket_mask;                  \
636         rte_table_hash_op_hash f_hash = t->f_hash;              \
637         uint64_t seed = t->seed;                                \
638         uint32_t key_size = t->key_size;                        \
639         uint32_t key_offset = t->key_offset;                    \
640                                                                 \
641         mbuf10 = pkts[pkt10_index];                             \
642         key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);\
643         sig10 = (uint64_t) f_hash(key10, key_size, seed);       \
644         bkt10_index = sig10 & bucket_mask;                      \
645         bkt10 = &buckets[bkt10_index];                          \
646                                                                 \
647         mbuf11 = pkts[pkt11_index];                             \
648         key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);\
649         sig11 = (uint64_t) f_hash(key11, key_size, seed);       \
650         bkt11_index = sig11 & bucket_mask;                      \
651         bkt11 = &buckets[bkt11_index];                          \
652                                                                 \
653         rte_prefetch0(bkt10);                                   \
654         rte_prefetch0(bkt11);                                   \
655                                                                 \
656         g10 = &g[pkt10_index];                                  \
657         g10->sig = sig10;                                       \
658         g10->bkt = bkt10;                                       \
659                                                                 \
660         g11 = &g[pkt11_index];                                  \
661         g11->sig = sig11;                                       \
662         g11->bkt = bkt11;                                       \
663 }
664
665 #define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)\
666 {                                                               \
667         struct grinder *g20, *g21;                              \
668         uint64_t sig20, sig21;                                  \
669         struct bucket *bkt20, *bkt21;                           \
670         uint8_t *key20, *key21, *key_mem = t->key_mem;          \
671         uint64_t match20, match21, match_many20, match_many21;  \
672         uint64_t match_pos20, match_pos21;                      \
673         uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;\
674                                                                 \
675         g20 = &g[pkt20_index];                                  \
676         sig20 = g20->sig;                                       \
677         bkt20 = g20->bkt;                                       \
678         sig20 = (sig20 >> 16) | 1LLU;                           \
679         lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);\
680         match20 <<= pkt20_index;                                \
681         match_many20 <<= pkt20_index;                           \
682         key20_index = bkt20->key_pos[match_pos20];              \
683         key20 = &key_mem[key20_index << key_size_shl];          \
684                                                                 \
685         g21 = &g[pkt21_index];                                  \
686         sig21 = g21->sig;                                       \
687         bkt21 = g21->bkt;                                       \
688         sig21 = (sig21 >> 16) | 1LLU;                           \
689         lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);\
690         match21 <<= pkt21_index;                                \
691         match_many21 <<= pkt21_index;                           \
692         key21_index = bkt21->key_pos[match_pos21];              \
693         key21 = &key_mem[key21_index << key_size_shl];          \
694                                                                 \
695         rte_prefetch0(key20);                                   \
696         rte_prefetch0(key21);                                   \
697                                                                 \
698         pkts_mask_match_many |= match_many20 | match_many21;    \
699                                                                 \
700         g20->match = match20;                                   \
701         g20->match_pos = match_pos20;                           \
702         g20->key_index = key20_index;                           \
703                                                                 \
704         g21->match = match21;                                   \
705         g21->match_pos = match_pos21;                           \
706         g21->key_index = key21_index;                           \
707 }
708
709 #define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, \
710         entries)                                                \
711 {                                                               \
712         struct grinder *g30, *g31;                              \
713         struct rte_mbuf *mbuf30, *mbuf31;                       \
714         struct bucket *bkt30, *bkt31;                           \
715         uint8_t *key30, *key31, *key_mem = t->key_mem;          \
716         uint8_t *data30, *data31, *data_mem = t->data_mem;      \
717         uint64_t match30, match31, match_pos30, match_pos31;    \
718         uint64_t match_key30, match_key31, match_keys;          \
719         uint32_t key30_index, key31_index;                      \
720         uint32_t key_size_shl = t->key_size_shl;                \
721         uint32_t data_size_shl = t->data_size_shl;              \
722                                                                 \
723         mbuf30 = pkts[pkt30_index];                             \
724         g30 = &g[pkt30_index];                                  \
725         bkt30 = g30->bkt;                                       \
726         match30 = g30->match;                                   \
727         match_pos30 = g30->match_pos;                           \
728         key30_index = g30->key_index;                           \
729         key30 = &key_mem[key30_index << key_size_shl];          \
730         lookup_cmp_key(mbuf30, key30, match_key30, t);          \
731         match_key30 <<= pkt30_index;                            \
732         match_key30 &= match30;                                 \
733         data30 = &data_mem[key30_index << data_size_shl];       \
734         entries[pkt30_index] = data30;                          \
735                                                                 \
736         mbuf31 = pkts[pkt31_index];                             \
737         g31 = &g[pkt31_index];                                  \
738         bkt31 = g31->bkt;                                       \
739         match31 = g31->match;                                   \
740         match_pos31 = g31->match_pos;                           \
741         key31_index = g31->key_index;                           \
742         key31 = &key_mem[key31_index << key_size_shl];          \
743         lookup_cmp_key(mbuf31, key31, match_key31, t);          \
744         match_key31 <<= pkt31_index;                            \
745         match_key31 &= match31;                                 \
746         data31 = &data_mem[key31_index << data_size_shl];       \
747         entries[pkt31_index] = data31;                          \
748                                                                 \
749         rte_prefetch0(data30);                                  \
750         rte_prefetch0(data31);                                  \
751                                                                 \
752         match_keys = match_key30 | match_key31;                 \
753         pkts_mask_out |= match_keys;                            \
754                                                                 \
755         if (match_key30 == 0)                                   \
756                 match_pos30 = 4;                                \
757         lru_update(bkt30, match_pos30);                         \
758                                                                 \
759         if (match_key31 == 0)                                   \
760                 match_pos31 = 4;                                \
761         lru_update(bkt31, match_pos31);                         \
762 }
763
764 /***
765 * The lookup function implements a 4-stage pipeline, with each stage processing
766 * two different packets. The purpose of pipelined implementation is to hide the
767 * latency of prefetching the data structures and loosen the data dependency
768 * between instructions.
769 *
770 *   p00  _______   p10  _______   p20  _______   p30  _______
771 * ----->|       |----->|       |----->|       |----->|       |----->
772 *       |   0   |      |   1   |      |   2   |      |   3   |
773 * ----->|_______|----->|_______|----->|_______|----->|_______|----->
774 *   p01            p11            p21            p31
775 *
776 * The naming convention is:
777 *         pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1
778 *
779 ***/
780 static int rte_table_hash_lru_lookup(
781         void *table,
782         struct rte_mbuf **pkts,
783         uint64_t pkts_mask,
784         uint64_t *lookup_hit_mask,
785         void **entries)
786 {
787         struct rte_table_hash *t = (struct rte_table_hash *) table;
788         struct grinder *g = t->grinders;
789         uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
790         uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
791         uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
792         int status = 0;
793
794         /* Cannot run the pipeline with less than 7 packets */
795         if (__builtin_popcountll(pkts_mask) < 7)
796                 return rte_table_hash_lru_lookup_unoptimized(table, pkts,
797                         pkts_mask, lookup_hit_mask, entries, 0);
798
799         /* Pipeline stage 0 */
800         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
801
802         /* Pipeline feed */
803         pkt10_index = pkt00_index;
804         pkt11_index = pkt01_index;
805
806         /* Pipeline stage 0 */
807         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
808
809         /* Pipeline stage 1 */
810         lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
811
812         /* Pipeline feed */
813         pkt20_index = pkt10_index;
814         pkt21_index = pkt11_index;
815         pkt10_index = pkt00_index;
816         pkt11_index = pkt01_index;
817
818         /* Pipeline stage 0 */
819         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
820
821         /* Pipeline stage 1 */
822         lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
823
824         /* Pipeline stage 2 */
825         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
826
827         /*
828         * Pipeline run
829         *
830         */
831         for ( ; pkts_mask; ) {
832                 /* Pipeline feed */
833                 pkt30_index = pkt20_index;
834                 pkt31_index = pkt21_index;
835                 pkt20_index = pkt10_index;
836                 pkt21_index = pkt11_index;
837                 pkt10_index = pkt00_index;
838                 pkt11_index = pkt01_index;
839
840                 /* Pipeline stage 0 */
841                 lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
842                         pkt00_index, pkt01_index);
843
844                 /* Pipeline stage 1 */
845                 lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
846
847                 /* Pipeline stage 2 */
848                 lookup2_stage2(t, g, pkt20_index, pkt21_index,
849                         pkts_mask_match_many);
850
851                 /* Pipeline stage 3 */
852                 lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
853                         pkts_mask_out, entries);
854         }
855
856         /* Pipeline feed */
857         pkt30_index = pkt20_index;
858         pkt31_index = pkt21_index;
859         pkt20_index = pkt10_index;
860         pkt21_index = pkt11_index;
861         pkt10_index = pkt00_index;
862         pkt11_index = pkt01_index;
863
864         /* Pipeline stage 1 */
865         lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
866
867         /* Pipeline stage 2 */
868         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
869
870         /* Pipeline stage 3 */
871         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
872                 entries);
873
874         /* Pipeline feed */
875         pkt30_index = pkt20_index;
876         pkt31_index = pkt21_index;
877         pkt20_index = pkt10_index;
878         pkt21_index = pkt11_index;
879
880         /* Pipeline stage 2 */
881         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
882
883         /* Pipeline stage 3 */
884         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
885                 entries);
886
887         /* Pipeline feed */
888         pkt30_index = pkt20_index;
889         pkt31_index = pkt21_index;
890
891         /* Pipeline stage 3 */
892         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
893                 entries);
894
895         /* Slow path */
896         pkts_mask_match_many &= ~pkts_mask_out;
897         if (pkts_mask_match_many) {
898                 uint64_t pkts_mask_out_slow = 0;
899
900                 status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
901                         pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
902                 pkts_mask_out |= pkts_mask_out_slow;
903         }
904
905         *lookup_hit_mask = pkts_mask_out;
906         return status;
907 }
908
909 static int rte_table_hash_lru_lookup_dosig(
910         void *table,
911         struct rte_mbuf **pkts,
912         uint64_t pkts_mask,
913         uint64_t *lookup_hit_mask,
914         void **entries)
915 {
916         struct rte_table_hash *t = (struct rte_table_hash *) table;
917         struct grinder *g = t->grinders;
918         uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
919         uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
920         uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
921         int status = 0;
922
923         /* Cannot run the pipeline with less than 7 packets */
924         if (__builtin_popcountll(pkts_mask) < 7)
925                 return rte_table_hash_lru_lookup_unoptimized(table, pkts,
926                         pkts_mask, lookup_hit_mask, entries, 1);
927
928         /* Pipeline stage 0 */
929         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
930
931         /* Pipeline feed */
932         pkt10_index = pkt00_index;
933         pkt11_index = pkt01_index;
934
935         /* Pipeline stage 0 */
936         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
937
938         /* Pipeline stage 1 */
939         lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
940
941         /* Pipeline feed */
942         pkt20_index = pkt10_index;
943         pkt21_index = pkt11_index;
944         pkt10_index = pkt00_index;
945         pkt11_index = pkt01_index;
946
947         /* Pipeline stage 0 */
948         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
949
950         /* Pipeline stage 1 */
951         lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
952
953         /* Pipeline stage 2 */
954         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
955
956         /*
957         * Pipeline run
958         *
959         */
960         for ( ; pkts_mask; ) {
961                 /* Pipeline feed */
962                 pkt30_index = pkt20_index;
963                 pkt31_index = pkt21_index;
964                 pkt20_index = pkt10_index;
965                 pkt21_index = pkt11_index;
966                 pkt10_index = pkt00_index;
967                 pkt11_index = pkt01_index;
968
969                 /* Pipeline stage 0 */
970                 lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
971                         pkt00_index, pkt01_index);
972
973                 /* Pipeline stage 1 */
974                 lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
975
976                 /* Pipeline stage 2 */
977                 lookup2_stage2(t, g, pkt20_index, pkt21_index,
978                         pkts_mask_match_many);
979
980                 /* Pipeline stage 3 */
981                 lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
982                         pkts_mask_out, entries);
983         }
984
985         /* Pipeline feed */
986         pkt30_index = pkt20_index;
987         pkt31_index = pkt21_index;
988         pkt20_index = pkt10_index;
989         pkt21_index = pkt11_index;
990         pkt10_index = pkt00_index;
991         pkt11_index = pkt01_index;
992
993         /* Pipeline stage 1 */
994         lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
995
996         /* Pipeline stage 2 */
997         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
998
999         /* Pipeline stage 3 */
1000         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
1001                 entries);
1002
1003         /* Pipeline feed */
1004         pkt30_index = pkt20_index;
1005         pkt31_index = pkt21_index;
1006         pkt20_index = pkt10_index;
1007         pkt21_index = pkt11_index;
1008
1009         /* Pipeline stage 2 */
1010         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
1011
1012         /* Pipeline stage 3 */
1013         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
1014                 entries);
1015
1016         /* Pipeline feed */
1017         pkt30_index = pkt20_index;
1018         pkt31_index = pkt21_index;
1019
1020         /* Pipeline stage 3 */
1021         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
1022                 entries);
1023
1024         /* Slow path */
1025         pkts_mask_match_many &= ~pkts_mask_out;
1026         if (pkts_mask_match_many) {
1027                 uint64_t pkts_mask_out_slow = 0;
1028
1029                 status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
1030                         pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
1031                 pkts_mask_out |= pkts_mask_out_slow;
1032         }
1033
1034         *lookup_hit_mask = pkts_mask_out;
1035         return status;
1036 }
1037
1038 struct rte_table_ops rte_table_hash_lru_ops = {
1039         .f_create = rte_table_hash_lru_create,
1040         .f_free = rte_table_hash_lru_free,
1041         .f_add = rte_table_hash_lru_entry_add,
1042         .f_delete = rte_table_hash_lru_entry_delete,
1043         .f_lookup = rte_table_hash_lru_lookup,
1044 };
1045
1046 struct rte_table_ops rte_table_hash_lru_dosig_ops = {
1047         .f_create = rte_table_hash_lru_create,
1048         .f_free = rte_table_hash_lru_free,
1049         .f_add = rte_table_hash_lru_entry_add,
1050         .f_delete = rte_table_hash_lru_entry_delete,
1051         .f_lookup = rte_table_hash_lru_lookup_dosig,
1052 };