add prefix to cache line macros
[dpdk.git] / lib / librte_table / rte_table_hash_lru.c
1 /*-
2  *       BSD LICENSE
3  *
4  *       Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *       All rights reserved.
6  *
7  *       Redistribution and use in source and binary forms, with or without
8  *       modification, are permitted provided that the following conditions
9  *       are met:
10  *
11  *      * Redistributions of source code must retain the above copyright
12  *               notice, this list of conditions and the following disclaimer.
13  *      * Redistributions in binary form must reproduce the above copyright
14  *               notice, this list of conditions and the following disclaimer in
15  *               the documentation and/or other materials provided with the
16  *               distribution.
17  *      * Neither the name of Intel Corporation nor the names of its
18  *               contributors may be used to endorse or promote products derived
19  *               from this software without specific prior written permission.
20  *
21  *       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <string.h>
35 #include <stdio.h>
36
37 #include <rte_common.h>
38 #include <rte_mbuf.h>
39 #include <rte_malloc.h>
40 #include <rte_log.h>
41
42 #include "rte_table_hash.h"
43 #include "rte_lru.h"
44
45 #define KEYS_PER_BUCKET 4
46
47 struct bucket {
48         union {
49                 struct bucket *next;
50                 uint64_t lru_list;
51         };
52         uint16_t sig[KEYS_PER_BUCKET];
53         uint32_t key_pos[KEYS_PER_BUCKET];
54 };
55
56 struct grinder {
57         struct bucket *bkt;
58         uint64_t sig;
59         uint64_t match;
60         uint64_t match_pos;
61         uint32_t key_index;
62 };
63
64 struct rte_table_hash {
65         /* Input parameters */
66         uint32_t key_size;
67         uint32_t entry_size;
68         uint32_t n_keys;
69         uint32_t n_buckets;
70         rte_table_hash_op_hash f_hash;
71         uint64_t seed;
72         uint32_t signature_offset;
73         uint32_t key_offset;
74
75         /* Internal */
76         uint64_t bucket_mask;
77         uint32_t key_size_shl;
78         uint32_t data_size_shl;
79         uint32_t key_stack_tos;
80
81         /* Grinder */
82         struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
83
84         /* Tables */
85         struct bucket *buckets;
86         uint8_t *key_mem;
87         uint8_t *data_mem;
88         uint32_t *key_stack;
89
90         /* Table memory */
91         uint8_t memory[0] __rte_cache_aligned;
92 };
93
94 static int
95 check_params_create(struct rte_table_hash_lru_params *params)
96 {
97         uint32_t n_buckets_min;
98
99         /* key_size */
100         if ((params->key_size == 0) ||
101                 (!rte_is_power_of_2(params->key_size))) {
102                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
103                 return -EINVAL;
104         }
105
106         /* n_keys */
107         if ((params->n_keys == 0) ||
108                 (!rte_is_power_of_2(params->n_keys))) {
109                 RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
110                 return -EINVAL;
111         }
112
113         /* n_buckets */
114         n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys;
115         if ((params->n_buckets == 0) ||
116                 (!rte_is_power_of_2(params->n_keys)) ||
117                 (params->n_buckets < n_buckets_min)) {
118                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
119                 return -EINVAL;
120         }
121
122         /* f_hash */
123         if (params->f_hash == NULL) {
124                 RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__);
125                 return -EINVAL;
126         }
127
128         /* signature offset */
129         if ((params->signature_offset & 0x3) != 0) {
130                 RTE_LOG(ERR, TABLE, "%s: signature_offset invalid value\n",
131                         __func__);
132                 return -EINVAL;
133         }
134
135         /* key offset */
136         if ((params->key_offset & 0x7) != 0) {
137                 RTE_LOG(ERR, TABLE, "%s: key_offset invalid value\n", __func__);
138                 return -EINVAL;
139         }
140
141         return 0;
142 }
143
144 static void *
145 rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
146 {
147         struct rte_table_hash_lru_params *p =
148                 (struct rte_table_hash_lru_params *) params;
149         struct rte_table_hash *t;
150         uint32_t total_size, table_meta_sz;
151         uint32_t bucket_sz, key_sz, key_stack_sz, data_sz;
152         uint32_t bucket_offset, key_offset, key_stack_offset, data_offset;
153         uint32_t i;
154
155         /* Check input parameters */
156         if ((check_params_create(p) != 0) ||
157                 (!rte_is_power_of_2(entry_size)) ||
158                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
159                 (sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2))) {
160                 return NULL;
161         }
162
163         /* Memory allocation */
164         table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
165         bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
166         key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
167         key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
168         data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
169         total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz +
170                 data_sz;
171
172         t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
173         if (t == NULL) {
174                 RTE_LOG(ERR, TABLE,
175                         "%s: Cannot allocate %u bytes for hash table\n",
176                         __func__, total_size);
177                 return NULL;
178         }
179         RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is "
180                 "%u bytes\n", __func__, p->key_size, total_size);
181
182         /* Memory initialization */
183         t->key_size = p->key_size;
184         t->entry_size = entry_size;
185         t->n_keys = p->n_keys;
186         t->n_buckets = p->n_buckets;
187         t->f_hash = p->f_hash;
188         t->seed = p->seed;
189         t->signature_offset = p->signature_offset;
190         t->key_offset = p->key_offset;
191
192         /* Internal */
193         t->bucket_mask = t->n_buckets - 1;
194         t->key_size_shl = __builtin_ctzl(p->key_size);
195         t->data_size_shl = __builtin_ctzl(entry_size);
196
197         /* Tables */
198         bucket_offset = 0;
199         key_offset = bucket_offset + bucket_sz;
200         key_stack_offset = key_offset + key_sz;
201         data_offset = key_stack_offset + key_stack_sz;
202
203         t->buckets = (struct bucket *) &t->memory[bucket_offset];
204         t->key_mem = &t->memory[key_offset];
205         t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
206         t->data_mem = &t->memory[data_offset];
207
208         /* Key stack */
209         for (i = 0; i < t->n_keys; i++)
210                 t->key_stack[i] = t->n_keys - 1 - i;
211         t->key_stack_tos = t->n_keys;
212
213         /* LRU */
214         for (i = 0; i < t->n_buckets; i++) {
215                 struct bucket *bkt = &t->buckets[i];
216
217                 lru_init(bkt);
218         }
219
220         return t;
221 }
222
223 static int
224 rte_table_hash_lru_free(void *table)
225 {
226         struct rte_table_hash *t = (struct rte_table_hash *) table;
227
228         /* Check input parameters */
229         if (t == NULL)
230                 return -EINVAL;
231
232         rte_free(t);
233         return 0;
234 }
235
236 static int
237 rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
238         int *key_found, void **entry_ptr)
239 {
240         struct rte_table_hash *t = (struct rte_table_hash *) table;
241         struct bucket *bkt;
242         uint64_t sig;
243         uint32_t bkt_index, i;
244
245         sig = t->f_hash(key, t->key_size, t->seed);
246         bkt_index = sig & t->bucket_mask;
247         bkt = &t->buckets[bkt_index];
248         sig = (sig >> 16) | 1LLU;
249
250         /* Key is present in the bucket */
251         for (i = 0; i < KEYS_PER_BUCKET; i++) {
252                 uint64_t bkt_sig = (uint64_t) bkt->sig[i];
253                 uint32_t bkt_key_index = bkt->key_pos[i];
254                 uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
255                         t->key_size_shl];
256
257                 if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size)
258                         == 0)) {
259                         uint8_t *data = &t->data_mem[bkt_key_index <<
260                                 t->data_size_shl];
261
262                         memcpy(data, entry, t->entry_size);
263                         lru_update(bkt, i);
264                         *key_found = 1;
265                         *entry_ptr = (void *) data;
266                         return 0;
267                 }
268         }
269
270         /* Key is not present in the bucket */
271         for (i = 0; i < KEYS_PER_BUCKET; i++) {
272                 uint64_t bkt_sig = (uint64_t) bkt->sig[i];
273
274                 if (bkt_sig == 0) {
275                         uint32_t bkt_key_index;
276                         uint8_t *bkt_key, *data;
277
278                         /* Allocate new key */
279                         if (t->key_stack_tos == 0) {
280                                 /* No keys available */
281                                 return -ENOSPC;
282                         }
283                         bkt_key_index = t->key_stack[--t->key_stack_tos];
284
285                         /* Install new key */
286                         bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl];
287                         data = &t->data_mem[bkt_key_index << t->data_size_shl];
288
289                         bkt->sig[i] = (uint16_t) sig;
290                         bkt->key_pos[i] = bkt_key_index;
291                         memcpy(bkt_key, key, t->key_size);
292                         memcpy(data, entry, t->entry_size);
293                         lru_update(bkt, i);
294
295                         *key_found = 0;
296                         *entry_ptr = (void *) data;
297                         return 0;
298                 }
299         }
300
301         /* Bucket full */
302         {
303                 uint64_t pos = lru_pos(bkt);
304                 uint32_t bkt_key_index = bkt->key_pos[pos];
305                 uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
306                         t->key_size_shl];
307                 uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl];
308
309                 bkt->sig[pos] = (uint16_t) sig;
310                 memcpy(bkt_key, key, t->key_size);
311                 memcpy(data, entry, t->entry_size);
312                 lru_update(bkt, pos);
313
314                 *key_found = 0;
315                 *entry_ptr = (void *) data;
316                 return 0;
317         }
318 }
319
320 static int
321 rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found,
322         void *entry)
323 {
324         struct rte_table_hash *t = (struct rte_table_hash *) table;
325         struct bucket *bkt;
326         uint64_t sig;
327         uint32_t bkt_index, i;
328
329         sig = t->f_hash(key, t->key_size, t->seed);
330         bkt_index = sig & t->bucket_mask;
331         bkt = &t->buckets[bkt_index];
332         sig = (sig >> 16) | 1LLU;
333
334         /* Key is present in the bucket */
335         for (i = 0; i < KEYS_PER_BUCKET; i++) {
336                 uint64_t bkt_sig = (uint64_t) bkt->sig[i];
337                 uint32_t bkt_key_index = bkt->key_pos[i];
338                 uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
339                         t->key_size_shl];
340
341                 if ((sig == bkt_sig) &&
342                         (memcmp(key, bkt_key, t->key_size) == 0)) {
343                         uint8_t *data = &t->data_mem[bkt_key_index <<
344                                 t->data_size_shl];
345
346                         bkt->sig[i] = 0;
347                         t->key_stack[t->key_stack_tos++] = bkt_key_index;
348                         *key_found = 1;
349                         memcpy(entry, data, t->entry_size);
350                         return 0;
351                 }
352         }
353
354         /* Key is not present in the bucket */
355         *key_found = 0;
356         return 0;
357 }
358
359 static int rte_table_hash_lru_lookup_unoptimized(
360         void *table,
361         struct rte_mbuf **pkts,
362         uint64_t pkts_mask,
363         uint64_t *lookup_hit_mask,
364         void **entries,
365         int dosig)
366 {
367         struct rte_table_hash *t = (struct rte_table_hash *) table;
368         uint64_t pkts_mask_out = 0;
369
370         for ( ; pkts_mask; ) {
371                 struct bucket *bkt;
372                 struct rte_mbuf *pkt;
373                 uint8_t *key;
374                 uint64_t pkt_mask, sig;
375                 uint32_t pkt_index, bkt_index, i;
376
377                 pkt_index = __builtin_ctzll(pkts_mask);
378                 pkt_mask = 1LLU << pkt_index;
379                 pkts_mask &= ~pkt_mask;
380
381                 pkt = pkts[pkt_index];
382                 key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
383                 if (dosig)
384                         sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
385                 else
386                         sig = RTE_MBUF_METADATA_UINT32(pkt,
387                                 t->signature_offset);
388
389                 bkt_index = sig & t->bucket_mask;
390                 bkt = &t->buckets[bkt_index];
391                 sig = (sig >> 16) | 1LLU;
392
393                 /* Key is present in the bucket */
394                 for (i = 0; i < KEYS_PER_BUCKET; i++) {
395                         uint64_t bkt_sig = (uint64_t) bkt->sig[i];
396                         uint32_t bkt_key_index = bkt->key_pos[i];
397                         uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
398                                 t->key_size_shl];
399
400                         if ((sig == bkt_sig) && (memcmp(key, bkt_key,
401                                 t->key_size) == 0)) {
402                                 uint8_t *data = &t->data_mem[bkt_key_index <<
403                                         t->data_size_shl];
404
405                                 lru_update(bkt, i);
406                                 pkts_mask_out |= pkt_mask;
407                                 entries[pkt_index] = (void *) data;
408                                 break;
409                         }
410                 }
411         }
412
413         *lookup_hit_mask = pkts_mask_out;
414         return 0;
415 }
416
417 /***
418 *
419 * mask = match bitmask
420 * match = at least one match
421 * match_many = more than one match
422 * match_pos = position of first match
423 *
424 * ----------------------------------------
425 * mask           match   match_many       match_pos
426 * ----------------------------------------
427 * 0000           0               0                        00
428 * 0001           1               0                        00
429 * 0010           1               0                        01
430 * 0011           1               1                        00
431 * ----------------------------------------
432 * 0100           1               0                        10
433 * 0101           1               1                        00
434 * 0110           1               1                        01
435 * 0111           1               1                        00
436 * ----------------------------------------
437 * 1000           1               0                        11
438 * 1001           1               1                        00
439 * 1010           1               1                        01
440 * 1011           1               1                        00
441 * ----------------------------------------
442 * 1100           1               1                        10
443 * 1101           1               1                        00
444 * 1110           1               1                        01
445 * 1111           1               1                        00
446 * ----------------------------------------
447 *
448 * match = 1111_1111_1111_1110
449 * match_many = 1111_1110_1110_1000
450 * match_pos = 0001_0010_0001_0011__0001_0010_0001_0000
451 *
452 * match = 0xFFFELLU
453 * match_many = 0xFEE8LLU
454 * match_pos = 0x12131210LLU
455 *
456 ***/
457
458 #define LUT_MATCH                                               0xFFFELLU
459 #define LUT_MATCH_MANY                                          0xFEE8LLU
460 #define LUT_MATCH_POS                                           0x12131210LLU
461
462 #define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)\
463 {                                                               \
464         uint64_t bucket_sig[4], mask[4], mask_all;              \
465                                                                 \
466         bucket_sig[0] = bucket->sig[0];                         \
467         bucket_sig[1] = bucket->sig[1];                         \
468         bucket_sig[2] = bucket->sig[2];                         \
469         bucket_sig[3] = bucket->sig[3];                         \
470                                                                 \
471         bucket_sig[0] ^= mbuf_sig;                              \
472         bucket_sig[1] ^= mbuf_sig;                              \
473         bucket_sig[2] ^= mbuf_sig;                              \
474         bucket_sig[3] ^= mbuf_sig;                              \
475                                                                 \
476         mask[0] = 0;                                            \
477         mask[1] = 0;                                            \
478         mask[2] = 0;                                            \
479         mask[3] = 0;                                            \
480                                                                 \
481         if (bucket_sig[0] == 0)                                 \
482                 mask[0] = 1;                                    \
483         if (bucket_sig[1] == 0)                                 \
484                 mask[1] = 2;                                    \
485         if (bucket_sig[2] == 0)                                 \
486                 mask[2] = 4;                                    \
487         if (bucket_sig[3] == 0)                                 \
488                 mask[3] = 8;                                    \
489                                                                 \
490         mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]);   \
491                                                                 \
492         match = (LUT_MATCH >> mask_all) & 1;                    \
493         match_many = (LUT_MATCH_MANY >> mask_all) & 1;          \
494         match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3;     \
495 }
496
497 #define lookup_cmp_key(mbuf, key, match_key, f)                 \
498 {                                                               \
499         uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\
500         uint64_t *bkt_key = (uint64_t *) key;                   \
501                                                                 \
502         switch (f->key_size) {                                  \
503         case 8:                                                 \
504         {                                                       \
505                 uint64_t xor = pkt_key[0] ^ bkt_key[0];         \
506                 match_key = 0;                                  \
507                 if (xor == 0)                                   \
508                         match_key = 1;                          \
509         }                                                       \
510         break;                                                  \
511                                                                 \
512         case 16:                                                \
513         {                                                       \
514                 uint64_t xor[2], or;                            \
515                                                                 \
516                 xor[0] = pkt_key[0] ^ bkt_key[0];               \
517                 xor[1] = pkt_key[1] ^ bkt_key[1];               \
518                 or = xor[0] | xor[1];                           \
519                 match_key = 0;                                  \
520                 if (or == 0)                                    \
521                         match_key = 1;                          \
522         }                                                       \
523         break;                                                  \
524                                                                 \
525         case 32:                                                \
526         {                                                       \
527                 uint64_t xor[4], or;                            \
528                                                                 \
529                 xor[0] = pkt_key[0] ^ bkt_key[0];               \
530                 xor[1] = pkt_key[1] ^ bkt_key[1];               \
531                 xor[2] = pkt_key[2] ^ bkt_key[2];               \
532                 xor[3] = pkt_key[3] ^ bkt_key[3];               \
533                 or = xor[0] | xor[1] | xor[2] | xor[3];         \
534                 match_key = 0;                                  \
535                 if (or == 0)                                    \
536                         match_key = 1;                          \
537         }                                                       \
538         break;                                                  \
539                                                                 \
540         case 64:                                                \
541         {                                                       \
542                 uint64_t xor[8], or;                            \
543                                                                 \
544                 xor[0] = pkt_key[0] ^ bkt_key[0];               \
545                 xor[1] = pkt_key[1] ^ bkt_key[1];               \
546                 xor[2] = pkt_key[2] ^ bkt_key[2];               \
547                 xor[3] = pkt_key[3] ^ bkt_key[3];               \
548                 xor[4] = pkt_key[4] ^ bkt_key[4];               \
549                 xor[5] = pkt_key[5] ^ bkt_key[5];               \
550                 xor[6] = pkt_key[6] ^ bkt_key[6];               \
551                 xor[7] = pkt_key[7] ^ bkt_key[7];               \
552                 or = xor[0] | xor[1] | xor[2] | xor[3] |        \
553                         xor[4] | xor[5] | xor[6] | xor[7];      \
554                 match_key = 0;                                  \
555                 if (or == 0)                                    \
556                         match_key = 1;                          \
557         }                                                       \
558         break;                                                  \
559                                                                 \
560         default:                                                \
561                 match_key = 0;                                  \
562                 if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \
563                         match_key = 1;                          \
564         }                                                       \
565 }
566
567 #define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)\
568 {                                                               \
569         uint64_t pkt00_mask, pkt01_mask;                        \
570         struct rte_mbuf *mbuf00, *mbuf01;                       \
571                                                                 \
572         pkt00_index = __builtin_ctzll(pkts_mask);               \
573         pkt00_mask = 1LLU << pkt00_index;                       \
574         pkts_mask &= ~pkt00_mask;                               \
575         mbuf00 = pkts[pkt00_index];                             \
576                                                                 \
577         pkt01_index = __builtin_ctzll(pkts_mask);               \
578         pkt01_mask = 1LLU << pkt01_index;                       \
579         pkts_mask &= ~pkt01_mask;                               \
580         mbuf01 = pkts[pkt01_index];                             \
581                                                                 \
582         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
583         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
584 }
585
586 #define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \
587         pkt01_index)                                            \
588 {                                                               \
589         uint64_t pkt00_mask, pkt01_mask;                        \
590         struct rte_mbuf *mbuf00, *mbuf01;                       \
591                                                                 \
592         pkt00_index = __builtin_ctzll(pkts_mask);               \
593         pkt00_mask = 1LLU << pkt00_index;                       \
594         pkts_mask &= ~pkt00_mask;                               \
595         mbuf00 = pkts[pkt00_index];                             \
596                                                                 \
597         pkt01_index = __builtin_ctzll(pkts_mask);               \
598         if (pkts_mask == 0)                                     \
599                 pkt01_index = pkt00_index;                      \
600                                                                 \
601         pkt01_mask = 1LLU << pkt01_index;                       \
602         pkts_mask &= ~pkt01_mask;                               \
603         mbuf01 = pkts[pkt01_index];                             \
604                                                                 \
605         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));  \
606         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));  \
607 }
608
609 #define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)    \
610 {                                                               \
611         struct grinder *g10, *g11;                              \
612         uint64_t sig10, sig11, bkt10_index, bkt11_index;        \
613         struct rte_mbuf *mbuf10, *mbuf11;                       \
614         struct bucket *bkt10, *bkt11, *buckets = t->buckets;    \
615         uint64_t bucket_mask = t->bucket_mask;                  \
616         uint32_t signature_offset = t->signature_offset;        \
617                                                                 \
618         mbuf10 = pkts[pkt10_index];                             \
619         sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\
620         bkt10_index = sig10 & bucket_mask;                      \
621         bkt10 = &buckets[bkt10_index];                          \
622                                                                 \
623         mbuf11 = pkts[pkt11_index];                             \
624         sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\
625         bkt11_index = sig11 & bucket_mask;                      \
626         bkt11 = &buckets[bkt11_index];                          \
627                                                                 \
628         rte_prefetch0(bkt10);                                   \
629         rte_prefetch0(bkt11);                                   \
630                                                                 \
631         g10 = &g[pkt10_index];                                  \
632         g10->sig = sig10;                                       \
633         g10->bkt = bkt10;                                       \
634                                                                 \
635         g11 = &g[pkt11_index];                                  \
636         g11->sig = sig11;                                       \
637         g11->bkt = bkt11;                                       \
638 }
639
640 #define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)\
641 {                                                               \
642         struct grinder *g10, *g11;                              \
643         uint64_t sig10, sig11, bkt10_index, bkt11_index;        \
644         struct rte_mbuf *mbuf10, *mbuf11;                       \
645         struct bucket *bkt10, *bkt11, *buckets = t->buckets;    \
646         uint8_t *key10, *key11;                                 \
647         uint64_t bucket_mask = t->bucket_mask;                  \
648         rte_table_hash_op_hash f_hash = t->f_hash;              \
649         uint64_t seed = t->seed;                                \
650         uint32_t key_size = t->key_size;                        \
651         uint32_t key_offset = t->key_offset;                    \
652                                                                 \
653         mbuf10 = pkts[pkt10_index];                             \
654         key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);\
655         sig10 = (uint64_t) f_hash(key10, key_size, seed);       \
656         bkt10_index = sig10 & bucket_mask;                      \
657         bkt10 = &buckets[bkt10_index];                          \
658                                                                 \
659         mbuf11 = pkts[pkt11_index];                             \
660         key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);\
661         sig11 = (uint64_t) f_hash(key11, key_size, seed);       \
662         bkt11_index = sig11 & bucket_mask;                      \
663         bkt11 = &buckets[bkt11_index];                          \
664                                                                 \
665         rte_prefetch0(bkt10);                                   \
666         rte_prefetch0(bkt11);                                   \
667                                                                 \
668         g10 = &g[pkt10_index];                                  \
669         g10->sig = sig10;                                       \
670         g10->bkt = bkt10;                                       \
671                                                                 \
672         g11 = &g[pkt11_index];                                  \
673         g11->sig = sig11;                                       \
674         g11->bkt = bkt11;                                       \
675 }
676
677 #define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)\
678 {                                                               \
679         struct grinder *g20, *g21;                              \
680         uint64_t sig20, sig21;                                  \
681         struct bucket *bkt20, *bkt21;                           \
682         uint8_t *key20, *key21, *key_mem = t->key_mem;          \
683         uint64_t match20, match21, match_many20, match_many21;  \
684         uint64_t match_pos20, match_pos21;                      \
685         uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;\
686                                                                 \
687         g20 = &g[pkt20_index];                                  \
688         sig20 = g20->sig;                                       \
689         bkt20 = g20->bkt;                                       \
690         sig20 = (sig20 >> 16) | 1LLU;                           \
691         lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);\
692         match20 <<= pkt20_index;                                \
693         match_many20 <<= pkt20_index;                           \
694         key20_index = bkt20->key_pos[match_pos20];              \
695         key20 = &key_mem[key20_index << key_size_shl];          \
696                                                                 \
697         g21 = &g[pkt21_index];                                  \
698         sig21 = g21->sig;                                       \
699         bkt21 = g21->bkt;                                       \
700         sig21 = (sig21 >> 16) | 1LLU;                           \
701         lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);\
702         match21 <<= pkt21_index;                                \
703         match_many21 <<= pkt21_index;                           \
704         key21_index = bkt21->key_pos[match_pos21];              \
705         key21 = &key_mem[key21_index << key_size_shl];          \
706                                                                 \
707         rte_prefetch0(key20);                                   \
708         rte_prefetch0(key21);                                   \
709                                                                 \
710         pkts_mask_match_many |= match_many20 | match_many21;    \
711                                                                 \
712         g20->match = match20;                                   \
713         g20->match_pos = match_pos20;                           \
714         g20->key_index = key20_index;                           \
715                                                                 \
716         g21->match = match21;                                   \
717         g21->match_pos = match_pos21;                           \
718         g21->key_index = key21_index;                           \
719 }
720
721 #define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, \
722         entries)                                                \
723 {                                                               \
724         struct grinder *g30, *g31;                              \
725         struct rte_mbuf *mbuf30, *mbuf31;                       \
726         struct bucket *bkt30, *bkt31;                           \
727         uint8_t *key30, *key31, *key_mem = t->key_mem;          \
728         uint8_t *data30, *data31, *data_mem = t->data_mem;      \
729         uint64_t match30, match31, match_pos30, match_pos31;    \
730         uint64_t match_key30, match_key31, match_keys;          \
731         uint32_t key30_index, key31_index;                      \
732         uint32_t key_size_shl = t->key_size_shl;                \
733         uint32_t data_size_shl = t->data_size_shl;              \
734                                                                 \
735         mbuf30 = pkts[pkt30_index];                             \
736         g30 = &g[pkt30_index];                                  \
737         bkt30 = g30->bkt;                                       \
738         match30 = g30->match;                                   \
739         match_pos30 = g30->match_pos;                           \
740         key30_index = g30->key_index;                           \
741         key30 = &key_mem[key30_index << key_size_shl];          \
742         lookup_cmp_key(mbuf30, key30, match_key30, t);          \
743         match_key30 <<= pkt30_index;                            \
744         match_key30 &= match30;                                 \
745         data30 = &data_mem[key30_index << data_size_shl];       \
746         entries[pkt30_index] = data30;                          \
747                                                                 \
748         mbuf31 = pkts[pkt31_index];                             \
749         g31 = &g[pkt31_index];                                  \
750         bkt31 = g31->bkt;                                       \
751         match31 = g31->match;                                   \
752         match_pos31 = g31->match_pos;                           \
753         key31_index = g31->key_index;                           \
754         key31 = &key_mem[key31_index << key_size_shl];          \
755         lookup_cmp_key(mbuf31, key31, match_key31, t);          \
756         match_key31 <<= pkt31_index;                            \
757         match_key31 &= match31;                                 \
758         data31 = &data_mem[key31_index << data_size_shl];       \
759         entries[pkt31_index] = data31;                          \
760                                                                 \
761         rte_prefetch0(data30);                                  \
762         rte_prefetch0(data31);                                  \
763                                                                 \
764         match_keys = match_key30 | match_key31;                 \
765         pkts_mask_out |= match_keys;                            \
766                                                                 \
767         if (match_key30 == 0)                                   \
768                 match_pos30 = 4;                                \
769         lru_update(bkt30, match_pos30);                         \
770                                                                 \
771         if (match_key31 == 0)                                   \
772                 match_pos31 = 4;                                \
773         lru_update(bkt31, match_pos31);                         \
774 }
775
776 /***
777 * The lookup function implements a 4-stage pipeline, with each stage processing
778 * two different packets. The purpose of pipelined implementation is to hide the
779 * latency of prefetching the data structures and loosen the data dependency
780 * between instructions.
781 *
782 *   p00  _______   p10  _______   p20  _______   p30  _______
783 * ----->|       |----->|       |----->|       |----->|       |----->
784 *       |   0   |      |   1   |      |   2   |      |   3   |
785 * ----->|_______|----->|_______|----->|_______|----->|_______|----->
786 *   p01            p11            p21            p31
787 *
788 * The naming convention is:
789 *         pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1
790 *
791 ***/
792 static int rte_table_hash_lru_lookup(
793         void *table,
794         struct rte_mbuf **pkts,
795         uint64_t pkts_mask,
796         uint64_t *lookup_hit_mask,
797         void **entries)
798 {
799         struct rte_table_hash *t = (struct rte_table_hash *) table;
800         struct grinder *g = t->grinders;
801         uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
802         uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
803         uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
804         int status = 0;
805
806         /* Cannot run the pipeline with less than 7 packets */
807         if (__builtin_popcountll(pkts_mask) < 7)
808                 return rte_table_hash_lru_lookup_unoptimized(table, pkts,
809                         pkts_mask, lookup_hit_mask, entries, 0);
810
811         /* Pipeline stage 0 */
812         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
813
814         /* Pipeline feed */
815         pkt10_index = pkt00_index;
816         pkt11_index = pkt01_index;
817
818         /* Pipeline stage 0 */
819         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
820
821         /* Pipeline stage 1 */
822         lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
823
824         /* Pipeline feed */
825         pkt20_index = pkt10_index;
826         pkt21_index = pkt11_index;
827         pkt10_index = pkt00_index;
828         pkt11_index = pkt01_index;
829
830         /* Pipeline stage 0 */
831         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
832
833         /* Pipeline stage 1 */
834         lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
835
836         /* Pipeline stage 2 */
837         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
838
839         /*
840         * Pipeline run
841         *
842         */
843         for ( ; pkts_mask; ) {
844                 /* Pipeline feed */
845                 pkt30_index = pkt20_index;
846                 pkt31_index = pkt21_index;
847                 pkt20_index = pkt10_index;
848                 pkt21_index = pkt11_index;
849                 pkt10_index = pkt00_index;
850                 pkt11_index = pkt01_index;
851
852                 /* Pipeline stage 0 */
853                 lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
854                         pkt00_index, pkt01_index);
855
856                 /* Pipeline stage 1 */
857                 lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
858
859                 /* Pipeline stage 2 */
860                 lookup2_stage2(t, g, pkt20_index, pkt21_index,
861                         pkts_mask_match_many);
862
863                 /* Pipeline stage 3 */
864                 lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
865                         pkts_mask_out, entries);
866         }
867
868         /* Pipeline feed */
869         pkt30_index = pkt20_index;
870         pkt31_index = pkt21_index;
871         pkt20_index = pkt10_index;
872         pkt21_index = pkt11_index;
873         pkt10_index = pkt00_index;
874         pkt11_index = pkt01_index;
875
876         /* Pipeline stage 1 */
877         lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
878
879         /* Pipeline stage 2 */
880         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
881
882         /* Pipeline stage 3 */
883         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
884                 entries);
885
886         /* Pipeline feed */
887         pkt30_index = pkt20_index;
888         pkt31_index = pkt21_index;
889         pkt20_index = pkt10_index;
890         pkt21_index = pkt11_index;
891
892         /* Pipeline stage 2 */
893         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
894
895         /* Pipeline stage 3 */
896         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
897                 entries);
898
899         /* Pipeline feed */
900         pkt30_index = pkt20_index;
901         pkt31_index = pkt21_index;
902
903         /* Pipeline stage 3 */
904         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
905                 entries);
906
907         /* Slow path */
908         pkts_mask_match_many &= ~pkts_mask_out;
909         if (pkts_mask_match_many) {
910                 uint64_t pkts_mask_out_slow = 0;
911
912                 status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
913                         pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
914                 pkts_mask_out |= pkts_mask_out_slow;
915         }
916
917         *lookup_hit_mask = pkts_mask_out;
918         return status;
919 }
920
921 static int rte_table_hash_lru_lookup_dosig(
922         void *table,
923         struct rte_mbuf **pkts,
924         uint64_t pkts_mask,
925         uint64_t *lookup_hit_mask,
926         void **entries)
927 {
928         struct rte_table_hash *t = (struct rte_table_hash *) table;
929         struct grinder *g = t->grinders;
930         uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
931         uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
932         uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
933         int status = 0;
934
935         /* Cannot run the pipeline with less than 7 packets */
936         if (__builtin_popcountll(pkts_mask) < 7)
937                 return rte_table_hash_lru_lookup_unoptimized(table, pkts,
938                         pkts_mask, lookup_hit_mask, entries, 1);
939
940         /* Pipeline stage 0 */
941         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
942
943         /* Pipeline feed */
944         pkt10_index = pkt00_index;
945         pkt11_index = pkt01_index;
946
947         /* Pipeline stage 0 */
948         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
949
950         /* Pipeline stage 1 */
951         lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
952
953         /* Pipeline feed */
954         pkt20_index = pkt10_index;
955         pkt21_index = pkt11_index;
956         pkt10_index = pkt00_index;
957         pkt11_index = pkt01_index;
958
959         /* Pipeline stage 0 */
960         lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
961
962         /* Pipeline stage 1 */
963         lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
964
965         /* Pipeline stage 2 */
966         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
967
968         /*
969         * Pipeline run
970         *
971         */
972         for ( ; pkts_mask; ) {
973                 /* Pipeline feed */
974                 pkt30_index = pkt20_index;
975                 pkt31_index = pkt21_index;
976                 pkt20_index = pkt10_index;
977                 pkt21_index = pkt11_index;
978                 pkt10_index = pkt00_index;
979                 pkt11_index = pkt01_index;
980
981                 /* Pipeline stage 0 */
982                 lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
983                         pkt00_index, pkt01_index);
984
985                 /* Pipeline stage 1 */
986                 lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
987
988                 /* Pipeline stage 2 */
989                 lookup2_stage2(t, g, pkt20_index, pkt21_index,
990                         pkts_mask_match_many);
991
992                 /* Pipeline stage 3 */
993                 lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
994                         pkts_mask_out, entries);
995         }
996
997         /* Pipeline feed */
998         pkt30_index = pkt20_index;
999         pkt31_index = pkt21_index;
1000         pkt20_index = pkt10_index;
1001         pkt21_index = pkt11_index;
1002         pkt10_index = pkt00_index;
1003         pkt11_index = pkt01_index;
1004
1005         /* Pipeline stage 1 */
1006         lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
1007
1008         /* Pipeline stage 2 */
1009         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
1010
1011         /* Pipeline stage 3 */
1012         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
1013                 entries);
1014
1015         /* Pipeline feed */
1016         pkt30_index = pkt20_index;
1017         pkt31_index = pkt21_index;
1018         pkt20_index = pkt10_index;
1019         pkt21_index = pkt11_index;
1020
1021         /* Pipeline stage 2 */
1022         lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
1023
1024         /* Pipeline stage 3 */
1025         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
1026                 entries);
1027
1028         /* Pipeline feed */
1029         pkt30_index = pkt20_index;
1030         pkt31_index = pkt21_index;
1031
1032         /* Pipeline stage 3 */
1033         lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
1034                 entries);
1035
1036         /* Slow path */
1037         pkts_mask_match_many &= ~pkts_mask_out;
1038         if (pkts_mask_match_many) {
1039                 uint64_t pkts_mask_out_slow = 0;
1040
1041                 status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
1042                         pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
1043                 pkts_mask_out |= pkts_mask_out_slow;
1044         }
1045
1046         *lookup_hit_mask = pkts_mask_out;
1047         return status;
1048 }
1049
1050 struct rte_table_ops rte_table_hash_lru_ops = {
1051         .f_create = rte_table_hash_lru_create,
1052         .f_free = rte_table_hash_lru_free,
1053         .f_add = rte_table_hash_lru_entry_add,
1054         .f_delete = rte_table_hash_lru_entry_delete,
1055         .f_lookup = rte_table_hash_lru_lookup,
1056 };
1057
1058 struct rte_table_ops rte_table_hash_lru_dosig_ops = {
1059         .f_create = rte_table_hash_lru_create,
1060         .f_free = rte_table_hash_lru_free,
1061         .f_add = rte_table_hash_lru_entry_add,
1062         .f_delete = rte_table_hash_lru_entry_delete,
1063         .f_lookup = rte_table_hash_lru_lookup_dosig,
1064 };