1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
9 #include <rte_common.h>
10 #include <rte_cycles.h>
11 #include <rte_prefetch.h>
13 #include "rte_swx_table_learner.h"
15 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES
16 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1
19 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE
21 #include <rte_malloc.h>
24 env_calloc(size_t size, size_t alignment, int numa_node)
26 return rte_zmalloc_socket(NULL, size, alignment, numa_node);
30 env_free(void *start, size_t size __rte_unused)
40 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node)
44 if (numa_available() == -1)
47 start = numa_alloc_onnode(size, numa_node);
51 memset(start, 0, size);
56 env_free(void *start, size_t size)
58 if ((numa_available() == -1) || !start)
61 numa_free(start, size);
66 #if defined(RTE_ARCH_X86_64)
68 #include <x86intrin.h>
70 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v)
74 static inline uint64_t
75 crc32_u64_generic(uint64_t crc, uint64_t value)
79 crc = (crc & 0xFFFFFFFFLLU) ^ value;
80 for (i = 63; i >= 0; i--) {
84 crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask);
90 #define crc32_u64(crc, v) crc32_u64_generic(crc, v)
94 /* Key size needs to be one of: 8, 16, 32 or 64. */
95 static inline uint32_t
96 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed)
99 uint64_t *m = key_mask;
100 uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
104 crc0 = crc32_u64(seed, k[0] & m[0]);
110 crc0 = crc32_u64(k0, seed);
111 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
121 crc0 = crc32_u64(k0, seed);
122 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
124 crc2 = crc32_u64(k2, k[3] & m[3]);
127 crc0 = crc32_u64(crc0, crc1);
128 crc1 = crc32_u64(crc2, crc3);
139 crc0 = crc32_u64(k0, seed);
140 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
142 crc2 = crc32_u64(k2, k[3] & m[3]);
143 crc3 = crc32_u64(k2 >> 32, k[4] & m[4]);
145 crc4 = crc32_u64(k5, k[6] & m[6]);
146 crc5 = crc32_u64(k5 >> 32, k[7] & m[7]);
148 crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2);
149 crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5);
162 * Return: 0 = Keys are NOT equal; 1 = Keys are equal.
164 static inline uint32_t
165 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes)
167 uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
171 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
180 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
181 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
182 uint64_t or = xor0 | xor1;
191 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
192 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
193 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
194 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
195 uint64_t or = (xor0 | xor1) | (xor2 | xor3);
204 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
205 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
206 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
207 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
208 uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]);
209 uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]);
210 uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]);
211 uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]);
212 uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) |
213 ((xor4 | xor5) | (xor6 | xor7));
224 for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
225 if (a64[i] != (b64[i] & b_mask64[i]))
232 #define TABLE_KEYS_PER_BUCKET 4
234 #define TABLE_BUCKET_PAD_SIZE \
235 (RTE_CACHE_LINE_SIZE - TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t)))
237 struct table_bucket {
238 uint32_t time[TABLE_KEYS_PER_BUCKET];
239 uint32_t sig[TABLE_KEYS_PER_BUCKET];
240 uint8_t pad[TABLE_BUCKET_PAD_SIZE];
244 struct table_params {
245 /* The real key size. Must be non-zero. */
248 /* They key size upgrated to the next power of 2. This used for hash generation (in
249 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why
250 * key sizes bigger than 64 bytes are not allowed.
252 size_t key_size_pow2;
254 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */
255 size_t key_size_log2;
257 /* The key offset within the key buffer. */
260 /* The real action data size. */
261 size_t action_data_size;
263 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the
266 size_t data_size_pow2;
268 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */
269 size_t data_size_log2;
271 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */
274 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */
277 /* Total number of key bytes in the bucket, including the key padding bytes. There are
278 * (key_size_pow2 - key_size) padding bytes for each key in the bucket.
280 size_t bucket_key_all_size;
282 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */
285 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */
286 size_t bucket_size_log2;
288 /* Timeout in CPU clock cycles. */
289 uint64_t key_timeout;
291 /* Total memory size. */
296 /* Table parameters. */
297 struct table_params params;
299 /* Key mask. Array of *key_size* bytes. */
300 uint8_t key_mask0[RTE_CACHE_LINE_SIZE];
304 } __rte_cache_aligned;
307 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params)
309 /* Check input parameters. */
312 (params->key_size > 64) ||
313 !params->n_keys_max ||
314 (params->n_keys_max > 1U << 31) ||
315 !params->key_timeout)
319 p->key_size = params->key_size;
321 p->key_size_pow2 = rte_align64pow2(p->key_size);
322 if (p->key_size_pow2 < 8)
323 p->key_size_pow2 = 8;
325 p->key_size_log2 = __builtin_ctzll(p->key_size_pow2);
327 p->key_offset = params->key_offset;
330 p->action_data_size = params->action_data_size;
332 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
334 p->data_size_log2 = __builtin_ctzll(p->data_size_pow2);
337 p->n_buckets = rte_align32pow2(params->n_keys_max);
339 p->bucket_mask = p->n_buckets - 1;
341 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2;
343 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) +
344 p->bucket_key_all_size +
345 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
347 p->bucket_size_log2 = __builtin_ctzll(p->bucket_size);
350 p->key_timeout = params->key_timeout * rte_get_tsc_hz();
353 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size;
358 static inline struct table_bucket *
359 table_bucket_get(struct table *t, size_t bucket_id)
361 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2];
364 static inline uint8_t *
365 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
367 return &b->key[bucket_key_pos << t->params.key_size_log2];
370 static inline uint64_t *
371 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
373 return (uint64_t *)&b->key[t->params.bucket_key_all_size +
374 (bucket_key_pos << t->params.data_size_log2)];
378 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params)
380 struct table_params p;
383 status = table_params_get(&p, params);
385 return status ? 0 : p.total_size;
389 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node)
391 struct table_params p;
395 /* Check and process the input parameters. */
396 status = table_params_get(&p, params);
400 /* Memory allocation. */
401 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node);
405 /* Memory initialization. */
406 memcpy(&t->params, &p, sizeof(struct table_params));
408 if (params->key_mask0)
409 memcpy(t->key_mask0, params->key_mask0, params->key_size);
411 memset(t->key_mask0, 0xFF, params->key_size);
417 rte_swx_table_learner_free(void *table)
419 struct table *t = table;
424 env_free(t, t->params.total_size);
428 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
429 struct table_bucket *bucket;
431 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
434 /* Writer: lookup state 1. Reader(s): add(). */
437 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */
440 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */
441 size_t bucket_key_pos;
448 rte_swx_table_learner_mailbox_size_get(void)
450 return sizeof(struct mailbox);
454 rte_swx_table_learner_lookup(void *table,
459 uint8_t **action_data,
462 struct table *t = table;
463 struct mailbox *m = mailbox;
468 struct table_bucket *b;
472 input_key = &(*key)[t->params.key_offset];
473 input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0);
474 bucket_id = input_sig & t->params.bucket_mask;
475 b = table_bucket_get(t, bucket_id);
478 rte_prefetch0(&b->key[0]);
479 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]);
482 m->input_key = input_key;
483 m->input_sig = input_sig | 1;
489 struct table_bucket *b = m->bucket;
492 /* Search the input key through the bucket keys. */
493 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
494 uint64_t time = b->time[i];
495 uint32_t sig = b->sig[i];
496 uint8_t *key = table_bucket_key_get(t, b, i);
497 uint32_t key_size_pow2 = t->params.key_size_pow2;
501 if ((time > input_time) &&
502 (sig == m->input_sig) &&
503 table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) {
504 uint64_t *data = table_bucket_data_get(t, b, i);
509 b->time[i] = (input_time + t->params.key_timeout) >> 32;
512 m->bucket_key_pos = i;
515 *action_id = data[0];
516 *action_data = (uint8_t *)&data[1];
531 /* This state should never be reached. Miss. */
541 rte_swx_table_learner_add(void *table,
545 uint8_t *action_data)
547 struct table *t = table;
548 struct mailbox *m = mailbox;
549 struct table_bucket *b = m->bucket;
552 /* Lookup hit: The key, key signature and key time are already properly configured (the key
553 * time was bumped by lookup), only the key data need to be updated.
556 uint64_t *data = table_bucket_data_get(t, b, m->bucket_key_pos);
558 /* Install the key data. */
560 if (t->params.action_data_size && action_data)
561 memcpy(&data[1], action_data, t->params.action_data_size);
566 /* Lookup miss: Search for a free position in the current bucket and install the key. */
567 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
568 uint64_t time = b->time[i];
572 /* Free position: Either there was never a key installed here, so the key time is
573 * set to zero (the init value), which is always less than the current time, or this
574 * position was used before, but the key expired (the key time is in the past).
576 if (time < input_time) {
577 uint8_t *key = table_bucket_key_get(t, b, i);
578 uint64_t *data = table_bucket_data_get(t, b, i);
580 /* Install the key. */
581 b->time[i] = (input_time + t->params.key_timeout) >> 32;
582 b->sig[i] = m->input_sig;
583 memcpy(key, m->input_key, t->params.key_size);
585 /* Install the key data. */
587 if (t->params.action_data_size && action_data)
588 memcpy(&data[1], action_data, t->params.action_data_size);
592 m->bucket_key_pos = i;
603 rte_swx_table_learner_delete(void *table __rte_unused,
606 struct mailbox *m = mailbox;
609 struct table_bucket *b = m->bucket;
611 /* Expire the key. */
612 b->time[m->bucket_key_pos] = 0;