1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
8 #include <rte_common.h>
9 #include <rte_cycles.h>
10 #include <rte_prefetch.h>
12 #include "rte_swx_table_learner.h"
14 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES
15 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1
18 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE
20 #include <rte_malloc.h>
23 env_calloc(size_t size, size_t alignment, int numa_node)
25 return rte_zmalloc_socket(NULL, size, alignment, numa_node);
29 env_free(void *start, size_t size __rte_unused)
39 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node)
43 if (numa_available() == -1)
46 start = numa_alloc_onnode(size, numa_node);
50 memset(start, 0, size);
55 env_free(void *start, size_t size)
57 if ((numa_available() == -1) || !start)
60 numa_free(start, size);
65 #if defined(RTE_ARCH_X86_64)
67 #include <x86intrin.h>
69 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v)
73 static inline uint64_t
74 crc32_u64_generic(uint64_t crc, uint64_t value)
78 crc = (crc & 0xFFFFFFFFLLU) ^ value;
79 for (i = 63; i >= 0; i--) {
83 crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask);
89 #define crc32_u64(crc, v) crc32_u64_generic(crc, v)
93 /* Key size needs to be one of: 8, 16, 32 or 64. */
94 static inline uint32_t
95 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed)
98 uint64_t *m = key_mask;
99 uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
103 crc0 = crc32_u64(seed, k[0] & m[0]);
109 crc0 = crc32_u64(k0, seed);
110 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
120 crc0 = crc32_u64(k0, seed);
121 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
123 crc2 = crc32_u64(k2, k[3] & m[3]);
126 crc0 = crc32_u64(crc0, crc1);
127 crc1 = crc32_u64(crc2, crc3);
138 crc0 = crc32_u64(k0, seed);
139 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
141 crc2 = crc32_u64(k2, k[3] & m[3]);
142 crc3 = crc32_u64(k2 >> 32, k[4] & m[4]);
144 crc4 = crc32_u64(k5, k[6] & m[6]);
145 crc5 = crc32_u64(k5 >> 32, k[7] & m[7]);
147 crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2);
148 crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5);
161 * Return: 0 = Keys are NOT equal; 1 = Keys are equal.
163 static inline uint32_t
164 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes)
166 uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
170 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
179 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
180 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
181 uint64_t or = xor0 | xor1;
190 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
191 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
192 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
193 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
194 uint64_t or = (xor0 | xor1) | (xor2 | xor3);
203 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
204 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
205 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
206 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
207 uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]);
208 uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]);
209 uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]);
210 uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]);
211 uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) |
212 ((xor4 | xor5) | (xor6 | xor7));
223 for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
224 if (a64[i] != (b64[i] & b_mask64[i]))
231 #define TABLE_KEYS_PER_BUCKET 4
233 #define TABLE_BUCKET_PAD_SIZE \
234 (RTE_CACHE_LINE_SIZE - TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t)))
236 struct table_bucket {
237 uint32_t time[TABLE_KEYS_PER_BUCKET];
238 uint32_t sig[TABLE_KEYS_PER_BUCKET];
239 uint8_t pad[TABLE_BUCKET_PAD_SIZE];
243 struct table_params {
244 /* The real key size. Must be non-zero. */
247 /* They key size upgrated to the next power of 2. This used for hash generation (in
248 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why
249 * key sizes bigger than 64 bytes are not allowed.
251 size_t key_size_pow2;
253 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */
254 size_t key_size_log2;
256 /* The key offset within the key buffer. */
259 /* The real action data size. */
260 size_t action_data_size;
262 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the
265 size_t data_size_pow2;
267 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */
268 size_t data_size_log2;
270 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */
273 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */
276 /* Total number of key bytes in the bucket, including the key padding bytes. There are
277 * (key_size_pow2 - key_size) padding bytes for each key in the bucket.
279 size_t bucket_key_all_size;
281 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */
284 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */
285 size_t bucket_size_log2;
287 /* Timeout in CPU clock cycles. */
288 uint64_t key_timeout;
290 /* Total memory size. */
295 /* Table parameters. */
296 struct table_params params;
298 /* Key mask. Array of *key_size* bytes. */
299 uint8_t key_mask0[RTE_CACHE_LINE_SIZE];
303 } __rte_cache_aligned;
306 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params)
308 /* Check input parameters. */
311 (params->key_size > 64) ||
312 !params->n_keys_max ||
313 (params->n_keys_max > 1U << 31) ||
314 !params->key_timeout)
318 p->key_size = params->key_size;
320 p->key_size_pow2 = rte_align64pow2(p->key_size);
321 if (p->key_size_pow2 < 8)
322 p->key_size_pow2 = 8;
324 p->key_size_log2 = __builtin_ctzll(p->key_size_pow2);
326 p->key_offset = params->key_offset;
329 p->action_data_size = params->action_data_size;
331 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
333 p->data_size_log2 = __builtin_ctzll(p->data_size_pow2);
336 p->n_buckets = rte_align32pow2(params->n_keys_max);
338 p->bucket_mask = p->n_buckets - 1;
340 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2;
342 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) +
343 p->bucket_key_all_size +
344 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
346 p->bucket_size_log2 = __builtin_ctzll(p->bucket_size);
349 p->key_timeout = params->key_timeout * rte_get_tsc_hz();
352 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size;
357 static inline struct table_bucket *
358 table_bucket_get(struct table *t, size_t bucket_id)
360 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2];
363 static inline uint8_t *
364 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
366 return &b->key[bucket_key_pos << t->params.key_size_log2];
369 static inline uint64_t *
370 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
372 return (uint64_t *)&b->key[t->params.bucket_key_all_size +
373 (bucket_key_pos << t->params.data_size_log2)];
377 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params)
379 struct table_params p;
382 status = table_params_get(&p, params);
384 return status ? 0 : p.total_size;
388 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node)
390 struct table_params p;
394 /* Check and process the input parameters. */
395 status = table_params_get(&p, params);
399 /* Memory allocation. */
400 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node);
404 /* Memory initialization. */
405 memcpy(&t->params, &p, sizeof(struct table_params));
407 if (params->key_mask0)
408 memcpy(t->key_mask0, params->key_mask0, params->key_size);
410 memset(t->key_mask0, 0xFF, params->key_size);
416 rte_swx_table_learner_free(void *table)
418 struct table *t = table;
423 env_free(t, t->params.total_size);
427 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
428 struct table_bucket *bucket;
430 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
433 /* Writer: lookup state 1. Reader(s): add(). */
436 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */
439 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */
440 size_t bucket_key_pos;
447 rte_swx_table_learner_mailbox_size_get(void)
449 return sizeof(struct mailbox);
453 rte_swx_table_learner_lookup(void *table,
458 uint8_t **action_data,
461 struct table *t = table;
462 struct mailbox *m = mailbox;
467 struct table_bucket *b;
471 input_key = &(*key)[t->params.key_offset];
472 input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0);
473 bucket_id = input_sig & t->params.bucket_mask;
474 b = table_bucket_get(t, bucket_id);
477 rte_prefetch0(&b->key[0]);
478 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]);
481 m->input_key = input_key;
482 m->input_sig = input_sig | 1;
488 struct table_bucket *b = m->bucket;
491 /* Search the input key through the bucket keys. */
492 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
493 uint64_t time = b->time[i];
494 uint32_t sig = b->sig[i];
495 uint8_t *key = table_bucket_key_get(t, b, i);
496 uint32_t key_size_pow2 = t->params.key_size_pow2;
500 if ((time > input_time) &&
501 (sig == m->input_sig) &&
502 table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) {
503 uint64_t *data = table_bucket_data_get(t, b, i);
508 b->time[i] = (input_time + t->params.key_timeout) >> 32;
511 m->bucket_key_pos = i;
514 *action_id = data[0];
515 *action_data = (uint8_t *)&data[1];
530 /* This state should never be reached. Miss. */
540 rte_swx_table_learner_add(void *table,
544 uint8_t *action_data)
546 struct table *t = table;
547 struct mailbox *m = mailbox;
548 struct table_bucket *b = m->bucket;
551 /* Lookup hit: The key, key signature and key time are already properly configured (the key
552 * time was bumped by lookup), only the key data need to be updated.
555 uint64_t *data = table_bucket_data_get(t, b, m->bucket_key_pos);
557 /* Install the key data. */
559 if (t->params.action_data_size && action_data)
560 memcpy(&data[1], action_data, t->params.action_data_size);
565 /* Lookup miss: Search for a free position in the current bucket and install the key. */
566 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
567 uint64_t time = b->time[i];
571 /* Free position: Either there was never a key installed here, so the key time is
572 * set to zero (the init value), which is always less than the current time, or this
573 * position was used before, but the key expired (the key time is in the past).
575 if (time < input_time) {
576 uint8_t *key = table_bucket_key_get(t, b, i);
577 uint64_t *data = table_bucket_data_get(t, b, i);
579 /* Install the key. */
580 b->time[i] = (input_time + t->params.key_timeout) >> 32;
581 b->sig[i] = m->input_sig;
582 memcpy(key, m->input_key, t->params.key_size);
584 /* Install the key data. */
586 if (t->params.action_data_size && action_data)
587 memcpy(&data[1], action_data, t->params.action_data_size);
591 m->bucket_key_pos = i;
602 rte_swx_table_learner_delete(void *table __rte_unused,
605 struct mailbox *m = mailbox;
608 struct table_bucket *b = m->bucket;
610 /* Expire the key. */
611 b->time[m->bucket_key_pos] = 0;