1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018 Arm Limited
9 #include <rte_rcu_qsbr.h>
11 #include <rte_hash_crc.h>
12 #include <rte_malloc.h>
13 #include <rte_cycles.h>
18 /* Check condition and return an error if true. */
19 static uint16_t enabled_core_ids[RTE_MAX_LCORE];
20 static unsigned int num_cores;
22 static uint32_t *keys;
23 #define TOTAL_ENTRY (1024 * 8)
24 #define COUNTER_VALUE 4096
25 static uint32_t *hash_data[TOTAL_ENTRY];
26 static volatile uint8_t writer_done;
27 static volatile uint8_t all_registered;
28 static volatile uint32_t thr_id;
30 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];
31 static struct rte_hash *h;
32 static char hash_name[8];
33 static uint64_t updates, checks;
34 static uint64_t update_cycles, check_cycles;
36 /* Scale down results to 1000 operations to support lower
39 #define RCU_SCALE_DOWN 1000
41 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */
42 static inline uint32_t
47 tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
48 if (tmp_thr_id >= RTE_MAX_LCORE)
49 printf("Invalid thread id %u\n", tmp_thr_id);
55 test_rcu_qsbr_reader_perf(void *arg)
57 bool writer_present = (bool)arg;
58 uint32_t thread_id = alloc_thread_id();
59 uint64_t loop_cnt = 0;
60 uint64_t begin, cycles;
62 /* Register for report QS */
63 rte_rcu_qsbr_thread_register(t[0], thread_id);
64 /* Make the thread online */
65 rte_rcu_qsbr_thread_online(t[0], thread_id);
67 begin = rte_rdtsc_precise();
70 while (!writer_done) {
71 /* Update quiescent state counter */
72 rte_rcu_qsbr_quiescent(t[0], thread_id);
76 while (loop_cnt < 100000000) {
77 /* Update quiescent state counter */
78 rte_rcu_qsbr_quiescent(t[0], thread_id);
83 cycles = rte_rdtsc_precise() - begin;
84 __atomic_fetch_add(&update_cycles, cycles, __ATOMIC_RELAXED);
85 __atomic_fetch_add(&updates, loop_cnt, __ATOMIC_RELAXED);
87 /* Make the thread offline */
88 rte_rcu_qsbr_thread_offline(t[0], thread_id);
89 /* Unregister before exiting to avoid writer from waiting */
90 rte_rcu_qsbr_thread_unregister(t[0], thread_id);
96 test_rcu_qsbr_writer_perf(void *arg)
98 bool wait = (bool)arg;
100 uint64_t loop_cnt = 0;
101 uint64_t begin, cycles;
103 begin = rte_rdtsc_precise();
106 /* Start the quiescent state query process */
108 token = rte_rcu_qsbr_start(t[0]);
110 /* Check quiescent state status */
111 rte_rcu_qsbr_check(t[0], token, wait);
113 } while (loop_cnt < 20000000);
115 cycles = rte_rdtsc_precise() - begin;
116 __atomic_fetch_add(&check_cycles, cycles, __ATOMIC_RELAXED);
117 __atomic_fetch_add(&checks, loop_cnt, __ATOMIC_RELAXED);
122 * Perf test: Reader/writer
123 * Single writer, Multiple Readers, Single QS var, Non-Blocking rcu_qsbr_check
126 test_rcu_qsbr_perf(void)
129 unsigned int i, tmp_num_cores;
133 __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
134 __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
135 __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
136 __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
138 printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == true)\n",
141 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
143 if (all_registered == 1)
144 tmp_num_cores = num_cores - 1;
146 tmp_num_cores = RTE_MAX_LCORE;
148 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
149 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
150 RTE_CACHE_LINE_SIZE);
151 /* QS variable is initialized */
152 rte_rcu_qsbr_init(t[0], tmp_num_cores);
154 /* Reader threads are launched */
155 for (i = 0; i < num_cores - 1; i++)
156 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, (void *)1,
157 enabled_core_ids[i]);
159 /* Writer thread is launched */
160 rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
161 (void *)1, enabled_core_ids[i]);
163 /* Wait for the writer thread */
164 rte_eal_wait_lcore(enabled_core_ids[i]);
167 /* Wait until all readers have exited */
168 rte_eal_mp_wait_lcore();
170 printf("Total quiescent state updates = %"PRIi64"\n",
171 __atomic_load_n(&updates, __ATOMIC_RELAXED));
172 printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
174 __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
175 (__atomic_load_n(&updates, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
176 printf("Total RCU checks = %"PRIi64"\n", __atomic_load_n(&checks, __ATOMIC_RELAXED));
177 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
178 __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
179 (__atomic_load_n(&checks, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
188 * Single writer, Multiple readers, Single QS variable
191 test_rcu_qsbr_rperf(void)
194 unsigned int i, tmp_num_cores;
196 __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
197 __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
199 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
201 printf("\nPerf Test: %d Readers\n", num_cores);
203 if (all_registered == 1)
204 tmp_num_cores = num_cores;
206 tmp_num_cores = RTE_MAX_LCORE;
208 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
209 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
210 RTE_CACHE_LINE_SIZE);
211 /* QS variable is initialized */
212 rte_rcu_qsbr_init(t[0], tmp_num_cores);
214 /* Reader threads are launched */
215 for (i = 0; i < num_cores; i++)
216 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, NULL,
217 enabled_core_ids[i]);
219 /* Wait until all readers have exited */
220 rte_eal_mp_wait_lcore();
222 printf("Total quiescent state updates = %"PRIi64"\n",
223 __atomic_load_n(&updates, __ATOMIC_RELAXED));
224 printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
226 __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
227 (__atomic_load_n(&updates, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
236 * Multiple writer, Single QS variable, Non-blocking rcu_qsbr_check
239 test_rcu_qsbr_wperf(void)
244 __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
245 __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
247 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
249 printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n",
252 /* Number of readers does not matter for QS variable in this test
253 * case as no reader will be registered.
255 sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE);
256 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
257 RTE_CACHE_LINE_SIZE);
258 /* QS variable is initialized */
259 rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);
261 /* Writer threads are launched */
262 for (i = 0; i < num_cores; i++)
263 rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
264 (void *)0, enabled_core_ids[i]);
266 /* Wait until all readers have exited */
267 rte_eal_mp_wait_lcore();
269 printf("Total RCU checks = %"PRIi64"\n", __atomic_load_n(&checks, __ATOMIC_RELAXED));
270 printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
271 __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
272 (__atomic_load_n(&checks, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
280 * RCU test cases using rte_hash data structure.
283 test_rcu_qsbr_hash_reader(void *arg)
285 struct rte_rcu_qsbr *temp;
286 struct rte_hash *hash = NULL;
288 uint64_t loop_cnt = 0;
289 uint64_t begin, cycles;
290 uint32_t thread_id = alloc_thread_id();
291 uint8_t read_type = (uint8_t)((uintptr_t)arg);
297 rte_rcu_qsbr_thread_register(temp, thread_id);
299 begin = rte_rdtsc_precise();
302 rte_rcu_qsbr_thread_online(temp, thread_id);
303 for (i = 0; i < TOTAL_ENTRY; i++) {
304 rte_rcu_qsbr_lock(temp, thread_id);
305 if (rte_hash_lookup_data(hash, keys + i,
306 (void **)&pdata) != -ENOENT) {
307 pdata[thread_id] = 0;
308 while (pdata[thread_id] < COUNTER_VALUE)
311 rte_rcu_qsbr_unlock(temp, thread_id);
313 /* Update quiescent state counter */
314 rte_rcu_qsbr_quiescent(temp, thread_id);
315 rte_rcu_qsbr_thread_offline(temp, thread_id);
317 } while (!writer_done);
319 cycles = rte_rdtsc_precise() - begin;
320 __atomic_fetch_add(&update_cycles, cycles, __ATOMIC_RELAXED);
321 __atomic_fetch_add(&updates, loop_cnt, __ATOMIC_RELAXED);
323 rte_rcu_qsbr_thread_unregister(temp, thread_id);
328 static struct rte_hash *init_hash(void)
331 struct rte_hash *hash = NULL;
333 snprintf(hash_name, 8, "hash");
334 struct rte_hash_parameters hash_params = {
335 .entries = TOTAL_ENTRY,
336 .key_len = sizeof(uint32_t),
337 .hash_func_init_val = 0,
338 .socket_id = rte_socket_id(),
339 .hash_func = rte_hash_crc,
341 RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF,
345 hash = rte_hash_create(&hash_params);
347 printf("Hash create Failed\n");
351 for (i = 0; i < TOTAL_ENTRY; i++) {
352 hash_data[i] = rte_zmalloc(NULL,
353 sizeof(uint32_t) * RTE_MAX_LCORE, 0);
354 if (hash_data[i] == NULL) {
355 printf("No memory\n");
359 keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
361 printf("No memory\n");
365 for (i = 0; i < TOTAL_ENTRY; i++)
368 for (i = 0; i < TOTAL_ENTRY; i++) {
369 if (rte_hash_add_key_data(hash, keys + i,
370 (void *)((uintptr_t)hash_data[i])) < 0) {
371 printf("Hash key add Failed #%d\n", i);
380 * Single writer, Single QS variable Single QSBR query, Blocking rcu_qsbr_check
383 test_rcu_qsbr_sw_sv_1qs(void)
385 uint64_t token, begin, cycles;
387 unsigned int i, j, tmp_num_cores;
392 __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
393 __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
394 __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
395 __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
397 __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
399 printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Blocking QSBR Check\n", num_cores);
401 if (all_registered == 1)
402 tmp_num_cores = num_cores;
404 tmp_num_cores = RTE_MAX_LCORE;
406 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
407 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
408 RTE_CACHE_LINE_SIZE);
409 /* QS variable is initialized */
410 rte_rcu_qsbr_init(t[0], tmp_num_cores);
412 /* Shared data structure created */
415 printf("Hash init failed\n");
419 /* Reader threads are launched */
420 for (i = 0; i < num_cores; i++)
421 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
422 enabled_core_ids[i]);
424 begin = rte_rdtsc_precise();
426 for (i = 0; i < TOTAL_ENTRY; i++) {
427 /* Delete elements from the shared data structure */
428 pos = rte_hash_del_key(h, keys + i);
430 printf("Delete key failed #%d\n", keys[i]);
433 /* Start the quiescent state query process */
434 token = rte_rcu_qsbr_start(t[0]);
436 /* Check the quiescent state status */
437 rte_rcu_qsbr_check(t[0], token, true);
438 for (j = 0; j < tmp_num_cores; j++) {
439 if (hash_data[i][j] != COUNTER_VALUE &&
440 hash_data[i][j] != 0) {
441 printf("Reader thread ID %u did not complete #%d = %d\n",
442 j, i, hash_data[i][j]);
447 if (rte_hash_free_key_with_position(h, pos) < 0) {
448 printf("Failed to free the key #%d\n", keys[i]);
451 rte_free(hash_data[i]);
455 cycles = rte_rdtsc_precise() - begin;
456 __atomic_fetch_add(&check_cycles, cycles, __ATOMIC_RELAXED);
457 __atomic_fetch_add(&checks, i, __ATOMIC_RELAXED);
461 /* Wait and check return value from reader threads */
462 for (i = 0; i < num_cores; i++)
463 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
468 printf("Following numbers include calls to rte_hash functions\n");
469 printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n",
470 __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
471 __atomic_load_n(&updates, __ATOMIC_RELAXED));
473 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
474 __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
475 __atomic_load_n(&checks, __ATOMIC_RELAXED));
483 /* Wait until all readers have exited */
484 rte_eal_mp_wait_lcore();
488 for (i = 0; i < TOTAL_ENTRY; i++)
489 rte_free(hash_data[i]);
498 * Single writer, Single QS variable, Single QSBR query,
499 * Non-blocking rcu_qsbr_check
502 test_rcu_qsbr_sw_sv_1qs_non_blocking(void)
504 uint64_t token, begin, cycles;
507 unsigned int i, j, tmp_num_cores;
512 printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Non-Blocking QSBR check\n", num_cores);
514 __atomic_store_n(&thr_id, 0, __ATOMIC_RELAXED);
516 if (all_registered == 1)
517 tmp_num_cores = num_cores;
519 tmp_num_cores = RTE_MAX_LCORE;
521 sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
522 t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
523 RTE_CACHE_LINE_SIZE);
524 /* QS variable is initialized */
525 rte_rcu_qsbr_init(t[0], tmp_num_cores);
527 /* Shared data structure created */
530 printf("Hash init failed\n");
534 /* Reader threads are launched */
535 for (i = 0; i < num_cores; i++)
536 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
537 enabled_core_ids[i]);
539 begin = rte_rdtsc_precise();
541 for (i = 0; i < TOTAL_ENTRY; i++) {
542 /* Delete elements from the shared data structure */
543 pos = rte_hash_del_key(h, keys + i);
545 printf("Delete key failed #%d\n", keys[i]);
548 /* Start the quiescent state query process */
549 token = rte_rcu_qsbr_start(t[0]);
551 /* Check the quiescent state status */
553 ret = rte_rcu_qsbr_check(t[0], token, false);
555 for (j = 0; j < tmp_num_cores; j++) {
556 if (hash_data[i][j] != COUNTER_VALUE &&
557 hash_data[i][j] != 0) {
558 printf("Reader thread ID %u did not complete #%d = %d\n",
559 j, i, hash_data[i][j]);
564 if (rte_hash_free_key_with_position(h, pos) < 0) {
565 printf("Failed to free the key #%d\n", keys[i]);
568 rte_free(hash_data[i]);
572 cycles = rte_rdtsc_precise() - begin;
573 __atomic_fetch_add(&check_cycles, cycles, __ATOMIC_RELAXED);
574 __atomic_fetch_add(&checks, i, __ATOMIC_RELAXED);
577 /* Wait and check return value from reader threads */
578 for (i = 0; i < num_cores; i++)
579 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
584 printf("Following numbers include calls to rte_hash functions\n");
585 printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n",
586 __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
587 __atomic_load_n(&updates, __ATOMIC_RELAXED));
589 printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
590 __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
591 __atomic_load_n(&checks, __ATOMIC_RELAXED));
599 /* Wait until all readers have exited */
600 rte_eal_mp_wait_lcore();
604 for (i = 0; i < TOTAL_ENTRY; i++)
605 rte_free(hash_data[i]);
613 test_rcu_qsbr_main(void)
617 if (rte_lcore_count() < 3) {
618 printf("Not enough cores for rcu_qsbr_perf_autotest, expecting at least 3\n");
622 __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
623 __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
624 __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
625 __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
628 RTE_LCORE_FOREACH_WORKER(core_id) {
629 enabled_core_ids[num_cores] = core_id;
633 printf("Number of cores provided = %d\n", num_cores);
634 printf("Perf test with all reader threads registered\n");
635 printf("--------------------------------------------\n");
638 if (test_rcu_qsbr_perf() < 0)
641 if (test_rcu_qsbr_rperf() < 0)
644 if (test_rcu_qsbr_wperf() < 0)
647 if (test_rcu_qsbr_sw_sv_1qs() < 0)
650 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
653 /* Make sure the actual number of cores provided is less than
654 * RTE_MAX_LCORE. This will allow for some threads not
655 * to be registered on the QS variable.
657 if (num_cores >= RTE_MAX_LCORE) {
658 printf("Test failed! number of cores provided should be less than %d\n",
663 printf("Perf test with some of reader threads registered\n");
664 printf("------------------------------------------------\n");
667 if (test_rcu_qsbr_perf() < 0)
670 if (test_rcu_qsbr_rperf() < 0)
673 if (test_rcu_qsbr_wperf() < 0)
676 if (test_rcu_qsbr_sw_sv_1qs() < 0)
679 if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
690 REGISTER_TEST_COMMAND(rcu_qsbr_perf_autotest, test_rcu_qsbr_main);