test/rcu: fix memory size integer truncate
[dpdk.git] / app / test / test_rcu_qsbr_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2018 Arm Limited
3  */
4
5 #include <stdio.h>
6 #include <stdbool.h>
7 #include <inttypes.h>
8 #include <rte_pause.h>
9 #include <rte_rcu_qsbr.h>
10 #include <rte_hash.h>
11 #include <rte_hash_crc.h>
12 #include <rte_malloc.h>
13 #include <rte_cycles.h>
14 #include <unistd.h>
15
16 #include "test.h"
17
18 /* Check condition and return an error if true. */
19 static uint16_t enabled_core_ids[RTE_MAX_LCORE];
20 static unsigned int num_cores;
21
22 static uint32_t *keys;
23 #define TOTAL_ENTRY (1024 * 8)
24 #define COUNTER_VALUE 4096
25 static uint32_t *hash_data[TOTAL_ENTRY];
26 static volatile uint8_t writer_done;
27 static volatile uint8_t all_registered;
28 static volatile uint32_t thr_id;
29
30 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];
31 static struct rte_hash *h;
32 static char hash_name[8];
33 static rte_atomic64_t updates, checks;
34 static rte_atomic64_t update_cycles, check_cycles;
35
36 /* Scale down results to 1000 operations to support lower
37  * granularity clocks.
38  */
39 #define RCU_SCALE_DOWN 1000
40
41 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */
42 static inline uint32_t
43 alloc_thread_id(void)
44 {
45         uint32_t tmp_thr_id;
46
47         tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
48         if (tmp_thr_id >= RTE_MAX_LCORE)
49                 printf("Invalid thread id %u\n", tmp_thr_id);
50
51         return tmp_thr_id;
52 }
53
54 static int
55 test_rcu_qsbr_reader_perf(void *arg)
56 {
57         bool writer_present = (bool)arg;
58         uint32_t thread_id = alloc_thread_id();
59         uint64_t loop_cnt = 0;
60         uint64_t begin, cycles;
61
62         /* Register for report QS */
63         rte_rcu_qsbr_thread_register(t[0], thread_id);
64         /* Make the thread online */
65         rte_rcu_qsbr_thread_online(t[0], thread_id);
66
67         begin = rte_rdtsc_precise();
68
69         if (writer_present) {
70                 while (!writer_done) {
71                         /* Update quiescent state counter */
72                         rte_rcu_qsbr_quiescent(t[0], thread_id);
73                         loop_cnt++;
74                 }
75         } else {
76                 while (loop_cnt < 100000000) {
77                         /* Update quiescent state counter */
78                         rte_rcu_qsbr_quiescent(t[0], thread_id);
79                         loop_cnt++;
80                 }
81         }
82
83         cycles = rte_rdtsc_precise() - begin;
84         rte_atomic64_add(&update_cycles, cycles);
85         rte_atomic64_add(&updates, loop_cnt);
86
87         /* Make the thread offline */
88         rte_rcu_qsbr_thread_offline(t[0], thread_id);
89         /* Unregister before exiting to avoid writer from waiting */
90         rte_rcu_qsbr_thread_unregister(t[0], thread_id);
91
92         return 0;
93 }
94
95 static int
96 test_rcu_qsbr_writer_perf(void *arg)
97 {
98         bool wait = (bool)arg;
99         uint64_t token = 0;
100         uint64_t loop_cnt = 0;
101         uint64_t begin, cycles;
102
103         begin = rte_rdtsc_precise();
104
105         do {
106                 /* Start the quiescent state query process */
107                 if (wait)
108                         token = rte_rcu_qsbr_start(t[0]);
109
110                 /* Check quiescent state status */
111                 rte_rcu_qsbr_check(t[0], token, wait);
112                 loop_cnt++;
113         } while (loop_cnt < 20000000);
114
115         cycles = rte_rdtsc_precise() - begin;
116         rte_atomic64_add(&check_cycles, cycles);
117         rte_atomic64_add(&checks, loop_cnt);
118         return 0;
119 }
120
121 /*
122  * Perf test: Reader/writer
123  * Single writer, Multiple Readers, Single QS var, Non-Blocking rcu_qsbr_check
124  */
125 static int
126 test_rcu_qsbr_perf(void)
127 {
128         size_t sz;
129         unsigned int i, tmp_num_cores;
130
131         writer_done = 0;
132
133         rte_atomic64_clear(&updates);
134         rte_atomic64_clear(&update_cycles);
135         rte_atomic64_clear(&checks);
136         rte_atomic64_clear(&check_cycles);
137
138         printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == true)\n",
139                 num_cores - 1);
140
141         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
142
143         if (all_registered == 1)
144                 tmp_num_cores = num_cores - 1;
145         else
146                 tmp_num_cores = RTE_MAX_LCORE;
147
148         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
149         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
150                                                 RTE_CACHE_LINE_SIZE);
151         /* QS variable is initialized */
152         rte_rcu_qsbr_init(t[0], tmp_num_cores);
153
154         /* Reader threads are launched */
155         for (i = 0; i < num_cores - 1; i++)
156                 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, (void *)1,
157                                         enabled_core_ids[i]);
158
159         /* Writer thread is launched */
160         rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
161                               (void *)1, enabled_core_ids[i]);
162
163         /* Wait for the writer thread */
164         rte_eal_wait_lcore(enabled_core_ids[i]);
165         writer_done = 1;
166
167         /* Wait until all readers have exited */
168         rte_eal_mp_wait_lcore();
169
170         printf("Total RCU updates = %"PRIi64"\n", rte_atomic64_read(&updates));
171         printf("Cycles per %d updates: %"PRIi64"\n", RCU_SCALE_DOWN,
172                 rte_atomic64_read(&update_cycles) /
173                 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN));
174         printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks));
175         printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
176                 rte_atomic64_read(&check_cycles) /
177                 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN));
178
179         rte_free(t[0]);
180
181         return 0;
182 }
183
184 /*
185  * Perf test: Readers
186  * Single writer, Multiple readers, Single QS variable
187  */
188 static int
189 test_rcu_qsbr_rperf(void)
190 {
191         size_t sz;
192         unsigned int i, tmp_num_cores;
193
194         rte_atomic64_clear(&updates);
195         rte_atomic64_clear(&update_cycles);
196
197         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
198
199         printf("\nPerf Test: %d Readers\n", num_cores);
200
201         if (all_registered == 1)
202                 tmp_num_cores = num_cores;
203         else
204                 tmp_num_cores = RTE_MAX_LCORE;
205
206         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
207         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
208                                                 RTE_CACHE_LINE_SIZE);
209         /* QS variable is initialized */
210         rte_rcu_qsbr_init(t[0], tmp_num_cores);
211
212         /* Reader threads are launched */
213         for (i = 0; i < num_cores; i++)
214                 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, NULL,
215                                         enabled_core_ids[i]);
216
217         /* Wait until all readers have exited */
218         rte_eal_mp_wait_lcore();
219
220         printf("Total RCU updates = %"PRIi64"\n", rte_atomic64_read(&updates));
221         printf("Cycles per %d updates: %"PRIi64"\n", RCU_SCALE_DOWN,
222                 rte_atomic64_read(&update_cycles) /
223                 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN));
224
225         rte_free(t[0]);
226
227         return 0;
228 }
229
230 /*
231  * Perf test:
232  * Multiple writer, Single QS variable, Non-blocking rcu_qsbr_check
233  */
234 static int
235 test_rcu_qsbr_wperf(void)
236 {
237         size_t sz;
238         unsigned int i;
239
240         rte_atomic64_clear(&checks);
241         rte_atomic64_clear(&check_cycles);
242
243         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
244
245         printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n",
246                 num_cores);
247
248         /* Number of readers does not matter for QS variable in this test
249          * case as no reader will be registered.
250          */
251         sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE);
252         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
253                                                 RTE_CACHE_LINE_SIZE);
254         /* QS variable is initialized */
255         rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);
256
257         /* Writer threads are launched */
258         for (i = 0; i < num_cores; i++)
259                 rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
260                                 (void *)0, enabled_core_ids[i]);
261
262         /* Wait until all readers have exited */
263         rte_eal_mp_wait_lcore();
264
265         printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks));
266         printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
267                 rte_atomic64_read(&check_cycles) /
268                 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN));
269
270         rte_free(t[0]);
271
272         return 0;
273 }
274
275 /*
276  * RCU test cases using rte_hash data structure.
277  */
278 static int
279 test_rcu_qsbr_hash_reader(void *arg)
280 {
281         struct rte_rcu_qsbr *temp;
282         struct rte_hash *hash = NULL;
283         int i;
284         uint64_t loop_cnt = 0;
285         uint64_t begin, cycles;
286         uint32_t thread_id = alloc_thread_id();
287         uint8_t read_type = (uint8_t)((uintptr_t)arg);
288         uint32_t *pdata;
289
290         temp = t[read_type];
291         hash = h;
292
293         rte_rcu_qsbr_thread_register(temp, thread_id);
294
295         begin = rte_rdtsc_precise();
296
297         do {
298                 rte_rcu_qsbr_thread_online(temp, thread_id);
299                 for (i = 0; i < TOTAL_ENTRY; i++) {
300                         rte_rcu_qsbr_lock(temp, thread_id);
301                         if (rte_hash_lookup_data(hash, keys + i,
302                                         (void **)&pdata) != -ENOENT) {
303                                 pdata[thread_id] = 0;
304                                 while (pdata[thread_id] < COUNTER_VALUE)
305                                         pdata[thread_id]++;
306                         }
307                         rte_rcu_qsbr_unlock(temp, thread_id);
308                 }
309                 /* Update quiescent state counter */
310                 rte_rcu_qsbr_quiescent(temp, thread_id);
311                 rte_rcu_qsbr_thread_offline(temp, thread_id);
312                 loop_cnt++;
313         } while (!writer_done);
314
315         cycles = rte_rdtsc_precise() - begin;
316         rte_atomic64_add(&update_cycles, cycles);
317         rte_atomic64_add(&updates, loop_cnt);
318
319         rte_rcu_qsbr_thread_unregister(temp, thread_id);
320
321         return 0;
322 }
323
324 static struct rte_hash *init_hash(void)
325 {
326         int i;
327         struct rte_hash *hash = NULL;
328
329         snprintf(hash_name, 8, "hash");
330         struct rte_hash_parameters hash_params = {
331                 .entries = TOTAL_ENTRY,
332                 .key_len = sizeof(uint32_t),
333                 .hash_func_init_val = 0,
334                 .socket_id = rte_socket_id(),
335                 .hash_func = rte_hash_crc,
336                 .extra_flag =
337                         RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF,
338                 .name = hash_name,
339         };
340
341         hash = rte_hash_create(&hash_params);
342         if (hash == NULL) {
343                 printf("Hash create Failed\n");
344                 return NULL;
345         }
346
347         for (i = 0; i < TOTAL_ENTRY; i++) {
348                 hash_data[i] = rte_zmalloc(NULL,
349                                 sizeof(uint32_t) * RTE_MAX_LCORE, 0);
350                 if (hash_data[i] == NULL) {
351                         printf("No memory\n");
352                         return NULL;
353                 }
354         }
355         keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
356         if (keys == NULL) {
357                 printf("No memory\n");
358                 return NULL;
359         }
360
361         for (i = 0; i < TOTAL_ENTRY; i++)
362                 keys[i] = i;
363
364         for (i = 0; i < TOTAL_ENTRY; i++) {
365                 if (rte_hash_add_key_data(hash, keys + i,
366                                 (void *)((uintptr_t)hash_data[i])) < 0) {
367                         printf("Hash key add Failed #%d\n", i);
368                         return NULL;
369                 }
370         }
371         return hash;
372 }
373
374 /*
375  * Functional test:
376  * Single writer, Single QS variable Single QSBR query, Blocking rcu_qsbr_check
377  */
378 static int
379 test_rcu_qsbr_sw_sv_1qs(void)
380 {
381         uint64_t token, begin, cycles;
382         size_t sz;
383         unsigned int i, j, tmp_num_cores;
384         int32_t pos;
385
386         writer_done = 0;
387
388         rte_atomic64_clear(&updates);
389         rte_atomic64_clear(&update_cycles);
390         rte_atomic64_clear(&checks);
391         rte_atomic64_clear(&check_cycles);
392
393         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
394
395         printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Blocking QSBR Check\n", num_cores);
396
397         if (all_registered == 1)
398                 tmp_num_cores = num_cores;
399         else
400                 tmp_num_cores = RTE_MAX_LCORE;
401
402         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
403         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
404                                                 RTE_CACHE_LINE_SIZE);
405         /* QS variable is initialized */
406         rte_rcu_qsbr_init(t[0], tmp_num_cores);
407
408         /* Shared data structure created */
409         h = init_hash();
410         if (h == NULL) {
411                 printf("Hash init failed\n");
412                 goto error;
413         }
414
415         /* Reader threads are launched */
416         for (i = 0; i < num_cores; i++)
417                 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
418                                         enabled_core_ids[i]);
419
420         begin = rte_rdtsc_precise();
421
422         for (i = 0; i < TOTAL_ENTRY; i++) {
423                 /* Delete elements from the shared data structure */
424                 pos = rte_hash_del_key(h, keys + i);
425                 if (pos < 0) {
426                         printf("Delete key failed #%d\n", keys[i]);
427                         goto error;
428                 }
429                 /* Start the quiescent state query process */
430                 token = rte_rcu_qsbr_start(t[0]);
431
432                 /* Check the quiescent state status */
433                 rte_rcu_qsbr_check(t[0], token, true);
434                 for (j = 0; j < tmp_num_cores; j++) {
435                         if (hash_data[i][j] != COUNTER_VALUE &&
436                                 hash_data[i][j] != 0) {
437                                 printf("Reader thread ID %u did not complete #%d =  %d\n",
438                                         j, i, hash_data[i][j]);
439                                 goto error;
440                         }
441                 }
442
443                 if (rte_hash_free_key_with_position(h, pos) < 0) {
444                         printf("Failed to free the key #%d\n", keys[i]);
445                         goto error;
446                 }
447                 rte_free(hash_data[i]);
448                 hash_data[i] = NULL;
449         }
450
451         cycles = rte_rdtsc_precise() - begin;
452         rte_atomic64_add(&check_cycles, cycles);
453         rte_atomic64_add(&checks, i);
454
455         writer_done = 1;
456
457         /* Wait and check return value from reader threads */
458         for (i = 0; i < num_cores; i++)
459                 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
460                         goto error;
461         rte_hash_free(h);
462         rte_free(keys);
463
464         printf("Following numbers include calls to rte_hash functions\n");
465         printf("Cycles per 1 update(online/update/offline): %"PRIi64"\n",
466                 rte_atomic64_read(&update_cycles) /
467                 rte_atomic64_read(&updates));
468
469         printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
470                 rte_atomic64_read(&check_cycles) /
471                 rte_atomic64_read(&checks));
472
473         rte_free(t[0]);
474
475         return 0;
476
477 error:
478         writer_done = 1;
479         /* Wait until all readers have exited */
480         rte_eal_mp_wait_lcore();
481
482         rte_hash_free(h);
483         rte_free(keys);
484         for (i = 0; i < TOTAL_ENTRY; i++)
485                 rte_free(hash_data[i]);
486
487         rte_free(t[0]);
488
489         return -1;
490 }
491
492 /*
493  * Functional test:
494  * Single writer, Single QS variable, Single QSBR query,
495  * Non-blocking rcu_qsbr_check
496  */
497 static int
498 test_rcu_qsbr_sw_sv_1qs_non_blocking(void)
499 {
500         uint64_t token, begin, cycles;
501         int ret;
502         size_t sz;
503         unsigned int i, j, tmp_num_cores;
504         int32_t pos;
505
506         writer_done = 0;
507
508         printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Non-Blocking QSBR check\n", num_cores);
509
510         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
511
512         if (all_registered == 1)
513                 tmp_num_cores = num_cores;
514         else
515                 tmp_num_cores = RTE_MAX_LCORE;
516
517         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
518         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
519                                                 RTE_CACHE_LINE_SIZE);
520         /* QS variable is initialized */
521         rte_rcu_qsbr_init(t[0], tmp_num_cores);
522
523         /* Shared data structure created */
524         h = init_hash();
525         if (h == NULL) {
526                 printf("Hash init failed\n");
527                 goto error;
528         }
529
530         /* Reader threads are launched */
531         for (i = 0; i < num_cores; i++)
532                 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
533                                         enabled_core_ids[i]);
534
535         begin = rte_rdtsc_precise();
536
537         for (i = 0; i < TOTAL_ENTRY; i++) {
538                 /* Delete elements from the shared data structure */
539                 pos = rte_hash_del_key(h, keys + i);
540                 if (pos < 0) {
541                         printf("Delete key failed #%d\n", keys[i]);
542                         goto error;
543                 }
544                 /* Start the quiescent state query process */
545                 token = rte_rcu_qsbr_start(t[0]);
546
547                 /* Check the quiescent state status */
548                 do {
549                         ret = rte_rcu_qsbr_check(t[0], token, false);
550                 } while (ret == 0);
551                 for (j = 0; j < tmp_num_cores; j++) {
552                         if (hash_data[i][j] != COUNTER_VALUE &&
553                                 hash_data[i][j] != 0) {
554                                 printf("Reader thread ID %u did not complete #%d =  %d\n",
555                                         j, i, hash_data[i][j]);
556                                 goto error;
557                         }
558                 }
559
560                 if (rte_hash_free_key_with_position(h, pos) < 0) {
561                         printf("Failed to free the key #%d\n", keys[i]);
562                         goto error;
563                 }
564                 rte_free(hash_data[i]);
565                 hash_data[i] = NULL;
566         }
567
568         cycles = rte_rdtsc_precise() - begin;
569         rte_atomic64_add(&check_cycles, cycles);
570         rte_atomic64_add(&checks, i);
571
572         writer_done = 1;
573         /* Wait and check return value from reader threads */
574         for (i = 0; i < num_cores; i++)
575                 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
576                         goto error;
577         rte_hash_free(h);
578         rte_free(keys);
579
580         printf("Following numbers include calls to rte_hash functions\n");
581         printf("Cycles per 1 update(online/update/offline): %"PRIi64"\n",
582                 rte_atomic64_read(&update_cycles) /
583                 rte_atomic64_read(&updates));
584
585         printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
586                 rte_atomic64_read(&check_cycles) /
587                 rte_atomic64_read(&checks));
588
589         rte_free(t[0]);
590
591         return 0;
592
593 error:
594         writer_done = 1;
595         /* Wait until all readers have exited */
596         rte_eal_mp_wait_lcore();
597
598         rte_hash_free(h);
599         rte_free(keys);
600         for (i = 0; i < TOTAL_ENTRY; i++)
601                 rte_free(hash_data[i]);
602
603         rte_free(t[0]);
604
605         return -1;
606 }
607
608 static int
609 test_rcu_qsbr_main(void)
610 {
611         uint16_t core_id;
612
613         if (rte_lcore_count() < 3) {
614                 printf("Not enough cores for rcu_qsbr_perf_autotest, expecting at least 3\n");
615                 return TEST_SKIPPED;
616         }
617
618         rte_atomic64_init(&updates);
619         rte_atomic64_init(&update_cycles);
620         rte_atomic64_init(&checks);
621         rte_atomic64_init(&check_cycles);
622
623         num_cores = 0;
624         RTE_LCORE_FOREACH_SLAVE(core_id) {
625                 enabled_core_ids[num_cores] = core_id;
626                 num_cores++;
627         }
628
629         printf("Number of cores provided = %d\n", num_cores);
630         printf("Perf test with all reader threads registered\n");
631         printf("--------------------------------------------\n");
632         all_registered = 1;
633
634         if (test_rcu_qsbr_perf() < 0)
635                 goto test_fail;
636
637         if (test_rcu_qsbr_rperf() < 0)
638                 goto test_fail;
639
640         if (test_rcu_qsbr_wperf() < 0)
641                 goto test_fail;
642
643         if (test_rcu_qsbr_sw_sv_1qs() < 0)
644                 goto test_fail;
645
646         if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
647                 goto test_fail;
648
649         /* Make sure the actual number of cores provided is less than
650          * RTE_MAX_LCORE. This will allow for some threads not
651          * to be registered on the QS variable.
652          */
653         if (num_cores >= RTE_MAX_LCORE) {
654                 printf("Test failed! number of cores provided should be less than %d\n",
655                         RTE_MAX_LCORE);
656                 goto test_fail;
657         }
658
659         printf("Perf test with some of reader threads registered\n");
660         printf("------------------------------------------------\n");
661         all_registered = 0;
662
663         if (test_rcu_qsbr_perf() < 0)
664                 goto test_fail;
665
666         if (test_rcu_qsbr_rperf() < 0)
667                 goto test_fail;
668
669         if (test_rcu_qsbr_wperf() < 0)
670                 goto test_fail;
671
672         if (test_rcu_qsbr_sw_sv_1qs() < 0)
673                 goto test_fail;
674
675         if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
676                 goto test_fail;
677
678         printf("\n");
679
680         return 0;
681
682 test_fail:
683         return -1;
684 }
685
686 REGISTER_TEST_COMMAND(rcu_qsbr_perf_autotest, test_rcu_qsbr_main);