test/mbuf: add unit test cases
[dpdk.git] / app / test / test_rcu_qsbr_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2018 Arm Limited
3  */
4
5 #include <stdio.h>
6 #include <stdbool.h>
7 #include <inttypes.h>
8 #include <rte_pause.h>
9 #include <rte_rcu_qsbr.h>
10 #include <rte_hash.h>
11 #include <rte_hash_crc.h>
12 #include <rte_malloc.h>
13 #include <rte_cycles.h>
14 #include <unistd.h>
15
16 #include "test.h"
17
18 /* Check condition and return an error if true. */
19 static uint16_t enabled_core_ids[RTE_MAX_LCORE];
20 static unsigned int num_cores;
21
22 static uint32_t *keys;
23 #define TOTAL_ENTRY (1024 * 8)
24 #define COUNTER_VALUE 4096
25 static uint32_t *hash_data[TOTAL_ENTRY];
26 static volatile uint8_t writer_done;
27 static volatile uint8_t all_registered;
28 static volatile uint32_t thr_id;
29
30 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];
31 static struct rte_hash *h;
32 static char hash_name[8];
33 static rte_atomic64_t updates, checks;
34 static rte_atomic64_t update_cycles, check_cycles;
35
36 /* Scale down results to 1000 operations to support lower
37  * granularity clocks.
38  */
39 #define RCU_SCALE_DOWN 1000
40
41 /* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */
42 static inline uint32_t
43 alloc_thread_id(void)
44 {
45         uint32_t tmp_thr_id;
46
47         tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
48         if (tmp_thr_id >= RTE_MAX_LCORE)
49                 printf("Invalid thread id %u\n", tmp_thr_id);
50
51         return tmp_thr_id;
52 }
53
54 static int
55 test_rcu_qsbr_reader_perf(void *arg)
56 {
57         bool writer_present = (bool)arg;
58         uint32_t thread_id = alloc_thread_id();
59         uint64_t loop_cnt = 0;
60         uint64_t begin, cycles;
61
62         /* Register for report QS */
63         rte_rcu_qsbr_thread_register(t[0], thread_id);
64         /* Make the thread online */
65         rte_rcu_qsbr_thread_online(t[0], thread_id);
66
67         begin = rte_rdtsc_precise();
68
69         if (writer_present) {
70                 while (!writer_done) {
71                         /* Update quiescent state counter */
72                         rte_rcu_qsbr_quiescent(t[0], thread_id);
73                         loop_cnt++;
74                 }
75         } else {
76                 while (loop_cnt < 100000000) {
77                         /* Update quiescent state counter */
78                         rte_rcu_qsbr_quiescent(t[0], thread_id);
79                         loop_cnt++;
80                 }
81         }
82
83         cycles = rte_rdtsc_precise() - begin;
84         rte_atomic64_add(&update_cycles, cycles);
85         rte_atomic64_add(&updates, loop_cnt);
86
87         /* Make the thread offline */
88         rte_rcu_qsbr_thread_offline(t[0], thread_id);
89         /* Unregister before exiting to avoid writer from waiting */
90         rte_rcu_qsbr_thread_unregister(t[0], thread_id);
91
92         return 0;
93 }
94
95 static int
96 test_rcu_qsbr_writer_perf(void *arg)
97 {
98         bool wait = (bool)arg;
99         uint64_t token = 0;
100         uint64_t loop_cnt = 0;
101         uint64_t begin, cycles;
102
103         begin = rte_rdtsc_precise();
104
105         do {
106                 /* Start the quiescent state query process */
107                 if (wait)
108                         token = rte_rcu_qsbr_start(t[0]);
109
110                 /* Check quiescent state status */
111                 rte_rcu_qsbr_check(t[0], token, wait);
112                 loop_cnt++;
113         } while (loop_cnt < 20000000);
114
115         cycles = rte_rdtsc_precise() - begin;
116         rte_atomic64_add(&check_cycles, cycles);
117         rte_atomic64_add(&checks, loop_cnt);
118         return 0;
119 }
120
121 /*
122  * Perf test: Reader/writer
123  * Single writer, Multiple Readers, Single QS var, Non-Blocking rcu_qsbr_check
124  */
125 static int
126 test_rcu_qsbr_perf(void)
127 {
128         size_t sz;
129         unsigned int i, tmp_num_cores;
130
131         writer_done = 0;
132
133         rte_atomic64_clear(&updates);
134         rte_atomic64_clear(&update_cycles);
135         rte_atomic64_clear(&checks);
136         rte_atomic64_clear(&check_cycles);
137
138         printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == true)\n",
139                 num_cores - 1);
140
141         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
142
143         if (all_registered == 1)
144                 tmp_num_cores = num_cores - 1;
145         else
146                 tmp_num_cores = RTE_MAX_LCORE;
147
148         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
149         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
150                                                 RTE_CACHE_LINE_SIZE);
151         /* QS variable is initialized */
152         rte_rcu_qsbr_init(t[0], tmp_num_cores);
153
154         /* Reader threads are launched */
155         for (i = 0; i < num_cores - 1; i++)
156                 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, (void *)1,
157                                         enabled_core_ids[i]);
158
159         /* Writer thread is launched */
160         rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
161                               (void *)1, enabled_core_ids[i]);
162
163         /* Wait for the writer thread */
164         rte_eal_wait_lcore(enabled_core_ids[i]);
165         writer_done = 1;
166
167         /* Wait until all readers have exited */
168         rte_eal_mp_wait_lcore();
169
170         printf("Total quiescent state updates = %"PRIi64"\n",
171                 rte_atomic64_read(&updates));
172         printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
173                 RCU_SCALE_DOWN,
174                 rte_atomic64_read(&update_cycles) /
175                 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN));
176         printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks));
177         printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
178                 rte_atomic64_read(&check_cycles) /
179                 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN));
180
181         rte_free(t[0]);
182
183         return 0;
184 }
185
186 /*
187  * Perf test: Readers
188  * Single writer, Multiple readers, Single QS variable
189  */
190 static int
191 test_rcu_qsbr_rperf(void)
192 {
193         size_t sz;
194         unsigned int i, tmp_num_cores;
195
196         rte_atomic64_clear(&updates);
197         rte_atomic64_clear(&update_cycles);
198
199         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
200
201         printf("\nPerf Test: %d Readers\n", num_cores);
202
203         if (all_registered == 1)
204                 tmp_num_cores = num_cores;
205         else
206                 tmp_num_cores = RTE_MAX_LCORE;
207
208         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
209         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
210                                                 RTE_CACHE_LINE_SIZE);
211         /* QS variable is initialized */
212         rte_rcu_qsbr_init(t[0], tmp_num_cores);
213
214         /* Reader threads are launched */
215         for (i = 0; i < num_cores; i++)
216                 rte_eal_remote_launch(test_rcu_qsbr_reader_perf, NULL,
217                                         enabled_core_ids[i]);
218
219         /* Wait until all readers have exited */
220         rte_eal_mp_wait_lcore();
221
222         printf("Total quiescent state updates = %"PRIi64"\n",
223                 rte_atomic64_read(&updates));
224         printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
225                 RCU_SCALE_DOWN,
226                 rte_atomic64_read(&update_cycles) /
227                 (rte_atomic64_read(&updates) / RCU_SCALE_DOWN));
228
229         rte_free(t[0]);
230
231         return 0;
232 }
233
234 /*
235  * Perf test:
236  * Multiple writer, Single QS variable, Non-blocking rcu_qsbr_check
237  */
238 static int
239 test_rcu_qsbr_wperf(void)
240 {
241         size_t sz;
242         unsigned int i;
243
244         rte_atomic64_clear(&checks);
245         rte_atomic64_clear(&check_cycles);
246
247         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
248
249         printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n",
250                 num_cores);
251
252         /* Number of readers does not matter for QS variable in this test
253          * case as no reader will be registered.
254          */
255         sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE);
256         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
257                                                 RTE_CACHE_LINE_SIZE);
258         /* QS variable is initialized */
259         rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);
260
261         /* Writer threads are launched */
262         for (i = 0; i < num_cores; i++)
263                 rte_eal_remote_launch(test_rcu_qsbr_writer_perf,
264                                 (void *)0, enabled_core_ids[i]);
265
266         /* Wait until all readers have exited */
267         rte_eal_mp_wait_lcore();
268
269         printf("Total RCU checks = %"PRIi64"\n", rte_atomic64_read(&checks));
270         printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
271                 rte_atomic64_read(&check_cycles) /
272                 (rte_atomic64_read(&checks) / RCU_SCALE_DOWN));
273
274         rte_free(t[0]);
275
276         return 0;
277 }
278
279 /*
280  * RCU test cases using rte_hash data structure.
281  */
282 static int
283 test_rcu_qsbr_hash_reader(void *arg)
284 {
285         struct rte_rcu_qsbr *temp;
286         struct rte_hash *hash = NULL;
287         int i;
288         uint64_t loop_cnt = 0;
289         uint64_t begin, cycles;
290         uint32_t thread_id = alloc_thread_id();
291         uint8_t read_type = (uint8_t)((uintptr_t)arg);
292         uint32_t *pdata;
293
294         temp = t[read_type];
295         hash = h;
296
297         rte_rcu_qsbr_thread_register(temp, thread_id);
298
299         begin = rte_rdtsc_precise();
300
301         do {
302                 rte_rcu_qsbr_thread_online(temp, thread_id);
303                 for (i = 0; i < TOTAL_ENTRY; i++) {
304                         rte_rcu_qsbr_lock(temp, thread_id);
305                         if (rte_hash_lookup_data(hash, keys + i,
306                                         (void **)&pdata) != -ENOENT) {
307                                 pdata[thread_id] = 0;
308                                 while (pdata[thread_id] < COUNTER_VALUE)
309                                         pdata[thread_id]++;
310                         }
311                         rte_rcu_qsbr_unlock(temp, thread_id);
312                 }
313                 /* Update quiescent state counter */
314                 rte_rcu_qsbr_quiescent(temp, thread_id);
315                 rte_rcu_qsbr_thread_offline(temp, thread_id);
316                 loop_cnt++;
317         } while (!writer_done);
318
319         cycles = rte_rdtsc_precise() - begin;
320         rte_atomic64_add(&update_cycles, cycles);
321         rte_atomic64_add(&updates, loop_cnt);
322
323         rte_rcu_qsbr_thread_unregister(temp, thread_id);
324
325         return 0;
326 }
327
328 static struct rte_hash *init_hash(void)
329 {
330         int i;
331         struct rte_hash *hash = NULL;
332
333         snprintf(hash_name, 8, "hash");
334         struct rte_hash_parameters hash_params = {
335                 .entries = TOTAL_ENTRY,
336                 .key_len = sizeof(uint32_t),
337                 .hash_func_init_val = 0,
338                 .socket_id = rte_socket_id(),
339                 .hash_func = rte_hash_crc,
340                 .extra_flag =
341                         RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF,
342                 .name = hash_name,
343         };
344
345         hash = rte_hash_create(&hash_params);
346         if (hash == NULL) {
347                 printf("Hash create Failed\n");
348                 return NULL;
349         }
350
351         for (i = 0; i < TOTAL_ENTRY; i++) {
352                 hash_data[i] = rte_zmalloc(NULL,
353                                 sizeof(uint32_t) * RTE_MAX_LCORE, 0);
354                 if (hash_data[i] == NULL) {
355                         printf("No memory\n");
356                         return NULL;
357                 }
358         }
359         keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
360         if (keys == NULL) {
361                 printf("No memory\n");
362                 return NULL;
363         }
364
365         for (i = 0; i < TOTAL_ENTRY; i++)
366                 keys[i] = i;
367
368         for (i = 0; i < TOTAL_ENTRY; i++) {
369                 if (rte_hash_add_key_data(hash, keys + i,
370                                 (void *)((uintptr_t)hash_data[i])) < 0) {
371                         printf("Hash key add Failed #%d\n", i);
372                         return NULL;
373                 }
374         }
375         return hash;
376 }
377
378 /*
379  * Functional test:
380  * Single writer, Single QS variable Single QSBR query, Blocking rcu_qsbr_check
381  */
382 static int
383 test_rcu_qsbr_sw_sv_1qs(void)
384 {
385         uint64_t token, begin, cycles;
386         size_t sz;
387         unsigned int i, j, tmp_num_cores;
388         int32_t pos;
389
390         writer_done = 0;
391
392         rte_atomic64_clear(&updates);
393         rte_atomic64_clear(&update_cycles);
394         rte_atomic64_clear(&checks);
395         rte_atomic64_clear(&check_cycles);
396
397         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
398
399         printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Blocking QSBR Check\n", num_cores);
400
401         if (all_registered == 1)
402                 tmp_num_cores = num_cores;
403         else
404                 tmp_num_cores = RTE_MAX_LCORE;
405
406         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
407         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
408                                                 RTE_CACHE_LINE_SIZE);
409         /* QS variable is initialized */
410         rte_rcu_qsbr_init(t[0], tmp_num_cores);
411
412         /* Shared data structure created */
413         h = init_hash();
414         if (h == NULL) {
415                 printf("Hash init failed\n");
416                 goto error;
417         }
418
419         /* Reader threads are launched */
420         for (i = 0; i < num_cores; i++)
421                 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
422                                         enabled_core_ids[i]);
423
424         begin = rte_rdtsc_precise();
425
426         for (i = 0; i < TOTAL_ENTRY; i++) {
427                 /* Delete elements from the shared data structure */
428                 pos = rte_hash_del_key(h, keys + i);
429                 if (pos < 0) {
430                         printf("Delete key failed #%d\n", keys[i]);
431                         goto error;
432                 }
433                 /* Start the quiescent state query process */
434                 token = rte_rcu_qsbr_start(t[0]);
435
436                 /* Check the quiescent state status */
437                 rte_rcu_qsbr_check(t[0], token, true);
438                 for (j = 0; j < tmp_num_cores; j++) {
439                         if (hash_data[i][j] != COUNTER_VALUE &&
440                                 hash_data[i][j] != 0) {
441                                 printf("Reader thread ID %u did not complete #%d =  %d\n",
442                                         j, i, hash_data[i][j]);
443                                 goto error;
444                         }
445                 }
446
447                 if (rte_hash_free_key_with_position(h, pos) < 0) {
448                         printf("Failed to free the key #%d\n", keys[i]);
449                         goto error;
450                 }
451                 rte_free(hash_data[i]);
452                 hash_data[i] = NULL;
453         }
454
455         cycles = rte_rdtsc_precise() - begin;
456         rte_atomic64_add(&check_cycles, cycles);
457         rte_atomic64_add(&checks, i);
458
459         writer_done = 1;
460
461         /* Wait and check return value from reader threads */
462         for (i = 0; i < num_cores; i++)
463                 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
464                         goto error;
465         rte_hash_free(h);
466         rte_free(keys);
467
468         printf("Following numbers include calls to rte_hash functions\n");
469         printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n",
470                 rte_atomic64_read(&update_cycles) /
471                 rte_atomic64_read(&updates));
472
473         printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
474                 rte_atomic64_read(&check_cycles) /
475                 rte_atomic64_read(&checks));
476
477         rte_free(t[0]);
478
479         return 0;
480
481 error:
482         writer_done = 1;
483         /* Wait until all readers have exited */
484         rte_eal_mp_wait_lcore();
485
486         rte_hash_free(h);
487         rte_free(keys);
488         for (i = 0; i < TOTAL_ENTRY; i++)
489                 rte_free(hash_data[i]);
490
491         rte_free(t[0]);
492
493         return -1;
494 }
495
496 /*
497  * Functional test:
498  * Single writer, Single QS variable, Single QSBR query,
499  * Non-blocking rcu_qsbr_check
500  */
501 static int
502 test_rcu_qsbr_sw_sv_1qs_non_blocking(void)
503 {
504         uint64_t token, begin, cycles;
505         int ret;
506         size_t sz;
507         unsigned int i, j, tmp_num_cores;
508         int32_t pos;
509
510         writer_done = 0;
511
512         printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, Non-Blocking QSBR check\n", num_cores);
513
514         __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
515
516         if (all_registered == 1)
517                 tmp_num_cores = num_cores;
518         else
519                 tmp_num_cores = RTE_MAX_LCORE;
520
521         sz = rte_rcu_qsbr_get_memsize(tmp_num_cores);
522         t[0] = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
523                                                 RTE_CACHE_LINE_SIZE);
524         /* QS variable is initialized */
525         rte_rcu_qsbr_init(t[0], tmp_num_cores);
526
527         /* Shared data structure created */
528         h = init_hash();
529         if (h == NULL) {
530                 printf("Hash init failed\n");
531                 goto error;
532         }
533
534         /* Reader threads are launched */
535         for (i = 0; i < num_cores; i++)
536                 rte_eal_remote_launch(test_rcu_qsbr_hash_reader, NULL,
537                                         enabled_core_ids[i]);
538
539         begin = rte_rdtsc_precise();
540
541         for (i = 0; i < TOTAL_ENTRY; i++) {
542                 /* Delete elements from the shared data structure */
543                 pos = rte_hash_del_key(h, keys + i);
544                 if (pos < 0) {
545                         printf("Delete key failed #%d\n", keys[i]);
546                         goto error;
547                 }
548                 /* Start the quiescent state query process */
549                 token = rte_rcu_qsbr_start(t[0]);
550
551                 /* Check the quiescent state status */
552                 do {
553                         ret = rte_rcu_qsbr_check(t[0], token, false);
554                 } while (ret == 0);
555                 for (j = 0; j < tmp_num_cores; j++) {
556                         if (hash_data[i][j] != COUNTER_VALUE &&
557                                 hash_data[i][j] != 0) {
558                                 printf("Reader thread ID %u did not complete #%d =  %d\n",
559                                         j, i, hash_data[i][j]);
560                                 goto error;
561                         }
562                 }
563
564                 if (rte_hash_free_key_with_position(h, pos) < 0) {
565                         printf("Failed to free the key #%d\n", keys[i]);
566                         goto error;
567                 }
568                 rte_free(hash_data[i]);
569                 hash_data[i] = NULL;
570         }
571
572         cycles = rte_rdtsc_precise() - begin;
573         rte_atomic64_add(&check_cycles, cycles);
574         rte_atomic64_add(&checks, i);
575
576         writer_done = 1;
577         /* Wait and check return value from reader threads */
578         for (i = 0; i < num_cores; i++)
579                 if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
580                         goto error;
581         rte_hash_free(h);
582         rte_free(keys);
583
584         printf("Following numbers include calls to rte_hash functions\n");
585         printf("Cycles per 1 quiescent state update(online/update/offline): %"PRIi64"\n",
586                 rte_atomic64_read(&update_cycles) /
587                 rte_atomic64_read(&updates));
588
589         printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
590                 rte_atomic64_read(&check_cycles) /
591                 rte_atomic64_read(&checks));
592
593         rte_free(t[0]);
594
595         return 0;
596
597 error:
598         writer_done = 1;
599         /* Wait until all readers have exited */
600         rte_eal_mp_wait_lcore();
601
602         rte_hash_free(h);
603         rte_free(keys);
604         for (i = 0; i < TOTAL_ENTRY; i++)
605                 rte_free(hash_data[i]);
606
607         rte_free(t[0]);
608
609         return -1;
610 }
611
612 static int
613 test_rcu_qsbr_main(void)
614 {
615         uint16_t core_id;
616
617         if (rte_lcore_count() < 3) {
618                 printf("Not enough cores for rcu_qsbr_perf_autotest, expecting at least 3\n");
619                 return TEST_SKIPPED;
620         }
621
622         rte_atomic64_init(&updates);
623         rte_atomic64_init(&update_cycles);
624         rte_atomic64_init(&checks);
625         rte_atomic64_init(&check_cycles);
626
627         num_cores = 0;
628         RTE_LCORE_FOREACH_SLAVE(core_id) {
629                 enabled_core_ids[num_cores] = core_id;
630                 num_cores++;
631         }
632
633         printf("Number of cores provided = %d\n", num_cores);
634         printf("Perf test with all reader threads registered\n");
635         printf("--------------------------------------------\n");
636         all_registered = 1;
637
638         if (test_rcu_qsbr_perf() < 0)
639                 goto test_fail;
640
641         if (test_rcu_qsbr_rperf() < 0)
642                 goto test_fail;
643
644         if (test_rcu_qsbr_wperf() < 0)
645                 goto test_fail;
646
647         if (test_rcu_qsbr_sw_sv_1qs() < 0)
648                 goto test_fail;
649
650         if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
651                 goto test_fail;
652
653         /* Make sure the actual number of cores provided is less than
654          * RTE_MAX_LCORE. This will allow for some threads not
655          * to be registered on the QS variable.
656          */
657         if (num_cores >= RTE_MAX_LCORE) {
658                 printf("Test failed! number of cores provided should be less than %d\n",
659                         RTE_MAX_LCORE);
660                 goto test_fail;
661         }
662
663         printf("Perf test with some of reader threads registered\n");
664         printf("------------------------------------------------\n");
665         all_registered = 0;
666
667         if (test_rcu_qsbr_perf() < 0)
668                 goto test_fail;
669
670         if (test_rcu_qsbr_rperf() < 0)
671                 goto test_fail;
672
673         if (test_rcu_qsbr_wperf() < 0)
674                 goto test_fail;
675
676         if (test_rcu_qsbr_sw_sv_1qs() < 0)
677                 goto test_fail;
678
679         if (test_rcu_qsbr_sw_sv_1qs_non_blocking() < 0)
680                 goto test_fail;
681
682         printf("\n");
683
684         return 0;
685
686 test_fail:
687         return -1;
688 }
689
690 REGISTER_TEST_COMMAND(rcu_qsbr_perf_autotest, test_rcu_qsbr_main);