test/rwlock: use compiler atomics for lcores sync
[dpdk.git] / app / test / test_rwlock.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <inttypes.h>
8 #include <unistd.h>
9 #include <sys/queue.h>
10 #include <string.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>
14 #include <rte_per_lcore.h>
15 #include <rte_launch.h>
16 #include <rte_rwlock.h>
17 #include <rte_eal.h>
18 #include <rte_lcore.h>
19 #include <rte_cycles.h>
20
21 #include "test.h"
22
23 /*
24  * rwlock test
25  * ===========
26  * Provides UT for rte_rwlock API.
27  * Main concern is on functional testing, but also provides some
28  * performance measurements.
29  * Obviously for proper testing need to be executed with more than one lcore.
30  */
31
32 #define ITER_NUM        0x80
33
34 #define TEST_SEC        5
35
36 static rte_rwlock_t sl;
37 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
38 static uint32_t synchro;
39
40 enum {
41         LC_TYPE_RDLOCK,
42         LC_TYPE_WRLOCK,
43 };
44
45 static struct {
46         rte_rwlock_t lock;
47         uint64_t tick;
48
49         volatile union {
50                 uint8_t u8[RTE_CACHE_LINE_SIZE];
51                 uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)];
52         } data;
53 } __rte_cache_aligned try_rwlock_data;
54
55 struct try_rwlock_lcore {
56         int32_t rc;
57         int32_t type;
58         struct {
59                 uint64_t tick;
60                 uint64_t fail;
61                 uint64_t success;
62         } stat;
63 } __rte_cache_aligned;
64
65 static struct try_rwlock_lcore try_lcore_data[RTE_MAX_LCORE];
66
67 static int
68 test_rwlock_per_core(__rte_unused void *arg)
69 {
70         rte_rwlock_write_lock(&sl);
71         printf("Global write lock taken on core %u\n", rte_lcore_id());
72         rte_rwlock_write_unlock(&sl);
73
74         rte_rwlock_write_lock(&sl_tab[rte_lcore_id()]);
75         printf("Hello from core %u !\n", rte_lcore_id());
76         rte_rwlock_write_unlock(&sl_tab[rte_lcore_id()]);
77
78         rte_rwlock_read_lock(&sl);
79         printf("Global read lock taken on core %u\n", rte_lcore_id());
80         rte_delay_ms(100);
81         printf("Release global read lock on core %u\n", rte_lcore_id());
82         rte_rwlock_read_unlock(&sl);
83
84         return 0;
85 }
86
87 static rte_rwlock_t lk = RTE_RWLOCK_INITIALIZER;
88 static volatile uint64_t rwlock_data;
89 static uint64_t time_count[RTE_MAX_LCORE] = {0};
90
91 #define MAX_LOOP 10000
92 #define TEST_RWLOCK_DEBUG 0
93
94 static int
95 load_loop_fn(__rte_unused void *arg)
96 {
97         uint64_t time_diff = 0, begin;
98         uint64_t hz = rte_get_timer_hz();
99         uint64_t lcount = 0;
100         const unsigned int lcore = rte_lcore_id();
101
102         /* wait synchro for workers */
103         if (lcore != rte_get_main_lcore())
104                 rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
105
106         begin = rte_rdtsc_precise();
107         while (lcount < MAX_LOOP) {
108                 rte_rwlock_write_lock(&lk);
109                 ++rwlock_data;
110                 rte_rwlock_write_unlock(&lk);
111
112                 rte_rwlock_read_lock(&lk);
113                 if (TEST_RWLOCK_DEBUG && !(lcount % 100))
114                         printf("Core [%u] rwlock_data = %"PRIu64"\n",
115                                 lcore, rwlock_data);
116                 rte_rwlock_read_unlock(&lk);
117
118                 lcount++;
119                 /* delay to make lock duty cycle slightly realistic */
120                 rte_pause();
121         }
122
123         time_diff = rte_rdtsc_precise() - begin;
124         time_count[lcore] = time_diff * 1000000 / hz;
125         return 0;
126 }
127
128 static int
129 test_rwlock_perf(void)
130 {
131         unsigned int i;
132         uint64_t total = 0;
133
134         printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
135
136         /* clear synchro and start workers */
137         __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
138         if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
139                 return -1;
140
141         /* start synchro and launch test on main */
142         __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
143         load_loop_fn(NULL);
144
145         rte_eal_mp_wait_lcore();
146
147         RTE_LCORE_FOREACH(i) {
148                 printf("Core [%u] cost time = %"PRIu64" us\n",
149                         i, time_count[i]);
150                 total += time_count[i];
151         }
152
153         printf("Total cost time = %"PRIu64" us\n", total);
154         memset(time_count, 0, sizeof(time_count));
155
156         return 0;
157 }
158
159 /*
160  * - There is a global rwlock and a table of rwlocks (one per lcore).
161  *
162  * - The test function takes all of these locks and launches the
163  *   ``test_rwlock_per_core()`` function on each core (except the main).
164  *
165  *   - The function takes the global write lock, display something,
166  *     then releases the global lock.
167  *   - Then, it takes the per-lcore write lock, display something, and
168  *     releases the per-core lock.
169  *   - Finally, a read lock is taken during 100 ms, then released.
170  *
171  * - The main function unlocks the per-lcore locks sequentially and
172  *   waits between each lock. This triggers the display of a message
173  *   for each core, in the correct order.
174  *
175  *   Then, it tries to take the global write lock and display the last
176  *   message. The autotest script checks that the message order is correct.
177  */
178 static int
179 rwlock_test1(void)
180 {
181         int i;
182
183         rte_rwlock_init(&sl);
184         for (i = 0; i < RTE_MAX_LCORE; i++)
185                 rte_rwlock_init(&sl_tab[i]);
186
187         rte_rwlock_write_lock(&sl);
188
189         RTE_LCORE_FOREACH_WORKER(i) {
190                 rte_rwlock_write_lock(&sl_tab[i]);
191                 rte_eal_remote_launch(test_rwlock_per_core, NULL, i);
192         }
193
194         rte_rwlock_write_unlock(&sl);
195
196         RTE_LCORE_FOREACH_WORKER(i) {
197                 rte_rwlock_write_unlock(&sl_tab[i]);
198                 rte_delay_ms(100);
199         }
200
201         rte_rwlock_write_lock(&sl);
202         /* this message should be the last message of test */
203         printf("Global write lock taken on main core %u\n", rte_lcore_id());
204         rte_rwlock_write_unlock(&sl);
205
206         rte_eal_mp_wait_lcore();
207
208         if (test_rwlock_perf() < 0)
209                 return -1;
210
211         return 0;
212 }
213
214 static int
215 try_read(uint32_t lc)
216 {
217         int32_t rc;
218         uint32_t i;
219
220         rc = rte_rwlock_read_trylock(&try_rwlock_data.lock);
221         if (rc != 0)
222                 return rc;
223
224         for (i = 0; i != RTE_DIM(try_rwlock_data.data.u64); i++) {
225
226                 /* race condition occurred, lock doesn't work properly */
227                 if (try_rwlock_data.data.u64[i] != 0) {
228                         printf("%s(%u) error: unexpected data pattern\n",
229                                 __func__, lc);
230                         rte_memdump(stdout, NULL,
231                                 (void *)(uintptr_t)&try_rwlock_data.data,
232                                 sizeof(try_rwlock_data.data));
233                         rc = -EFAULT;
234                         break;
235                 }
236         }
237
238         rte_rwlock_read_unlock(&try_rwlock_data.lock);
239         return rc;
240 }
241
242 static int
243 try_write(uint32_t lc)
244 {
245         int32_t rc;
246         uint32_t i, v;
247
248         v = RTE_MAX(lc % UINT8_MAX, 1U);
249
250         rc = rte_rwlock_write_trylock(&try_rwlock_data.lock);
251         if (rc != 0)
252                 return rc;
253
254         /* update by bytes in reverse order */
255         for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
256
257                 /* race condition occurred, lock doesn't work properly */
258                 if (try_rwlock_data.data.u8[i] != 0) {
259                         printf("%s:%d(%u) error: unexpected data pattern\n",
260                                 __func__, __LINE__, lc);
261                         rte_memdump(stdout, NULL,
262                                 (void *)(uintptr_t)&try_rwlock_data.data,
263                                 sizeof(try_rwlock_data.data));
264                         rc = -EFAULT;
265                         break;
266                 }
267
268                 try_rwlock_data.data.u8[i] = v;
269         }
270
271         /* restore by bytes in reverse order */
272         for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
273
274                 /* race condition occurred, lock doesn't work properly */
275                 if (try_rwlock_data.data.u8[i] != v) {
276                         printf("%s:%d(%u) error: unexpected data pattern\n",
277                                 __func__, __LINE__, lc);
278                         rte_memdump(stdout, NULL,
279                                 (void *)(uintptr_t)&try_rwlock_data.data,
280                                 sizeof(try_rwlock_data.data));
281                         rc = -EFAULT;
282                         break;
283                 }
284
285                 try_rwlock_data.data.u8[i] = 0;
286         }
287
288         rte_rwlock_write_unlock(&try_rwlock_data.lock);
289         return rc;
290 }
291
292 static int
293 try_read_lcore(__rte_unused void *data)
294 {
295         int32_t rc;
296         uint32_t i, lc;
297         uint64_t ftm, stm, tm;
298         struct try_rwlock_lcore *lcd;
299
300         lc = rte_lcore_id();
301         lcd = try_lcore_data + lc;
302         lcd->type = LC_TYPE_RDLOCK;
303
304         ftm = try_rwlock_data.tick;
305         stm = rte_get_timer_cycles();
306
307         do {
308                 for (i = 0; i != ITER_NUM; i++) {
309                         rc = try_read(lc);
310                         if (rc == 0)
311                                 lcd->stat.success++;
312                         else if (rc == -EBUSY)
313                                 lcd->stat.fail++;
314                         else
315                                 break;
316                         rc = 0;
317                 }
318                 tm = rte_get_timer_cycles() - stm;
319         } while (tm < ftm && rc == 0);
320
321         lcd->rc = rc;
322         lcd->stat.tick = tm;
323         return rc;
324 }
325
326 static int
327 try_write_lcore(__rte_unused void *data)
328 {
329         int32_t rc;
330         uint32_t i, lc;
331         uint64_t ftm, stm, tm;
332         struct try_rwlock_lcore *lcd;
333
334         lc = rte_lcore_id();
335         lcd = try_lcore_data + lc;
336         lcd->type = LC_TYPE_WRLOCK;
337
338         ftm = try_rwlock_data.tick;
339         stm = rte_get_timer_cycles();
340
341         do {
342                 for (i = 0; i != ITER_NUM; i++) {
343                         rc = try_write(lc);
344                         if (rc == 0)
345                                 lcd->stat.success++;
346                         else if (rc == -EBUSY)
347                                 lcd->stat.fail++;
348                         else
349                                 break;
350                         rc = 0;
351                 }
352                 tm = rte_get_timer_cycles() - stm;
353         } while (tm < ftm && rc == 0);
354
355         lcd->rc = rc;
356         lcd->stat.tick = tm;
357         return rc;
358 }
359
360 static void
361 print_try_lcore_stats(const struct try_rwlock_lcore *tlc, uint32_t lc)
362 {
363         uint64_t f, s;
364
365         f = RTE_MAX(tlc->stat.fail, 1ULL);
366         s = RTE_MAX(tlc->stat.success, 1ULL);
367
368         printf("try_lcore_data[%u]={\n"
369                 "\trc=%d,\n"
370                 "\ttype=%s,\n"
371                 "\tfail=%" PRIu64 ",\n"
372                 "\tsuccess=%" PRIu64 ",\n"
373                 "\tcycles=%" PRIu64 ",\n"
374                 "\tcycles/op=%#Lf,\n"
375                 "\tcycles/success=%#Lf,\n"
376                 "\tsuccess/fail=%#Lf,\n"
377                 "};\n",
378                 lc,
379                 tlc->rc,
380                 tlc->type == LC_TYPE_RDLOCK ? "RDLOCK" : "WRLOCK",
381                 tlc->stat.fail,
382                 tlc->stat.success,
383                 tlc->stat.tick,
384                 (long double)tlc->stat.tick /
385                 (tlc->stat.fail + tlc->stat.success),
386                 (long double)tlc->stat.tick / s,
387                 (long double)tlc->stat.success / f);
388 }
389
390 static void
391 collect_try_lcore_stats(struct try_rwlock_lcore *tlc,
392         const struct try_rwlock_lcore *lc)
393 {
394         tlc->stat.tick += lc->stat.tick;
395         tlc->stat.fail += lc->stat.fail;
396         tlc->stat.success += lc->stat.success;
397 }
398
399 /*
400  * Process collected results:
401  *  - check status
402  *  - collect and print statistics
403  */
404 static int
405 process_try_lcore_stats(void)
406 {
407         int32_t rc;
408         uint32_t lc, rd, wr;
409         struct try_rwlock_lcore rlc, wlc;
410
411         memset(&rlc, 0, sizeof(rlc));
412         memset(&wlc, 0, sizeof(wlc));
413
414         rlc.type = LC_TYPE_RDLOCK;
415         wlc.type = LC_TYPE_WRLOCK;
416         rd = 0;
417         wr = 0;
418
419         rc = 0;
420         RTE_LCORE_FOREACH(lc) {
421                 rc |= try_lcore_data[lc].rc;
422                 if (try_lcore_data[lc].type == LC_TYPE_RDLOCK) {
423                         collect_try_lcore_stats(&rlc, try_lcore_data + lc);
424                         rd++;
425                 } else {
426                         collect_try_lcore_stats(&wlc, try_lcore_data + lc);
427                         wr++;
428                 }
429         }
430
431         if (rc == 0) {
432                 RTE_LCORE_FOREACH(lc)
433                         print_try_lcore_stats(try_lcore_data + lc, lc);
434
435                 if (rd != 0) {
436                         printf("aggregated stats for %u RDLOCK cores:\n", rd);
437                         print_try_lcore_stats(&rlc, rd);
438                 }
439
440                 if (wr != 0) {
441                         printf("aggregated stats for %u WRLOCK cores:\n", wr);
442                         print_try_lcore_stats(&wlc, wr);
443                 }
444         }
445
446         return rc;
447 }
448
449 static void
450 try_test_reset(void)
451 {
452         memset(&try_lcore_data, 0, sizeof(try_lcore_data));
453         memset(&try_rwlock_data, 0, sizeof(try_rwlock_data));
454         try_rwlock_data.tick = TEST_SEC * rte_get_tsc_hz();
455 }
456
457 /* all lcores grab RDLOCK */
458 static int
459 try_rwlock_test_rda(void)
460 {
461         try_test_reset();
462
463         /* start read test on all available lcores */
464         rte_eal_mp_remote_launch(try_read_lcore, NULL, CALL_MAIN);
465         rte_eal_mp_wait_lcore();
466
467         return process_try_lcore_stats();
468 }
469
470 /* all worker lcores grab RDLOCK, main one grabs WRLOCK */
471 static int
472 try_rwlock_test_rds_wrm(void)
473 {
474         try_test_reset();
475
476         rte_eal_mp_remote_launch(try_read_lcore, NULL, SKIP_MAIN);
477         try_write_lcore(NULL);
478         rte_eal_mp_wait_lcore();
479
480         return process_try_lcore_stats();
481 }
482
483 /* main and even worker lcores grab RDLOCK, odd lcores grab WRLOCK */
484 static int
485 try_rwlock_test_rde_wro(void)
486 {
487         uint32_t lc, mlc;
488
489         try_test_reset();
490
491         mlc = rte_get_main_lcore();
492
493         RTE_LCORE_FOREACH(lc) {
494                 if (lc != mlc) {
495                         if ((lc & 1) == 0)
496                                 rte_eal_remote_launch(try_read_lcore,
497                                                 NULL, lc);
498                         else
499                                 rte_eal_remote_launch(try_write_lcore,
500                                                 NULL, lc);
501                 }
502         }
503         try_read_lcore(NULL);
504         rte_eal_mp_wait_lcore();
505
506         return process_try_lcore_stats();
507 }
508
509 static int
510 test_rwlock(void)
511 {
512         uint32_t i;
513         int32_t rc, ret;
514
515         static const struct {
516                 const char *name;
517                 int (*ftst)(void);
518         } test[] = {
519                 {
520                         .name = "rwlock_test1",
521                         .ftst = rwlock_test1,
522                 },
523                 {
524                         .name = "try_rwlock_test_rda",
525                         .ftst = try_rwlock_test_rda,
526                 },
527                 {
528                         .name = "try_rwlock_test_rds_wrm",
529                         .ftst = try_rwlock_test_rds_wrm,
530                 },
531                 {
532                         .name = "try_rwlock_test_rde_wro",
533                         .ftst = try_rwlock_test_rde_wro,
534                 },
535         };
536
537         ret = 0;
538         for (i = 0; i != RTE_DIM(test); i++) {
539                 printf("starting test %s;\n", test[i].name);
540                 rc = test[i].ftst();
541                 printf("test %s completed with status %d\n", test[i].name, rc);
542                 ret |= rc;
543         }
544
545         return ret;
546 }
547
548 REGISTER_TEST_COMMAND(rwlock_autotest, test_rwlock);
549
550 /* subtests used in meson for CI */
551 REGISTER_TEST_COMMAND(rwlock_test1_autotest, rwlock_test1);
552 REGISTER_TEST_COMMAND(rwlock_rda_autotest, try_rwlock_test_rda);
553 REGISTER_TEST_COMMAND(rwlock_rds_wrm_autotest, try_rwlock_test_rds_wrm);
554 REGISTER_TEST_COMMAND(rwlock_rde_wro_autotest, try_rwlock_test_rde_wro);