test/rwlock: benchmark on all available cores
[dpdk.git] / app / test / test_rwlock.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <inttypes.h>
8 #include <unistd.h>
9 #include <sys/queue.h>
10 #include <string.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>
14 #include <rte_per_lcore.h>
15 #include <rte_launch.h>
16 #include <rte_atomic.h>
17 #include <rte_rwlock.h>
18 #include <rte_eal.h>
19 #include <rte_lcore.h>
20 #include <rte_cycles.h>
21
22 #include "test.h"
23
24 /*
25  * rwlock test
26  * ===========
27  * Provides UT for rte_rwlock API.
28  * Main concern is on functional testing, but also provides some
29  * performance measurements.
30  * Obviously for proper testing need to be executed with more than one lcore.
31  */
32
33 #define ITER_NUM        0x80
34
35 #define TEST_SEC        5
36
37 static rte_rwlock_t sl;
38 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
39 static rte_atomic32_t synchro;
40
41 enum {
42         LC_TYPE_RDLOCK,
43         LC_TYPE_WRLOCK,
44 };
45
46 static struct {
47         rte_rwlock_t lock;
48         uint64_t tick;
49         volatile union {
50                 uint8_t u8[RTE_CACHE_LINE_SIZE];
51                 uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)];
52         } data;
53 } __rte_cache_aligned try_rwlock_data;
54
55 struct try_rwlock_lcore {
56         int32_t rc;
57         int32_t type;
58         struct {
59                 uint64_t tick;
60                 uint64_t fail;
61                 uint64_t success;
62         } stat;
63 } __rte_cache_aligned;
64
65 static struct try_rwlock_lcore try_lcore_data[RTE_MAX_LCORE];
66
67 static int
68 test_rwlock_per_core(__attribute__((unused)) void *arg)
69 {
70         rte_rwlock_write_lock(&sl);
71         printf("Global write lock taken on core %u\n", rte_lcore_id());
72         rte_rwlock_write_unlock(&sl);
73
74         rte_rwlock_write_lock(&sl_tab[rte_lcore_id()]);
75         printf("Hello from core %u !\n", rte_lcore_id());
76         rte_rwlock_write_unlock(&sl_tab[rte_lcore_id()]);
77
78         rte_rwlock_read_lock(&sl);
79         printf("Global read lock taken on core %u\n", rte_lcore_id());
80         rte_delay_ms(100);
81         printf("Release global read lock on core %u\n", rte_lcore_id());
82         rte_rwlock_read_unlock(&sl);
83
84         return 0;
85 }
86
87 static rte_rwlock_t lk = RTE_RWLOCK_INITIALIZER;
88 static volatile uint64_t rwlock_data;
89 static uint64_t lock_count[RTE_MAX_LCORE] = {0};
90
91 #define TIME_MS 100
92 #define TEST_RWLOCK_DEBUG 0
93
94 static int
95 load_loop_fn(__attribute__((unused)) void *arg)
96 {
97         uint64_t time_diff = 0, begin;
98         uint64_t hz = rte_get_timer_hz();
99         uint64_t lcount = 0;
100         const unsigned int lcore = rte_lcore_id();
101
102         /* wait synchro for slaves */
103         if (lcore != rte_get_master_lcore())
104                 while (rte_atomic32_read(&synchro) == 0)
105                         ;
106
107         begin = rte_rdtsc_precise();
108         while (time_diff < hz * TIME_MS / 1000) {
109                 rte_rwlock_write_lock(&lk);
110                 ++rwlock_data;
111                 rte_rwlock_write_unlock(&lk);
112
113                 rte_rwlock_read_lock(&lk);
114                 if (TEST_RWLOCK_DEBUG && !(lcount % 100))
115                         printf("Core [%u] rwlock_data = %"PRIu64"\n",
116                                 lcore, rwlock_data);
117                 rte_rwlock_read_unlock(&lk);
118
119                 lcount++;
120                 /* delay to make lock duty cycle slightly realistic */
121                 rte_pause();
122                 time_diff = rte_rdtsc_precise() - begin;
123         }
124
125         lock_count[lcore] = lcount;
126         return 0;
127 }
128
129 static int
130 test_rwlock_perf(void)
131 {
132         unsigned int i;
133         uint64_t total = 0;
134
135         printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
136
137         /* clear synchro and start slaves */
138         rte_atomic32_set(&synchro, 0);
139         if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MASTER) < 0)
140                 return -1;
141
142         /* start synchro and launch test on master */
143         rte_atomic32_set(&synchro, 1);
144         load_loop_fn(NULL);
145
146         rte_eal_mp_wait_lcore();
147
148         RTE_LCORE_FOREACH(i) {
149                 printf("Core [%u] count = %"PRIu64"\n", i, lock_count[i]);
150                 total += lock_count[i];
151         }
152
153         printf("Total count = %"PRIu64"\n", total);
154
155         return 0;
156 }
157
158 /*
159  * - There is a global rwlock and a table of rwlocks (one per lcore).
160  *
161  * - The test function takes all of these locks and launches the
162  *   ``test_rwlock_per_core()`` function on each core (except the master).
163  *
164  *   - The function takes the global write lock, display something,
165  *     then releases the global lock.
166  *   - Then, it takes the per-lcore write lock, display something, and
167  *     releases the per-core lock.
168  *   - Finally, a read lock is taken during 100 ms, then released.
169  *
170  * - The main function unlocks the per-lcore locks sequentially and
171  *   waits between each lock. This triggers the display of a message
172  *   for each core, in the correct order.
173  *
174  *   Then, it tries to take the global write lock and display the last
175  *   message. The autotest script checks that the message order is correct.
176  */
177 static int
178 rwlock_test1(void)
179 {
180         int i;
181
182         rte_rwlock_init(&sl);
183         for (i=0; i<RTE_MAX_LCORE; i++)
184                 rte_rwlock_init(&sl_tab[i]);
185
186         rte_rwlock_write_lock(&sl);
187
188         RTE_LCORE_FOREACH_SLAVE(i) {
189                 rte_rwlock_write_lock(&sl_tab[i]);
190                 rte_eal_remote_launch(test_rwlock_per_core, NULL, i);
191         }
192
193         rte_rwlock_write_unlock(&sl);
194
195         RTE_LCORE_FOREACH_SLAVE(i) {
196                 rte_rwlock_write_unlock(&sl_tab[i]);
197                 rte_delay_ms(100);
198         }
199
200         rte_rwlock_write_lock(&sl);
201         /* this message should be the last message of test */
202         printf("Global write lock taken on master core %u\n", rte_lcore_id());
203         rte_rwlock_write_unlock(&sl);
204
205         rte_eal_mp_wait_lcore();
206
207         if (test_rwlock_perf() < 0)
208                 return -1;
209
210         return 0;
211 }
212
213 static int
214 try_read(uint32_t lc)
215 {
216         int32_t rc;
217         uint32_t i;
218
219         rc = rte_rwlock_read_trylock(&try_rwlock_data.lock);
220         if (rc != 0)
221                 return rc;
222
223         for (i = 0; i != RTE_DIM(try_rwlock_data.data.u64); i++) {
224
225                 /* race condition occurred, lock doesn't work properly */
226                 if (try_rwlock_data.data.u64[i] != 0) {
227                         printf("%s(%u) error: unexpected data pattern\n",
228                                 __func__, lc);
229                         rte_memdump(stdout, NULL,
230                                 (void *)(uintptr_t)&try_rwlock_data.data,
231                                 sizeof(try_rwlock_data.data));
232                         rc = -EFAULT;
233                         break;
234                 }
235         }
236
237         rte_rwlock_read_unlock(&try_rwlock_data.lock);
238         return rc;
239 }
240
241 static int
242 try_write(uint32_t lc)
243 {
244         int32_t rc;
245         uint32_t i, v;
246
247         v = RTE_MAX(lc % UINT8_MAX, 1U);
248
249         rc = rte_rwlock_write_trylock(&try_rwlock_data.lock);
250         if (rc != 0)
251                 return rc;
252
253         /* update by bytes in reverese order */
254         for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
255
256                 /* race condition occurred, lock doesn't work properly */
257                 if (try_rwlock_data.data.u8[i] != 0) {
258                         printf("%s:%d(%u) error: unexpected data pattern\n",
259                                 __func__, __LINE__, lc);
260                         rte_memdump(stdout, NULL,
261                                 (void *)(uintptr_t)&try_rwlock_data.data,
262                                 sizeof(try_rwlock_data.data));
263                         rc = -EFAULT;
264                         break;
265                 }
266
267                 try_rwlock_data.data.u8[i] = v;
268         }
269
270         /* restore by bytes in reverese order */
271         for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
272
273                 /* race condition occurred, lock doesn't work properly */
274                 if (try_rwlock_data.data.u8[i] != v) {
275                         printf("%s:%d(%u) error: unexpected data pattern\n",
276                                 __func__, __LINE__, lc);
277                         rte_memdump(stdout, NULL,
278                                 (void *)(uintptr_t)&try_rwlock_data.data,
279                                 sizeof(try_rwlock_data.data));
280                         rc = -EFAULT;
281                         break;
282                 }
283
284                 try_rwlock_data.data.u8[i] = 0;
285         }
286
287         rte_rwlock_write_unlock(&try_rwlock_data.lock);
288         return rc;
289 }
290
291 static int
292 try_read_lcore(__rte_unused void *data)
293 {
294         int32_t rc;
295         uint32_t i, lc;
296         uint64_t ftm, stm, tm;
297         struct try_rwlock_lcore *lcd;
298
299         lc = rte_lcore_id();
300         lcd = try_lcore_data + lc;
301         lcd->type = LC_TYPE_RDLOCK;
302
303         ftm = try_rwlock_data.tick;
304         stm = rte_get_timer_cycles();
305
306         do {
307                 for (i = 0; i != ITER_NUM; i++) {
308                         rc = try_read(lc);
309                         if (rc == 0)
310                                 lcd->stat.success++;
311                         else if (rc == -EBUSY)
312                                 lcd->stat.fail++;
313                         else
314                                 break;
315                         rc = 0;
316                 }
317                 tm = rte_get_timer_cycles() - stm;
318         } while (tm < ftm && rc == 0);
319
320         lcd->rc = rc;
321         lcd->stat.tick = tm;
322         return rc;
323 }
324
325 static int
326 try_write_lcore(__rte_unused void *data)
327 {
328         int32_t rc;
329         uint32_t i, lc;
330         uint64_t ftm, stm, tm;
331         struct try_rwlock_lcore *lcd;
332
333         lc = rte_lcore_id();
334         lcd = try_lcore_data + lc;
335         lcd->type = LC_TYPE_WRLOCK;
336
337         ftm = try_rwlock_data.tick;
338         stm = rte_get_timer_cycles();
339
340         do {
341                 for (i = 0; i != ITER_NUM; i++) {
342                         rc = try_write(lc);
343                         if (rc == 0)
344                                 lcd->stat.success++;
345                         else if (rc == -EBUSY)
346                                 lcd->stat.fail++;
347                         else
348                                 break;
349                         rc = 0;
350                 }
351                 tm = rte_get_timer_cycles() - stm;
352         } while (tm < ftm && rc == 0);
353
354         lcd->rc = rc;
355         lcd->stat.tick = tm;
356         return rc;
357 }
358
359 static void
360 print_try_lcore_stats(const struct try_rwlock_lcore *tlc, uint32_t lc)
361 {
362         uint64_t f, s;
363
364         f = RTE_MAX(tlc->stat.fail, 1ULL);
365         s = RTE_MAX(tlc->stat.success, 1ULL);
366
367         printf("try_lcore_data[%u]={\n"
368                 "\trc=%d,\n"
369                 "\ttype=%s,\n"
370                 "\tfail=%" PRIu64 ",\n"
371                 "\tsuccess=%" PRIu64 ",\n"
372                 "\tcycles=%" PRIu64 ",\n"
373                 "\tcycles/op=%#Lf,\n"
374                 "\tcycles/success=%#Lf,\n"
375                 "\tsuccess/fail=%#Lf,\n"
376                 "};\n",
377                 lc,
378                 tlc->rc,
379                 tlc->type == LC_TYPE_RDLOCK ? "RDLOCK" : "WRLOCK",
380                 tlc->stat.fail,
381                 tlc->stat.success,
382                 tlc->stat.tick,
383                 (long double)tlc->stat.tick /
384                 (tlc->stat.fail + tlc->stat.success),
385                 (long double)tlc->stat.tick / s,
386                 (long double)tlc->stat.success / f);
387 }
388
389 static void
390 collect_try_lcore_stats(struct try_rwlock_lcore *tlc,
391         const struct try_rwlock_lcore *lc)
392 {
393         tlc->stat.tick += lc->stat.tick;
394         tlc->stat.fail += lc->stat.fail;
395         tlc->stat.success += lc->stat.success;
396 }
397
398 /*
399  * Process collected results:
400  *  - check status
401  *  - collect and print statistics
402  */
403 static int
404 process_try_lcore_stats(void)
405 {
406         int32_t rc;
407         uint32_t lc, rd, wr;
408         struct try_rwlock_lcore rlc, wlc;
409
410         memset(&rlc, 0, sizeof(rlc));
411         memset(&wlc, 0, sizeof(wlc));
412
413         rlc.type = LC_TYPE_RDLOCK;
414         wlc.type = LC_TYPE_WRLOCK;
415         rd = 0;
416         wr = 0;
417
418         rc = 0;
419         RTE_LCORE_FOREACH(lc) {
420                 rc |= try_lcore_data[lc].rc;
421                 if (try_lcore_data[lc].type == LC_TYPE_RDLOCK) {
422                         collect_try_lcore_stats(&rlc, try_lcore_data + lc);
423                         rd++;
424                 } else {
425                         collect_try_lcore_stats(&wlc, try_lcore_data + lc);
426                         wr++;
427                 }
428         }
429
430         if (rc == 0) {
431                 RTE_LCORE_FOREACH(lc)
432                         print_try_lcore_stats(try_lcore_data + lc, lc);
433
434                 if (rd != 0) {
435                         printf("aggregated stats for %u RDLOCK cores:\n", rd);
436                         print_try_lcore_stats(&rlc, rd);
437                 }
438
439                 if (wr != 0) {
440                         printf("aggregated stats for %u WRLOCK cores:\n", wr);
441                         print_try_lcore_stats(&wlc, wr);
442                 }
443         }
444
445         return rc;
446 }
447
448 static void
449 try_test_reset(void)
450 {
451         memset(&try_lcore_data, 0, sizeof(try_lcore_data));
452         memset(&try_rwlock_data, 0, sizeof(try_rwlock_data));
453         try_rwlock_data.tick = TEST_SEC * rte_get_tsc_hz();
454 }
455
456 /* all lcores grab RDLOCK */
457 static int
458 try_rwlock_test_rda(void)
459 {
460         try_test_reset();
461
462         /* start read test on all avaialble lcores */
463         rte_eal_mp_remote_launch(try_read_lcore, NULL, CALL_MASTER);
464         rte_eal_mp_wait_lcore();
465
466         return process_try_lcore_stats();
467 }
468
469 /* all slave lcores grab RDLOCK, master one grabs WRLOCK */
470 static int
471 try_rwlock_test_rds_wrm(void)
472 {
473         try_test_reset();
474
475         rte_eal_mp_remote_launch(try_read_lcore, NULL, SKIP_MASTER);
476         try_write_lcore(NULL);
477         rte_eal_mp_wait_lcore();
478
479         return process_try_lcore_stats();
480 }
481
482 /* master and even slave lcores grab RDLOCK, odd lcores grab WRLOCK */
483 static int
484 try_rwlock_test_rde_wro(void)
485 {
486         uint32_t lc, mlc;
487
488         try_test_reset();
489
490         mlc = rte_get_master_lcore();
491
492         RTE_LCORE_FOREACH(lc) {
493                 if (lc != mlc) {
494                         if ((lc & 1) == 0)
495                                 rte_eal_remote_launch(try_read_lcore,
496                                                 NULL, lc);
497                         else
498                                 rte_eal_remote_launch(try_write_lcore,
499                                                 NULL, lc);
500                 }
501         }
502         try_read_lcore(NULL);
503         rte_eal_mp_wait_lcore();
504
505         return process_try_lcore_stats();
506 }
507
508 static int
509 test_rwlock(void)
510 {
511         uint32_t i;
512         int32_t rc, ret;
513
514         static const struct {
515                 const char *name;
516                 int (*ftst)(void);
517         } test[] = {
518                 {
519                         .name = "rwlock_test1",
520                         .ftst = rwlock_test1,
521                 },
522                 {
523                         .name = "try_rwlock_test_rda",
524                         .ftst = try_rwlock_test_rda,
525                 },
526                 {
527                         .name = "try_rwlock_test_rds_wrm",
528                         .ftst = try_rwlock_test_rds_wrm,
529                 },
530                 {
531                         .name = "try_rwlock_test_rde_wro",
532                         .ftst = try_rwlock_test_rde_wro,
533                 },
534         };
535
536         ret = 0;
537         for (i = 0; i != RTE_DIM(test); i++) {
538                 printf("starting test %s;\n", test[i].name);
539                 rc = test[i].ftst();
540                 printf("test %s completed with status %d\n", test[i].name, rc);
541                 ret |= rc;
542         }
543
544         return ret;
545 }
546
547 REGISTER_TEST_COMMAND(rwlock_autotest, test_rwlock);