net/mlx5: remove redundant operations in NEON Rx
[dpdk.git] / app / test / test_rwlock.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <inttypes.h>
8 #include <unistd.h>
9 #include <sys/queue.h>
10 #include <string.h>
11
12 #include <rte_common.h>
13 #include <rte_memory.h>
14 #include <rte_per_lcore.h>
15 #include <rte_launch.h>
16 #include <rte_atomic.h>
17 #include <rte_rwlock.h>
18 #include <rte_eal.h>
19 #include <rte_lcore.h>
20 #include <rte_cycles.h>
21
22 #include "test.h"
23
24 /*
25  * rwlock test
26  * ===========
27  * Provides UT for rte_rwlock API.
28  * Main concern is on functional testing, but also provides some
29  * performance measurements.
30  * Obviously for proper testing need to be executed with more than one lcore.
31  */
32
33 #define ITER_NUM        0x80
34
35 #define TEST_SEC        5
36
37 static rte_rwlock_t sl;
38 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
39 static rte_atomic32_t synchro;
40
41 enum {
42         LC_TYPE_RDLOCK,
43         LC_TYPE_WRLOCK,
44 };
45
46 static struct {
47         rte_rwlock_t lock;
48         uint64_t tick;
49
50         volatile union {
51                 uint8_t u8[RTE_CACHE_LINE_SIZE];
52                 uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)];
53         } data;
54 } __rte_cache_aligned try_rwlock_data;
55
56 struct try_rwlock_lcore {
57         int32_t rc;
58         int32_t type;
59         struct {
60                 uint64_t tick;
61                 uint64_t fail;
62                 uint64_t success;
63         } stat;
64 } __rte_cache_aligned;
65
66 static struct try_rwlock_lcore try_lcore_data[RTE_MAX_LCORE];
67
68 static int
69 test_rwlock_per_core(__rte_unused void *arg)
70 {
71         rte_rwlock_write_lock(&sl);
72         printf("Global write lock taken on core %u\n", rte_lcore_id());
73         rte_rwlock_write_unlock(&sl);
74
75         rte_rwlock_write_lock(&sl_tab[rte_lcore_id()]);
76         printf("Hello from core %u !\n", rte_lcore_id());
77         rte_rwlock_write_unlock(&sl_tab[rte_lcore_id()]);
78
79         rte_rwlock_read_lock(&sl);
80         printf("Global read lock taken on core %u\n", rte_lcore_id());
81         rte_delay_ms(100);
82         printf("Release global read lock on core %u\n", rte_lcore_id());
83         rte_rwlock_read_unlock(&sl);
84
85         return 0;
86 }
87
88 static rte_rwlock_t lk = RTE_RWLOCK_INITIALIZER;
89 static volatile uint64_t rwlock_data;
90 static uint64_t time_count[RTE_MAX_LCORE] = {0};
91
92 #define MAX_LOOP 10000
93 #define TEST_RWLOCK_DEBUG 0
94
95 static int
96 load_loop_fn(__rte_unused void *arg)
97 {
98         uint64_t time_diff = 0, begin;
99         uint64_t hz = rte_get_timer_hz();
100         uint64_t lcount = 0;
101         const unsigned int lcore = rte_lcore_id();
102
103         /* wait synchro for workers */
104         if (lcore != rte_get_main_lcore())
105                 while (rte_atomic32_read(&synchro) == 0)
106                         ;
107
108         begin = rte_rdtsc_precise();
109         while (lcount < MAX_LOOP) {
110                 rte_rwlock_write_lock(&lk);
111                 ++rwlock_data;
112                 rte_rwlock_write_unlock(&lk);
113
114                 rte_rwlock_read_lock(&lk);
115                 if (TEST_RWLOCK_DEBUG && !(lcount % 100))
116                         printf("Core [%u] rwlock_data = %"PRIu64"\n",
117                                 lcore, rwlock_data);
118                 rte_rwlock_read_unlock(&lk);
119
120                 lcount++;
121                 /* delay to make lock duty cycle slightly realistic */
122                 rte_pause();
123         }
124
125         time_diff = rte_rdtsc_precise() - begin;
126         time_count[lcore] = time_diff * 1000000 / hz;
127         return 0;
128 }
129
130 static int
131 test_rwlock_perf(void)
132 {
133         unsigned int i;
134         uint64_t total = 0;
135
136         printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
137
138         /* clear synchro and start workers */
139         rte_atomic32_set(&synchro, 0);
140         if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
141                 return -1;
142
143         /* start synchro and launch test on main */
144         rte_atomic32_set(&synchro, 1);
145         load_loop_fn(NULL);
146
147         rte_eal_mp_wait_lcore();
148
149         RTE_LCORE_FOREACH(i) {
150                 printf("Core [%u] cost time = %"PRIu64" us\n",
151                         i, time_count[i]);
152                 total += time_count[i];
153         }
154
155         printf("Total cost time = %"PRIu64" us\n", total);
156         memset(time_count, 0, sizeof(time_count));
157
158         return 0;
159 }
160
161 /*
162  * - There is a global rwlock and a table of rwlocks (one per lcore).
163  *
164  * - The test function takes all of these locks and launches the
165  *   ``test_rwlock_per_core()`` function on each core (except the main).
166  *
167  *   - The function takes the global write lock, display something,
168  *     then releases the global lock.
169  *   - Then, it takes the per-lcore write lock, display something, and
170  *     releases the per-core lock.
171  *   - Finally, a read lock is taken during 100 ms, then released.
172  *
173  * - The main function unlocks the per-lcore locks sequentially and
174  *   waits between each lock. This triggers the display of a message
175  *   for each core, in the correct order.
176  *
177  *   Then, it tries to take the global write lock and display the last
178  *   message. The autotest script checks that the message order is correct.
179  */
180 static int
181 rwlock_test1(void)
182 {
183         int i;
184
185         rte_rwlock_init(&sl);
186         for (i = 0; i < RTE_MAX_LCORE; i++)
187                 rte_rwlock_init(&sl_tab[i]);
188
189         rte_rwlock_write_lock(&sl);
190
191         RTE_LCORE_FOREACH_WORKER(i) {
192                 rte_rwlock_write_lock(&sl_tab[i]);
193                 rte_eal_remote_launch(test_rwlock_per_core, NULL, i);
194         }
195
196         rte_rwlock_write_unlock(&sl);
197
198         RTE_LCORE_FOREACH_WORKER(i) {
199                 rte_rwlock_write_unlock(&sl_tab[i]);
200                 rte_delay_ms(100);
201         }
202
203         rte_rwlock_write_lock(&sl);
204         /* this message should be the last message of test */
205         printf("Global write lock taken on main core %u\n", rte_lcore_id());
206         rte_rwlock_write_unlock(&sl);
207
208         rte_eal_mp_wait_lcore();
209
210         if (test_rwlock_perf() < 0)
211                 return -1;
212
213         return 0;
214 }
215
216 static int
217 try_read(uint32_t lc)
218 {
219         int32_t rc;
220         uint32_t i;
221
222         rc = rte_rwlock_read_trylock(&try_rwlock_data.lock);
223         if (rc != 0)
224                 return rc;
225
226         for (i = 0; i != RTE_DIM(try_rwlock_data.data.u64); i++) {
227
228                 /* race condition occurred, lock doesn't work properly */
229                 if (try_rwlock_data.data.u64[i] != 0) {
230                         printf("%s(%u) error: unexpected data pattern\n",
231                                 __func__, lc);
232                         rte_memdump(stdout, NULL,
233                                 (void *)(uintptr_t)&try_rwlock_data.data,
234                                 sizeof(try_rwlock_data.data));
235                         rc = -EFAULT;
236                         break;
237                 }
238         }
239
240         rte_rwlock_read_unlock(&try_rwlock_data.lock);
241         return rc;
242 }
243
244 static int
245 try_write(uint32_t lc)
246 {
247         int32_t rc;
248         uint32_t i, v;
249
250         v = RTE_MAX(lc % UINT8_MAX, 1U);
251
252         rc = rte_rwlock_write_trylock(&try_rwlock_data.lock);
253         if (rc != 0)
254                 return rc;
255
256         /* update by bytes in reverse order */
257         for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
258
259                 /* race condition occurred, lock doesn't work properly */
260                 if (try_rwlock_data.data.u8[i] != 0) {
261                         printf("%s:%d(%u) error: unexpected data pattern\n",
262                                 __func__, __LINE__, lc);
263                         rte_memdump(stdout, NULL,
264                                 (void *)(uintptr_t)&try_rwlock_data.data,
265                                 sizeof(try_rwlock_data.data));
266                         rc = -EFAULT;
267                         break;
268                 }
269
270                 try_rwlock_data.data.u8[i] = v;
271         }
272
273         /* restore by bytes in reverse order */
274         for (i = RTE_DIM(try_rwlock_data.data.u8); i-- != 0; ) {
275
276                 /* race condition occurred, lock doesn't work properly */
277                 if (try_rwlock_data.data.u8[i] != v) {
278                         printf("%s:%d(%u) error: unexpected data pattern\n",
279                                 __func__, __LINE__, lc);
280                         rte_memdump(stdout, NULL,
281                                 (void *)(uintptr_t)&try_rwlock_data.data,
282                                 sizeof(try_rwlock_data.data));
283                         rc = -EFAULT;
284                         break;
285                 }
286
287                 try_rwlock_data.data.u8[i] = 0;
288         }
289
290         rte_rwlock_write_unlock(&try_rwlock_data.lock);
291         return rc;
292 }
293
294 static int
295 try_read_lcore(__rte_unused void *data)
296 {
297         int32_t rc;
298         uint32_t i, lc;
299         uint64_t ftm, stm, tm;
300         struct try_rwlock_lcore *lcd;
301
302         lc = rte_lcore_id();
303         lcd = try_lcore_data + lc;
304         lcd->type = LC_TYPE_RDLOCK;
305
306         ftm = try_rwlock_data.tick;
307         stm = rte_get_timer_cycles();
308
309         do {
310                 for (i = 0; i != ITER_NUM; i++) {
311                         rc = try_read(lc);
312                         if (rc == 0)
313                                 lcd->stat.success++;
314                         else if (rc == -EBUSY)
315                                 lcd->stat.fail++;
316                         else
317                                 break;
318                         rc = 0;
319                 }
320                 tm = rte_get_timer_cycles() - stm;
321         } while (tm < ftm && rc == 0);
322
323         lcd->rc = rc;
324         lcd->stat.tick = tm;
325         return rc;
326 }
327
328 static int
329 try_write_lcore(__rte_unused void *data)
330 {
331         int32_t rc;
332         uint32_t i, lc;
333         uint64_t ftm, stm, tm;
334         struct try_rwlock_lcore *lcd;
335
336         lc = rte_lcore_id();
337         lcd = try_lcore_data + lc;
338         lcd->type = LC_TYPE_WRLOCK;
339
340         ftm = try_rwlock_data.tick;
341         stm = rte_get_timer_cycles();
342
343         do {
344                 for (i = 0; i != ITER_NUM; i++) {
345                         rc = try_write(lc);
346                         if (rc == 0)
347                                 lcd->stat.success++;
348                         else if (rc == -EBUSY)
349                                 lcd->stat.fail++;
350                         else
351                                 break;
352                         rc = 0;
353                 }
354                 tm = rte_get_timer_cycles() - stm;
355         } while (tm < ftm && rc == 0);
356
357         lcd->rc = rc;
358         lcd->stat.tick = tm;
359         return rc;
360 }
361
362 static void
363 print_try_lcore_stats(const struct try_rwlock_lcore *tlc, uint32_t lc)
364 {
365         uint64_t f, s;
366
367         f = RTE_MAX(tlc->stat.fail, 1ULL);
368         s = RTE_MAX(tlc->stat.success, 1ULL);
369
370         printf("try_lcore_data[%u]={\n"
371                 "\trc=%d,\n"
372                 "\ttype=%s,\n"
373                 "\tfail=%" PRIu64 ",\n"
374                 "\tsuccess=%" PRIu64 ",\n"
375                 "\tcycles=%" PRIu64 ",\n"
376                 "\tcycles/op=%#Lf,\n"
377                 "\tcycles/success=%#Lf,\n"
378                 "\tsuccess/fail=%#Lf,\n"
379                 "};\n",
380                 lc,
381                 tlc->rc,
382                 tlc->type == LC_TYPE_RDLOCK ? "RDLOCK" : "WRLOCK",
383                 tlc->stat.fail,
384                 tlc->stat.success,
385                 tlc->stat.tick,
386                 (long double)tlc->stat.tick /
387                 (tlc->stat.fail + tlc->stat.success),
388                 (long double)tlc->stat.tick / s,
389                 (long double)tlc->stat.success / f);
390 }
391
392 static void
393 collect_try_lcore_stats(struct try_rwlock_lcore *tlc,
394         const struct try_rwlock_lcore *lc)
395 {
396         tlc->stat.tick += lc->stat.tick;
397         tlc->stat.fail += lc->stat.fail;
398         tlc->stat.success += lc->stat.success;
399 }
400
401 /*
402  * Process collected results:
403  *  - check status
404  *  - collect and print statistics
405  */
406 static int
407 process_try_lcore_stats(void)
408 {
409         int32_t rc;
410         uint32_t lc, rd, wr;
411         struct try_rwlock_lcore rlc, wlc;
412
413         memset(&rlc, 0, sizeof(rlc));
414         memset(&wlc, 0, sizeof(wlc));
415
416         rlc.type = LC_TYPE_RDLOCK;
417         wlc.type = LC_TYPE_WRLOCK;
418         rd = 0;
419         wr = 0;
420
421         rc = 0;
422         RTE_LCORE_FOREACH(lc) {
423                 rc |= try_lcore_data[lc].rc;
424                 if (try_lcore_data[lc].type == LC_TYPE_RDLOCK) {
425                         collect_try_lcore_stats(&rlc, try_lcore_data + lc);
426                         rd++;
427                 } else {
428                         collect_try_lcore_stats(&wlc, try_lcore_data + lc);
429                         wr++;
430                 }
431         }
432
433         if (rc == 0) {
434                 RTE_LCORE_FOREACH(lc)
435                         print_try_lcore_stats(try_lcore_data + lc, lc);
436
437                 if (rd != 0) {
438                         printf("aggregated stats for %u RDLOCK cores:\n", rd);
439                         print_try_lcore_stats(&rlc, rd);
440                 }
441
442                 if (wr != 0) {
443                         printf("aggregated stats for %u WRLOCK cores:\n", wr);
444                         print_try_lcore_stats(&wlc, wr);
445                 }
446         }
447
448         return rc;
449 }
450
451 static void
452 try_test_reset(void)
453 {
454         memset(&try_lcore_data, 0, sizeof(try_lcore_data));
455         memset(&try_rwlock_data, 0, sizeof(try_rwlock_data));
456         try_rwlock_data.tick = TEST_SEC * rte_get_tsc_hz();
457 }
458
459 /* all lcores grab RDLOCK */
460 static int
461 try_rwlock_test_rda(void)
462 {
463         try_test_reset();
464
465         /* start read test on all available lcores */
466         rte_eal_mp_remote_launch(try_read_lcore, NULL, CALL_MAIN);
467         rte_eal_mp_wait_lcore();
468
469         return process_try_lcore_stats();
470 }
471
472 /* all worker lcores grab RDLOCK, main one grabs WRLOCK */
473 static int
474 try_rwlock_test_rds_wrm(void)
475 {
476         try_test_reset();
477
478         rte_eal_mp_remote_launch(try_read_lcore, NULL, SKIP_MAIN);
479         try_write_lcore(NULL);
480         rte_eal_mp_wait_lcore();
481
482         return process_try_lcore_stats();
483 }
484
485 /* main and even worker lcores grab RDLOCK, odd lcores grab WRLOCK */
486 static int
487 try_rwlock_test_rde_wro(void)
488 {
489         uint32_t lc, mlc;
490
491         try_test_reset();
492
493         mlc = rte_get_main_lcore();
494
495         RTE_LCORE_FOREACH(lc) {
496                 if (lc != mlc) {
497                         if ((lc & 1) == 0)
498                                 rte_eal_remote_launch(try_read_lcore,
499                                                 NULL, lc);
500                         else
501                                 rte_eal_remote_launch(try_write_lcore,
502                                                 NULL, lc);
503                 }
504         }
505         try_read_lcore(NULL);
506         rte_eal_mp_wait_lcore();
507
508         return process_try_lcore_stats();
509 }
510
511 static int
512 test_rwlock(void)
513 {
514         uint32_t i;
515         int32_t rc, ret;
516
517         static const struct {
518                 const char *name;
519                 int (*ftst)(void);
520         } test[] = {
521                 {
522                         .name = "rwlock_test1",
523                         .ftst = rwlock_test1,
524                 },
525                 {
526                         .name = "try_rwlock_test_rda",
527                         .ftst = try_rwlock_test_rda,
528                 },
529                 {
530                         .name = "try_rwlock_test_rds_wrm",
531                         .ftst = try_rwlock_test_rds_wrm,
532                 },
533                 {
534                         .name = "try_rwlock_test_rde_wro",
535                         .ftst = try_rwlock_test_rde_wro,
536                 },
537         };
538
539         ret = 0;
540         for (i = 0; i != RTE_DIM(test); i++) {
541                 printf("starting test %s;\n", test[i].name);
542                 rc = test[i].ftst();
543                 printf("test %s completed with status %d\n", test[i].name, rc);
544                 ret |= rc;
545         }
546
547         return ret;
548 }
549
550 REGISTER_TEST_COMMAND(rwlock_autotest, test_rwlock);
551
552 /* subtests used in meson for CI */
553 REGISTER_TEST_COMMAND(rwlock_test1_autotest, rwlock_test1);
554 REGISTER_TEST_COMMAND(rwlock_rda_autotest, try_rwlock_test_rda);
555 REGISTER_TEST_COMMAND(rwlock_rds_wrm_autotest, try_rwlock_test_rds_wrm);
556 REGISTER_TEST_COMMAND(rwlock_rde_wro_autotest, try_rwlock_test_rde_wro);