1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 * Copyright(c) 2019 Arm Limited
10 #include <sys/queue.h>
12 #include <rte_memory.h>
13 #include <rte_per_lcore.h>
14 #include <rte_launch.h>
15 #include <rte_atomic.h>
17 #include <rte_lcore.h>
18 #include <rte_random.h>
19 #include <rte_hash_crc.h>
27 * - The main test function performs several subtests. The first
28 * checks that the usual inc/dec/add/sub functions are working
31 * - Initialize 16-bit, 32-bit and 64-bit atomic variables to specific
34 * - These variables are incremented and decremented on each core at
35 * the same time in ``test_atomic_usual()``.
37 * - The function checks that once all lcores finish their function,
38 * the value of the atomic variables are still the same.
40 * - Test "test and set" functions.
42 * - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
44 * - Invoke ``test_atomic_tas()`` on each lcore: before doing anything
45 * else. The cores are waiting a synchro using ``while
46 * (rte_atomic32_read(&val) == 0)`` which is triggered by the main test
47 * function. Then all cores do a
48 * ``rte_atomicXX_test_and_set()`` at the same time. If it is successful,
49 * it increments another atomic counter.
51 * - The main function checks that the atomic counter was incremented
52 * twice only (one for 16-bit, one for 32-bit and one for 64-bit values).
54 * - Test "add/sub and return" functions
56 * - Initialize 16-bit, 32-bit and 64-bit atomic variables to zero.
58 * - Invoke ``test_atomic_addsub_return()`` on each lcore. Before doing
59 * anything else, the cores are waiting a synchro. Each lcore does
60 * this operation several times::
62 * tmp = rte_atomicXX_add_return(&a, 1);
63 * atomic_add(&count, tmp);
64 * tmp = rte_atomicXX_sub_return(&a, 1);
65 * atomic_sub(&count, tmp+1);
67 * - At the end of the test, the *count* value must be 0.
69 * - Test "128-bit compare and swap" (aarch64 and x86_64 only)
71 * - Initialize 128-bit atomic variables to zero.
73 * - Invoke ``test_atomic128_cmp_exchange()`` on each lcore. Before doing
74 * anything else, the cores are waiting a synchro. Each lcore does
75 * these compare and swap (CAS) operations several times::
77 * Acquired CAS update counter.val[0] + 2; counter.val[1] + 1;
78 * Released CAS update counter.val[0] + 2; counter.val[1] + 1;
79 * Acquired_Released CAS update counter.val[0] + 2; counter.val[1] + 1;
80 * Relaxed CAS update counter.val[0] + 2; counter.val[1] + 1;
82 * - At the end of the test, the *count128* first 64-bit value and
83 * second 64-bit value differ by the total iterations.
85 * - Test "atomic exchange" functions
87 * - Create a 64 bit token that can be tested for data integrity
89 * - Invoke ``test_atomic_exchange`` on each lcore. Before doing
90 * anything else, the cores wait for a synchronization event.
91 * Each core then does the follwoing for N iterations:
93 * Generate a new token with a data integrity check
94 * Exchange the new token for previously generated token
95 * Increment a counter if a corrupt token was received
97 * - At the end of the test, the number of corrupted tokens must be 0.
100 #define NUM_ATOMIC_TYPES 3
104 static rte_atomic16_t a16;
105 static rte_atomic32_t a32;
106 static rte_atomic64_t a64;
107 static rte_atomic64_t count;
108 static rte_atomic32_t synchro;
111 test_atomic_usual(__rte_unused void *arg)
115 while (rte_atomic32_read(&synchro) == 0)
118 for (i = 0; i < N; i++)
119 rte_atomic16_inc(&a16);
120 for (i = 0; i < N; i++)
121 rte_atomic16_dec(&a16);
122 for (i = 0; i < (N / 5); i++)
123 rte_atomic16_add(&a16, 5);
124 for (i = 0; i < (N / 5); i++)
125 rte_atomic16_sub(&a16, 5);
127 for (i = 0; i < N; i++)
128 rte_atomic32_inc(&a32);
129 for (i = 0; i < N; i++)
130 rte_atomic32_dec(&a32);
131 for (i = 0; i < (N / 5); i++)
132 rte_atomic32_add(&a32, 5);
133 for (i = 0; i < (N / 5); i++)
134 rte_atomic32_sub(&a32, 5);
136 for (i = 0; i < N; i++)
137 rte_atomic64_inc(&a64);
138 for (i = 0; i < N; i++)
139 rte_atomic64_dec(&a64);
140 for (i = 0; i < (N / 5); i++)
141 rte_atomic64_add(&a64, 5);
142 for (i = 0; i < (N / 5); i++)
143 rte_atomic64_sub(&a64, 5);
149 test_atomic_tas(__rte_unused void *arg)
151 while (rte_atomic32_read(&synchro) == 0)
154 if (rte_atomic16_test_and_set(&a16))
155 rte_atomic64_inc(&count);
156 if (rte_atomic32_test_and_set(&a32))
157 rte_atomic64_inc(&count);
158 if (rte_atomic64_test_and_set(&a64))
159 rte_atomic64_inc(&count);
165 test_atomic_addsub_and_return(__rte_unused void *arg)
172 while (rte_atomic32_read(&synchro) == 0)
175 for (i = 0; i < N; i++) {
176 tmp16 = rte_atomic16_add_return(&a16, 1);
177 rte_atomic64_add(&count, tmp16);
179 tmp16 = rte_atomic16_sub_return(&a16, 1);
180 rte_atomic64_sub(&count, tmp16+1);
182 tmp32 = rte_atomic32_add_return(&a32, 1);
183 rte_atomic64_add(&count, tmp32);
185 tmp32 = rte_atomic32_sub_return(&a32, 1);
186 rte_atomic64_sub(&count, tmp32+1);
188 tmp64 = rte_atomic64_add_return(&a64, 1);
189 rte_atomic64_add(&count, tmp64);
191 tmp64 = rte_atomic64_sub_return(&a64, 1);
192 rte_atomic64_sub(&count, tmp64+1);
199 * rte_atomic32_inc_and_test() would increase a 32 bits counter by one and then
200 * test if that counter is equal to 0. It would return true if the counter is 0
201 * and false if the counter is not 0. rte_atomic64_inc_and_test() could do the
202 * same thing but for a 64 bits counter.
203 * Here checks that if the 32/64 bits counter is equal to 0 after being atomically
204 * increased by one. If it is, increase the variable of "count" by one which would
205 * be checked as the result later.
209 test_atomic_inc_and_test(__rte_unused void *arg)
211 while (rte_atomic32_read(&synchro) == 0)
214 if (rte_atomic16_inc_and_test(&a16)) {
215 rte_atomic64_inc(&count);
217 if (rte_atomic32_inc_and_test(&a32)) {
218 rte_atomic64_inc(&count);
220 if (rte_atomic64_inc_and_test(&a64)) {
221 rte_atomic64_inc(&count);
228 * rte_atomicXX_dec_and_test() should decrease a 32 bits counter by one and then
229 * test if that counter is equal to 0. It should return true if the counter is 0
230 * and false if the counter is not 0.
231 * This test checks if the counter is equal to 0 after being atomically
232 * decreased by one. If it is, increase the value of "count" by one which is to
233 * be checked as the result later.
236 test_atomic_dec_and_test(__rte_unused void *arg)
238 while (rte_atomic32_read(&synchro) == 0)
241 if (rte_atomic16_dec_and_test(&a16))
242 rte_atomic64_inc(&count);
244 if (rte_atomic32_dec_and_test(&a32))
245 rte_atomic64_inc(&count);
247 if (rte_atomic64_dec_and_test(&a64))
248 rte_atomic64_inc(&count);
253 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
254 static rte_int128_t count128;
257 * rte_atomic128_cmp_exchange() should update a 128 bits counter's first 64
258 * bits by 2 and the second 64 bits by 1 in this test. It should return true
259 * if the compare exchange operation is successful.
260 * This test repeats 128 bits compare and swap operations N rounds. In each
261 * iteration it runs compare and swap operation with different memory models.
264 test_atomic128_cmp_exchange(__rte_unused void *arg)
266 rte_int128_t expected;
270 while (rte_atomic32_read(&synchro) == 0)
275 for (i = 0; i < N; i++) {
277 rte_int128_t desired;
279 desired.val[0] = expected.val[0] + 2;
280 desired.val[1] = expected.val[1] + 1;
282 success = rte_atomic128_cmp_exchange(&count128,
283 &expected, &desired, 1,
284 __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
285 } while (success == 0);
288 rte_int128_t desired;
290 desired.val[0] = expected.val[0] + 2;
291 desired.val[1] = expected.val[1] + 1;
293 success = rte_atomic128_cmp_exchange(&count128,
294 &expected, &desired, 1,
295 __ATOMIC_RELEASE, __ATOMIC_RELAXED);
296 } while (success == 0);
299 rte_int128_t desired;
301 desired.val[0] = expected.val[0] + 2;
302 desired.val[1] = expected.val[1] + 1;
304 success = rte_atomic128_cmp_exchange(&count128,
305 &expected, &desired, 1,
306 __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
307 } while (success == 0);
310 rte_int128_t desired;
312 desired.val[0] = expected.val[0] + 2;
313 desired.val[1] = expected.val[1] + 1;
315 success = rte_atomic128_cmp_exchange(&count128,
316 &expected, &desired, 1,
317 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
318 } while (success == 0);
326 * Helper definitions/variables/functions for
327 * atomic exchange tests
347 const uint8_t CRC8_POLY = 0x91;
348 uint8_t crc8_table[256];
350 volatile uint16_t token16;
351 volatile uint32_t token32;
352 volatile uint64_t token64;
355 build_crc8_table(void)
360 for (i = 0; i < 256; i++) {
362 for (j = 0; j < 8; j++) {
372 get_crc8(uint8_t *message, int length)
377 for (i = 0; i < length; i++)
378 crc = crc8_table[crc ^ message[i]];
383 * The atomic exchange test sets up a token in memory and
384 * then spins up multiple lcores whose job is to generate
385 * new tokens, exchange that new token for the old one held
386 * in memory, and then verify that the old token is still
387 * valid (i.e. the exchange did not corrupt the token).
389 * A token is made up of random data and 8 bits of crc
390 * covering that random data. The following is an example
393 * +------------+------------+
395 * +------------+------------+
397 * +------------+------------+
400 test_atomic_exchange(__rte_unused void *arg)
403 test16_t nt16, ot16; /* new token, old token */
407 /* Wait until all of the other threads have been dispatched */
408 while (rte_atomic32_read(&synchro) == 0)
412 * Let the battle begin! Every thread attempts to steal the current
413 * token with an atomic exchange operation and install its own newly
414 * generated token. If the old token is valid (i.e. it has the
415 * appropriate crc32 hash for the data) then the test iteration has
416 * passed. If the token is invalid, increment the counter.
418 for (i = 0; i < N; i++) {
420 /* Test 64bit Atomic Exchange */
421 nt64.u64 = rte_rand();
422 nt64.u8[7] = get_crc8(&nt64.u8[0], sizeof(nt64) - 1);
423 ot64.u64 = rte_atomic64_exchange(&token64, nt64.u64);
424 if (ot64.u8[7] != get_crc8(&ot64.u8[0], sizeof(ot64) - 1))
425 rte_atomic64_inc(&count);
427 /* Test 32bit Atomic Exchange */
428 nt32.u32 = (uint32_t)rte_rand();
429 nt32.u8[3] = get_crc8(&nt32.u8[0], sizeof(nt32) - 1);
430 ot32.u32 = rte_atomic32_exchange(&token32, nt32.u32);
431 if (ot32.u8[3] != get_crc8(&ot32.u8[0], sizeof(ot32) - 1))
432 rte_atomic64_inc(&count);
434 /* Test 16bit Atomic Exchange */
435 nt16.u16 = (uint16_t)rte_rand();
436 nt16.u8[1] = get_crc8(&nt16.u8[0], sizeof(nt16) - 1);
437 ot16.u16 = rte_atomic16_exchange(&token16, nt16.u16);
438 if (ot16.u8[1] != get_crc8(&ot16.u8[0], sizeof(ot16) - 1))
439 rte_atomic64_inc(&count);
447 rte_atomic16_init(&a16);
448 rte_atomic32_init(&a32);
449 rte_atomic64_init(&a64);
450 rte_atomic64_init(&count);
451 rte_atomic32_init(&synchro);
453 rte_atomic16_set(&a16, 1UL << 10);
454 rte_atomic32_set(&a32, 1UL << 10);
455 rte_atomic64_set(&a64, 1ULL << 33);
457 printf("usual inc/dec/add/sub functions\n");
459 rte_eal_mp_remote_launch(test_atomic_usual, NULL, SKIP_MASTER);
460 rte_atomic32_set(&synchro, 1);
461 rte_eal_mp_wait_lcore();
462 rte_atomic32_set(&synchro, 0);
464 if (rte_atomic16_read(&a16) != 1UL << 10) {
465 printf("Atomic16 usual functions failed\n");
469 if (rte_atomic32_read(&a32) != 1UL << 10) {
470 printf("Atomic32 usual functions failed\n");
474 if (rte_atomic64_read(&a64) != 1ULL << 33) {
475 printf("Atomic64 usual functions failed\n");
479 printf("test and set\n");
481 rte_atomic64_set(&a64, 0);
482 rte_atomic32_set(&a32, 0);
483 rte_atomic16_set(&a16, 0);
484 rte_atomic64_set(&count, 0);
485 rte_eal_mp_remote_launch(test_atomic_tas, NULL, SKIP_MASTER);
486 rte_atomic32_set(&synchro, 1);
487 rte_eal_mp_wait_lcore();
488 rte_atomic32_set(&synchro, 0);
490 if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
491 printf("Atomic test and set failed\n");
495 printf("add/sub and return\n");
497 rte_atomic64_set(&a64, 0);
498 rte_atomic32_set(&a32, 0);
499 rte_atomic16_set(&a16, 0);
500 rte_atomic64_set(&count, 0);
501 rte_eal_mp_remote_launch(test_atomic_addsub_and_return, NULL,
503 rte_atomic32_set(&synchro, 1);
504 rte_eal_mp_wait_lcore();
505 rte_atomic32_set(&synchro, 0);
507 if (rte_atomic64_read(&count) != 0) {
508 printf("Atomic add/sub+return failed\n");
513 * Set a64, a32 and a16 with the same value of minus "number of slave
514 * lcores", launch all slave lcores to atomically increase by one and
515 * test them respectively.
516 * Each lcore should have only one chance to increase a64 by one and
517 * then check if it is equal to 0, but there should be only one lcore
518 * that finds that it is 0. It is similar for a32 and a16.
519 * Then a variable of "count", initialized to zero, is increased by
520 * one if a64, a32 or a16 is 0 after being increased and tested
522 * We can check if "count" is finally equal to 3 to see if all slave
523 * lcores performed "atomic inc and test" right.
525 printf("inc and test\n");
527 rte_atomic64_clear(&a64);
528 rte_atomic32_clear(&a32);
529 rte_atomic16_clear(&a16);
530 rte_atomic32_clear(&synchro);
531 rte_atomic64_clear(&count);
533 rte_atomic64_set(&a64, (int64_t)(1 - (int64_t)rte_lcore_count()));
534 rte_atomic32_set(&a32, (int32_t)(1 - (int32_t)rte_lcore_count()));
535 rte_atomic16_set(&a16, (int16_t)(1 - (int16_t)rte_lcore_count()));
536 rte_eal_mp_remote_launch(test_atomic_inc_and_test, NULL, SKIP_MASTER);
537 rte_atomic32_set(&synchro, 1);
538 rte_eal_mp_wait_lcore();
539 rte_atomic32_clear(&synchro);
541 if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
542 printf("Atomic inc and test failed %d\n", (int)count.cnt);
547 * Same as above, but this time we set the values to "number of slave
548 * lcores", and decrement instead of increment.
550 printf("dec and test\n");
552 rte_atomic32_clear(&synchro);
553 rte_atomic64_clear(&count);
555 rte_atomic64_set(&a64, (int64_t)(rte_lcore_count() - 1));
556 rte_atomic32_set(&a32, (int32_t)(rte_lcore_count() - 1));
557 rte_atomic16_set(&a16, (int16_t)(rte_lcore_count() - 1));
558 rte_eal_mp_remote_launch(test_atomic_dec_and_test, NULL, SKIP_MASTER);
559 rte_atomic32_set(&synchro, 1);
560 rte_eal_mp_wait_lcore();
561 rte_atomic32_clear(&synchro);
563 if (rte_atomic64_read(&count) != NUM_ATOMIC_TYPES) {
564 printf("Atomic dec and test failed\n");
568 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
570 * This case tests the functionality of rte_atomic128_cmp_exchange
571 * API. It calls rte_atomic128_cmp_exchange with four kinds of memory
572 * models successively on each slave core. Once each 128-bit atomic
573 * compare and swap operation is successful, it updates the global
574 * 128-bit counter by 2 for the first 64-bit and 1 for the second
575 * 64-bit. Each slave core iterates this test N times.
576 * At the end of test, verify whether the first 64-bits of the 128-bit
577 * counter and the second 64bits is differ by the total iterations. If
578 * it is, the test passes.
580 printf("128-bit compare and swap test\n");
581 uint64_t iterations = 0;
583 rte_atomic32_clear(&synchro);
587 rte_eal_mp_remote_launch(test_atomic128_cmp_exchange, NULL,
589 rte_atomic32_set(&synchro, 1);
590 rte_eal_mp_wait_lcore();
591 rte_atomic32_clear(&synchro);
593 iterations = count128.val[0] - count128.val[1];
594 if (iterations != 4*N*(rte_lcore_count()-1)) {
595 printf("128-bit compare and swap failed\n");
601 * Test 16/32/64bit atomic exchange.
605 printf("exchange test\n");
607 rte_atomic32_clear(&synchro);
608 rte_atomic64_clear(&count);
610 /* Generate the CRC8 lookup table */
613 /* Create the initial tokens used by the test */
615 token16 = (get_crc8(&t.u8[0], sizeof(token16) - 1) << 8)
616 | (t.u16[0] & 0x00ff);
617 token32 = ((uint32_t)get_crc8(&t.u8[0], sizeof(token32) - 1) << 24)
618 | (t.u32[0] & 0x00ffffff);
619 token64 = ((uint64_t)get_crc8(&t.u8[0], sizeof(token64) - 1) << 56)
620 | (t.u64 & 0x00ffffffffffffff);
622 rte_eal_mp_remote_launch(test_atomic_exchange, NULL, SKIP_MASTER);
623 rte_atomic32_set(&synchro, 1);
624 rte_eal_mp_wait_lcore();
625 rte_atomic32_clear(&synchro);
627 if (rte_atomic64_read(&count) > 0) {
628 printf("Atomic exchange test failed\n");
634 REGISTER_TEST_COMMAND(atomic_autotest, test_atomic);