1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2018 Intel Corporation
6 * This is a simple functional test for rte_smp_mb() implementation.
7 * I.E. make sure that LOAD and STORE operations that precede the
8 * rte_smp_mb() call are globally visible across the lcores
9 * before the the LOAD and STORE operations that follows it.
10 * The test uses simple implementation of Peterson's lock algorithm
11 * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm)
12 * for two execution units to make sure that rte_smp_mb() prevents
13 * store-load reordering to happen.
14 * Also when executed on a single lcore could be used as a approxiamate
15 * estimation of number of cycles particular implementation of rte_smp_mb()
24 #include <rte_memory.h>
25 #include <rte_per_lcore.h>
26 #include <rte_launch.h>
27 #include <rte_atomic.h>
29 #include <rte_lcore.h>
30 #include <rte_pause.h>
31 #include <rte_random.h>
32 #include <rte_cycles.h>
34 #include <rte_debug.h>
39 #define ITER_MAX 0x1000000
48 volatile uint32_t flag[2];
49 volatile uint32_t victim;
50 enum plock_use_type utype;
54 * Lock plus protected by it two counters.
63 * Each active lcore shares plock_test struct with it's left and right
66 struct lcore_plock_test {
67 struct plock_test *pt[2]; /* shared, lock-protected data */
68 uint64_t sum[2]; /* local copy of the shared data */
69 uint64_t iter; /* number of iterations to perfom */
70 uint32_t lc; /* given lcore id */
74 store_load_barrier(uint32_t utype)
78 else if (utype == USE_SMP_MB)
85 * Peterson lock implementation.
88 plock_lock(struct plock *l, uint32_t self)
97 store_load_barrier(l->utype);
99 while (l->flag[other] == 1 && l->victim == self)
104 plock_unlock(struct plock *l, uint32_t self)
111 plock_reset(struct plock *l, enum plock_use_type utype)
113 memset(l, 0, sizeof(*l));
118 * grab the lock, update both counters, release the lock.
121 plock_add(struct plock_test *pt, uint32_t self, uint32_t n)
123 plock_lock(&pt->lock, self);
126 plock_unlock(&pt->lock, self);
130 plock_test1_lcore(void *data)
135 struct lcore_plock_test *lpt;
140 /* find lcore_plock_test struct for given lcore */
141 for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--)
145 printf("%s(%u) error at init\n", __func__, lc);
149 n = rte_rand() % ADD_MAX;
150 tm = rte_get_timer_cycles();
153 * for each iteration:
154 * - update shared, locked protected data in a safe manner
155 * - update local copy of the shared data
157 for (i = 0; i != lpt->iter; i++) {
159 plock_add(lpt->pt[0], 0, n);
160 plock_add(lpt->pt[1], 1, n);
165 n = (n + 1) % ADD_MAX;
168 tm = rte_get_timer_cycles() - tm;
170 printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64
171 " cycles, %#Lf cycles/iteration, "
172 "local sum={%" PRIu64 ", %" PRIu64 "}\n",
173 __func__, lc, i, tm, (long double)tm / i,
174 lpt->sum[0], lpt->sum[1]);
179 * For N active lcores we allocate N+1 lcore_plock_test structures.
180 * Each active lcore shares one lcore_plock_test structure with its
181 * left lcore neighbor and one lcore_plock_test structure with its
182 * right lcore neighbor.
183 * During the test each lcore updates data in both shared structures and
184 * its local copies. Then at validation phase we check that our shared
185 * and local data are the same.
188 plock_test(uint64_t iter, enum plock_use_type utype)
193 struct plock_test *pt;
194 struct lcore_plock_test *lpt;
196 /* init phase, allocate and initialize shared data */
198 n = rte_lcore_count();
199 pt = calloc(n + 1, sizeof(*pt));
200 lpt = calloc(n, sizeof(*lpt));
201 sum = calloc(n + 1, sizeof(*sum));
203 printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n",
204 __func__, iter, utype, n);
206 if (pt == NULL || lpt == NULL || sum == NULL) {
207 printf("%s: failed to allocate memory for %u lcores\n",
215 for (i = 0; i != n + 1; i++)
216 plock_reset(&pt[i].lock, utype);
219 RTE_LCORE_FOREACH(lc) {
223 lpt[i].pt[0] = pt + i;
224 lpt[i].pt[1] = pt + i + 1;
228 lpt[i - 1].pt[1] = pt;
230 for (i = 0; i != n; i++)
231 printf("lpt[%u]={lc=%u, pt={%p, %p},};\n",
232 i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]);
235 /* test phase - start and wait for completion on each active lcore */
237 rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MASTER);
238 rte_eal_mp_wait_lcore();
240 /* validation phase - make sure that shared and local data match */
242 for (i = 0; i != n; i++) {
243 sum[i] += lpt[i].sum[0];
244 sum[i + 1] += lpt[i].sum[1];
250 for (i = 0; i != n; i++) {
251 printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n",
252 __func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
254 /* race condition occurred, lock doesn't work properly */
255 if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) {
256 printf("error: local and shared sums don't match\n");
265 printf("%s(utype=%u) returns %d\n", __func__, utype, rc);
272 int32_t i, ret, rc[USE_NUM];
274 for (i = 0; i != RTE_DIM(rc); i++)
275 rc[i] = plock_test(ITER_MAX, i);
278 for (i = 0; i != RTE_DIM(rc); i++) {
279 printf("%s for utype=%d %s\n",
280 __func__, i, rc[i] == 0 ? "passed" : "failed");
287 REGISTER_TEST_COMMAND(barrier_autotest, test_barrier);