* This is a simple functional test for rte_smp_mb() implementation.
* I.E. make sure that LOAD and STORE operations that precede the
* rte_smp_mb() call are globally visible across the lcores
- * before the the LOAD and STORE operations that follows it.
+ * before the LOAD and STORE operations that follows it.
* The test uses simple implementation of Peterson's lock algorithm
* (https://en.wikipedia.org/wiki/Peterson%27s_algorithm)
* for two execution units to make sure that rte_smp_mb() prevents
* store-load reordering to happen.
- * Also when executed on a single lcore could be used as a approxiamate
+ * Also when executed on a single lcore could be used as a approximate
* estimation of number of cycles particular implementation of rte_smp_mb()
* will take.
*/
#include <rte_memory.h>
#include <rte_per_lcore.h>
#include <rte_launch.h>
-#include <rte_atomic.h>
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_pause.h>
*/
struct plock_test {
struct plock lock;
- uint32_t val;
- uint32_t iter;
+ uint64_t val;
+ uint64_t iter;
};
/*
*/
struct lcore_plock_test {
struct plock_test *pt[2]; /* shared, lock-protected data */
- uint32_t sum[2]; /* local copy of the shared data */
- uint32_t iter; /* number of iterations to perfom */
+ uint64_t sum[2]; /* local copy of the shared data */
+ uint64_t iter; /* number of iterations to perform */
uint32_t lc; /* given lcore id */
};
other = self ^ 1;
l->flag[self] = 1;
+ rte_smp_wmb();
l->victim = self;
store_load_barrier(l->utype);
while (l->flag[other] == 1 && l->victim == self)
rte_pause();
+ rte_smp_rmb();
}
static void
plock_test1_lcore(void *data)
{
uint64_t tm;
- uint32_t i, lc, ln, n;
+ uint32_t lc, ln;
+ uint64_t i, n;
struct lcore_plock_test *lpt;
lpt = data;
tm = rte_get_timer_cycles() - tm;
- printf("%s(%u): %u iterations finished, in %" PRIu64
+ printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64
" cycles, %#Lf cycles/iteration, "
- "local sum={%u, %u}\n",
+ "local sum={%" PRIu64 ", %" PRIu64 "}\n",
__func__, lc, i, tm, (long double)tm / i,
lpt->sum[0], lpt->sum[1]);
return 0;
* and local data are the same.
*/
static int
-plock_test(uint32_t iter, enum plock_use_type utype)
+plock_test(uint64_t iter, enum plock_use_type utype)
{
int32_t rc;
uint32_t i, lc, n;
- uint32_t *sum;
+ uint64_t *sum;
struct plock_test *pt;
struct lcore_plock_test *lpt;
lpt = calloc(n, sizeof(*lpt));
sum = calloc(n + 1, sizeof(*sum));
- printf("%s(iter=%u, utype=%u) started on %u lcores\n",
+ printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n",
__func__, iter, utype, n);
- if (pt == NULL || lpt == NULL) {
+ if (pt == NULL || lpt == NULL || sum == NULL) {
printf("%s: failed to allocate memory for %u lcores\n",
__func__, n);
free(pt);
/* test phase - start and wait for completion on each active lcore */
- rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MASTER);
+ rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MAIN);
rte_eal_mp_wait_lcore();
/* validation phase - make sure that shared and local data match */
rc = 0;
for (i = 0; i != n; i++) {
- printf("%s: sum[%u]=%u, pt[%u].val=%u, pt[%u].iter=%u;\n",
+ printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n",
__func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
/* race condition occurred, lock doesn't work properly */