1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
5 #include "test_ring_stress.h"
8 * Stress test for ring enqueue/dequeue operations.
9 * Performs the following pattern on each worker:
10 * dequeue/read-write data from the dequeued objects/enqueue.
11 * Serves as both functional and performance test of ring
12 * enqueue/dequeue operations under high contention
13 * (for both over committed and non-over committed scenarios).
16 #define RING_NAME "RING_STRESS"
18 #define RING_SIZE (2 * BULK_NUM * RTE_MAX_LCORE)
25 static uint32_t wrk_cmd __rte_cache_aligned = WRK_CMD_STOP;
27 /* test run-time in seconds */
28 static const uint32_t run_time = 60;
29 static const uint32_t verbose;
44 struct lcore_stat stats;
45 } __rte_cache_aligned;
48 uint32_t cnt[RTE_CACHE_LINE_SIZE / sizeof(uint32_t)];
49 } __rte_cache_aligned;
52 * redefinable functions
55 _st_ring_dequeue_bulk(struct rte_ring *r, void **obj, uint32_t n,
59 _st_ring_enqueue_bulk(struct rte_ring *r, void * const *obj, uint32_t n,
63 _st_ring_init(struct rte_ring *r, const char *name, uint32_t num);
67 lcore_stat_update(struct lcore_stat *ls, uint64_t call, uint64_t obj,
68 uint64_t tm, int32_t prcs)
70 ls->op.nb_call += call;
72 ls->op.nb_cycle += tm;
74 ls->op.max_cycle = RTE_MAX(ls->op.max_cycle, tm);
75 ls->op.min_cycle = RTE_MIN(ls->op.min_cycle, tm);
80 lcore_op_stat_aggr(struct lcore_stat *ms, const struct lcore_stat *ls)
83 ms->op.nb_call += ls->op.nb_call;
84 ms->op.nb_obj += ls->op.nb_obj;
85 ms->op.nb_cycle += ls->op.nb_cycle;
86 ms->op.max_cycle = RTE_MAX(ms->op.max_cycle, ls->op.max_cycle);
87 ms->op.min_cycle = RTE_MIN(ms->op.min_cycle, ls->op.min_cycle);
91 lcore_stat_aggr(struct lcore_stat *ms, const struct lcore_stat *ls)
93 ms->nb_cycle = RTE_MAX(ms->nb_cycle, ls->nb_cycle);
94 lcore_op_stat_aggr(ms, ls);
98 lcore_stat_dump(FILE *f, uint32_t lc, const struct lcore_stat *ls)
102 st = (long double)rte_get_timer_hz() / US_PER_S;
104 if (lc == UINT32_MAX)
105 fprintf(f, "%s(AGGREGATE)={\n", __func__);
107 fprintf(f, "%s(lcore=%u)={\n", __func__, lc);
109 fprintf(f, "\tnb_cycle=%" PRIu64 "(%.2Lf usec),\n",
110 ls->nb_cycle, (long double)ls->nb_cycle / st);
112 fprintf(f, "\tDEQ+ENQ={\n");
114 fprintf(f, "\t\tnb_call=%" PRIu64 ",\n", ls->op.nb_call);
115 fprintf(f, "\t\tnb_obj=%" PRIu64 ",\n", ls->op.nb_obj);
116 fprintf(f, "\t\tnb_cycle=%" PRIu64 ",\n", ls->op.nb_cycle);
117 fprintf(f, "\t\tobj/call(avg): %.2Lf\n",
118 (long double)ls->op.nb_obj / ls->op.nb_call);
119 fprintf(f, "\t\tcycles/obj(avg): %.2Lf\n",
120 (long double)ls->op.nb_cycle / ls->op.nb_obj);
121 fprintf(f, "\t\tcycles/call(avg): %.2Lf\n",
122 (long double)ls->op.nb_cycle / ls->op.nb_call);
124 /* if min/max cycles per call stats was collected */
125 if (ls->op.min_cycle != UINT64_MAX) {
126 fprintf(f, "\t\tmax cycles/call=%" PRIu64 "(%.2Lf usec),\n",
128 (long double)ls->op.max_cycle / st);
129 fprintf(f, "\t\tmin cycles/call=%" PRIu64 "(%.2Lf usec),\n",
131 (long double)ls->op.min_cycle / st);
134 fprintf(f, "\t},\n");
139 fill_ring_elm(struct ring_elem *elm, uint32_t fill)
143 for (i = 0; i != RTE_DIM(elm->cnt); i++)
148 check_updt_elem(struct ring_elem *elm[], uint32_t num,
149 const struct ring_elem *check, const struct ring_elem *fill)
153 static rte_spinlock_t dump_lock;
155 for (i = 0; i != num; i++) {
156 if (memcmp(check, elm[i], sizeof(*check)) != 0) {
157 rte_spinlock_lock(&dump_lock);
158 printf("%s(lc=%u, num=%u) failed at %u-th iter, "
159 "offending object: %p\n",
160 __func__, rte_lcore_id(), num, i, elm[i]);
161 rte_memdump(stdout, "expected", check, sizeof(*check));
162 rte_memdump(stdout, "result", elm[i], sizeof(*elm[i]));
163 rte_spinlock_unlock(&dump_lock);
166 memcpy(elm[i], fill, sizeof(*elm[i]));
173 check_ring_op(uint32_t exp, uint32_t res, uint32_t lc,
174 const char *fname, const char *opname)
177 printf("%s(lc=%u) failure: %s expected: %u, returned %u\n",
178 fname, lc, opname, exp, res);
185 test_worker(void *arg, const char *fname, int32_t prcs)
189 uint64_t cl, tm0, tm1;
190 struct lcore_arg *la;
191 struct ring_elem def_elm, loc_elm;
192 struct ring_elem *obj[2 * BULK_NUM];
197 fill_ring_elm(&def_elm, UINT32_MAX);
198 fill_ring_elm(&loc_elm, lc);
200 /* Acquire ordering is not required as the main is not
201 * really releasing any data through 'wrk_cmd' to
204 while (__atomic_load_n(&wrk_cmd, __ATOMIC_RELAXED) != WRK_CMD_RUN)
207 cl = rte_rdtsc_precise();
210 /* num in interval [7/8, 11/8] of BULK_NUM */
211 num = 7 * BULK_NUM / 8 + rte_rand() % (BULK_NUM / 2);
213 /* reset all pointer values */
214 memset(obj, 0, sizeof(obj));
216 /* dequeue num elems */
217 tm0 = (prcs != 0) ? rte_rdtsc_precise() : 0;
218 n = _st_ring_dequeue_bulk(la->rng, (void **)obj, num, NULL);
219 tm0 = (prcs != 0) ? rte_rdtsc_precise() - tm0 : 0;
221 /* check return value and objects */
222 rc = check_ring_op(num, n, lc, fname,
223 RTE_STR(_st_ring_dequeue_bulk));
225 rc = check_updt_elem(obj, num, &def_elm, &loc_elm);
229 /* enqueue num elems */
230 rte_compiler_barrier();
231 rc = check_updt_elem(obj, num, &loc_elm, &def_elm);
235 tm1 = (prcs != 0) ? rte_rdtsc_precise() : 0;
236 n = _st_ring_enqueue_bulk(la->rng, (void **)obj, num, NULL);
237 tm1 = (prcs != 0) ? rte_rdtsc_precise() - tm1 : 0;
239 /* check return value */
240 rc = check_ring_op(num, n, lc, fname,
241 RTE_STR(_st_ring_enqueue_bulk));
245 lcore_stat_update(&la->stats, 1, num, tm0 + tm1, prcs);
247 } while (__atomic_load_n(&wrk_cmd, __ATOMIC_RELAXED) == WRK_CMD_RUN);
249 cl = rte_rdtsc_precise() - cl;
251 lcore_stat_update(&la->stats, 0, 0, cl, 0);
252 la->stats.nb_cycle = cl;
256 test_worker_prcs(void *arg)
258 return test_worker(arg, __func__, 1);
262 test_worker_avg(void *arg)
264 return test_worker(arg, __func__, 0);
268 mt1_fini(struct rte_ring *rng, void *data)
275 mt1_init(struct rte_ring **rng, void **data, uint32_t num)
281 struct ring_elem *elm;
287 sz = num * sizeof(*elm);
288 elm = rte_zmalloc(NULL, sz, __alignof__(*elm));
290 printf("%s: alloc(%zu) for %u elems data failed",
299 sz = rte_ring_get_memsize(nr);
300 r = rte_zmalloc(NULL, sz, __alignof__(*r));
302 printf("%s: alloc(%zu) for FIFO with %u elems failed",
309 rc = _st_ring_init(r, RING_NAME, nr);
311 printf("%s: _st_ring_init(%p, %u) failed, error: %d(%s)\n",
312 __func__, r, nr, rc, strerror(-rc));
316 for (i = 0; i != num; i++) {
317 fill_ring_elm(elm + i, UINT32_MAX);
319 if (_st_ring_enqueue_bulk(r, &p, 1, NULL) != 1)
324 printf("%s: _st_ring_enqueue(%p, %u) returned %u\n",
325 __func__, r, num, i);
333 test_mt1(int (*test)(void *))
339 struct lcore_arg arg[RTE_MAX_LCORE];
341 static const struct lcore_stat init_stat = {
342 .op.min_cycle = UINT64_MAX,
345 rc = mt1_init(&r, &data, RING_SIZE);
351 memset(arg, 0, sizeof(arg));
353 /* launch on all workers */
354 RTE_LCORE_FOREACH_WORKER(lc) {
356 arg[lc].stats = init_stat;
357 rte_eal_remote_launch(test, &arg[lc], lc);
360 /* signal worker to start test */
361 __atomic_store_n(&wrk_cmd, WRK_CMD_RUN, __ATOMIC_RELEASE);
363 rte_delay_us(run_time * US_PER_S);
365 /* signal worker to start test */
366 __atomic_store_n(&wrk_cmd, WRK_CMD_STOP, __ATOMIC_RELEASE);
368 /* wait for workers and collect stats. */
370 arg[mc].stats = init_stat;
373 RTE_LCORE_FOREACH_WORKER(lc) {
374 rc |= rte_eal_wait_lcore(lc);
375 lcore_stat_aggr(&arg[mc].stats, &arg[lc].stats);
377 lcore_stat_dump(stdout, lc, &arg[lc].stats);
380 lcore_stat_dump(stdout, UINT32_MAX, &arg[mc].stats);
385 static const struct test_case tests[] = {
387 .name = "MT-WRK_ENQ_DEQ-MST_NONE-PRCS",
389 .wfunc = test_worker_prcs,
392 .name = "MT-WRK_ENQ_DEQ-MST_NONE-AVG",
394 .wfunc = test_worker_avg,