1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Cavium, Inc
5 #include "test_perf_common.h"
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
11 struct test_perf *t = evt_test_priv(test);
17 perf_producer(void *arg)
19 struct prod_data *p = arg;
20 struct test_perf *t = p->t;
21 struct evt_options *opt = t->opt;
22 const uint8_t dev_id = p->dev_id;
23 const uint8_t port = p->port_id;
24 struct rte_mempool *pool = t->pool;
25 const uint64_t nb_pkts = t->nb_pkts;
26 const uint32_t nb_flows = t->nb_flows;
27 uint32_t flow_counter = 0;
32 if (opt->verbose_level > 1)
33 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
34 rte_lcore_id(), dev_id, port, p->queue_id);
37 ev.op = RTE_EVENT_OP_NEW;
38 ev.queue_id = p->queue_id;
39 ev.sched_type = t->opt->sched_type_list[0];
40 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
41 ev.event_type = RTE_EVENT_TYPE_CPU;
42 ev.sub_event_type = 0; /* stage 0 */
44 while (count < nb_pkts && t->done == false) {
45 if (rte_mempool_get(pool, (void **)&m) < 0)
48 ev.flow_id = flow_counter++ % nb_flows;
50 m->timestamp = rte_get_timer_cycles();
51 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
55 m->timestamp = rte_get_timer_cycles();
64 perf_producer_wrapper(void *arg)
66 struct prod_data *p = arg;
67 struct test_perf *t = p->t;
68 /* Launch the producer function only in case of synthetic producer. */
69 if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
70 return perf_producer(arg);
74 static inline uint64_t
75 processed_pkts(struct test_perf *t)
81 for (i = 0; i < t->nb_workers; i++)
82 total += t->worker[i].processed_pkts;
87 static inline uint64_t
88 total_latency(struct test_perf *t)
94 for (i = 0; i < t->nb_workers; i++)
95 total += t->worker[i].latency;
102 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
103 int (*worker)(void *))
106 struct test_perf *t = evt_test_priv(test);
110 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
111 if (!(opt->wlcores[lcore_id]))
114 ret = rte_eal_remote_launch(worker,
115 &t->worker[port_idx], lcore_id);
117 evt_err("failed to launch worker %d", lcore_id);
123 /* launch producers */
124 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
125 if (!(opt->plcores[lcore_id]))
128 ret = rte_eal_remote_launch(perf_producer_wrapper,
129 &t->prod[port_idx], lcore_id);
131 evt_err("failed to launch perf_producer %d", lcore_id);
137 const uint64_t total_pkts = opt->nb_pkts *
138 evt_nr_active_lcores(opt->plcores);
140 uint64_t dead_lock_cycles = rte_get_timer_cycles();
141 int64_t dead_lock_remaining = total_pkts;
142 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
144 uint64_t perf_cycles = rte_get_timer_cycles();
145 int64_t perf_remaining = total_pkts;
146 const uint64_t perf_sample = rte_get_timer_hz();
148 static float total_mpps;
149 static uint64_t samples;
151 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
152 int64_t remaining = t->outstand_pkts - processed_pkts(t);
154 while (t->done == false) {
155 const uint64_t new_cycles = rte_get_timer_cycles();
157 if ((new_cycles - perf_cycles) > perf_sample) {
158 const uint64_t latency = total_latency(t);
159 const uint64_t pkts = processed_pkts(t);
161 remaining = t->outstand_pkts - pkts;
162 float mpps = (float)(perf_remaining-remaining)/1000000;
164 perf_remaining = remaining;
165 perf_cycles = new_cycles;
168 if (opt->fwd_latency && pkts > 0) {
169 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
170 mpps, total_mpps/samples,
171 (float)(latency/pkts)/freq_mhz);
173 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
174 mpps, total_mpps/samples);
178 if (remaining <= 0) {
179 t->result = EVT_TEST_SUCCESS;
180 if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
188 if (new_cycles - dead_lock_cycles > dead_lock_sample &&
189 opt->prod_type == EVT_PROD_TYPE_SYNT) {
190 remaining = t->outstand_pkts - processed_pkts(t);
191 if (dead_lock_remaining == remaining) {
192 rte_event_dev_dump(opt->dev_id, stdout);
193 evt_err("No schedules for seconds, deadlock");
198 dead_lock_remaining = remaining;
199 dead_lock_cycles = new_cycles;
207 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
208 uint8_t stride, uint8_t nb_queues)
210 struct test_perf *t = evt_test_priv(test);
214 /* port configuration */
215 const struct rte_event_port_conf wkr_p_conf = {
216 .dequeue_depth = opt->wkr_deq_dep,
218 .new_event_threshold = 4096,
221 /* setup one port per worker, linking to all queues */
222 for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
224 struct worker_data *w = &t->worker[port];
226 w->dev_id = opt->dev_id;
229 w->processed_pkts = 0;
232 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
234 evt_err("failed to setup port %d", port);
238 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
239 if (ret != nb_queues) {
240 evt_err("failed to link all queues to port %d", port);
245 /* port for producers, no links */
246 const struct rte_event_port_conf prod_conf = {
249 .new_event_threshold = 1200,
252 for ( ; port < perf_nb_event_ports(opt); port++) {
253 struct prod_data *p = &t->prod[port];
255 p->dev_id = opt->dev_id;
257 p->queue_id = prod * stride;
260 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
262 evt_err("failed to setup port %d", port);
272 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
276 /* N producer + N worker + 1 master when producer cores are used
277 * Else N worker + 1 master when Rx adapter is used
279 lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
281 if (rte_lcore_count() < lcores) {
282 evt_err("test need minimum %d lcores", lcores);
286 /* Validate worker lcores */
287 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
288 evt_err("worker lcores overlaps with master lcore");
291 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
292 evt_err("worker lcores overlaps producer lcores");
295 if (evt_has_disabled_lcore(opt->wlcores)) {
296 evt_err("one or more workers lcores are not enabled");
299 if (!evt_has_active_lcore(opt->wlcores)) {
300 evt_err("minimum one worker is required");
304 if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
305 /* Validate producer lcores */
306 if (evt_lcores_has_overlap(opt->plcores,
307 rte_get_master_lcore())) {
308 evt_err("producer lcores overlaps with master lcore");
311 if (evt_has_disabled_lcore(opt->plcores)) {
312 evt_err("one or more producer lcores are not enabled");
315 if (!evt_has_active_lcore(opt->plcores)) {
316 evt_err("minimum one producer is required");
321 if (evt_has_invalid_stage(opt))
324 if (evt_has_invalid_sched_type(opt))
327 if (nb_queues > EVT_MAX_QUEUES) {
328 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
331 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
332 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
337 if (opt->nb_stages == 1 && opt->fwd_latency) {
338 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
339 opt->fwd_latency = 0;
341 if (opt->fwd_latency && !opt->q_priority) {
342 evt_info("enabled queue priority for latency measurement");
345 if (opt->nb_pkts == 0)
346 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
352 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
354 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
355 evt_dump_producer_lcores(opt);
356 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
357 evt_dump_worker_lcores(opt);
358 evt_dump_nb_stages(opt);
359 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
360 evt_dump("nb_evdev_queues", "%d", nb_queues);
361 evt_dump_queue_priority(opt);
362 evt_dump_sched_type_list(opt);
363 evt_dump_producer_type(opt);
367 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
371 rte_event_dev_stop(opt->dev_id);
372 rte_event_dev_close(opt->dev_id);
376 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
377 void *obj, unsigned i __rte_unused)
379 memset(obj, 0, mp->elt_size);
383 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
385 struct test_perf *t = evt_test_priv(test);
387 if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
388 t->pool = rte_mempool_create(test->name, /* mempool name */
389 opt->pool_sz, /* number of elements*/
390 sizeof(struct perf_elt), /* element size*/
393 perf_elt_init, /* obj constructor */
394 NULL, opt->socket_id, 0); /* flags */
396 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
397 opt->pool_sz, /* number of elements*/
400 RTE_MBUF_DEFAULT_BUF_SIZE,
401 opt->socket_id); /* flags */
405 if (t->pool == NULL) {
406 evt_err("failed to create mempool");
414 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
417 struct test_perf *t = evt_test_priv(test);
419 rte_mempool_free(t->pool);
423 perf_test_setup(struct evt_test *test, struct evt_options *opt)
427 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
428 RTE_CACHE_LINE_SIZE, opt->socket_id);
429 if (test_perf == NULL) {
430 evt_err("failed to allocate test_perf memory");
433 test->test_priv = test_perf;
435 struct test_perf *t = evt_test_priv(test);
437 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
438 t->nb_workers = evt_nr_active_lcores(opt->wlcores);
440 t->nb_pkts = opt->nb_pkts;
441 t->nb_flows = opt->nb_flows;
442 t->result = EVT_TEST_FAILED;
444 memcpy(t->sched_type_list, opt->sched_type_list,
445 sizeof(opt->sched_type_list));
452 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
456 rte_free(test->test_priv);