4 * Copyright (C) Cavium, Inc 2017.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Cavium, Inc nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include "test_perf_common.h"
36 perf_test_result(struct evt_test *test, struct evt_options *opt)
39 struct test_perf *t = evt_test_priv(test);
45 perf_producer(void *arg)
47 struct prod_data *p = arg;
48 struct test_perf *t = p->t;
49 struct evt_options *opt = t->opt;
50 const uint8_t dev_id = p->dev_id;
51 const uint8_t port = p->port_id;
52 struct rte_mempool *pool = t->pool;
53 const uint64_t nb_pkts = t->nb_pkts;
54 const uint32_t nb_flows = t->nb_flows;
55 uint32_t flow_counter = 0;
60 if (opt->verbose_level > 1)
61 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
62 rte_lcore_id(), dev_id, port, p->queue_id);
65 ev.op = RTE_EVENT_OP_NEW;
66 ev.queue_id = p->queue_id;
67 ev.sched_type = t->opt->sched_type_list[0];
68 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
69 ev.event_type = RTE_EVENT_TYPE_CPU;
70 ev.sub_event_type = 0; /* stage 0 */
72 while (count < nb_pkts && t->done == false) {
73 if (rte_mempool_get(pool, (void **)&m) < 0)
76 ev.flow_id = flow_counter++ % nb_flows;
78 m->timestamp = rte_get_timer_cycles();
79 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
83 m->timestamp = rte_get_timer_cycles();
94 struct test_perf *t = arg;
95 const uint8_t dev_id = t->opt->dev_id;
97 while (t->done == false)
98 rte_event_schedule(dev_id);
103 static inline uint64_t
104 processed_pkts(struct test_perf *t)
110 for (i = 0; i < t->nb_workers; i++)
111 total += t->worker[i].processed_pkts;
116 static inline uint64_t
117 total_latency(struct test_perf *t)
123 for (i = 0; i < t->nb_workers; i++)
124 total += t->worker[i].latency;
131 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
132 int (*worker)(void *))
135 struct test_perf *t = evt_test_priv(test);
139 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
140 if (!(opt->wlcores[lcore_id]))
143 ret = rte_eal_remote_launch(worker,
144 &t->worker[port_idx], lcore_id);
146 evt_err("failed to launch worker %d", lcore_id);
152 /* launch producers */
153 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
154 if (!(opt->plcores[lcore_id]))
157 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
160 evt_err("failed to launch perf_producer %d", lcore_id);
166 /* launch scheduler */
167 if (!evt_has_distributed_sched(opt->dev_id)) {
168 ret = rte_eal_remote_launch(scheduler, t, opt->slcore);
170 evt_err("failed to launch sched %d", opt->slcore);
175 const uint64_t total_pkts = opt->nb_pkts *
176 evt_nr_active_lcores(opt->plcores);
178 uint64_t dead_lock_cycles = rte_get_timer_cycles();
179 int64_t dead_lock_remaining = total_pkts;
180 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
182 uint64_t perf_cycles = rte_get_timer_cycles();
183 int64_t perf_remaining = total_pkts;
184 const uint64_t perf_sample = rte_get_timer_hz();
186 static float total_mpps;
187 static uint64_t samples;
189 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
190 int64_t remaining = t->outstand_pkts - processed_pkts(t);
192 while (t->done == false) {
193 const uint64_t new_cycles = rte_get_timer_cycles();
195 if ((new_cycles - perf_cycles) > perf_sample) {
196 const uint64_t latency = total_latency(t);
197 const uint64_t pkts = processed_pkts(t);
199 remaining = t->outstand_pkts - pkts;
200 float mpps = (float)(perf_remaining-remaining)/1000000;
202 perf_remaining = remaining;
203 perf_cycles = new_cycles;
206 if (opt->fwd_latency && pkts > 0) {
207 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
208 mpps, total_mpps/samples,
209 (float)(latency/pkts)/freq_mhz);
211 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
212 mpps, total_mpps/samples);
216 if (remaining <= 0) {
218 t->result = EVT_TEST_SUCCESS;
224 if (new_cycles - dead_lock_cycles > dead_lock_sample) {
225 remaining = t->outstand_pkts - processed_pkts(t);
226 if (dead_lock_remaining == remaining) {
227 rte_event_dev_dump(opt->dev_id, stdout);
228 evt_err("No schedules for seconds, deadlock");
233 dead_lock_remaining = remaining;
234 dead_lock_cycles = new_cycles;
242 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
243 uint8_t stride, uint8_t nb_queues)
245 struct test_perf *t = evt_test_priv(test);
249 /* port configuration */
250 const struct rte_event_port_conf wkr_p_conf = {
251 .dequeue_depth = opt->wkr_deq_dep,
253 .new_event_threshold = 4096,
256 /* setup one port per worker, linking to all queues */
257 for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
259 struct worker_data *w = &t->worker[port];
261 w->dev_id = opt->dev_id;
264 w->processed_pkts = 0;
267 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
269 evt_err("failed to setup port %d", port);
273 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
274 if (ret != nb_queues) {
275 evt_err("failed to link all queues to port %d", port);
280 /* port for producers, no links */
281 const struct rte_event_port_conf prod_conf = {
284 .new_event_threshold = 1200,
287 for ( ; port < perf_nb_event_ports(opt); port++) {
288 struct prod_data *p = &t->prod[port];
290 p->dev_id = opt->dev_id;
292 p->queue_id = prod * stride;
295 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
297 evt_err("failed to setup port %d", port);
307 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
310 bool need_slcore = !evt_has_distributed_sched(opt->dev_id);
312 /* N producer + N worker + 1 scheduler(based on dev capa) + 1 master */
313 lcores = need_slcore ? 4 : 3;
315 if (rte_lcore_count() < lcores) {
316 evt_err("test need minimum %d lcores", lcores);
320 /* Validate worker lcores */
321 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
322 evt_err("worker lcores overlaps with master lcore");
325 if (need_slcore && evt_lcores_has_overlap(opt->wlcores, opt->slcore)) {
326 evt_err("worker lcores overlaps with scheduler lcore");
329 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
330 evt_err("worker lcores overlaps producer lcores");
333 if (evt_has_disabled_lcore(opt->wlcores)) {
334 evt_err("one or more workers lcores are not enabled");
337 if (!evt_has_active_lcore(opt->wlcores)) {
338 evt_err("minimum one worker is required");
342 /* Validate producer lcores */
343 if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) {
344 evt_err("producer lcores overlaps with master lcore");
347 if (need_slcore && evt_lcores_has_overlap(opt->plcores, opt->slcore)) {
348 evt_err("producer lcores overlaps with scheduler lcore");
351 if (evt_has_disabled_lcore(opt->plcores)) {
352 evt_err("one or more producer lcores are not enabled");
355 if (!evt_has_active_lcore(opt->plcores)) {
356 evt_err("minimum one producer is required");
360 /* Validate scheduler lcore */
361 if (!evt_has_distributed_sched(opt->dev_id) &&
362 opt->slcore == (int)rte_get_master_lcore()) {
363 evt_err("scheduler lcore and master lcore should be different");
366 if (need_slcore && !rte_lcore_is_enabled(opt->slcore)) {
367 evt_err("scheduler lcore is not enabled");
371 if (evt_has_invalid_stage(opt))
374 if (evt_has_invalid_sched_type(opt))
377 if (nb_queues > EVT_MAX_QUEUES) {
378 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
381 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
382 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
387 if (opt->nb_stages == 1 && opt->fwd_latency) {
388 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
389 opt->fwd_latency = 0;
391 if (opt->fwd_latency && !opt->q_priority) {
392 evt_info("enabled queue priority for latency measurement");
395 if (opt->nb_pkts == 0)
396 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
402 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
404 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
405 evt_dump_producer_lcores(opt);
406 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
407 evt_dump_worker_lcores(opt);
408 if (!evt_has_distributed_sched(opt->dev_id))
409 evt_dump_scheduler_lcore(opt);
410 evt_dump_nb_stages(opt);
411 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
412 evt_dump("nb_evdev_queues", "%d", nb_queues);
413 evt_dump_queue_priority(opt);
414 evt_dump_sched_type_list(opt);
418 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
422 rte_event_dev_stop(opt->dev_id);
423 rte_event_dev_close(opt->dev_id);
427 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
428 void *obj, unsigned i __rte_unused)
430 memset(obj, 0, mp->elt_size);
434 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
436 struct test_perf *t = evt_test_priv(test);
438 t->pool = rte_mempool_create(test->name, /* mempool name */
439 opt->pool_sz, /* number of elements*/
440 sizeof(struct perf_elt), /* element size*/
443 perf_elt_init, /* obj constructor */
444 NULL, opt->socket_id, 0); /* flags */
445 if (t->pool == NULL) {
446 evt_err("failed to create mempool");
454 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
457 struct test_perf *t = evt_test_priv(test);
459 rte_mempool_free(t->pool);
463 perf_test_setup(struct evt_test *test, struct evt_options *opt)
467 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
468 RTE_CACHE_LINE_SIZE, opt->socket_id);
469 if (test_perf == NULL) {
470 evt_err("failed to allocate test_perf memory");
473 test->test_priv = test_perf;
475 struct test_perf *t = evt_test_priv(test);
477 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
478 t->nb_workers = evt_nr_active_lcores(opt->wlcores);
480 t->nb_pkts = opt->nb_pkts;
481 t->nb_flows = opt->nb_flows;
482 t->result = EVT_TEST_FAILED;
484 memcpy(t->sched_type_list, opt->sched_type_list,
485 sizeof(opt->sched_type_list));
492 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
496 rte_free(test->test_priv);