4 * Copyright (C) Cavium, Inc 2017.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Cavium, Inc nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include "test_perf_common.h"
36 perf_test_result(struct evt_test *test, struct evt_options *opt)
39 struct test_perf *t = evt_test_priv(test);
45 perf_producer(void *arg)
47 struct prod_data *p = arg;
48 struct test_perf *t = p->t;
49 struct evt_options *opt = t->opt;
50 const uint8_t dev_id = p->dev_id;
51 const uint8_t port = p->port_id;
52 struct rte_mempool *pool = t->pool;
53 const uint64_t nb_pkts = t->nb_pkts;
54 const uint32_t nb_flows = t->nb_flows;
55 uint32_t flow_counter = 0;
60 if (opt->verbose_level > 1)
61 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
62 rte_lcore_id(), dev_id, port, p->queue_id);
65 ev.op = RTE_EVENT_OP_NEW;
66 ev.queue_id = p->queue_id;
67 ev.sched_type = t->opt->sched_type_list[0];
68 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
69 ev.event_type = RTE_EVENT_TYPE_CPU;
70 ev.sub_event_type = 0; /* stage 0 */
72 while (count < nb_pkts && t->done == false) {
73 if (rte_mempool_get(pool, (void **)&m) < 0)
76 ev.flow_id = flow_counter++ % nb_flows;
78 m->timestamp = rte_get_timer_cycles();
79 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
83 m->timestamp = rte_get_timer_cycles();
91 static inline uint64_t
92 processed_pkts(struct test_perf *t)
98 for (i = 0; i < t->nb_workers; i++)
99 total += t->worker[i].processed_pkts;
104 static inline uint64_t
105 total_latency(struct test_perf *t)
111 for (i = 0; i < t->nb_workers; i++)
112 total += t->worker[i].latency;
119 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
120 int (*worker)(void *))
123 struct test_perf *t = evt_test_priv(test);
127 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
128 if (!(opt->wlcores[lcore_id]))
131 ret = rte_eal_remote_launch(worker,
132 &t->worker[port_idx], lcore_id);
134 evt_err("failed to launch worker %d", lcore_id);
140 /* launch producers */
141 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
142 if (!(opt->plcores[lcore_id]))
145 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
148 evt_err("failed to launch perf_producer %d", lcore_id);
154 const uint64_t total_pkts = opt->nb_pkts *
155 evt_nr_active_lcores(opt->plcores);
157 uint64_t dead_lock_cycles = rte_get_timer_cycles();
158 int64_t dead_lock_remaining = total_pkts;
159 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
161 uint64_t perf_cycles = rte_get_timer_cycles();
162 int64_t perf_remaining = total_pkts;
163 const uint64_t perf_sample = rte_get_timer_hz();
165 static float total_mpps;
166 static uint64_t samples;
168 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
169 int64_t remaining = t->outstand_pkts - processed_pkts(t);
171 while (t->done == false) {
172 const uint64_t new_cycles = rte_get_timer_cycles();
174 if ((new_cycles - perf_cycles) > perf_sample) {
175 const uint64_t latency = total_latency(t);
176 const uint64_t pkts = processed_pkts(t);
178 remaining = t->outstand_pkts - pkts;
179 float mpps = (float)(perf_remaining-remaining)/1000000;
181 perf_remaining = remaining;
182 perf_cycles = new_cycles;
185 if (opt->fwd_latency && pkts > 0) {
186 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
187 mpps, total_mpps/samples,
188 (float)(latency/pkts)/freq_mhz);
190 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
191 mpps, total_mpps/samples);
195 if (remaining <= 0) {
197 t->result = EVT_TEST_SUCCESS;
203 if (new_cycles - dead_lock_cycles > dead_lock_sample) {
204 remaining = t->outstand_pkts - processed_pkts(t);
205 if (dead_lock_remaining == remaining) {
206 rte_event_dev_dump(opt->dev_id, stdout);
207 evt_err("No schedules for seconds, deadlock");
212 dead_lock_remaining = remaining;
213 dead_lock_cycles = new_cycles;
221 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
222 uint8_t stride, uint8_t nb_queues)
224 struct test_perf *t = evt_test_priv(test);
228 /* port configuration */
229 const struct rte_event_port_conf wkr_p_conf = {
230 .dequeue_depth = opt->wkr_deq_dep,
232 .new_event_threshold = 4096,
235 /* setup one port per worker, linking to all queues */
236 for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
238 struct worker_data *w = &t->worker[port];
240 w->dev_id = opt->dev_id;
243 w->processed_pkts = 0;
246 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
248 evt_err("failed to setup port %d", port);
252 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
253 if (ret != nb_queues) {
254 evt_err("failed to link all queues to port %d", port);
259 /* port for producers, no links */
260 const struct rte_event_port_conf prod_conf = {
263 .new_event_threshold = 1200,
266 for ( ; port < perf_nb_event_ports(opt); port++) {
267 struct prod_data *p = &t->prod[port];
269 p->dev_id = opt->dev_id;
271 p->queue_id = prod * stride;
274 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
276 evt_err("failed to setup port %d", port);
286 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
290 /* N producer + N worker + 1 master */
293 if (rte_lcore_count() < lcores) {
294 evt_err("test need minimum %d lcores", lcores);
298 /* Validate worker lcores */
299 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
300 evt_err("worker lcores overlaps with master lcore");
303 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
304 evt_err("worker lcores overlaps producer lcores");
307 if (evt_has_disabled_lcore(opt->wlcores)) {
308 evt_err("one or more workers lcores are not enabled");
311 if (!evt_has_active_lcore(opt->wlcores)) {
312 evt_err("minimum one worker is required");
316 /* Validate producer lcores */
317 if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) {
318 evt_err("producer lcores overlaps with master lcore");
321 if (evt_has_disabled_lcore(opt->plcores)) {
322 evt_err("one or more producer lcores are not enabled");
325 if (!evt_has_active_lcore(opt->plcores)) {
326 evt_err("minimum one producer is required");
330 if (evt_has_invalid_stage(opt))
333 if (evt_has_invalid_sched_type(opt))
336 if (nb_queues > EVT_MAX_QUEUES) {
337 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
340 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
341 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
346 if (opt->nb_stages == 1 && opt->fwd_latency) {
347 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
348 opt->fwd_latency = 0;
350 if (opt->fwd_latency && !opt->q_priority) {
351 evt_info("enabled queue priority for latency measurement");
354 if (opt->nb_pkts == 0)
355 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
361 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
363 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
364 evt_dump_producer_lcores(opt);
365 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
366 evt_dump_worker_lcores(opt);
367 evt_dump_nb_stages(opt);
368 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
369 evt_dump("nb_evdev_queues", "%d", nb_queues);
370 evt_dump_queue_priority(opt);
371 evt_dump_sched_type_list(opt);
375 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
379 rte_event_dev_stop(opt->dev_id);
380 rte_event_dev_close(opt->dev_id);
384 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
385 void *obj, unsigned i __rte_unused)
387 memset(obj, 0, mp->elt_size);
391 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
393 struct test_perf *t = evt_test_priv(test);
395 t->pool = rte_mempool_create(test->name, /* mempool name */
396 opt->pool_sz, /* number of elements*/
397 sizeof(struct perf_elt), /* element size*/
400 perf_elt_init, /* obj constructor */
401 NULL, opt->socket_id, 0); /* flags */
402 if (t->pool == NULL) {
403 evt_err("failed to create mempool");
411 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
414 struct test_perf *t = evt_test_priv(test);
416 rte_mempool_free(t->pool);
420 perf_test_setup(struct evt_test *test, struct evt_options *opt)
424 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
425 RTE_CACHE_LINE_SIZE, opt->socket_id);
426 if (test_perf == NULL) {
427 evt_err("failed to allocate test_perf memory");
430 test->test_priv = test_perf;
432 struct test_perf *t = evt_test_priv(test);
434 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
435 t->nb_workers = evt_nr_active_lcores(opt->wlcores);
437 t->nb_pkts = opt->nb_pkts;
438 t->nb_flows = opt->nb_flows;
439 t->result = EVT_TEST_FAILED;
441 memcpy(t->sched_type_list, opt->sched_type_list,
442 sizeof(opt->sched_type_list));
449 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
453 rte_free(test->test_priv);