1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Cavium, Inc
5 #include "test_perf_common.h"
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
13 struct test_perf *t = evt_test_priv(test);
15 printf("Packet distribution across worker cores :\n");
16 for (i = 0; i < t->nb_workers; i++)
17 total += t->worker[i].processed_pkts;
18 for (i = 0; i < t->nb_workers; i++)
19 printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:"
20 CLGRN" %3.2f\n"CLNRM, i,
21 t->worker[i].processed_pkts,
22 (((double)t->worker[i].processed_pkts)/total)
29 perf_producer(void *arg)
31 struct prod_data *p = arg;
32 struct test_perf *t = p->t;
33 struct evt_options *opt = t->opt;
34 const uint8_t dev_id = p->dev_id;
35 const uint8_t port = p->port_id;
36 struct rte_mempool *pool = t->pool;
37 const uint64_t nb_pkts = t->nb_pkts;
38 const uint32_t nb_flows = t->nb_flows;
39 uint32_t flow_counter = 0;
44 if (opt->verbose_level > 1)
45 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
46 rte_lcore_id(), dev_id, port, p->queue_id);
49 ev.op = RTE_EVENT_OP_NEW;
50 ev.queue_id = p->queue_id;
51 ev.sched_type = t->opt->sched_type_list[0];
52 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
53 ev.event_type = RTE_EVENT_TYPE_CPU;
54 ev.sub_event_type = 0; /* stage 0 */
56 while (count < nb_pkts && t->done == false) {
57 if (rte_mempool_get(pool, (void **)&m) < 0)
60 ev.flow_id = flow_counter++ % nb_flows;
62 m->timestamp = rte_get_timer_cycles();
63 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
67 m->timestamp = rte_get_timer_cycles();
76 perf_event_timer_producer(void *arg)
78 struct prod_data *p = arg;
79 struct test_perf *t = p->t;
80 struct evt_options *opt = t->opt;
81 uint32_t flow_counter = 0;
83 uint64_t arm_latency = 0;
84 const uint8_t nb_timer_adptrs = opt->nb_timer_adptrs;
85 const uint32_t nb_flows = t->nb_flows;
86 const uint64_t nb_timers = opt->nb_timers;
87 struct rte_mempool *pool = t->pool;
89 struct rte_event_timer_adapter **adptr = t->timer_adptr;
90 struct rte_event_timer tim;
91 uint64_t timeout_ticks = opt->expiry_nsec / opt->timer_tick_nsec;
93 memset(&tim, 0, sizeof(struct rte_event_timer));
94 timeout_ticks = opt->optm_timer_tick_nsec ?
95 (timeout_ticks * opt->timer_tick_nsec)
96 / opt->optm_timer_tick_nsec : timeout_ticks;
97 timeout_ticks += timeout_ticks ? 0 : 1;
98 tim.ev.event_type = RTE_EVENT_TYPE_TIMER;
99 tim.ev.op = RTE_EVENT_OP_NEW;
100 tim.ev.sched_type = t->opt->sched_type_list[0];
101 tim.ev.queue_id = p->queue_id;
102 tim.ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
103 tim.state = RTE_EVENT_TIMER_NOT_ARMED;
104 tim.timeout_ticks = timeout_ticks;
106 if (opt->verbose_level > 1)
107 printf("%s(): lcore %d\n", __func__, rte_lcore_id());
109 while (count < nb_timers && t->done == false) {
110 if (rte_mempool_get(pool, (void **)&m) < 0)
114 m->tim.ev.flow_id = flow_counter++ % nb_flows;
115 m->tim.ev.event_ptr = m;
116 m->timestamp = rte_get_timer_cycles();
117 while (rte_event_timer_arm_burst(
118 adptr[flow_counter % nb_timer_adptrs],
119 (struct rte_event_timer **)&m, 1) != 1) {
123 m->timestamp = rte_get_timer_cycles();
125 arm_latency += rte_get_timer_cycles() - m->timestamp;
130 printf("%s(): lcore %d Average event timer arm latency = %.3f us\n",
131 __func__, rte_lcore_id(), (float)(arm_latency / count) /
132 (rte_get_timer_hz() / 1000000));
137 perf_event_timer_producer_burst(void *arg)
140 struct prod_data *p = arg;
141 struct test_perf *t = p->t;
142 struct evt_options *opt = t->opt;
143 uint32_t flow_counter = 0;
145 uint64_t arm_latency = 0;
146 const uint8_t nb_timer_adptrs = opt->nb_timer_adptrs;
147 const uint32_t nb_flows = t->nb_flows;
148 const uint64_t nb_timers = opt->nb_timers;
149 struct rte_mempool *pool = t->pool;
150 struct perf_elt *m[BURST_SIZE + 1] = {NULL};
151 struct rte_event_timer_adapter **adptr = t->timer_adptr;
152 struct rte_event_timer tim;
153 uint64_t timeout_ticks = opt->expiry_nsec / opt->timer_tick_nsec;
155 memset(&tim, 0, sizeof(struct rte_event_timer));
156 timeout_ticks = opt->optm_timer_tick_nsec ?
157 (timeout_ticks * opt->timer_tick_nsec)
158 / opt->optm_timer_tick_nsec : timeout_ticks;
159 timeout_ticks += timeout_ticks ? 0 : 1;
160 tim.ev.event_type = RTE_EVENT_TYPE_TIMER;
161 tim.ev.op = RTE_EVENT_OP_NEW;
162 tim.ev.sched_type = t->opt->sched_type_list[0];
163 tim.ev.queue_id = p->queue_id;
164 tim.ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
165 tim.state = RTE_EVENT_TIMER_NOT_ARMED;
166 tim.timeout_ticks = timeout_ticks;
168 if (opt->verbose_level > 1)
169 printf("%s(): lcore %d\n", __func__, rte_lcore_id());
171 while (count < nb_timers && t->done == false) {
172 if (rte_mempool_get_bulk(pool, (void **)m, BURST_SIZE) < 0)
174 for (i = 0; i < BURST_SIZE; i++) {
175 rte_prefetch0(m[i + 1]);
177 m[i]->tim.ev.flow_id = flow_counter++ % nb_flows;
178 m[i]->tim.ev.event_ptr = m[i];
179 m[i]->timestamp = rte_get_timer_cycles();
181 rte_event_timer_arm_tmo_tick_burst(
182 adptr[flow_counter % nb_timer_adptrs],
183 (struct rte_event_timer **)m,
186 arm_latency += rte_get_timer_cycles() - m[i - 1]->timestamp;
191 printf("%s(): lcore %d Average event timer arm latency = %.3f us\n",
192 __func__, rte_lcore_id(), (float)(arm_latency / count) /
193 (rte_get_timer_hz() / 1000000));
198 perf_producer_wrapper(void *arg)
200 struct prod_data *p = arg;
201 struct test_perf *t = p->t;
202 /* Launch the producer function only in case of synthetic producer. */
203 if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
204 return perf_producer(arg);
205 else if (t->opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR &&
206 !t->opt->timdev_use_burst)
207 return perf_event_timer_producer(arg);
208 else if (t->opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR &&
209 t->opt->timdev_use_burst)
210 return perf_event_timer_producer_burst(arg);
214 static inline uint64_t
215 processed_pkts(struct test_perf *t)
221 for (i = 0; i < t->nb_workers; i++)
222 total += t->worker[i].processed_pkts;
227 static inline uint64_t
228 total_latency(struct test_perf *t)
234 for (i = 0; i < t->nb_workers; i++)
235 total += t->worker[i].latency;
242 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
243 int (*worker)(void *))
246 struct test_perf *t = evt_test_priv(test);
250 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
251 if (!(opt->wlcores[lcore_id]))
254 ret = rte_eal_remote_launch(worker,
255 &t->worker[port_idx], lcore_id);
257 evt_err("failed to launch worker %d", lcore_id);
263 /* launch producers */
264 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
265 if (!(opt->plcores[lcore_id]))
268 ret = rte_eal_remote_launch(perf_producer_wrapper,
269 &t->prod[port_idx], lcore_id);
271 evt_err("failed to launch perf_producer %d", lcore_id);
277 const uint64_t total_pkts = t->outstand_pkts;
279 uint64_t dead_lock_cycles = rte_get_timer_cycles();
280 int64_t dead_lock_remaining = total_pkts;
281 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
283 uint64_t perf_cycles = rte_get_timer_cycles();
284 int64_t perf_remaining = total_pkts;
285 const uint64_t perf_sample = rte_get_timer_hz();
287 static float total_mpps;
288 static uint64_t samples;
290 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
291 int64_t remaining = t->outstand_pkts - processed_pkts(t);
293 while (t->done == false) {
294 const uint64_t new_cycles = rte_get_timer_cycles();
296 if ((new_cycles - perf_cycles) > perf_sample) {
297 const uint64_t latency = total_latency(t);
298 const uint64_t pkts = processed_pkts(t);
300 remaining = t->outstand_pkts - pkts;
301 float mpps = (float)(perf_remaining-remaining)/1000000;
303 perf_remaining = remaining;
304 perf_cycles = new_cycles;
307 if (opt->fwd_latency && pkts > 0) {
308 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
309 mpps, total_mpps/samples,
310 (float)(latency/pkts)/freq_mhz);
312 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
313 mpps, total_mpps/samples);
317 if (remaining <= 0) {
318 t->result = EVT_TEST_SUCCESS;
319 if (opt->prod_type == EVT_PROD_TYPE_SYNT ||
321 EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
329 if (new_cycles - dead_lock_cycles > dead_lock_sample &&
330 opt->prod_type == EVT_PROD_TYPE_SYNT) {
331 remaining = t->outstand_pkts - processed_pkts(t);
332 if (dead_lock_remaining == remaining) {
333 rte_event_dev_dump(opt->dev_id, stdout);
334 evt_err("No schedules for seconds, deadlock");
339 dead_lock_remaining = remaining;
340 dead_lock_cycles = new_cycles;
348 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
349 struct rte_event_port_conf prod_conf)
353 struct rte_event_eth_rx_adapter_queue_conf queue_conf;
355 memset(&queue_conf, 0,
356 sizeof(struct rte_event_eth_rx_adapter_queue_conf));
357 queue_conf.ev.sched_type = opt->sched_type_list[0];
358 RTE_ETH_FOREACH_DEV(prod) {
361 ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id,
364 evt_err("failed to get event rx adapter[%d]"
369 queue_conf.ev.queue_id = prod * stride;
370 ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id,
373 evt_err("failed to create rx adapter[%d]", prod);
376 ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1,
379 evt_err("failed to add rx queues to adapter[%d]", prod);
383 if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
386 rte_event_eth_rx_adapter_service_id_get(prod,
388 ret = evt_service_setup(service_id);
390 evt_err("Failed to setup service core"
391 " for Rx adapter\n");
396 ret = rte_eth_dev_start(prod);
398 evt_err("Ethernet dev [%d] failed to start."
399 " Using synthetic producer", prod);
403 ret = rte_event_eth_rx_adapter_start(prod);
405 evt_err("Rx adapter[%d] start failed", prod);
408 printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
416 perf_event_timer_adapter_setup(struct test_perf *t)
420 struct rte_event_timer_adapter_info adapter_info;
421 struct rte_event_timer_adapter *wl;
422 uint8_t nb_producers = evt_nr_active_lcores(t->opt->plcores);
423 uint8_t flags = RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES;
425 if (nb_producers == 1)
426 flags |= RTE_EVENT_TIMER_ADAPTER_F_SP_PUT;
428 for (i = 0; i < t->opt->nb_timer_adptrs; i++) {
429 struct rte_event_timer_adapter_conf config = {
430 .event_dev_id = t->opt->dev_id,
431 .timer_adapter_id = i,
432 .timer_tick_ns = t->opt->timer_tick_nsec,
433 .max_tmo_ns = t->opt->max_tmo_nsec,
434 .nb_timers = 2 * 1024 * 1024,
438 wl = rte_event_timer_adapter_create(&config);
440 evt_err("failed to create event timer ring %d", i);
444 memset(&adapter_info, 0,
445 sizeof(struct rte_event_timer_adapter_info));
446 rte_event_timer_adapter_get_info(wl, &adapter_info);
447 t->opt->optm_timer_tick_nsec = adapter_info.min_resolution_ns;
449 if (!(adapter_info.caps &
450 RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT)) {
453 rte_event_timer_adapter_service_id_get(wl,
455 ret = evt_service_setup(service_id);
457 evt_err("Failed to setup service core"
458 " for timer adapter\n");
461 rte_service_runstate_set(service_id, 1);
464 ret = rte_event_timer_adapter_start(wl);
466 evt_err("failed to Start event timer adapter %d", i);
469 t->timer_adptr[i] = wl;
475 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
476 uint8_t stride, uint8_t nb_queues,
477 const struct rte_event_port_conf *port_conf)
479 struct test_perf *t = evt_test_priv(test);
483 /* setup one port per worker, linking to all queues */
484 for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
486 struct worker_data *w = &t->worker[port];
488 w->dev_id = opt->dev_id;
491 w->processed_pkts = 0;
494 ret = rte_event_port_setup(opt->dev_id, port, port_conf);
496 evt_err("failed to setup port %d", port);
500 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
501 if (ret != nb_queues) {
502 evt_err("failed to link all queues to port %d", port);
507 /* port for producers, no links */
508 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
509 for ( ; port < perf_nb_event_ports(opt); port++) {
510 struct prod_data *p = &t->prod[port];
514 ret = perf_event_rx_adapter_setup(opt, stride, *port_conf);
517 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
519 for ( ; port < perf_nb_event_ports(opt); port++) {
520 struct prod_data *p = &t->prod[port];
521 p->queue_id = prod * stride;
526 ret = perf_event_timer_adapter_setup(t);
531 for ( ; port < perf_nb_event_ports(opt); port++) {
532 struct prod_data *p = &t->prod[port];
534 p->dev_id = opt->dev_id;
536 p->queue_id = prod * stride;
539 ret = rte_event_port_setup(opt->dev_id, port,
542 evt_err("failed to setup port %d", port);
553 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
557 /* N producer + N worker + 1 master when producer cores are used
558 * Else N worker + 1 master when Rx adapter is used
560 lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
562 if (rte_lcore_count() < lcores) {
563 evt_err("test need minimum %d lcores", lcores);
567 /* Validate worker lcores */
568 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
569 evt_err("worker lcores overlaps with master lcore");
572 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
573 evt_err("worker lcores overlaps producer lcores");
576 if (evt_has_disabled_lcore(opt->wlcores)) {
577 evt_err("one or more workers lcores are not enabled");
580 if (!evt_has_active_lcore(opt->wlcores)) {
581 evt_err("minimum one worker is required");
585 if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
586 /* Validate producer lcores */
587 if (evt_lcores_has_overlap(opt->plcores,
588 rte_get_master_lcore())) {
589 evt_err("producer lcores overlaps with master lcore");
592 if (evt_has_disabled_lcore(opt->plcores)) {
593 evt_err("one or more producer lcores are not enabled");
596 if (!evt_has_active_lcore(opt->plcores)) {
597 evt_err("minimum one producer is required");
602 if (evt_has_invalid_stage(opt))
605 if (evt_has_invalid_sched_type(opt))
608 if (nb_queues > EVT_MAX_QUEUES) {
609 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
612 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
613 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
618 if ((opt->nb_stages == 1 &&
619 opt->prod_type != EVT_PROD_TYPE_EVENT_TIMER_ADPTR) &&
621 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
622 opt->fwd_latency = 0;
625 if (opt->fwd_latency && !opt->q_priority) {
626 evt_info("enabled queue priority for latency measurement");
629 if (opt->nb_pkts == 0)
630 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
636 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
638 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
639 evt_dump_producer_lcores(opt);
640 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
641 evt_dump_worker_lcores(opt);
642 evt_dump_nb_stages(opt);
643 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
644 evt_dump("nb_evdev_queues", "%d", nb_queues);
645 evt_dump_queue_priority(opt);
646 evt_dump_sched_type_list(opt);
647 evt_dump_producer_type(opt);
651 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
654 struct test_perf *t = evt_test_priv(test);
656 if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
657 for (i = 0; i < opt->nb_timer_adptrs; i++)
658 rte_event_timer_adapter_stop(t->timer_adptr[i]);
660 rte_event_dev_stop(opt->dev_id);
661 rte_event_dev_close(opt->dev_id);
665 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
666 void *obj, unsigned i __rte_unused)
668 memset(obj, 0, mp->elt_size);
671 #define NB_RX_DESC 128
672 #define NB_TX_DESC 512
674 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
677 struct test_perf *t = evt_test_priv(test);
678 struct rte_eth_conf port_conf = {
680 .mq_mode = ETH_MQ_RX_RSS,
681 .max_rx_pkt_len = ETHER_MAX_LEN,
694 .rss_hf = ETH_RSS_IP,
699 if (opt->prod_type == EVT_PROD_TYPE_SYNT ||
700 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR)
703 if (!rte_eth_dev_count_avail()) {
704 evt_err("No ethernet ports found.");
708 RTE_ETH_FOREACH_DEV(i) {
710 if (rte_eth_dev_configure(i, 1, 1,
713 evt_err("Failed to configure eth port [%d]", i);
717 if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
718 rte_socket_id(), NULL, t->pool) < 0) {
719 evt_err("Failed to setup eth port [%d] rx_queue: %d.",
724 if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
725 rte_socket_id(), NULL) < 0) {
726 evt_err("Failed to setup eth port [%d] tx_queue: %d.",
731 rte_eth_promiscuous_enable(i);
737 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
742 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
743 RTE_ETH_FOREACH_DEV(i) {
744 rte_event_eth_rx_adapter_stop(i);
746 rte_eth_dev_close(i);
752 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
754 struct test_perf *t = evt_test_priv(test);
756 if (opt->prod_type == EVT_PROD_TYPE_SYNT ||
757 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
758 t->pool = rte_mempool_create(test->name, /* mempool name */
759 opt->pool_sz, /* number of elements*/
760 sizeof(struct perf_elt), /* element size*/
763 perf_elt_init, /* obj constructor */
764 NULL, opt->socket_id, 0); /* flags */
766 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
767 opt->pool_sz, /* number of elements*/
770 RTE_MBUF_DEFAULT_BUF_SIZE,
771 opt->socket_id); /* flags */
775 if (t->pool == NULL) {
776 evt_err("failed to create mempool");
784 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
787 struct test_perf *t = evt_test_priv(test);
789 rte_mempool_free(t->pool);
793 perf_test_setup(struct evt_test *test, struct evt_options *opt)
797 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
798 RTE_CACHE_LINE_SIZE, opt->socket_id);
799 if (test_perf == NULL) {
800 evt_err("failed to allocate test_perf memory");
803 test->test_priv = test_perf;
805 struct test_perf *t = evt_test_priv(test);
807 if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
808 t->outstand_pkts = opt->nb_timers *
809 evt_nr_active_lcores(opt->plcores);
810 t->nb_pkts = opt->nb_timers;
812 t->outstand_pkts = opt->nb_pkts *
813 evt_nr_active_lcores(opt->plcores);
814 t->nb_pkts = opt->nb_pkts;
817 t->nb_workers = evt_nr_active_lcores(opt->wlcores);
819 t->nb_flows = opt->nb_flows;
820 t->result = EVT_TEST_FAILED;
822 memcpy(t->sched_type_list, opt->sched_type_list,
823 sizeof(opt->sched_type_list));
830 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
834 rte_free(test->test_priv);