373c9ce5c66b2f83d2d1ce710faf1af23c68a8b2
[dpdk.git] / app / test-eventdev / test_perf_common.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4
5 #include "test_perf_common.h"
6
7 int
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
9 {
10         RTE_SET_USED(opt);
11         struct test_perf *t = evt_test_priv(test);
12
13         return t->result;
14 }
15
16 static inline int
17 perf_producer(void *arg)
18 {
19         struct prod_data *p  = arg;
20         struct test_perf *t = p->t;
21         struct evt_options *opt = t->opt;
22         const uint8_t dev_id = p->dev_id;
23         const uint8_t port = p->port_id;
24         struct rte_mempool *pool = t->pool;
25         const uint64_t nb_pkts = t->nb_pkts;
26         const uint32_t nb_flows = t->nb_flows;
27         uint32_t flow_counter = 0;
28         uint64_t count = 0;
29         struct perf_elt *m;
30         struct rte_event ev;
31
32         if (opt->verbose_level > 1)
33                 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
34                                 rte_lcore_id(), dev_id, port, p->queue_id);
35
36         ev.event = 0;
37         ev.op = RTE_EVENT_OP_NEW;
38         ev.queue_id = p->queue_id;
39         ev.sched_type = t->opt->sched_type_list[0];
40         ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
41         ev.event_type =  RTE_EVENT_TYPE_CPU;
42         ev.sub_event_type = 0; /* stage 0 */
43
44         while (count < nb_pkts && t->done == false) {
45                 if (rte_mempool_get(pool, (void **)&m) < 0)
46                         continue;
47
48                 ev.flow_id = flow_counter++ % nb_flows;
49                 ev.event_ptr = m;
50                 m->timestamp = rte_get_timer_cycles();
51                 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
52                         if (t->done)
53                                 break;
54                         rte_pause();
55                         m->timestamp = rte_get_timer_cycles();
56                 }
57                 count++;
58         }
59
60         return 0;
61 }
62
63 static inline uint64_t
64 processed_pkts(struct test_perf *t)
65 {
66         uint8_t i;
67         uint64_t total = 0;
68
69         rte_smp_rmb();
70         for (i = 0; i < t->nb_workers; i++)
71                 total += t->worker[i].processed_pkts;
72
73         return total;
74 }
75
76 static inline uint64_t
77 total_latency(struct test_perf *t)
78 {
79         uint8_t i;
80         uint64_t total = 0;
81
82         rte_smp_rmb();
83         for (i = 0; i < t->nb_workers; i++)
84                 total += t->worker[i].latency;
85
86         return total;
87 }
88
89
90 int
91 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
92                 int (*worker)(void *))
93 {
94         int ret, lcore_id;
95         struct test_perf *t = evt_test_priv(test);
96
97         int port_idx = 0;
98         /* launch workers */
99         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
100                 if (!(opt->wlcores[lcore_id]))
101                         continue;
102
103                 ret = rte_eal_remote_launch(worker,
104                                  &t->worker[port_idx], lcore_id);
105                 if (ret) {
106                         evt_err("failed to launch worker %d", lcore_id);
107                         return ret;
108                 }
109                 port_idx++;
110         }
111
112         /* launch producers */
113         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
114                 if (!(opt->plcores[lcore_id]))
115                         continue;
116
117                 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
118                                          lcore_id);
119                 if (ret) {
120                         evt_err("failed to launch perf_producer %d", lcore_id);
121                         return ret;
122                 }
123                 port_idx++;
124         }
125
126         const uint64_t total_pkts = opt->nb_pkts *
127                         evt_nr_active_lcores(opt->plcores);
128
129         uint64_t dead_lock_cycles = rte_get_timer_cycles();
130         int64_t dead_lock_remaining  =  total_pkts;
131         const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
132
133         uint64_t perf_cycles = rte_get_timer_cycles();
134         int64_t perf_remaining  = total_pkts;
135         const uint64_t perf_sample = rte_get_timer_hz();
136
137         static float total_mpps;
138         static uint64_t samples;
139
140         const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
141         int64_t remaining = t->outstand_pkts - processed_pkts(t);
142
143         while (t->done == false) {
144                 const uint64_t new_cycles = rte_get_timer_cycles();
145
146                 if ((new_cycles - perf_cycles) > perf_sample) {
147                         const uint64_t latency = total_latency(t);
148                         const uint64_t pkts = processed_pkts(t);
149
150                         remaining = t->outstand_pkts - pkts;
151                         float mpps = (float)(perf_remaining-remaining)/1000000;
152
153                         perf_remaining = remaining;
154                         perf_cycles = new_cycles;
155                         total_mpps += mpps;
156                         ++samples;
157                         if (opt->fwd_latency && pkts > 0) {
158                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
159                                         mpps, total_mpps/samples,
160                                         (float)(latency/pkts)/freq_mhz);
161                         } else {
162                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
163                                         mpps, total_mpps/samples);
164                         }
165                         fflush(stdout);
166
167                         if (remaining <= 0) {
168                                 t->done = true;
169                                 t->result = EVT_TEST_SUCCESS;
170                                 rte_smp_wmb();
171                                 break;
172                         }
173                 }
174
175                 if (new_cycles - dead_lock_cycles > dead_lock_sample) {
176                         remaining = t->outstand_pkts - processed_pkts(t);
177                         if (dead_lock_remaining == remaining) {
178                                 rte_event_dev_dump(opt->dev_id, stdout);
179                                 evt_err("No schedules for seconds, deadlock");
180                                 t->done = true;
181                                 rte_smp_wmb();
182                                 break;
183                         }
184                         dead_lock_remaining = remaining;
185                         dead_lock_cycles = new_cycles;
186                 }
187         }
188         printf("\n");
189         return 0;
190 }
191
192 int
193 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
194                                 uint8_t stride, uint8_t nb_queues)
195 {
196         struct test_perf *t = evt_test_priv(test);
197         uint8_t port, prod;
198         int ret = -1;
199
200         /* port configuration */
201         const struct rte_event_port_conf wkr_p_conf = {
202                         .dequeue_depth = opt->wkr_deq_dep,
203                         .enqueue_depth = 64,
204                         .new_event_threshold = 4096,
205         };
206
207         /* setup one port per worker, linking to all queues */
208         for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
209                                 port++) {
210                 struct worker_data *w = &t->worker[port];
211
212                 w->dev_id = opt->dev_id;
213                 w->port_id = port;
214                 w->t = t;
215                 w->processed_pkts = 0;
216                 w->latency = 0;
217
218                 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
219                 if (ret) {
220                         evt_err("failed to setup port %d", port);
221                         return ret;
222                 }
223
224                 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
225                 if (ret != nb_queues) {
226                         evt_err("failed to link all queues to port %d", port);
227                         return -EINVAL;
228                 }
229         }
230
231         /* port for producers, no links */
232         const struct rte_event_port_conf prod_conf = {
233                         .dequeue_depth = 8,
234                         .enqueue_depth = 32,
235                         .new_event_threshold = 1200,
236         };
237         prod = 0;
238         for ( ; port < perf_nb_event_ports(opt); port++) {
239                 struct prod_data *p = &t->prod[port];
240
241                 p->dev_id = opt->dev_id;
242                 p->port_id = port;
243                 p->queue_id = prod * stride;
244                 p->t = t;
245
246                 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
247                 if (ret) {
248                         evt_err("failed to setup port %d", port);
249                         return ret;
250                 }
251                 prod++;
252         }
253
254         return ret;
255 }
256
257 int
258 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
259 {
260         unsigned int lcores;
261
262         /* N producer + N worker + 1 master when producer cores are used
263          * Else N worker + 1 master when Rx adapter is used
264          */
265         lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
266
267         if (rte_lcore_count() < lcores) {
268                 evt_err("test need minimum %d lcores", lcores);
269                 return -1;
270         }
271
272         /* Validate worker lcores */
273         if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
274                 evt_err("worker lcores overlaps with master lcore");
275                 return -1;
276         }
277         if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
278                 evt_err("worker lcores overlaps producer lcores");
279                 return -1;
280         }
281         if (evt_has_disabled_lcore(opt->wlcores)) {
282                 evt_err("one or more workers lcores are not enabled");
283                 return -1;
284         }
285         if (!evt_has_active_lcore(opt->wlcores)) {
286                 evt_err("minimum one worker is required");
287                 return -1;
288         }
289
290         if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
291                 /* Validate producer lcores */
292                 if (evt_lcores_has_overlap(opt->plcores,
293                                         rte_get_master_lcore())) {
294                         evt_err("producer lcores overlaps with master lcore");
295                         return -1;
296                 }
297                 if (evt_has_disabled_lcore(opt->plcores)) {
298                         evt_err("one or more producer lcores are not enabled");
299                         return -1;
300                 }
301                 if (!evt_has_active_lcore(opt->plcores)) {
302                         evt_err("minimum one producer is required");
303                         return -1;
304                 }
305         }
306
307         if (evt_has_invalid_stage(opt))
308                 return -1;
309
310         if (evt_has_invalid_sched_type(opt))
311                 return -1;
312
313         if (nb_queues > EVT_MAX_QUEUES) {
314                 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
315                 return -1;
316         }
317         if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
318                 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
319                 return -1;
320         }
321
322         /* Fixups */
323         if (opt->nb_stages == 1 && opt->fwd_latency) {
324                 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
325                 opt->fwd_latency = 0;
326         }
327         if (opt->fwd_latency && !opt->q_priority) {
328                 evt_info("enabled queue priority for latency measurement");
329                 opt->q_priority = 1;
330         }
331         if (opt->nb_pkts == 0)
332                 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
333
334         return 0;
335 }
336
337 void
338 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
339 {
340         evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
341         evt_dump_producer_lcores(opt);
342         evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
343         evt_dump_worker_lcores(opt);
344         evt_dump_nb_stages(opt);
345         evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
346         evt_dump("nb_evdev_queues", "%d", nb_queues);
347         evt_dump_queue_priority(opt);
348         evt_dump_sched_type_list(opt);
349         evt_dump_producer_type(opt);
350 }
351
352 void
353 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
354 {
355         RTE_SET_USED(test);
356
357         rte_event_dev_stop(opt->dev_id);
358         rte_event_dev_close(opt->dev_id);
359 }
360
361 static inline void
362 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
363             void *obj, unsigned i __rte_unused)
364 {
365         memset(obj, 0, mp->elt_size);
366 }
367
368 int
369 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
370 {
371         struct test_perf *t = evt_test_priv(test);
372
373         t->pool = rte_mempool_create(test->name, /* mempool name */
374                                 opt->pool_sz, /* number of elements*/
375                                 sizeof(struct perf_elt), /* element size*/
376                                 512, /* cache size*/
377                                 0, NULL, NULL,
378                                 perf_elt_init, /* obj constructor */
379                                 NULL, opt->socket_id, 0); /* flags */
380         if (t->pool == NULL) {
381                 evt_err("failed to create mempool");
382                 return -ENOMEM;
383         }
384
385         return 0;
386 }
387
388 void
389 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
390 {
391         RTE_SET_USED(opt);
392         struct test_perf *t = evt_test_priv(test);
393
394         rte_mempool_free(t->pool);
395 }
396
397 int
398 perf_test_setup(struct evt_test *test, struct evt_options *opt)
399 {
400         void *test_perf;
401
402         test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
403                                 RTE_CACHE_LINE_SIZE, opt->socket_id);
404         if (test_perf  == NULL) {
405                 evt_err("failed to allocate test_perf memory");
406                 goto nomem;
407         }
408         test->test_priv = test_perf;
409
410         struct test_perf *t = evt_test_priv(test);
411
412         t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
413         t->nb_workers = evt_nr_active_lcores(opt->wlcores);
414         t->done = false;
415         t->nb_pkts = opt->nb_pkts;
416         t->nb_flows = opt->nb_flows;
417         t->result = EVT_TEST_FAILED;
418         t->opt = opt;
419         memcpy(t->sched_type_list, opt->sched_type_list,
420                         sizeof(opt->sched_type_list));
421         return 0;
422 nomem:
423         return -ENOMEM;
424 }
425
426 void
427 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
428 {
429         RTE_SET_USED(opt);
430
431         rte_free(test->test_priv);
432 }