app/eventdev: add service core configuration
[dpdk.git] / app / test-eventdev / test_perf_common.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4
5 #include "test_perf_common.h"
6
7 int
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
9 {
10         RTE_SET_USED(opt);
11         struct test_perf *t = evt_test_priv(test);
12
13         return t->result;
14 }
15
16 static inline int
17 perf_producer(void *arg)
18 {
19         struct prod_data *p  = arg;
20         struct test_perf *t = p->t;
21         struct evt_options *opt = t->opt;
22         const uint8_t dev_id = p->dev_id;
23         const uint8_t port = p->port_id;
24         struct rte_mempool *pool = t->pool;
25         const uint64_t nb_pkts = t->nb_pkts;
26         const uint32_t nb_flows = t->nb_flows;
27         uint32_t flow_counter = 0;
28         uint64_t count = 0;
29         struct perf_elt *m;
30         struct rte_event ev;
31
32         if (opt->verbose_level > 1)
33                 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
34                                 rte_lcore_id(), dev_id, port, p->queue_id);
35
36         ev.event = 0;
37         ev.op = RTE_EVENT_OP_NEW;
38         ev.queue_id = p->queue_id;
39         ev.sched_type = t->opt->sched_type_list[0];
40         ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
41         ev.event_type =  RTE_EVENT_TYPE_CPU;
42         ev.sub_event_type = 0; /* stage 0 */
43
44         while (count < nb_pkts && t->done == false) {
45                 if (rte_mempool_get(pool, (void **)&m) < 0)
46                         continue;
47
48                 ev.flow_id = flow_counter++ % nb_flows;
49                 ev.event_ptr = m;
50                 m->timestamp = rte_get_timer_cycles();
51                 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
52                         if (t->done)
53                                 break;
54                         rte_pause();
55                         m->timestamp = rte_get_timer_cycles();
56                 }
57                 count++;
58         }
59
60         return 0;
61 }
62
63 static int
64 perf_producer_wrapper(void *arg)
65 {
66         struct prod_data *p  = arg;
67         struct test_perf *t = p->t;
68         /* Launch the producer function only in case of synthetic producer. */
69         if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
70                 return perf_producer(arg);
71         return 0;
72 }
73
74 static inline uint64_t
75 processed_pkts(struct test_perf *t)
76 {
77         uint8_t i;
78         uint64_t total = 0;
79
80         rte_smp_rmb();
81         for (i = 0; i < t->nb_workers; i++)
82                 total += t->worker[i].processed_pkts;
83
84         return total;
85 }
86
87 static inline uint64_t
88 total_latency(struct test_perf *t)
89 {
90         uint8_t i;
91         uint64_t total = 0;
92
93         rte_smp_rmb();
94         for (i = 0; i < t->nb_workers; i++)
95                 total += t->worker[i].latency;
96
97         return total;
98 }
99
100
101 int
102 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
103                 int (*worker)(void *))
104 {
105         int ret, lcore_id;
106         struct test_perf *t = evt_test_priv(test);
107
108         int port_idx = 0;
109         /* launch workers */
110         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
111                 if (!(opt->wlcores[lcore_id]))
112                         continue;
113
114                 ret = rte_eal_remote_launch(worker,
115                                  &t->worker[port_idx], lcore_id);
116                 if (ret) {
117                         evt_err("failed to launch worker %d", lcore_id);
118                         return ret;
119                 }
120                 port_idx++;
121         }
122
123         /* launch producers */
124         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
125                 if (!(opt->plcores[lcore_id]))
126                         continue;
127
128                 ret = rte_eal_remote_launch(perf_producer_wrapper,
129                                 &t->prod[port_idx], lcore_id);
130                 if (ret) {
131                         evt_err("failed to launch perf_producer %d", lcore_id);
132                         return ret;
133                 }
134                 port_idx++;
135         }
136
137         const uint64_t total_pkts = opt->nb_pkts *
138                         evt_nr_active_lcores(opt->plcores);
139
140         uint64_t dead_lock_cycles = rte_get_timer_cycles();
141         int64_t dead_lock_remaining  =  total_pkts;
142         const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
143
144         uint64_t perf_cycles = rte_get_timer_cycles();
145         int64_t perf_remaining  = total_pkts;
146         const uint64_t perf_sample = rte_get_timer_hz();
147
148         static float total_mpps;
149         static uint64_t samples;
150
151         const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
152         int64_t remaining = t->outstand_pkts - processed_pkts(t);
153
154         while (t->done == false) {
155                 const uint64_t new_cycles = rte_get_timer_cycles();
156
157                 if ((new_cycles - perf_cycles) > perf_sample) {
158                         const uint64_t latency = total_latency(t);
159                         const uint64_t pkts = processed_pkts(t);
160
161                         remaining = t->outstand_pkts - pkts;
162                         float mpps = (float)(perf_remaining-remaining)/1000000;
163
164                         perf_remaining = remaining;
165                         perf_cycles = new_cycles;
166                         total_mpps += mpps;
167                         ++samples;
168                         if (opt->fwd_latency && pkts > 0) {
169                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
170                                         mpps, total_mpps/samples,
171                                         (float)(latency/pkts)/freq_mhz);
172                         } else {
173                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
174                                         mpps, total_mpps/samples);
175                         }
176                         fflush(stdout);
177
178                         if (remaining <= 0) {
179                                 t->result = EVT_TEST_SUCCESS;
180                                 if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
181                                         t->done = true;
182                                         rte_smp_wmb();
183                                         break;
184                                 }
185                         }
186                 }
187
188                 if (new_cycles - dead_lock_cycles > dead_lock_sample &&
189                                 opt->prod_type == EVT_PROD_TYPE_SYNT) {
190                         remaining = t->outstand_pkts - processed_pkts(t);
191                         if (dead_lock_remaining == remaining) {
192                                 rte_event_dev_dump(opt->dev_id, stdout);
193                                 evt_err("No schedules for seconds, deadlock");
194                                 t->done = true;
195                                 rte_smp_wmb();
196                                 break;
197                         }
198                         dead_lock_remaining = remaining;
199                         dead_lock_cycles = new_cycles;
200                 }
201         }
202         printf("\n");
203         return 0;
204 }
205
206 static int
207 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
208                 struct rte_event_port_conf prod_conf)
209 {
210         int ret = 0;
211         uint16_t prod;
212         struct rte_event_eth_rx_adapter_queue_conf queue_conf;
213
214         memset(&queue_conf, 0,
215                         sizeof(struct rte_event_eth_rx_adapter_queue_conf));
216         queue_conf.ev.sched_type = opt->sched_type_list[0];
217         for (prod = 0; prod < rte_eth_dev_count(); prod++) {
218                 uint32_t cap;
219
220                 ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id,
221                                 prod, &cap);
222                 if (ret) {
223                         evt_err("failed to get event rx adapter[%d]"
224                                         " capabilities",
225                                         opt->dev_id);
226                         return ret;
227                 }
228                 queue_conf.ev.queue_id = prod * stride;
229                 ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id,
230                                 &prod_conf);
231                 if (ret) {
232                         evt_err("failed to create rx adapter[%d]", prod);
233                         return ret;
234                 }
235                 ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1,
236                                 &queue_conf);
237                 if (ret) {
238                         evt_err("failed to add rx queues to adapter[%d]", prod);
239                         return ret;
240                 }
241
242                 if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
243                         uint32_t service_id;
244
245                         rte_event_eth_rx_adapter_service_id_get(prod,
246                                         &service_id);
247                         ret = evt_service_setup(service_id);
248                         if (ret) {
249                                 evt_err("Failed to setup service core"
250                                                 " for Rx adapter\n");
251                                 return ret;
252                         }
253                 }
254
255                 ret = rte_eth_dev_start(prod);
256                 if (ret) {
257                         evt_err("Ethernet dev [%d] failed to start."
258                                         " Using synthetic producer", prod);
259                         return ret;
260                 }
261
262                 ret = rte_event_eth_rx_adapter_start(prod);
263                 if (ret) {
264                         evt_err("Rx adapter[%d] start failed", prod);
265                         return ret;
266                 }
267                 printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
268                                 prod, prod);
269         }
270
271         return ret;
272 }
273
274 int
275 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
276                                 uint8_t stride, uint8_t nb_queues)
277 {
278         struct test_perf *t = evt_test_priv(test);
279         uint16_t port, prod;
280         int ret = -1;
281         struct rte_event_port_conf port_conf;
282
283         memset(&port_conf, 0, sizeof(struct rte_event_port_conf));
284         rte_event_port_default_conf_get(opt->dev_id, 0, &port_conf);
285
286         /* port configuration */
287         const struct rte_event_port_conf wkr_p_conf = {
288                         .dequeue_depth = opt->wkr_deq_dep,
289                         .enqueue_depth = port_conf.enqueue_depth,
290                         .new_event_threshold = port_conf.new_event_threshold,
291         };
292
293         /* setup one port per worker, linking to all queues */
294         for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
295                                 port++) {
296                 struct worker_data *w = &t->worker[port];
297
298                 w->dev_id = opt->dev_id;
299                 w->port_id = port;
300                 w->t = t;
301                 w->processed_pkts = 0;
302                 w->latency = 0;
303
304                 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
305                 if (ret) {
306                         evt_err("failed to setup port %d", port);
307                         return ret;
308                 }
309
310                 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
311                 if (ret != nb_queues) {
312                         evt_err("failed to link all queues to port %d", port);
313                         return -EINVAL;
314                 }
315         }
316
317         /* port for producers, no links */
318         struct rte_event_port_conf prod_conf = {
319                         .dequeue_depth = port_conf.dequeue_depth,
320                         .enqueue_depth = port_conf.enqueue_depth,
321                         .new_event_threshold = port_conf.new_event_threshold,
322         };
323         if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
324                 for ( ; port < perf_nb_event_ports(opt); port++) {
325                         struct prod_data *p = &t->prod[port];
326                         p->t = t;
327                 }
328
329                 ret = perf_event_rx_adapter_setup(opt, stride, prod_conf);
330                 if (ret)
331                         return ret;
332         } else {
333                 prod = 0;
334                 for ( ; port < perf_nb_event_ports(opt); port++) {
335                         struct prod_data *p = &t->prod[port];
336
337                         p->dev_id = opt->dev_id;
338                         p->port_id = port;
339                         p->queue_id = prod * stride;
340                         p->t = t;
341
342                         ret = rte_event_port_setup(opt->dev_id, port,
343                                         &prod_conf);
344                         if (ret) {
345                                 evt_err("failed to setup port %d", port);
346                                 return ret;
347                         }
348                         prod++;
349                 }
350         }
351
352         return ret;
353 }
354
355 int
356 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
357 {
358         unsigned int lcores;
359
360         /* N producer + N worker + 1 master when producer cores are used
361          * Else N worker + 1 master when Rx adapter is used
362          */
363         lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
364
365         if (rte_lcore_count() < lcores) {
366                 evt_err("test need minimum %d lcores", lcores);
367                 return -1;
368         }
369
370         /* Validate worker lcores */
371         if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
372                 evt_err("worker lcores overlaps with master lcore");
373                 return -1;
374         }
375         if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
376                 evt_err("worker lcores overlaps producer lcores");
377                 return -1;
378         }
379         if (evt_has_disabled_lcore(opt->wlcores)) {
380                 evt_err("one or more workers lcores are not enabled");
381                 return -1;
382         }
383         if (!evt_has_active_lcore(opt->wlcores)) {
384                 evt_err("minimum one worker is required");
385                 return -1;
386         }
387
388         if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
389                 /* Validate producer lcores */
390                 if (evt_lcores_has_overlap(opt->plcores,
391                                         rte_get_master_lcore())) {
392                         evt_err("producer lcores overlaps with master lcore");
393                         return -1;
394                 }
395                 if (evt_has_disabled_lcore(opt->plcores)) {
396                         evt_err("one or more producer lcores are not enabled");
397                         return -1;
398                 }
399                 if (!evt_has_active_lcore(opt->plcores)) {
400                         evt_err("minimum one producer is required");
401                         return -1;
402                 }
403         }
404
405         if (evt_has_invalid_stage(opt))
406                 return -1;
407
408         if (evt_has_invalid_sched_type(opt))
409                 return -1;
410
411         if (nb_queues > EVT_MAX_QUEUES) {
412                 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
413                 return -1;
414         }
415         if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
416                 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
417                 return -1;
418         }
419
420         /* Fixups */
421         if (opt->nb_stages == 1 && opt->fwd_latency) {
422                 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
423                 opt->fwd_latency = 0;
424         }
425         if (opt->fwd_latency && !opt->q_priority) {
426                 evt_info("enabled queue priority for latency measurement");
427                 opt->q_priority = 1;
428         }
429         if (opt->nb_pkts == 0)
430                 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
431
432         return 0;
433 }
434
435 void
436 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
437 {
438         evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
439         evt_dump_producer_lcores(opt);
440         evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
441         evt_dump_worker_lcores(opt);
442         evt_dump_nb_stages(opt);
443         evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
444         evt_dump("nb_evdev_queues", "%d", nb_queues);
445         evt_dump_queue_priority(opt);
446         evt_dump_sched_type_list(opt);
447         evt_dump_producer_type(opt);
448 }
449
450 void
451 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
452 {
453         RTE_SET_USED(test);
454
455         rte_event_dev_stop(opt->dev_id);
456         rte_event_dev_close(opt->dev_id);
457 }
458
459 static inline void
460 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
461             void *obj, unsigned i __rte_unused)
462 {
463         memset(obj, 0, mp->elt_size);
464 }
465
466 #define NB_RX_DESC                      128
467 #define NB_TX_DESC                      512
468 int
469 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
470 {
471         int i;
472         struct test_perf *t = evt_test_priv(test);
473         struct rte_eth_conf port_conf = {
474                 .rxmode = {
475                         .mq_mode = ETH_MQ_RX_RSS,
476                         .max_rx_pkt_len = ETHER_MAX_LEN,
477                         .split_hdr_size = 0,
478                         .header_split   = 0,
479                         .hw_ip_checksum = 0,
480                         .hw_vlan_filter = 0,
481                         .hw_vlan_strip  = 0,
482                         .hw_vlan_extend = 0,
483                         .jumbo_frame    = 0,
484                         .hw_strip_crc   = 1,
485                 },
486                 .rx_adv_conf = {
487                         .rss_conf = {
488                                 .rss_key = NULL,
489                                 .rss_hf = ETH_RSS_IP,
490                         },
491                 },
492         };
493
494         if (opt->prod_type == EVT_PROD_TYPE_SYNT)
495                 return 0;
496
497         if (!rte_eth_dev_count()) {
498                 evt_err("No ethernet ports found.");
499                 return -ENODEV;
500         }
501
502         for (i = 0; i < rte_eth_dev_count(); i++) {
503
504                 if (rte_eth_dev_configure(i, 1, 1,
505                                         &port_conf)
506                                 < 0) {
507                         evt_err("Failed to configure eth port [%d]", i);
508                         return -EINVAL;
509                 }
510
511                 if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
512                                 rte_socket_id(), NULL, t->pool) < 0) {
513                         evt_err("Failed to setup eth port [%d] rx_queue: %d.",
514                                         i, 0);
515                         return -EINVAL;
516                 }
517
518                 if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
519                                         rte_socket_id(), NULL) < 0) {
520                         evt_err("Failed to setup eth port [%d] tx_queue: %d.",
521                                         i, 0);
522                         return -EINVAL;
523                 }
524
525                 rte_eth_promiscuous_enable(i);
526         }
527
528         return 0;
529 }
530
531 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
532 {
533         int i;
534         RTE_SET_USED(test);
535
536         if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
537                 for (i = 0; i < rte_eth_dev_count(); i++) {
538                         rte_event_eth_rx_adapter_stop(i);
539                         rte_eth_dev_stop(i);
540                         rte_eth_dev_close(i);
541                 }
542         }
543 }
544
545 int
546 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
547 {
548         struct test_perf *t = evt_test_priv(test);
549
550         if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
551                 t->pool = rte_mempool_create(test->name, /* mempool name */
552                                 opt->pool_sz, /* number of elements*/
553                                 sizeof(struct perf_elt), /* element size*/
554                                 512, /* cache size*/
555                                 0, NULL, NULL,
556                                 perf_elt_init, /* obj constructor */
557                                 NULL, opt->socket_id, 0); /* flags */
558         } else {
559                 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
560                                 opt->pool_sz, /* number of elements*/
561                                 512, /* cache size*/
562                                 0,
563                                 RTE_MBUF_DEFAULT_BUF_SIZE,
564                                 opt->socket_id); /* flags */
565
566         }
567
568         if (t->pool == NULL) {
569                 evt_err("failed to create mempool");
570                 return -ENOMEM;
571         }
572
573         return 0;
574 }
575
576 void
577 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
578 {
579         RTE_SET_USED(opt);
580         struct test_perf *t = evt_test_priv(test);
581
582         rte_mempool_free(t->pool);
583 }
584
585 int
586 perf_test_setup(struct evt_test *test, struct evt_options *opt)
587 {
588         void *test_perf;
589
590         test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
591                                 RTE_CACHE_LINE_SIZE, opt->socket_id);
592         if (test_perf  == NULL) {
593                 evt_err("failed to allocate test_perf memory");
594                 goto nomem;
595         }
596         test->test_priv = test_perf;
597
598         struct test_perf *t = evt_test_priv(test);
599
600         t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
601         t->nb_workers = evt_nr_active_lcores(opt->wlcores);
602         t->done = false;
603         t->nb_pkts = opt->nb_pkts;
604         t->nb_flows = opt->nb_flows;
605         t->result = EVT_TEST_FAILED;
606         t->opt = opt;
607         memcpy(t->sched_type_list, opt->sched_type_list,
608                         sizeof(opt->sched_type_list));
609         return 0;
610 nomem:
611         return -ENOMEM;
612 }
613
614 void
615 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
616 {
617         RTE_SET_USED(opt);
618
619         rte_free(test->test_priv);
620 }