mempool: fix slow allocation of large mempools
[dpdk.git] / app / test-compress-perf / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <signal.h>
6 #include <sys/types.h>
7 #include <unistd.h>
8
9 #include <rte_malloc.h>
10 #include <rte_eal.h>
11 #include <rte_log.h>
12 #include <rte_compressdev.h>
13
14 #include "comp_perf_options.h"
15 #include "comp_perf_test_verify.h"
16 #include "comp_perf_test_benchmark.h"
17 #include "comp_perf.h"
18 #include "comp_perf_test_common.h"
19
20 #define NUM_MAX_XFORMS 16
21 #define NUM_MAX_INFLIGHT_OPS 512
22
23 __extension__
24 const char *comp_perf_test_type_strs[] = {
25         [CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
26         [CPERF_TEST_TYPE_VERIFY] = "verify"
27 };
28
29 __extension__
30 static const struct cperf_test cperf_testmap[] = {
31         [CPERF_TEST_TYPE_BENCHMARK] = {
32                         cperf_benchmark_test_constructor,
33                         cperf_benchmark_test_runner,
34                         cperf_benchmark_test_destructor
35         },
36         [CPERF_TEST_TYPE_VERIFY] = {
37                         cperf_verify_test_constructor,
38                         cperf_verify_test_runner,
39                         cperf_verify_test_destructor
40         }
41 };
42
43 static struct comp_test_data *test_data;
44
45 static int
46 comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id)
47 {
48         const struct rte_compressdev_capabilities *cap;
49
50         cap = rte_compressdev_capability_get(cdev_id,
51                                              RTE_COMP_ALGO_DEFLATE);
52
53         if (cap == NULL) {
54                 RTE_LOG(ERR, USER1,
55                         "Compress device does not support DEFLATE\n");
56                 return -1;
57         }
58
59         uint64_t comp_flags = cap->comp_feature_flags;
60
61         /* Huffman enconding */
62         if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
63                         (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
64                 RTE_LOG(ERR, USER1,
65                         "Compress device does not supported Fixed Huffman\n");
66                 return -1;
67         }
68
69         if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
70                         (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
71                 RTE_LOG(ERR, USER1,
72                         "Compress device does not supported Dynamic Huffman\n");
73                 return -1;
74         }
75
76         /* Window size */
77         if (test_data->window_sz != -1) {
78                 if (param_range_check(test_data->window_sz, &cap->window_size)
79                                 < 0) {
80                         RTE_LOG(ERR, USER1,
81                                 "Compress device does not support "
82                                 "this window size\n");
83                         return -1;
84                 }
85         } else
86                 /* Set window size to PMD maximum if none was specified */
87                 test_data->window_sz = cap->window_size.max;
88
89         /* Check if chained mbufs is supported */
90         if (test_data->max_sgl_segs > 1  &&
91                         (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
92                 RTE_LOG(INFO, USER1, "Compress device does not support "
93                                 "chained mbufs. Max SGL segments set to 1\n");
94                 test_data->max_sgl_segs = 1;
95         }
96
97         /* Level 0 support */
98         if (test_data->level_lst.min == 0 &&
99                         (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
100                 RTE_LOG(ERR, USER1, "Compress device does not support "
101                                 "level 0 (no compression)\n");
102                 return -1;
103         }
104
105         return 0;
106 }
107
108 static int
109 comp_perf_initialize_compressdev(struct comp_test_data *test_data,
110                                  uint8_t *enabled_cdevs)
111 {
112         uint8_t enabled_cdev_count, nb_lcores, cdev_id;
113         unsigned int i, j;
114         int ret;
115
116         enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
117                         enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
118         if (enabled_cdev_count == 0) {
119                 RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
120                                 test_data->driver_name);
121                 return -EINVAL;
122         }
123
124         nb_lcores = rte_lcore_count() - 1;
125         /*
126          * Use fewer devices,
127          * if there are more available than cores.
128          */
129         if (enabled_cdev_count > nb_lcores) {
130                 if (nb_lcores == 0) {
131                         RTE_LOG(ERR, USER1, "Cannot run with 0 cores! Increase the number of cores\n");
132                         return -EINVAL;
133                 }
134                 enabled_cdev_count = nb_lcores;
135                 RTE_LOG(INFO, USER1,
136                         "There's more available devices than cores!"
137                         " The number of devices has been aligned to %d cores\n",
138                         nb_lcores);
139         }
140
141         /*
142          * Calculate number of needed queue pairs, based on the amount
143          * of available number of logical cores and compression devices.
144          * For instance, if there are 4 cores and 2 compression devices,
145          * 2 queue pairs will be set up per device.
146          * One queue pair per one core.
147          * if e.g.: there're 3 cores and 2 compression devices,
148          * 2 queue pairs will be set up per device but one queue pair
149          * will left unused in the last one device
150          */
151         test_data->nb_qps = (nb_lcores % enabled_cdev_count) ?
152                                 (nb_lcores / enabled_cdev_count) + 1 :
153                                 nb_lcores / enabled_cdev_count;
154
155         for (i = 0; i < enabled_cdev_count &&
156                         i < RTE_COMPRESS_MAX_DEVS; i++,
157                                         nb_lcores -= test_data->nb_qps) {
158                 cdev_id = enabled_cdevs[i];
159
160                 struct rte_compressdev_info cdev_info;
161                 uint8_t socket_id = rte_compressdev_socket_id(cdev_id);
162
163                 rte_compressdev_info_get(cdev_id, &cdev_info);
164                 if (cdev_info.max_nb_queue_pairs &&
165                         test_data->nb_qps > cdev_info.max_nb_queue_pairs) {
166                         RTE_LOG(ERR, USER1,
167                                 "Number of needed queue pairs is higher "
168                                 "than the maximum number of queue pairs "
169                                 "per device.\n");
170                         RTE_LOG(ERR, USER1,
171                                 "Lower the number of cores or increase "
172                                 "the number of crypto devices\n");
173                         return -EINVAL;
174                 }
175
176                 if (comp_perf_check_capabilities(test_data, cdev_id) < 0)
177                         return -EINVAL;
178
179                 /* Configure compressdev */
180                 struct rte_compressdev_config config = {
181                         .socket_id = socket_id,
182                         .nb_queue_pairs = nb_lcores > test_data->nb_qps
183                                         ? test_data->nb_qps : nb_lcores,
184                         .max_nb_priv_xforms = NUM_MAX_XFORMS,
185                         .max_nb_streams = 0
186                 };
187
188                 if (rte_compressdev_configure(cdev_id, &config) < 0) {
189                         RTE_LOG(ERR, USER1, "Device configuration failed\n");
190                         return -EINVAL;
191                 }
192
193                 for (j = 0; j < test_data->nb_qps; j++) {
194                         ret = rte_compressdev_queue_pair_setup(cdev_id, j,
195                                         NUM_MAX_INFLIGHT_OPS, socket_id);
196                         if (ret < 0) {
197                                 RTE_LOG(ERR, USER1,
198                               "Failed to setup queue pair %u on compressdev %u",
199                                         j, cdev_id);
200                                 return -EINVAL;
201                         }
202                 }
203
204                 ret = rte_compressdev_start(cdev_id);
205                 if (ret < 0) {
206                         RTE_LOG(ERR, USER1,
207                                 "Failed to start device %u: error %d\n",
208                                 cdev_id, ret);
209                         return -EPERM;
210                 }
211         }
212
213         return enabled_cdev_count;
214 }
215
216 static int
217 comp_perf_dump_input_data(struct comp_test_data *test_data)
218 {
219         FILE *f = fopen(test_data->input_file, "r");
220         int ret = -1;
221
222         if (f == NULL) {
223                 RTE_LOG(ERR, USER1, "Input file could not be opened\n");
224                 return -1;
225         }
226
227         if (fseek(f, 0, SEEK_END) != 0) {
228                 RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
229                 goto end;
230         }
231         size_t actual_file_sz = ftell(f);
232         /* If extended input data size has not been set,
233          * input data size = file size
234          */
235
236         if (test_data->input_data_sz == 0)
237                 test_data->input_data_sz = actual_file_sz;
238
239         if (test_data->input_data_sz <= 0 || actual_file_sz <= 0 ||
240                         fseek(f, 0, SEEK_SET) != 0) {
241                 RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
242                 goto end;
243         }
244
245         test_data->input_data = rte_zmalloc_socket(NULL,
246                                 test_data->input_data_sz, 0, rte_socket_id());
247
248         if (test_data->input_data == NULL) {
249                 RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
250                                 "file could not be allocated\n");
251                 goto end;
252         }
253
254         size_t remaining_data = test_data->input_data_sz;
255         uint8_t *data = test_data->input_data;
256
257         while (remaining_data > 0) {
258                 size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
259
260                 if (fread(data, data_to_read, 1, f) != 1) {
261                         RTE_LOG(ERR, USER1, "Input file could not be read\n");
262                         goto end;
263                 }
264                 if (fseek(f, 0, SEEK_SET) != 0) {
265                         RTE_LOG(ERR, USER1,
266                                 "Size of input could not be calculated\n");
267                         goto end;
268                 }
269                 remaining_data -= data_to_read;
270                 data += data_to_read;
271         }
272
273         if (test_data->input_data_sz > actual_file_sz)
274                 RTE_LOG(INFO, USER1,
275                   "%zu bytes read from file %s, extending the file %.2f times\n",
276                         test_data->input_data_sz, test_data->input_file,
277                         (double)test_data->input_data_sz/actual_file_sz);
278         else
279                 RTE_LOG(INFO, USER1,
280                         "%zu bytes read from file %s\n",
281                         test_data->input_data_sz, test_data->input_file);
282
283         ret = 0;
284
285 end:
286         fclose(f);
287         return ret;
288 }
289
290 static void
291 comp_perf_cleanup_on_signal(int signalNumber __rte_unused)
292 {
293         test_data->perf_comp_force_stop = 1;
294 }
295
296 static void
297 comp_perf_register_cleanup_on_signal(void)
298 {
299         signal(SIGTERM, comp_perf_cleanup_on_signal);
300         signal(SIGINT, comp_perf_cleanup_on_signal);
301 }
302
303 int
304 main(int argc, char **argv)
305 {
306         uint8_t level_idx = 0;
307         int ret, i;
308         void *ctx[RTE_MAX_LCORE] = {};
309         uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
310         int nb_compressdevs = 0;
311         uint16_t total_nb_qps = 0;
312         uint8_t cdev_id;
313         uint32_t lcore_id;
314
315         /* Initialise DPDK EAL */
316         ret = rte_eal_init(argc, argv);
317         if (ret < 0)
318                 rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
319         argc -= ret;
320         argv += ret;
321
322         test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
323                                         0, rte_socket_id());
324
325         if (test_data == NULL)
326                 rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
327                                 rte_socket_id());
328
329         comp_perf_register_cleanup_on_signal();
330
331         ret = EXIT_SUCCESS;
332         test_data->cleanup = ST_TEST_DATA;
333         comp_perf_options_default(test_data);
334
335         if (comp_perf_options_parse(test_data, argc, argv) < 0) {
336                 RTE_LOG(ERR, USER1,
337                         "Parsing one or more user options failed\n");
338                 ret = EXIT_FAILURE;
339                 goto end;
340         }
341
342         if (comp_perf_options_check(test_data) < 0) {
343                 ret = EXIT_FAILURE;
344                 goto end;
345         }
346
347         nb_compressdevs =
348                 comp_perf_initialize_compressdev(test_data, enabled_cdevs);
349
350         if (nb_compressdevs < 1) {
351                 ret = EXIT_FAILURE;
352                 goto end;
353         }
354
355         test_data->cleanup = ST_COMPDEV;
356         if (comp_perf_dump_input_data(test_data) < 0) {
357                 ret = EXIT_FAILURE;
358                 goto end;
359         }
360
361         test_data->cleanup = ST_INPUT_DATA;
362
363         if (test_data->level_lst.inc != 0)
364                 test_data->level = test_data->level_lst.min;
365         else
366                 test_data->level = test_data->level_lst.list[0];
367
368         printf("App uses socket: %u\n", rte_socket_id());
369         printf("Burst size = %u\n", test_data->burst_sz);
370         printf("Input data size = %zu\n", test_data->input_data_sz);
371
372         test_data->cleanup = ST_DURING_TEST;
373         total_nb_qps = nb_compressdevs * test_data->nb_qps;
374
375         i = 0;
376         uint8_t qp_id = 0, cdev_index = 0;
377
378         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
379
380                 if (i == total_nb_qps)
381                         break;
382
383                 cdev_id = enabled_cdevs[cdev_index];
384                 ctx[i] = cperf_testmap[test_data->test].constructor(
385                                                         cdev_id, qp_id,
386                                                         test_data);
387                 if (ctx[i] == NULL) {
388                         RTE_LOG(ERR, USER1, "Test run constructor failed\n");
389                         goto end;
390                 }
391                 qp_id = (qp_id + 1) % test_data->nb_qps;
392                 if (qp_id == 0)
393                         cdev_index++;
394                 i++;
395         }
396
397         print_test_dynamics(); /* constructors must be executed first */
398
399         while (test_data->level <= test_data->level_lst.max) {
400
401                 i = 0;
402                 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
403
404                         if (i == total_nb_qps)
405                                 break;
406
407                         rte_eal_remote_launch(
408                                         cperf_testmap[test_data->test].runner,
409                                         ctx[i], lcore_id);
410                         i++;
411                 }
412                 i = 0;
413                 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
414
415                         if (i == total_nb_qps)
416                                 break;
417                         ret |= rte_eal_wait_lcore(lcore_id);
418                         i++;
419                 }
420
421                 if (ret != EXIT_SUCCESS)
422                         break;
423
424                 if (test_data->level_lst.inc != 0)
425                         test_data->level += test_data->level_lst.inc;
426                 else {
427                         if (++level_idx == test_data->level_lst.count)
428                                 break;
429                         test_data->level = test_data->level_lst.list[level_idx];
430                 }
431         }
432
433 end:
434         switch (test_data->cleanup) {
435
436         case ST_DURING_TEST:
437                 i = 0;
438                 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
439                         if (i == total_nb_qps)
440                                 break;
441
442                         if (ctx[i] && cperf_testmap[test_data->test].destructor)
443                                 cperf_testmap[test_data->test].destructor(
444                                                                         ctx[i]);
445                         i++;
446                 }
447                 /* fallthrough */
448         case ST_INPUT_DATA:
449                 rte_free(test_data->input_data);
450                 /* fallthrough */
451         case ST_COMPDEV:
452                 for (i = 0; i < nb_compressdevs &&
453                      i < RTE_COMPRESS_MAX_DEVS; i++) {
454                         rte_compressdev_stop(enabled_cdevs[i]);
455                         rte_compressdev_close(enabled_cdevs[i]);
456                 }
457                 /* fallthrough */
458         case ST_TEST_DATA:
459                 rte_free(test_data);
460                 /* fallthrough */
461         case ST_CLEAR:
462         default:
463                 i = rte_eal_cleanup();
464                 if (i) {
465                         RTE_LOG(ERR, USER1,
466                                 "Error from rte_eal_cleanup(), %d\n", i);
467                         ret = i;
468                 }
469                 break;
470         }
471         return ret;
472 }
473
474 __rte_weak void *
475 cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
476                                  uint16_t qp_id __rte_unused,
477                                  struct comp_test_data *options __rte_unused)
478 {
479         RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n");
480         return NULL;
481 }
482
483 __rte_weak void
484 cperf_benchmark_test_destructor(void *arg __rte_unused)
485 {
486
487 }
488
489 __rte_weak int
490 cperf_benchmark_test_runner(void *test_ctx __rte_unused)
491 {
492         return 0;
493 }
494 __rte_weak void *
495 cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
496                                  uint16_t qp_id __rte_unused,
497                                  struct comp_test_data *options __rte_unused)
498 {
499         RTE_LOG(INFO, USER1, "Verify test is not supported yet\n");
500         return NULL;
501 }
502
503 __rte_weak void
504 cperf_verify_test_destructor(void *arg __rte_unused)
505 {
506
507 }
508
509 __rte_weak int
510 cperf_verify_test_runner(void *test_ctx __rte_unused)
511 {
512         return 0;
513 }