devtools: standardize script arguments
[dpdk.git] / app / test-compress-perf / comp_perf_test_cyclecount.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Intel Corporation
3  */
4
5 #include <rte_malloc.h>
6 #include <rte_eal.h>
7 #include <rte_log.h>
8 #include <rte_cycles.h>
9 #include "rte_spinlock.h"
10 #include <rte_compressdev.h>
11
12 #include "comp_perf_test_cyclecount.h"
13
14 struct cperf_cyclecount_ctx {
15         struct cperf_verify_ctx ver;
16
17         uint32_t ops_enq_retries;
18         uint32_t ops_deq_retries;
19
20         uint64_t duration_op;
21         uint64_t duration_enq;
22         uint64_t duration_deq;
23 };
24
25 void
26 cperf_cyclecount_test_destructor(void *arg)
27 {
28         struct cperf_cyclecount_ctx *ctx = arg;
29
30         if (arg) {
31                 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
32                 rte_free(arg);
33         }
34 }
35
36 void *
37 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
38                 struct comp_test_data *options)
39 {
40         struct cperf_cyclecount_ctx *ctx = NULL;
41
42         ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
43
44         if (ctx == NULL)
45                 return NULL;
46
47         ctx->ver.mem.dev_id = dev_id;
48         ctx->ver.mem.qp_id = qp_id;
49         ctx->ver.options = options;
50         ctx->ver.silent = 1; /* ver. part will be silent */
51
52         if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
53                         && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
54                 return ctx;
55
56         cperf_cyclecount_test_destructor(ctx);
57         return NULL;
58 }
59
60 static int
61 cperf_cyclecount_op_setup(struct rte_comp_op **ops,
62                                  struct cperf_cyclecount_ctx *ctx,
63                                  struct rte_mbuf **input_bufs,
64                                  struct rte_mbuf **output_bufs,
65                                  void *priv_xform,
66                                  uint32_t out_seg_sz)
67 {
68         struct comp_test_data *test_data = ctx->ver.options;
69         struct cperf_mem_resources *mem = &ctx->ver.mem;
70
71         uint32_t i, iter, num_iter;
72         int res = 0;
73         uint16_t ops_needed;
74
75         num_iter = test_data->num_iter;
76
77         for (iter = 0; iter < num_iter; iter++) {
78                 uint32_t remaining_ops = mem->total_bufs;
79                 uint32_t total_deq_ops = 0;
80                 uint32_t total_enq_ops = 0;
81                 uint16_t num_enq = 0;
82                 uint16_t num_deq = 0;
83
84                 while (remaining_ops > 0) {
85                         uint16_t num_ops = RTE_MIN(remaining_ops,
86                                                    test_data->burst_sz);
87                         ops_needed = num_ops;
88
89                         /* Allocate compression operations */
90                         if (ops_needed && rte_mempool_get_bulk(
91                                                 mem->op_pool,
92                                                 (void **)ops,
93                                                 ops_needed) != 0) {
94                                 RTE_LOG(ERR, USER1,
95                                       "Cyclecount: could not allocate enough operations\n");
96                                 res = -1;
97                                 goto end;
98                         }
99
100                         for (i = 0; i < ops_needed; i++) {
101
102                                 /* Calculate next buffer to attach */
103                                 /* to operation */
104                                 uint32_t buf_id = total_enq_ops + i;
105                                 uint16_t op_id = i;
106
107                                 /* Reset all data in output buffers */
108                                 struct rte_mbuf *m = output_bufs[buf_id];
109
110                                 m->pkt_len = out_seg_sz * m->nb_segs;
111                                 while (m) {
112                                         m->data_len = m->buf_len - m->data_off;
113                                         m = m->next;
114                                 }
115                                 ops[op_id]->m_src = input_bufs[buf_id];
116                                 ops[op_id]->m_dst = output_bufs[buf_id];
117                                 ops[op_id]->src.offset = 0;
118                                 ops[op_id]->src.length =
119                                         rte_pktmbuf_pkt_len(input_bufs[buf_id]);
120                                 ops[op_id]->dst.offset = 0;
121                                 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
122                                 ops[op_id]->input_chksum = buf_id;
123                                 ops[op_id]->private_xform = priv_xform;
124                         }
125
126                         /* E N Q U E U I N G */
127                         /* assuming that all ops are enqueued */
128                         /* instead of the real enqueue operation */
129                         num_enq = num_ops;
130
131                         remaining_ops -= num_enq;
132                         total_enq_ops += num_enq;
133
134                         /* D E Q U E U I N G */
135                         /* assuming that all ops dequeued */
136                         /* instead of the real dequeue operation */
137                         num_deq = num_ops;
138
139                         total_deq_ops += num_deq;
140                         rte_mempool_put_bulk(mem->op_pool,
141                                              (void **)ops, num_deq);
142                 }
143         }
144         return res;
145 end:
146         rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
147         rte_free(ops);
148
149         return res;
150 }
151
152 static int
153 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
154 {
155         struct comp_test_data *test_data = ctx->ver.options;
156         struct cperf_mem_resources *mem = &ctx->ver.mem;
157         uint8_t dev_id = mem->dev_id;
158         uint32_t i, iter, num_iter;
159         struct rte_comp_op **ops, **deq_ops;
160         void *priv_xform = NULL;
161         struct rte_comp_xform xform;
162         struct rte_mbuf **input_bufs, **output_bufs;
163         int ret, res = 0;
164         int allocated = 0;
165         uint32_t out_seg_sz;
166
167         uint64_t tsc_start, tsc_end, tsc_duration;
168
169         if (test_data == NULL || !test_data->burst_sz) {
170                 RTE_LOG(ERR, USER1, "Unknown burst size\n");
171                 return -1;
172         }
173         ctx->duration_enq = 0;
174         ctx->duration_deq = 0;
175         ctx->ops_enq_retries = 0;
176         ctx->ops_deq_retries = 0;
177
178         /* one array for both enqueue and dequeue */
179         ops = rte_zmalloc_socket(NULL,
180                 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
181                 0, rte_socket_id());
182
183         if (ops == NULL) {
184                 RTE_LOG(ERR, USER1,
185                         "Can't allocate memory for ops strucures\n");
186                 return -1;
187         }
188
189         deq_ops = &ops[mem->total_bufs];
190
191         if (type == RTE_COMP_COMPRESS) {
192                 xform = (struct rte_comp_xform) {
193                         .type = RTE_COMP_COMPRESS,
194                         .compress = {
195                                 .algo = RTE_COMP_ALGO_DEFLATE,
196                                 .deflate.huffman = test_data->huffman_enc,
197                                 .level = test_data->level,
198                                 .window_size = test_data->window_sz,
199                                 .chksum = RTE_COMP_CHECKSUM_NONE,
200                                 .hash_algo = RTE_COMP_HASH_ALGO_NONE
201                         }
202                 };
203                 input_bufs = mem->decomp_bufs;
204                 output_bufs = mem->comp_bufs;
205                 out_seg_sz = test_data->out_seg_sz;
206         } else {
207                 xform = (struct rte_comp_xform) {
208                         .type = RTE_COMP_DECOMPRESS,
209                         .decompress = {
210                                 .algo = RTE_COMP_ALGO_DEFLATE,
211                                 .chksum = RTE_COMP_CHECKSUM_NONE,
212                                 .window_size = test_data->window_sz,
213                                 .hash_algo = RTE_COMP_HASH_ALGO_NONE
214                         }
215                 };
216                 input_bufs = mem->comp_bufs;
217                 output_bufs = mem->decomp_bufs;
218                 out_seg_sz = test_data->seg_sz;
219         }
220
221         /* Create private xform */
222         if (rte_compressdev_private_xform_create(dev_id, &xform,
223                                                 &priv_xform) < 0) {
224                 RTE_LOG(ERR, USER1, "Private xform could not be created\n");
225                 res = -1;
226                 goto end;
227         }
228
229         tsc_start = rte_rdtsc_precise();
230         ret = cperf_cyclecount_op_setup(ops,
231                                 ctx,
232                                 input_bufs,
233                                 output_bufs,
234                                 priv_xform,
235                                 out_seg_sz);
236
237         tsc_end = rte_rdtsc_precise();
238
239         /* ret value check postponed a bit to cancel extra 'if' bias */
240         if (ret < 0) {
241                 RTE_LOG(ERR, USER1, "Setup function failed\n");
242                 res = -1;
243                 goto end;
244         }
245
246         tsc_duration = tsc_end - tsc_start;
247         ctx->duration_op = tsc_duration;
248
249         num_iter = test_data->num_iter;
250         for (iter = 0; iter < num_iter; iter++) {
251                 uint32_t total_ops = mem->total_bufs;
252                 uint32_t remaining_ops = mem->total_bufs;
253                 uint32_t total_deq_ops = 0;
254                 uint32_t total_enq_ops = 0;
255                 uint16_t ops_unused = 0;
256                 uint16_t num_enq = 0;
257                 uint16_t num_deq = 0;
258
259                 while (remaining_ops > 0) {
260                         uint16_t num_ops = RTE_MIN(remaining_ops,
261                                                    test_data->burst_sz);
262                         uint16_t ops_needed = num_ops - ops_unused;
263
264                         /*
265                          * Move the unused operations from the previous
266                          * enqueue_burst call to the front, to maintain order
267                          */
268                         if ((ops_unused > 0) && (num_enq > 0)) {
269                                 size_t nb_b_to_mov =
270                                       ops_unused * sizeof(struct rte_comp_op *);
271
272                                 memmove(ops, &ops[num_enq], nb_b_to_mov);
273                         }
274
275                         /* Allocate compression operations */
276                         if (ops_needed && rte_mempool_get_bulk(
277                                                 mem->op_pool,
278                                                 (void **)ops,
279                                                 ops_needed) != 0) {
280                                 RTE_LOG(ERR, USER1,
281                                       "Could not allocate enough operations\n");
282                                 res = -1;
283                                 goto end;
284                         }
285                         allocated += ops_needed;
286
287                         for (i = 0; i < ops_needed; i++) {
288                                 /*
289                                  * Calculate next buffer to attach to operation
290                                  */
291                                 uint32_t buf_id = total_enq_ops + i +
292                                                 ops_unused;
293                                 uint16_t op_id = ops_unused + i;
294                                 /* Reset all data in output buffers */
295                                 struct rte_mbuf *m = output_bufs[buf_id];
296
297                                 m->pkt_len = out_seg_sz * m->nb_segs;
298                                 while (m) {
299                                         m->data_len = m->buf_len - m->data_off;
300                                         m = m->next;
301                                 }
302                                 ops[op_id]->m_src = input_bufs[buf_id];
303                                 ops[op_id]->m_dst = output_bufs[buf_id];
304                                 ops[op_id]->src.offset = 0;
305                                 ops[op_id]->src.length =
306                                         rte_pktmbuf_pkt_len(input_bufs[buf_id]);
307                                 ops[op_id]->dst.offset = 0;
308                                 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
309                                 ops[op_id]->input_chksum = buf_id;
310                                 ops[op_id]->private_xform = priv_xform;
311                         }
312
313                         if (unlikely(test_data->perf_comp_force_stop))
314                                 goto end;
315
316                         tsc_start = rte_rdtsc_precise();
317                         num_enq = rte_compressdev_enqueue_burst(dev_id,
318                                                                 mem->qp_id, ops,
319                                                                 num_ops);
320                         tsc_end = rte_rdtsc_precise();
321                         tsc_duration = tsc_end - tsc_start;
322                         ctx->duration_enq += tsc_duration;
323
324                         if (num_enq < num_ops)
325                                 ctx->ops_enq_retries++;
326
327                         if (test_data->cyclecount_delay)
328                                 rte_delay_us_block(test_data->cyclecount_delay);
329
330                         if (num_enq == 0) {
331                                 struct rte_compressdev_stats stats;
332
333                                 rte_compressdev_stats_get(dev_id, &stats);
334                                 if (stats.enqueue_err_count) {
335                                         res = -1;
336                                         goto end;
337                                 }
338                         }
339
340                         ops_unused = num_ops - num_enq;
341                         remaining_ops -= num_enq;
342                         total_enq_ops += num_enq;
343
344                         tsc_start = rte_rdtsc_precise();
345                         num_deq = rte_compressdev_dequeue_burst(dev_id,
346                                                            mem->qp_id,
347                                                            deq_ops,
348                                                            allocated);
349                         tsc_end = rte_rdtsc_precise();
350                         tsc_duration = tsc_end - tsc_start;
351                         ctx->duration_deq += tsc_duration;
352
353                         if (num_deq < allocated)
354                                 ctx->ops_deq_retries++;
355
356                         total_deq_ops += num_deq;
357
358                         if (iter == num_iter - 1) {
359                                 for (i = 0; i < num_deq; i++) {
360                                         struct rte_comp_op *op = deq_ops[i];
361
362                                         if (op->status !=
363                                                 RTE_COMP_OP_STATUS_SUCCESS) {
364                                                 RTE_LOG(ERR, USER1, "Some operations were not successful\n");
365                                                 goto end;
366                                         }
367
368                                         struct rte_mbuf *m = op->m_dst;
369
370                                         m->pkt_len = op->produced;
371                                         uint32_t remaining_data = op->produced;
372                                         uint16_t data_to_append;
373
374                                         while (remaining_data > 0) {
375                                                 data_to_append =
376                                                         RTE_MIN(remaining_data,
377                                                              out_seg_sz);
378                                                 m->data_len = data_to_append;
379                                                 remaining_data -=
380                                                                 data_to_append;
381                                                 m = m->next;
382                                         }
383                                 }
384                         }
385                         rte_mempool_put_bulk(mem->op_pool,
386                                              (void **)deq_ops, num_deq);
387                         allocated -= num_deq;
388                 }
389
390                 /* Dequeue the last operations */
391                 while (total_deq_ops < total_ops) {
392                         if (unlikely(test_data->perf_comp_force_stop))
393                                 goto end;
394
395                         tsc_start = rte_rdtsc_precise();
396                         num_deq = rte_compressdev_dequeue_burst(dev_id,
397                                                 mem->qp_id,
398                                                 deq_ops,
399                                                 test_data->burst_sz);
400                         tsc_end = rte_rdtsc_precise();
401                         tsc_duration = tsc_end - tsc_start;
402                         ctx->duration_deq += tsc_duration;
403                         ctx->ops_deq_retries++;
404
405                         if (num_deq == 0) {
406                                 struct rte_compressdev_stats stats;
407
408                                 rte_compressdev_stats_get(dev_id, &stats);
409                                 if (stats.dequeue_err_count) {
410                                         res = -1;
411                                         goto end;
412                                 }
413                         }
414                         total_deq_ops += num_deq;
415
416                         if (iter == num_iter - 1) {
417                                 for (i = 0; i < num_deq; i++) {
418                                         struct rte_comp_op *op = deq_ops[i];
419
420                                         if (op->status !=
421                                                 RTE_COMP_OP_STATUS_SUCCESS) {
422                                                 RTE_LOG(ERR, USER1, "Some operations were not successful\n");
423                                                 goto end;
424                                         }
425
426                                         struct rte_mbuf *m = op->m_dst;
427
428                                         m->pkt_len = op->produced;
429                                         uint32_t remaining_data = op->produced;
430                                         uint16_t data_to_append;
431
432                                         while (remaining_data > 0) {
433                                                 data_to_append =
434                                                 RTE_MIN(remaining_data,
435                                                         out_seg_sz);
436                                                 m->data_len = data_to_append;
437                                                 remaining_data -=
438                                                                 data_to_append;
439                                                 m = m->next;
440                                         }
441                                 }
442                         }
443                         rte_mempool_put_bulk(mem->op_pool,
444                                              (void **)deq_ops, num_deq);
445                         allocated -= num_deq;
446                 }
447         }
448         allocated = 0;
449
450 end:
451         if (allocated)
452                 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
453         rte_compressdev_private_xform_free(dev_id, priv_xform);
454         rte_free(ops);
455
456         if (test_data->perf_comp_force_stop) {
457                 RTE_LOG(ERR, USER1,
458                       "lcore: %d Perf. test has been aborted by user\n",
459                         mem->lcore_id);
460                 res = -1;
461         }
462         return res;
463 }
464
465 int
466 cperf_cyclecount_test_runner(void *test_ctx)
467 {
468         struct cperf_cyclecount_ctx *ctx = test_ctx;
469         struct comp_test_data *test_data = ctx->ver.options;
470         uint32_t lcore = rte_lcore_id();
471         static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
472         static rte_spinlock_t print_spinlock;
473         int i;
474
475         uint32_t ops_enq_retries_comp;
476         uint32_t ops_deq_retries_comp;
477
478         uint32_t ops_enq_retries_decomp;
479         uint32_t ops_deq_retries_decomp;
480
481         uint32_t duration_setup_per_op;
482
483         uint32_t duration_enq_per_op_comp;
484         uint32_t duration_deq_per_op_comp;
485
486         uint32_t duration_enq_per_op_decomp;
487         uint32_t duration_deq_per_op_decomp;
488
489         ctx->ver.mem.lcore_id = lcore;
490
491         /*
492          * printing information about current compression thread
493          */
494         if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
495                 printf("    lcore: %u,"
496                                 " driver name: %s,"
497                                 " device name: %s,"
498                                 " device id: %u,"
499                                 " socket id: %u,"
500                                 " queue pair id: %u\n",
501                         lcore,
502                         ctx->ver.options->driver_name,
503                         rte_compressdev_name_get(ctx->ver.mem.dev_id),
504                         ctx->ver.mem.dev_id,
505                         rte_compressdev_socket_id(ctx->ver.mem.dev_id),
506                         ctx->ver.mem.qp_id);
507
508         /*
509          * First the verification part is needed
510          */
511         if (cperf_verify_test_runner(&ctx->ver))
512                 return EXIT_FAILURE;
513
514         /*
515          * Run the tests twice, discarding the first performance
516          * results, before the cache is warmed up
517          */
518
519         /* C O M P R E S S */
520         for (i = 0; i < 2; i++) {
521                 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
522                         return EXIT_FAILURE;
523         }
524
525         ops_enq_retries_comp = ctx->ops_enq_retries;
526         ops_deq_retries_comp = ctx->ops_deq_retries;
527
528         duration_enq_per_op_comp = ctx->duration_enq /
529                         (ctx->ver.mem.total_bufs * test_data->num_iter);
530         duration_deq_per_op_comp = ctx->duration_deq /
531                         (ctx->ver.mem.total_bufs * test_data->num_iter);
532
533         /* D E C O M P R E S S */
534         for (i = 0; i < 2; i++) {
535                 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
536                         return EXIT_FAILURE;
537         }
538
539         ops_enq_retries_decomp = ctx->ops_enq_retries;
540         ops_deq_retries_decomp = ctx->ops_deq_retries;
541
542         duration_enq_per_op_decomp = ctx->duration_enq /
543                         (ctx->ver.mem.total_bufs * test_data->num_iter);
544         duration_deq_per_op_decomp = ctx->duration_deq /
545                         (ctx->ver.mem.total_bufs * test_data->num_iter);
546
547         duration_setup_per_op = ctx->duration_op /
548                         (ctx->ver.mem.total_bufs * test_data->num_iter);
549
550         /* R E P O R T processing */
551         if (rte_atomic16_test_and_set(&display_once)) {
552
553                 rte_spinlock_lock(&print_spinlock);
554
555                 printf("\nLegend for the table\n"
556                 "  - Retries section: number of retries for the following operations:\n"
557                 "    [C-e] - compression enqueue\n"
558                 "    [C-d] - compression dequeue\n"
559                 "    [D-e] - decompression enqueue\n"
560                 "    [D-d] - decompression dequeue\n"
561                 "  - Cycles section: number of cycles per 'op' for the following operations:\n"
562                 "    setup/op - memory allocation, op configuration and memory dealocation\n"
563                 "    [C-e] - compression enqueue\n"
564                 "    [C-d] - compression dequeue\n"
565                 "    [D-e] - decompression enqueue\n"
566                 "    [D-d] - decompression dequeue\n\n");
567
568                 printf("\n%12s%6s%12s%17s",
569                         "lcore id", "Level", "Comp size", "Comp ratio [%]");
570
571                 printf("  |%10s %6s %8s %6s %8s",
572                         " Retries:",
573                         "[C-e]", "[C-d]",
574                         "[D-e]", "[D-d]");
575
576                 printf("  |%9s %9s %9s %9s %9s %9s\n",
577                         " Cycles:",
578                         "setup/op",
579                         "[C-e]", "[C-d]",
580                         "[D-e]", "[D-d]");
581
582                 rte_spinlock_unlock(&print_spinlock);
583         }
584
585         rte_spinlock_lock(&print_spinlock);
586
587         printf("%12u"
588                "%6u"
589                "%12zu"
590                "%17.2f",
591                 ctx->ver.mem.lcore_id,
592                 test_data->level,
593                 ctx->ver.comp_data_sz,
594                 ctx->ver.ratio);
595
596         printf("  |%10s %6u %8u %6u %8u",
597                " ",
598                 ops_enq_retries_comp,
599                 ops_deq_retries_comp,
600                 ops_enq_retries_decomp,
601                 ops_deq_retries_decomp);
602
603         printf("  |%9s %9u %9u %9u %9u %9u\n",
604                " ",
605                 duration_setup_per_op,
606                 duration_enq_per_op_comp,
607                 duration_deq_per_op_comp,
608                 duration_enq_per_op_decomp,
609                 duration_deq_per_op_decomp);
610
611         rte_spinlock_unlock(&print_spinlock);
612
613         return EXIT_SUCCESS;
614 }