net/nfp: rename set MAC function
[dpdk.git] / app / test-compress-perf / comp_perf_test_cyclecount.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Intel Corporation
3  */
4
5 #include <rte_malloc.h>
6 #include <rte_eal.h>
7 #include <rte_log.h>
8 #include <rte_cycles.h>
9 #include "rte_spinlock.h"
10 #include <rte_compressdev.h>
11
12 #include "comp_perf_test_cyclecount.h"
13
14 struct cperf_cyclecount_ctx {
15         struct cperf_verify_ctx ver;
16
17         uint32_t ops_enq_retries;
18         uint32_t ops_deq_retries;
19
20         uint64_t duration_op;
21         uint64_t duration_enq;
22         uint64_t duration_deq;
23 };
24
25 void
26 cperf_cyclecount_test_destructor(void *arg)
27 {
28         struct cperf_cyclecount_ctx *ctx = arg;
29
30         if (arg) {
31                 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
32                 rte_free(arg);
33         }
34 }
35
36 void *
37 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
38                 struct comp_test_data *options)
39 {
40         struct cperf_cyclecount_ctx *ctx = NULL;
41
42         ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
43
44         if (ctx == NULL)
45                 return NULL;
46
47         ctx->ver.mem.dev_id = dev_id;
48         ctx->ver.mem.qp_id = qp_id;
49         ctx->ver.options = options;
50         ctx->ver.silent = 1; /* ver. part will be silent */
51
52         if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
53                         && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
54                 return ctx;
55
56         cperf_cyclecount_test_destructor(ctx);
57         return NULL;
58 }
59
60 static int
61 cperf_cyclecount_op_setup(struct rte_comp_op **ops,
62                                  struct cperf_cyclecount_ctx *ctx,
63                                  struct rte_mbuf **input_bufs,
64                                  struct rte_mbuf **output_bufs,
65                                  void *priv_xform,
66                                  uint32_t out_seg_sz)
67 {
68         struct comp_test_data *test_data = ctx->ver.options;
69         struct cperf_mem_resources *mem = &ctx->ver.mem;
70
71         uint32_t i, iter, num_iter;
72         int res = 0;
73         uint16_t ops_needed;
74
75         num_iter = test_data->num_iter;
76
77         for (iter = 0; iter < num_iter; iter++) {
78                 uint32_t remaining_ops = mem->total_bufs;
79                 uint32_t total_enq_ops = 0;
80                 uint16_t num_enq = 0;
81                 uint16_t num_deq = 0;
82
83                 while (remaining_ops > 0) {
84                         uint16_t num_ops = RTE_MIN(remaining_ops,
85                                                    test_data->burst_sz);
86                         ops_needed = num_ops;
87
88                         /* Allocate compression operations */
89                         if (ops_needed && rte_mempool_get_bulk(
90                                                 mem->op_pool,
91                                                 (void **)ops,
92                                                 ops_needed) != 0) {
93                                 RTE_LOG(ERR, USER1,
94                                       "Cyclecount: could not allocate enough operations\n");
95                                 res = -1;
96                                 goto end;
97                         }
98
99                         for (i = 0; i < ops_needed; i++) {
100
101                                 /* Calculate next buffer to attach */
102                                 /* to operation */
103                                 uint32_t buf_id = total_enq_ops + i;
104                                 uint16_t op_id = i;
105
106                                 /* Reset all data in output buffers */
107                                 struct rte_mbuf *m = output_bufs[buf_id];
108
109                                 m->pkt_len = out_seg_sz * m->nb_segs;
110                                 while (m) {
111                                         m->data_len = m->buf_len - m->data_off;
112                                         m = m->next;
113                                 }
114                                 ops[op_id]->m_src = input_bufs[buf_id];
115                                 ops[op_id]->m_dst = output_bufs[buf_id];
116                                 ops[op_id]->src.offset = 0;
117                                 ops[op_id]->src.length =
118                                         rte_pktmbuf_pkt_len(input_bufs[buf_id]);
119                                 ops[op_id]->dst.offset = 0;
120                                 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
121                                 ops[op_id]->input_chksum = buf_id;
122                                 ops[op_id]->private_xform = priv_xform;
123                         }
124
125                         /* E N Q U E U I N G */
126                         /* assuming that all ops are enqueued */
127                         /* instead of the real enqueue operation */
128                         num_enq = num_ops;
129
130                         remaining_ops -= num_enq;
131                         total_enq_ops += num_enq;
132
133                         /* D E Q U E U I N G */
134                         /* assuming that all ops dequeued */
135                         /* instead of the real dequeue operation */
136                         num_deq = num_ops;
137
138                         rte_mempool_put_bulk(mem->op_pool,
139                                              (void **)ops, num_deq);
140                 }
141         }
142         return res;
143 end:
144         rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
145         rte_free(ops);
146
147         return res;
148 }
149
150 static int
151 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
152 {
153         struct comp_test_data *test_data = ctx->ver.options;
154         struct cperf_mem_resources *mem = &ctx->ver.mem;
155         uint8_t dev_id = mem->dev_id;
156         uint32_t i, iter, num_iter;
157         struct rte_comp_op **ops, **deq_ops;
158         void *priv_xform = NULL;
159         struct rte_comp_xform xform;
160         struct rte_mbuf **input_bufs, **output_bufs;
161         int ret, res = 0;
162         int allocated = 0;
163         uint32_t out_seg_sz;
164
165         uint64_t tsc_start, tsc_end, tsc_duration;
166
167         if (test_data == NULL || !test_data->burst_sz) {
168                 RTE_LOG(ERR, USER1, "Unknown burst size\n");
169                 return -1;
170         }
171         ctx->duration_enq = 0;
172         ctx->duration_deq = 0;
173         ctx->ops_enq_retries = 0;
174         ctx->ops_deq_retries = 0;
175
176         /* one array for both enqueue and dequeue */
177         ops = rte_zmalloc_socket(NULL,
178                 (test_data->burst_sz + mem->total_bufs) *
179                 sizeof(struct rte_comp_op *),
180                 0, rte_socket_id());
181
182         if (ops == NULL) {
183                 RTE_LOG(ERR, USER1,
184                         "Can't allocate memory for ops structures\n");
185                 return -1;
186         }
187
188         deq_ops = &ops[test_data->burst_sz];
189
190         if (type == RTE_COMP_COMPRESS) {
191                 xform = (struct rte_comp_xform) {
192                         .type = RTE_COMP_COMPRESS,
193                         .compress = {
194                                 .algo = RTE_COMP_ALGO_DEFLATE,
195                                 .deflate.huffman = test_data->huffman_enc,
196                                 .level = test_data->level,
197                                 .window_size = test_data->window_sz,
198                                 .chksum = RTE_COMP_CHECKSUM_NONE,
199                                 .hash_algo = RTE_COMP_HASH_ALGO_NONE
200                         }
201                 };
202                 input_bufs = mem->decomp_bufs;
203                 output_bufs = mem->comp_bufs;
204                 out_seg_sz = test_data->out_seg_sz;
205         } else {
206                 xform = (struct rte_comp_xform) {
207                         .type = RTE_COMP_DECOMPRESS,
208                         .decompress = {
209                                 .algo = RTE_COMP_ALGO_DEFLATE,
210                                 .chksum = RTE_COMP_CHECKSUM_NONE,
211                                 .window_size = test_data->window_sz,
212                                 .hash_algo = RTE_COMP_HASH_ALGO_NONE
213                         }
214                 };
215                 input_bufs = mem->comp_bufs;
216                 output_bufs = mem->decomp_bufs;
217                 out_seg_sz = test_data->seg_sz;
218         }
219
220         /* Create private xform */
221         if (rte_compressdev_private_xform_create(dev_id, &xform,
222                                                 &priv_xform) < 0) {
223                 RTE_LOG(ERR, USER1, "Private xform could not be created\n");
224                 res = -1;
225                 goto end;
226         }
227
228         tsc_start = rte_rdtsc_precise();
229         ret = cperf_cyclecount_op_setup(ops,
230                                 ctx,
231                                 input_bufs,
232                                 output_bufs,
233                                 priv_xform,
234                                 out_seg_sz);
235
236         tsc_end = rte_rdtsc_precise();
237
238         /* ret value check postponed a bit to cancel extra 'if' bias */
239         if (ret < 0) {
240                 RTE_LOG(ERR, USER1, "Setup function failed\n");
241                 res = -1;
242                 goto end;
243         }
244
245         tsc_duration = tsc_end - tsc_start;
246         ctx->duration_op = tsc_duration;
247
248         num_iter = test_data->num_iter;
249         for (iter = 0; iter < num_iter; iter++) {
250                 uint32_t total_ops = mem->total_bufs;
251                 uint32_t remaining_ops = mem->total_bufs;
252                 uint32_t total_deq_ops = 0;
253                 uint32_t total_enq_ops = 0;
254                 uint16_t ops_unused = 0;
255                 uint16_t num_enq = 0;
256                 uint16_t num_deq = 0;
257
258                 while (remaining_ops > 0) {
259                         uint16_t num_ops = RTE_MIN(remaining_ops,
260                                                    test_data->burst_sz);
261                         uint16_t ops_needed = num_ops - ops_unused;
262
263                         /*
264                          * Move the unused operations from the previous
265                          * enqueue_burst call to the front, to maintain order
266                          */
267                         if ((ops_unused > 0) && (num_enq > 0)) {
268                                 size_t nb_b_to_mov =
269                                       ops_unused * sizeof(struct rte_comp_op *);
270
271                                 memmove(ops, &ops[num_enq], nb_b_to_mov);
272                         }
273
274                         /* Allocate compression operations */
275                         if (ops_needed && rte_mempool_get_bulk(
276                                                 mem->op_pool,
277                                                 (void **)&ops[ops_unused],
278                                                 ops_needed) != 0) {
279                                 RTE_LOG(ERR, USER1,
280                                       "Could not allocate enough operations\n");
281                                 res = -1;
282                                 goto end;
283                         }
284                         allocated += ops_needed;
285
286                         for (i = 0; i < ops_needed; i++) {
287                                 /*
288                                  * Calculate next buffer to attach to operation
289                                  */
290                                 uint32_t buf_id = total_enq_ops + i +
291                                                 ops_unused;
292                                 uint16_t op_id = ops_unused + i;
293                                 /* Reset all data in output buffers */
294                                 struct rte_mbuf *m = output_bufs[buf_id];
295
296                                 m->pkt_len = out_seg_sz * m->nb_segs;
297                                 while (m) {
298                                         m->data_len = m->buf_len - m->data_off;
299                                         m = m->next;
300                                 }
301                                 ops[op_id]->m_src = input_bufs[buf_id];
302                                 ops[op_id]->m_dst = output_bufs[buf_id];
303                                 ops[op_id]->src.offset = 0;
304                                 ops[op_id]->src.length =
305                                         rte_pktmbuf_pkt_len(input_bufs[buf_id]);
306                                 ops[op_id]->dst.offset = 0;
307                                 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
308                                 ops[op_id]->input_chksum = buf_id;
309                                 ops[op_id]->private_xform = priv_xform;
310                         }
311
312                         if (unlikely(test_data->perf_comp_force_stop))
313                                 goto end;
314
315                         tsc_start = rte_rdtsc_precise();
316                         num_enq = rte_compressdev_enqueue_burst(dev_id,
317                                                                 mem->qp_id, ops,
318                                                                 num_ops);
319                         tsc_end = rte_rdtsc_precise();
320                         tsc_duration = tsc_end - tsc_start;
321                         ctx->duration_enq += tsc_duration;
322
323                         if (num_enq < num_ops)
324                                 ctx->ops_enq_retries++;
325
326                         if (test_data->cyclecount_delay)
327                                 rte_delay_us_block(test_data->cyclecount_delay);
328
329                         if (num_enq == 0) {
330                                 struct rte_compressdev_stats stats;
331
332                                 rte_compressdev_stats_get(dev_id, &stats);
333                                 if (stats.enqueue_err_count) {
334                                         res = -1;
335                                         goto end;
336                                 }
337                         }
338
339                         ops_unused = num_ops - num_enq;
340                         remaining_ops -= num_enq;
341                         total_enq_ops += num_enq;
342
343                         tsc_start = rte_rdtsc_precise();
344                         num_deq = rte_compressdev_dequeue_burst(dev_id,
345                                                            mem->qp_id,
346                                                            deq_ops,
347                                                            allocated);
348                         tsc_end = rte_rdtsc_precise();
349                         tsc_duration = tsc_end - tsc_start;
350                         ctx->duration_deq += tsc_duration;
351
352                         if (num_deq < allocated)
353                                 ctx->ops_deq_retries++;
354
355                         total_deq_ops += num_deq;
356
357                         if (iter == num_iter - 1) {
358                                 for (i = 0; i < num_deq; i++) {
359                                         struct rte_comp_op *op = deq_ops[i];
360
361                                         if (op->status !=
362                                                 RTE_COMP_OP_STATUS_SUCCESS) {
363                                                 RTE_LOG(ERR, USER1, "Some operations were not successful\n");
364                                                 goto end;
365                                         }
366
367                                         struct rte_mbuf *m = op->m_dst;
368
369                                         m->pkt_len = op->produced;
370                                         uint32_t remaining_data = op->produced;
371                                         uint16_t data_to_append;
372
373                                         while (remaining_data > 0) {
374                                                 data_to_append =
375                                                         RTE_MIN(remaining_data,
376                                                              out_seg_sz);
377                                                 m->data_len = data_to_append;
378                                                 remaining_data -=
379                                                                 data_to_append;
380                                                 m = m->next;
381                                         }
382                                 }
383                         }
384                         rte_mempool_put_bulk(mem->op_pool,
385                                              (void **)deq_ops, num_deq);
386                         allocated -= num_deq;
387                 }
388
389                 /* Dequeue the last operations */
390                 while (total_deq_ops < total_ops) {
391                         if (unlikely(test_data->perf_comp_force_stop))
392                                 goto end;
393
394                         tsc_start = rte_rdtsc_precise();
395                         num_deq = rte_compressdev_dequeue_burst(dev_id,
396                                                 mem->qp_id,
397                                                 deq_ops,
398                                                 test_data->burst_sz);
399                         tsc_end = rte_rdtsc_precise();
400                         tsc_duration = tsc_end - tsc_start;
401                         ctx->duration_deq += tsc_duration;
402                         ctx->ops_deq_retries++;
403
404                         if (num_deq == 0) {
405                                 struct rte_compressdev_stats stats;
406
407                                 rte_compressdev_stats_get(dev_id, &stats);
408                                 if (stats.dequeue_err_count) {
409                                         res = -1;
410                                         goto end;
411                                 }
412                         }
413                         total_deq_ops += num_deq;
414
415                         if (iter == num_iter - 1) {
416                                 for (i = 0; i < num_deq; i++) {
417                                         struct rte_comp_op *op = deq_ops[i];
418
419                                         if (op->status !=
420                                                 RTE_COMP_OP_STATUS_SUCCESS) {
421                                                 RTE_LOG(ERR, USER1, "Some operations were not successful\n");
422                                                 goto end;
423                                         }
424
425                                         struct rte_mbuf *m = op->m_dst;
426
427                                         m->pkt_len = op->produced;
428                                         uint32_t remaining_data = op->produced;
429                                         uint16_t data_to_append;
430
431                                         while (remaining_data > 0) {
432                                                 data_to_append =
433                                                 RTE_MIN(remaining_data,
434                                                         out_seg_sz);
435                                                 m->data_len = data_to_append;
436                                                 remaining_data -=
437                                                                 data_to_append;
438                                                 m = m->next;
439                                         }
440                                 }
441                         }
442                         rte_mempool_put_bulk(mem->op_pool,
443                                              (void **)deq_ops, num_deq);
444                         allocated -= num_deq;
445                 }
446         }
447         allocated = 0;
448
449 end:
450         if (allocated)
451                 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
452         rte_compressdev_private_xform_free(dev_id, priv_xform);
453         rte_free(ops);
454
455         if (test_data->perf_comp_force_stop) {
456                 RTE_LOG(ERR, USER1,
457                       "lcore: %d Perf. test has been aborted by user\n",
458                         mem->lcore_id);
459                 res = -1;
460         }
461         return res;
462 }
463
464 int
465 cperf_cyclecount_test_runner(void *test_ctx)
466 {
467         struct cperf_cyclecount_ctx *ctx = test_ctx;
468         struct comp_test_data *test_data = ctx->ver.options;
469         uint32_t lcore = rte_lcore_id();
470         static uint16_t display_once;
471         static rte_spinlock_t print_spinlock;
472         int i;
473
474         uint32_t ops_enq_retries_comp;
475         uint32_t ops_deq_retries_comp;
476
477         uint32_t ops_enq_retries_decomp;
478         uint32_t ops_deq_retries_decomp;
479
480         uint32_t duration_setup_per_op;
481
482         uint32_t duration_enq_per_op_comp;
483         uint32_t duration_deq_per_op_comp;
484
485         uint32_t duration_enq_per_op_decomp;
486         uint32_t duration_deq_per_op_decomp;
487
488         ctx->ver.mem.lcore_id = lcore;
489
490         uint16_t exp = 0;
491         /*
492          * printing information about current compression thread
493          */
494         if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp,
495                                 1, 0, __ATOMIC_RELAXED,  __ATOMIC_RELAXED))
496                 printf("    lcore: %u,"
497                                 " driver name: %s,"
498                                 " device name: %s,"
499                                 " device id: %u,"
500                                 " socket id: %u,"
501                                 " queue pair id: %u\n",
502                         lcore,
503                         ctx->ver.options->driver_name,
504                         rte_compressdev_name_get(ctx->ver.mem.dev_id),
505                         ctx->ver.mem.dev_id,
506                         rte_compressdev_socket_id(ctx->ver.mem.dev_id),
507                         ctx->ver.mem.qp_id);
508
509         /*
510          * First the verification part is needed
511          */
512         if (cperf_verify_test_runner(&ctx->ver))
513                 return EXIT_FAILURE;
514
515         /*
516          * Run the tests twice, discarding the first performance
517          * results, before the cache is warmed up
518          */
519
520         /* C O M P R E S S */
521         for (i = 0; i < 2; i++) {
522                 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
523                         return EXIT_FAILURE;
524         }
525
526         ops_enq_retries_comp = ctx->ops_enq_retries;
527         ops_deq_retries_comp = ctx->ops_deq_retries;
528
529         duration_enq_per_op_comp = ctx->duration_enq /
530                         (ctx->ver.mem.total_bufs * test_data->num_iter);
531         duration_deq_per_op_comp = ctx->duration_deq /
532                         (ctx->ver.mem.total_bufs * test_data->num_iter);
533
534         /* D E C O M P R E S S */
535         for (i = 0; i < 2; i++) {
536                 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
537                         return EXIT_FAILURE;
538         }
539
540         ops_enq_retries_decomp = ctx->ops_enq_retries;
541         ops_deq_retries_decomp = ctx->ops_deq_retries;
542
543         duration_enq_per_op_decomp = ctx->duration_enq /
544                         (ctx->ver.mem.total_bufs * test_data->num_iter);
545         duration_deq_per_op_decomp = ctx->duration_deq /
546                         (ctx->ver.mem.total_bufs * test_data->num_iter);
547
548         duration_setup_per_op = ctx->duration_op /
549                         (ctx->ver.mem.total_bufs * test_data->num_iter);
550
551         /* R E P O R T processing */
552         rte_spinlock_lock(&print_spinlock);
553
554         if (display_once == 0) {
555                 display_once = 1;
556
557                 printf("\nLegend for the table\n"
558                 "  - Retries section: number of retries for the following operations:\n"
559                 "    [C-e] - compression enqueue\n"
560                 "    [C-d] - compression dequeue\n"
561                 "    [D-e] - decompression enqueue\n"
562                 "    [D-d] - decompression dequeue\n"
563                 "  - Cycles section: number of cycles per 'op' for the following operations:\n"
564                 "    setup/op - memory allocation, op configuration and memory dealocation\n"
565                 "    [C-e] - compression enqueue\n"
566                 "    [C-d] - compression dequeue\n"
567                 "    [D-e] - decompression enqueue\n"
568                 "    [D-d] - decompression dequeue\n\n");
569
570                 printf("\n%12s%6s%12s%17s",
571                         "lcore id", "Level", "Comp size", "Comp ratio [%]");
572
573                 printf("  |%10s %6s %8s %6s %8s",
574                         " Retries:",
575                         "[C-e]", "[C-d]",
576                         "[D-e]", "[D-d]");
577
578                 printf("  |%9s %9s %9s %9s %9s %9s\n",
579                         " Cycles:",
580                         "setup/op",
581                         "[C-e]", "[C-d]",
582                         "[D-e]", "[D-d]");
583         }
584
585         printf("%12u"
586                "%6u"
587                "%12zu"
588                "%17.2f",
589                 ctx->ver.mem.lcore_id,
590                 test_data->level,
591                 ctx->ver.comp_data_sz,
592                 ctx->ver.ratio);
593
594         printf("  |%10s %6u %8u %6u %8u",
595                " ",
596                 ops_enq_retries_comp,
597                 ops_deq_retries_comp,
598                 ops_enq_retries_decomp,
599                 ops_deq_retries_decomp);
600
601         printf("  |%9s %9u %9u %9u %9u %9u\n",
602                " ",
603                 duration_setup_per_op,
604                 duration_enq_per_op_comp,
605                 duration_deq_per_op_comp,
606                 duration_enq_per_op_decomp,
607                 duration_deq_per_op_decomp);
608
609         rte_spinlock_unlock(&print_spinlock);
610
611         return EXIT_SUCCESS;
612 }