test mbuf attach
[dpdk.git] / app / test-compress-perf / comp_perf_test_throughput.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <rte_malloc.h>
6 #include <rte_eal.h>
7 #include <rte_log.h>
8 #include <rte_cycles.h>
9 #include <rte_compressdev.h>
10
11 #include "comp_perf_test_throughput.h"
12
13 void
14 cperf_throughput_test_destructor(void *arg)
15 {
16         if (arg) {
17                 comp_perf_free_memory(
18                         ((struct cperf_benchmark_ctx *)arg)->ver.options,
19                         &((struct cperf_benchmark_ctx *)arg)->ver.mem);
20                 rte_free(arg);
21         }
22 }
23
24 void *
25 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
26                 struct comp_test_data *options)
27 {
28         struct cperf_benchmark_ctx *ctx = NULL;
29
30         ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
31
32         if (ctx == NULL)
33                 return NULL;
34
35         ctx->ver.mem.dev_id = dev_id;
36         ctx->ver.mem.qp_id = qp_id;
37         ctx->ver.options = options;
38         ctx->ver.silent = 1; /* ver. part will be silent */
39
40         if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
41                         && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
42                 return ctx;
43
44         cperf_throughput_test_destructor(ctx);
45         return NULL;
46 }
47
48 static int
49 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
50 {
51         struct comp_test_data *test_data = ctx->ver.options;
52         struct cperf_mem_resources *mem = &ctx->ver.mem;
53         uint8_t dev_id = mem->dev_id;
54         uint32_t i, iter, num_iter;
55         struct rte_comp_op **ops, **deq_ops;
56         void *priv_xform = NULL;
57         struct rte_comp_xform xform;
58         struct rte_mbuf **input_bufs, **output_bufs;
59         int res = 0;
60         int allocated = 0;
61         uint32_t out_seg_sz;
62
63         if (test_data == NULL || !test_data->burst_sz) {
64                 RTE_LOG(ERR, USER1,
65                         "Unknown burst size\n");
66                 return -1;
67         }
68
69         ops = rte_zmalloc_socket(NULL,
70                 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
71                 0, rte_socket_id());
72
73         if (ops == NULL) {
74                 RTE_LOG(ERR, USER1,
75                         "Can't allocate memory for ops strucures\n");
76                 return -1;
77         }
78
79         deq_ops = &ops[mem->total_bufs];
80
81         if (type == RTE_COMP_COMPRESS) {
82                 xform = (struct rte_comp_xform) {
83                         .type = RTE_COMP_COMPRESS,
84                         .compress = {
85                                 .algo = RTE_COMP_ALGO_DEFLATE,
86                                 .deflate.huffman = test_data->huffman_enc,
87                                 .level = test_data->level,
88                                 .window_size = test_data->window_sz,
89                                 .chksum = RTE_COMP_CHECKSUM_NONE,
90                                 .hash_algo = RTE_COMP_HASH_ALGO_NONE
91                         }
92                 };
93                 input_bufs = mem->decomp_bufs;
94                 output_bufs = mem->comp_bufs;
95                 out_seg_sz = test_data->out_seg_sz;
96         } else {
97                 xform = (struct rte_comp_xform) {
98                         .type = RTE_COMP_DECOMPRESS,
99                         .decompress = {
100                                 .algo = RTE_COMP_ALGO_DEFLATE,
101                                 .chksum = RTE_COMP_CHECKSUM_NONE,
102                                 .window_size = test_data->window_sz,
103                                 .hash_algo = RTE_COMP_HASH_ALGO_NONE
104                         }
105                 };
106                 input_bufs = mem->comp_bufs;
107                 output_bufs = mem->decomp_bufs;
108                 out_seg_sz = test_data->seg_sz;
109         }
110
111         /* Create private xform */
112         if (rte_compressdev_private_xform_create(dev_id, &xform,
113                         &priv_xform) < 0) {
114                 RTE_LOG(ERR, USER1, "Private xform could not be created\n");
115                 res = -1;
116                 goto end;
117         }
118
119         uint64_t tsc_start, tsc_end, tsc_duration;
120
121         num_iter = test_data->num_iter;
122         tsc_start = tsc_end = tsc_duration = 0;
123         tsc_start = rte_rdtsc_precise();
124
125         for (iter = 0; iter < num_iter; iter++) {
126                 uint32_t total_ops = mem->total_bufs;
127                 uint32_t remaining_ops = mem->total_bufs;
128                 uint32_t total_deq_ops = 0;
129                 uint32_t total_enq_ops = 0;
130                 uint16_t ops_unused = 0;
131                 uint16_t num_enq = 0;
132                 uint16_t num_deq = 0;
133
134                 while (remaining_ops > 0) {
135                         uint16_t num_ops = RTE_MIN(remaining_ops,
136                                                    test_data->burst_sz);
137                         uint16_t ops_needed = num_ops - ops_unused;
138
139                         /*
140                          * Move the unused operations from the previous
141                          * enqueue_burst call to the front, to maintain order
142                          */
143                         if ((ops_unused > 0) && (num_enq > 0)) {
144                                 size_t nb_b_to_mov =
145                                       ops_unused * sizeof(struct rte_comp_op *);
146
147                                 memmove(ops, &ops[num_enq], nb_b_to_mov);
148                         }
149
150                         /* Allocate compression operations */
151                         if (ops_needed && !rte_comp_op_bulk_alloc(
152                                                 mem->op_pool,
153                                                 &ops[ops_unused],
154                                                 ops_needed)) {
155                                 RTE_LOG(ERR, USER1,
156                                       "Could not allocate enough operations\n");
157                                 res = -1;
158                                 goto end;
159                         }
160                         allocated += ops_needed;
161
162                         for (i = 0; i < ops_needed; i++) {
163                                 /*
164                                  * Calculate next buffer to attach to operation
165                                  */
166                                 uint32_t buf_id = total_enq_ops + i +
167                                                 ops_unused;
168                                 uint16_t op_id = ops_unused + i;
169                                 /* Reset all data in output buffers */
170                                 struct rte_mbuf *m = output_bufs[buf_id];
171
172                                 m->pkt_len = out_seg_sz * m->nb_segs;
173                                 while (m) {
174                                         m->data_len = m->buf_len - m->data_off;
175                                         m = m->next;
176                                 }
177                                 ops[op_id]->m_src = input_bufs[buf_id];
178                                 ops[op_id]->m_dst = output_bufs[buf_id];
179                                 ops[op_id]->src.offset = 0;
180                                 ops[op_id]->src.length =
181                                         rte_pktmbuf_pkt_len(input_bufs[buf_id]);
182                                 ops[op_id]->dst.offset = 0;
183                                 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
184                                 ops[op_id]->input_chksum = buf_id;
185                                 ops[op_id]->private_xform = priv_xform;
186                         }
187
188                         if (unlikely(test_data->perf_comp_force_stop))
189                                 goto end;
190
191                         num_enq = rte_compressdev_enqueue_burst(dev_id,
192                                                                 mem->qp_id, ops,
193                                                                 num_ops);
194                         if (num_enq == 0) {
195                                 struct rte_compressdev_stats stats;
196
197                                 rte_compressdev_stats_get(dev_id, &stats);
198                                 if (stats.enqueue_err_count) {
199                                         res = -1;
200                                         goto end;
201                                 }
202                         }
203
204                         ops_unused = num_ops - num_enq;
205                         remaining_ops -= num_enq;
206                         total_enq_ops += num_enq;
207
208                         num_deq = rte_compressdev_dequeue_burst(dev_id,
209                                                            mem->qp_id,
210                                                            deq_ops,
211                                                            test_data->burst_sz);
212                         total_deq_ops += num_deq;
213
214                         if (iter == num_iter - 1) {
215                                 for (i = 0; i < num_deq; i++) {
216                                         struct rte_comp_op *op = deq_ops[i];
217
218                                         if (op->status !=
219                                                 RTE_COMP_OP_STATUS_SUCCESS) {
220                                                 RTE_LOG(ERR, USER1,
221                                        "Some operations were not successful\n");
222                                                 goto end;
223                                         }
224
225                                         struct rte_mbuf *m = op->m_dst;
226
227                                         m->pkt_len = op->produced;
228                                         uint32_t remaining_data = op->produced;
229                                         uint16_t data_to_append;
230
231                                         while (remaining_data > 0) {
232                                                 data_to_append =
233                                                         RTE_MIN(remaining_data,
234                                                              out_seg_sz);
235                                                 m->data_len = data_to_append;
236                                                 remaining_data -=
237                                                                 data_to_append;
238                                                 m = m->next;
239                                         }
240                                 }
241                         }
242                         rte_mempool_put_bulk(mem->op_pool,
243                                              (void **)deq_ops, num_deq);
244                         allocated -= num_deq;
245                 }
246
247                 /* Dequeue the last operations */
248                 while (total_deq_ops < total_ops) {
249                         if (unlikely(test_data->perf_comp_force_stop))
250                                 goto end;
251
252                         num_deq = rte_compressdev_dequeue_burst(dev_id,
253                                                            mem->qp_id,
254                                                            deq_ops,
255                                                            test_data->burst_sz);
256                         if (num_deq == 0) {
257                                 struct rte_compressdev_stats stats;
258
259                                 rte_compressdev_stats_get(dev_id, &stats);
260                                 if (stats.dequeue_err_count) {
261                                         res = -1;
262                                         goto end;
263                                 }
264                         }
265
266                         total_deq_ops += num_deq;
267
268                         if (iter == num_iter - 1) {
269                                 for (i = 0; i < num_deq; i++) {
270                                         struct rte_comp_op *op = deq_ops[i];
271
272                                         if (op->status !=
273                                                 RTE_COMP_OP_STATUS_SUCCESS) {
274                                                 RTE_LOG(ERR, USER1,
275                                        "Some operations were not successful\n");
276                                                 goto end;
277                                         }
278
279                                         struct rte_mbuf *m = op->m_dst;
280
281                                         m->pkt_len = op->produced;
282                                         uint32_t remaining_data = op->produced;
283                                         uint16_t data_to_append;
284
285                                         while (remaining_data > 0) {
286                                                 data_to_append =
287                                                 RTE_MIN(remaining_data,
288                                                         out_seg_sz);
289                                                 m->data_len = data_to_append;
290                                                 remaining_data -=
291                                                                 data_to_append;
292                                                 m = m->next;
293                                         }
294                                 }
295                         }
296                         rte_mempool_put_bulk(mem->op_pool,
297                                              (void **)deq_ops, num_deq);
298                         allocated -= num_deq;
299                 }
300         }
301
302         tsc_end = rte_rdtsc_precise();
303         tsc_duration = tsc_end - tsc_start;
304
305         if (type == RTE_COMP_COMPRESS)
306                 ctx->comp_tsc_duration[test_data->level] =
307                                 tsc_duration / num_iter;
308         else
309                 ctx->decomp_tsc_duration[test_data->level] =
310                                 tsc_duration / num_iter;
311
312 end:
313         rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
314         rte_compressdev_private_xform_free(dev_id, priv_xform);
315         rte_free(ops);
316
317         if (test_data->perf_comp_force_stop) {
318                 RTE_LOG(ERR, USER1,
319                       "lcore: %d Perf. test has been aborted by user\n",
320                         mem->lcore_id);
321                 res = -1;
322         }
323         return res;
324 }
325
326 int
327 cperf_throughput_test_runner(void *test_ctx)
328 {
329         struct cperf_benchmark_ctx *ctx = test_ctx;
330         struct comp_test_data *test_data = ctx->ver.options;
331         uint32_t lcore = rte_lcore_id();
332         static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
333         int i, ret = EXIT_SUCCESS;
334
335         ctx->ver.mem.lcore_id = lcore;
336
337         /*
338          * printing information about current compression thread
339          */
340         if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
341                 printf("    lcore: %u,"
342                                 " driver name: %s,"
343                                 " device name: %s,"
344                                 " device id: %u,"
345                                 " socket id: %u,"
346                                 " queue pair id: %u\n",
347                         lcore,
348                         ctx->ver.options->driver_name,
349                         rte_compressdev_name_get(ctx->ver.mem.dev_id),
350                         ctx->ver.mem.dev_id,
351                         rte_compressdev_socket_id(ctx->ver.mem.dev_id),
352                         ctx->ver.mem.qp_id);
353
354         /*
355          * First the verification part is needed
356          */
357         if (cperf_verify_test_runner(&ctx->ver)) {
358                 ret =  EXIT_FAILURE;
359                 goto end;
360         }
361
362         /*
363          * Run the tests twice, discarding the first performance
364          * results, before the cache is warmed up
365          */
366         for (i = 0; i < 2; i++) {
367                 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
368                         ret = EXIT_FAILURE;
369                         goto end;
370                 }
371         }
372
373         for (i = 0; i < 2; i++) {
374                 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
375                         ret = EXIT_FAILURE;
376                         goto end;
377                 }
378         }
379
380         ctx->comp_tsc_byte =
381                         (double)(ctx->comp_tsc_duration[test_data->level]) /
382                                         test_data->input_data_sz;
383
384         ctx->decomp_tsc_byte =
385                         (double)(ctx->decomp_tsc_duration[test_data->level]) /
386                                         test_data->input_data_sz;
387
388         ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
389                         1000000000;
390
391         ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
392                         1000000000;
393
394         if (rte_atomic16_test_and_set(&display_once)) {
395                 printf("\n%12s%6s%12s%17s%15s%16s\n",
396                         "lcore id", "Level", "Comp size", "Comp ratio [%]",
397                         "Comp [Gbps]", "Decomp [Gbps]");
398         }
399
400         printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
401                 ctx->ver.mem.lcore_id,
402                 test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
403                 ctx->comp_gbps,
404                 ctx->decomp_gbps);
405
406 end:
407         return ret;
408 }