1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
5 #include <rte_malloc.h>
8 #include <rte_cycles.h>
9 #include <rte_compressdev.h>
11 #include "comp_perf_test_benchmark.h"
14 cperf_benchmark_test_destructor(void *arg)
17 comp_perf_free_memory(
18 ((struct cperf_benchmark_ctx *)arg)->ver.options,
19 &((struct cperf_benchmark_ctx *)arg)->ver.mem);
25 cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
26 struct comp_test_data *options)
28 struct cperf_benchmark_ctx *ctx = NULL;
30 ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
35 ctx->ver.mem.dev_id = dev_id;
36 ctx->ver.mem.qp_id = qp_id;
37 ctx->ver.options = options;
38 ctx->ver.silent = 1; /* ver. part will be silent */
40 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
41 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
44 cperf_benchmark_test_destructor(ctx);
49 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
51 struct comp_test_data *test_data = ctx->ver.options;
52 struct cperf_mem_resources *mem = &ctx->ver.mem;
53 uint8_t dev_id = mem->dev_id;
54 uint32_t i, iter, num_iter;
55 struct rte_comp_op **ops, **deq_ops;
56 void *priv_xform = NULL;
57 struct rte_comp_xform xform;
58 struct rte_mbuf **input_bufs, **output_bufs;
63 if (test_data == NULL || !test_data->burst_sz) {
65 "Unknown burst size\n");
69 ops = rte_zmalloc_socket(NULL,
70 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
75 "Can't allocate memory for ops strucures\n");
79 deq_ops = &ops[mem->total_bufs];
81 if (type == RTE_COMP_COMPRESS) {
82 xform = (struct rte_comp_xform) {
83 .type = RTE_COMP_COMPRESS,
85 .algo = RTE_COMP_ALGO_DEFLATE,
86 .deflate.huffman = test_data->huffman_enc,
87 .level = test_data->level,
88 .window_size = test_data->window_sz,
89 .chksum = RTE_COMP_CHECKSUM_NONE,
90 .hash_algo = RTE_COMP_HASH_ALGO_NONE
93 input_bufs = mem->decomp_bufs;
94 output_bufs = mem->comp_bufs;
95 out_seg_sz = test_data->out_seg_sz;
97 xform = (struct rte_comp_xform) {
98 .type = RTE_COMP_DECOMPRESS,
100 .algo = RTE_COMP_ALGO_DEFLATE,
101 .chksum = RTE_COMP_CHECKSUM_NONE,
102 .window_size = test_data->window_sz,
103 .hash_algo = RTE_COMP_HASH_ALGO_NONE
106 input_bufs = mem->comp_bufs;
107 output_bufs = mem->decomp_bufs;
108 out_seg_sz = test_data->seg_sz;
111 /* Create private xform */
112 if (rte_compressdev_private_xform_create(dev_id, &xform,
114 RTE_LOG(ERR, USER1, "Private xform could not be created\n");
119 uint64_t tsc_start, tsc_end, tsc_duration;
121 num_iter = test_data->num_iter;
122 tsc_start = tsc_end = tsc_duration = 0;
123 tsc_start = rte_rdtsc_precise();
125 for (iter = 0; iter < num_iter; iter++) {
126 uint32_t total_ops = mem->total_bufs;
127 uint32_t remaining_ops = mem->total_bufs;
128 uint32_t total_deq_ops = 0;
129 uint32_t total_enq_ops = 0;
130 uint16_t ops_unused = 0;
131 uint16_t num_enq = 0;
132 uint16_t num_deq = 0;
134 while (remaining_ops > 0) {
135 uint16_t num_ops = RTE_MIN(remaining_ops,
136 test_data->burst_sz);
137 uint16_t ops_needed = num_ops - ops_unused;
140 * Move the unused operations from the previous
141 * enqueue_burst call to the front, to maintain order
143 if ((ops_unused > 0) && (num_enq > 0)) {
145 ops_unused * sizeof(struct rte_comp_op *);
147 memmove(ops, &ops[num_enq], nb_b_to_mov);
150 /* Allocate compression operations */
151 if (ops_needed && !rte_comp_op_bulk_alloc(
156 "Could not allocate enough operations\n");
160 allocated += ops_needed;
162 for (i = 0; i < ops_needed; i++) {
164 * Calculate next buffer to attach to operation
166 uint32_t buf_id = total_enq_ops + i +
168 uint16_t op_id = ops_unused + i;
169 /* Reset all data in output buffers */
170 struct rte_mbuf *m = output_bufs[buf_id];
172 m->pkt_len = out_seg_sz * m->nb_segs;
174 m->data_len = m->buf_len - m->data_off;
177 ops[op_id]->m_src = input_bufs[buf_id];
178 ops[op_id]->m_dst = output_bufs[buf_id];
179 ops[op_id]->src.offset = 0;
180 ops[op_id]->src.length =
181 rte_pktmbuf_pkt_len(input_bufs[buf_id]);
182 ops[op_id]->dst.offset = 0;
183 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
184 ops[op_id]->input_chksum = buf_id;
185 ops[op_id]->private_xform = priv_xform;
188 if (unlikely(test_data->perf_comp_force_stop))
191 num_enq = rte_compressdev_enqueue_burst(dev_id,
195 struct rte_compressdev_stats stats;
197 rte_compressdev_stats_get(dev_id, &stats);
198 if (stats.enqueue_err_count) {
204 ops_unused = num_ops - num_enq;
205 remaining_ops -= num_enq;
206 total_enq_ops += num_enq;
208 num_deq = rte_compressdev_dequeue_burst(dev_id,
211 test_data->burst_sz);
212 total_deq_ops += num_deq;
214 if (iter == num_iter - 1) {
215 for (i = 0; i < num_deq; i++) {
216 struct rte_comp_op *op = deq_ops[i];
219 RTE_COMP_OP_STATUS_SUCCESS) {
221 "Some operations were not successful\n");
225 struct rte_mbuf *m = op->m_dst;
227 m->pkt_len = op->produced;
228 uint32_t remaining_data = op->produced;
229 uint16_t data_to_append;
231 while (remaining_data > 0) {
233 RTE_MIN(remaining_data,
235 m->data_len = data_to_append;
242 rte_mempool_put_bulk(mem->op_pool,
243 (void **)deq_ops, num_deq);
244 allocated -= num_deq;
247 /* Dequeue the last operations */
248 while (total_deq_ops < total_ops) {
249 if (unlikely(test_data->perf_comp_force_stop))
252 num_deq = rte_compressdev_dequeue_burst(dev_id,
255 test_data->burst_sz);
257 struct rte_compressdev_stats stats;
259 rte_compressdev_stats_get(dev_id, &stats);
260 if (stats.dequeue_err_count) {
266 total_deq_ops += num_deq;
268 if (iter == num_iter - 1) {
269 for (i = 0; i < num_deq; i++) {
270 struct rte_comp_op *op = deq_ops[i];
273 RTE_COMP_OP_STATUS_SUCCESS) {
275 "Some operations were not successful\n");
279 struct rte_mbuf *m = op->m_dst;
281 m->pkt_len = op->produced;
282 uint32_t remaining_data = op->produced;
283 uint16_t data_to_append;
285 while (remaining_data > 0) {
287 RTE_MIN(remaining_data,
289 m->data_len = data_to_append;
296 rte_mempool_put_bulk(mem->op_pool,
297 (void **)deq_ops, num_deq);
298 allocated -= num_deq;
302 tsc_end = rte_rdtsc_precise();
303 tsc_duration = tsc_end - tsc_start;
305 if (type == RTE_COMP_COMPRESS)
306 ctx->comp_tsc_duration[test_data->level] =
307 tsc_duration / num_iter;
309 ctx->decomp_tsc_duration[test_data->level] =
310 tsc_duration / num_iter;
313 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
314 rte_compressdev_private_xform_free(dev_id, priv_xform);
317 if (test_data->perf_comp_force_stop) {
319 "lcore: %d Perf. test has been aborted by user\n",
327 cperf_benchmark_test_runner(void *test_ctx)
329 struct cperf_benchmark_ctx *ctx = test_ctx;
330 struct comp_test_data *test_data = ctx->ver.options;
331 uint32_t lcore = rte_lcore_id();
332 static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
333 int i, ret = EXIT_SUCCESS;
335 ctx->ver.mem.lcore_id = lcore;
338 * printing information about current compression thread
340 if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
346 " queue pair id: %u\n",
348 ctx->ver.options->driver_name,
349 rte_compressdev_name_get(ctx->ver.mem.dev_id),
351 rte_compressdev_socket_id(ctx->ver.mem.dev_id),
355 * First the verification part is needed
357 if (cperf_verify_test_runner(&ctx->ver)) {
363 * Run the tests twice, discarding the first performance
364 * results, before the cache is warmed up
366 for (i = 0; i < 2; i++) {
367 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
373 for (i = 0; i < 2; i++) {
374 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
381 (double)(ctx->comp_tsc_duration[test_data->level]) /
382 test_data->input_data_sz;
384 ctx->decomp_tsc_byte =
385 (double)(ctx->decomp_tsc_duration[test_data->level]) /
386 test_data->input_data_sz;
388 ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
391 ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
394 if (rte_atomic16_test_and_set(&display_once)) {
395 printf("\n%12s%6s%12s%17s%15s%16s\n",
396 "lcore id", "Level", "Comp size", "Comp ratio [%]",
397 "Comp [Gbps]", "Decomp [Gbps]");
400 printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
401 ctx->ver.mem.lcore_id,
402 test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,