From 7c14a7a93b7cb7e26900ed5c55b02d1210bf5b9b Mon Sep 17 00:00:00 2001 From: Tomasz Jozwiak Date: Mon, 8 Jul 2019 20:16:16 +0200 Subject: [PATCH] app/compress-perf: add benchmark test case This patch adds a benchmark part to compression-perf-tool as a separate test case, which can be executed multi-threaded. Also updates release notes. Signed-off-by: Tomasz Jozwiak Acked-by: Artur Trybula Acked-by: Shally Verma --- app/test-compress-perf/Makefile | 1 + .../comp_perf_test_benchmark.c | 140 +++++++++++++----- .../comp_perf_test_benchmark.h | 25 +++- app/test-compress-perf/main.c | 1 + app/test-compress-perf/meson.build | 1 + doc/guides/rel_notes/release_19_08.rst | 4 + 6 files changed, 134 insertions(+), 38 deletions(-) diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile index f54d9a4dd9..d1a6820e62 100644 --- a/app/test-compress-perf/Makefile +++ b/app/test-compress-perf/Makefile @@ -13,6 +13,7 @@ CFLAGS += -O3 SRCS-y := main.c SRCS-y += comp_perf_options_parse.c SRCS-y += comp_perf_test_verify.c +SRCS-y += comp_perf_test_benchmark.c SRCS-y += comp_perf_test_common.c include $(RTE_SDK)/mk/rte.app.mk diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c index 5752906b3b..e0f852bf0c 100644 --- a/app/test-compress-perf/comp_perf_test_benchmark.c +++ b/app/test-compress-perf/comp_perf_test_benchmark.c @@ -10,11 +10,46 @@ #include "comp_perf_test_benchmark.h" +void +cperf_benchmark_test_destructor(void *arg) +{ + if (arg) { + comp_perf_free_memory( + &((struct cperf_benchmark_ctx *)arg)->ver.mem); + rte_free(arg); + } +} + +void * +cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_benchmark_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->ver.mem.dev_id = dev_id; + ctx->ver.mem.qp_id = qp_id; + ctx->ver.options = options; + ctx->ver.silent = 1; /* ver. part will be silent */ + + if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) + && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) + return ctx; + + cperf_benchmark_test_destructor(ctx); + return NULL; +} + static int -main_loop(struct comp_test_data *test_data, uint8_t level, - enum rte_comp_xform_type type) +main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) { - uint8_t dev_id = test_data->cdev_id; + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + uint8_t dev_id = mem->dev_id; uint32_t i, iter, num_iter; struct rte_comp_op **ops, **deq_ops; void *priv_xform = NULL; @@ -31,7 +66,7 @@ main_loop(struct comp_test_data *test_data, uint8_t level, } ops = rte_zmalloc_socket(NULL, - 2 * test_data->total_bufs * sizeof(struct rte_comp_op *), + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 0, rte_socket_id()); if (ops == NULL) { @@ -40,7 +75,7 @@ main_loop(struct comp_test_data *test_data, uint8_t level, return -1; } - deq_ops = &ops[test_data->total_bufs]; + deq_ops = &ops[mem->total_bufs]; if (type == RTE_COMP_COMPRESS) { xform = (struct rte_comp_xform) { @@ -48,14 +83,14 @@ main_loop(struct comp_test_data *test_data, uint8_t level, .compress = { .algo = RTE_COMP_ALGO_DEFLATE, .deflate.huffman = test_data->huffman_enc, - .level = level, + .level = test_data->level, .window_size = test_data->window_sz, .chksum = RTE_COMP_CHECKSUM_NONE, .hash_algo = RTE_COMP_HASH_ALGO_NONE } }; - input_bufs = test_data->decomp_bufs; - output_bufs = test_data->comp_bufs; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; out_seg_sz = test_data->out_seg_sz; } else { xform = (struct rte_comp_xform) { @@ -67,8 +102,8 @@ main_loop(struct comp_test_data *test_data, uint8_t level, .hash_algo = RTE_COMP_HASH_ALGO_NONE } }; - input_bufs = test_data->comp_bufs; - output_bufs = test_data->decomp_bufs; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; out_seg_sz = test_data->seg_sz; } @@ -82,13 +117,13 @@ main_loop(struct comp_test_data *test_data, uint8_t level, uint64_t tsc_start, tsc_end, tsc_duration; - tsc_start = tsc_end = tsc_duration = 0; - tsc_start = rte_rdtsc(); num_iter = test_data->num_iter; + tsc_start = tsc_end = tsc_duration = 0; + tsc_start = rte_rdtsc_precise(); for (iter = 0; iter < num_iter; iter++) { - uint32_t total_ops = test_data->total_bufs; - uint32_t remaining_ops = test_data->total_bufs; + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; uint32_t total_deq_ops = 0; uint32_t total_enq_ops = 0; uint16_t ops_unused = 0; @@ -113,7 +148,7 @@ main_loop(struct comp_test_data *test_data, uint8_t level, /* Allocate compression operations */ if (ops_needed && !rte_comp_op_bulk_alloc( - test_data->op_pool, + mem->op_pool, &ops[ops_unused], ops_needed)) { RTE_LOG(ERR, USER1, @@ -149,7 +184,8 @@ main_loop(struct comp_test_data *test_data, uint8_t level, ops[op_id]->private_xform = priv_xform; } - num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops, + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, num_ops); if (num_enq == 0) { struct rte_compressdev_stats stats; @@ -165,7 +201,8 @@ main_loop(struct comp_test_data *test_data, uint8_t level, remaining_ops -= num_enq; total_enq_ops += num_enq; - num_deq = rte_compressdev_dequeue_burst(dev_id, 0, + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, deq_ops, test_data->burst_sz); total_deq_ops += num_deq; @@ -177,7 +214,7 @@ main_loop(struct comp_test_data *test_data, uint8_t level, if (op->status != RTE_COMP_OP_STATUS_SUCCESS) { RTE_LOG(ERR, USER1, - "Some operations were not successful\n"); + "Some operations were not successful\n"); goto end; } @@ -198,15 +235,17 @@ main_loop(struct comp_test_data *test_data, uint8_t level, } } } - rte_mempool_put_bulk(test_data->op_pool, + rte_mempool_put_bulk(mem->op_pool, (void **)deq_ops, num_deq); allocated -= num_deq; } /* Dequeue the last operations */ while (total_deq_ops < total_ops) { - num_deq = rte_compressdev_dequeue_burst(dev_id, 0, - deq_ops, test_data->burst_sz); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); if (num_deq == 0) { struct rte_compressdev_stats stats; @@ -226,7 +265,7 @@ main_loop(struct comp_test_data *test_data, uint8_t level, if (op->status != RTE_COMP_OP_STATUS_SUCCESS) { RTE_LOG(ERR, USER1, - "Some operations were not successful\n"); + "Some operations were not successful\n"); goto end; } @@ -247,65 +286,92 @@ main_loop(struct comp_test_data *test_data, uint8_t level, } } } - rte_mempool_put_bulk(test_data->op_pool, + rte_mempool_put_bulk(mem->op_pool, (void **)deq_ops, num_deq); allocated -= num_deq; } } - tsc_end = rte_rdtsc(); + tsc_end = rte_rdtsc_precise(); tsc_duration = tsc_end - tsc_start; if (type == RTE_COMP_COMPRESS) - test_data->comp_tsc_duration[level] = + ctx->comp_tsc_duration[test_data->level] = tsc_duration / num_iter; else - test_data->decomp_tsc_duration[level] = + ctx->decomp_tsc_duration[test_data->level] = tsc_duration / num_iter; end: - rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated); + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); rte_compressdev_private_xform_free(dev_id, priv_xform); rte_free(ops); return res; } int -cperf_benchmark(struct comp_test_data *test_data, uint8_t level) +cperf_benchmark_test_runner(void *test_ctx) { + struct cperf_benchmark_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->ver.options; + uint32_t lcore = rte_lcore_id(); + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + + ctx->ver.mem.lcore_id = lcore; int i, ret = EXIT_SUCCESS; + /* + * First the verification part is needed + */ + if (cperf_verify_test_runner(&ctx->ver)) { + ret = EXIT_FAILURE; + goto end; + } + /* * Run the tests twice, discarding the first performance * results, before the cache is warmed up */ for (i = 0; i < 2; i++) { - if (main_loop(test_data, level, RTE_COMP_COMPRESS) < 0) { + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { ret = EXIT_FAILURE; goto end; } } for (i = 0; i < 2; i++) { - if (main_loop(test_data, level, RTE_COMP_DECOMPRESS) < 0) { + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { ret = EXIT_FAILURE; goto end; } } - test_data->comp_tsc_byte = - (double)(test_data->comp_tsc_duration[level]) / + ctx->comp_tsc_byte = + (double)(ctx->comp_tsc_duration[test_data->level]) / test_data->input_data_sz; - test_data->decomp_tsc_byte = - (double)(test_data->decomp_tsc_duration[level]) / + ctx->decomp_tsc_byte = + (double)(ctx->decomp_tsc_duration[test_data->level]) / test_data->input_data_sz; - test_data->comp_gbps = rte_get_tsc_hz() / test_data->comp_tsc_byte * 8 / + ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / + 1000000000; + + ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / 1000000000; - test_data->decomp_gbps = rte_get_tsc_hz() / test_data->decomp_tsc_byte - * 8 / 1000000000; + if (rte_atomic16_test_and_set(&display_once)) { + printf("%12s%6s%12s%17s%15s%16s\n", + "lcore id", "Level", "Comp size", "Comp ratio [%]", + "Comp [Gbps]", "Decomp [Gbps]"); + } + + printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", + ctx->ver.mem.lcore_id, + test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, + ctx->comp_gbps, + ctx->decomp_gbps); + end: return ret; } diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_benchmark.h index b193445a3f..d9b2694b89 100644 --- a/app/test-compress-perf/comp_perf_test_benchmark.h +++ b/app/test-compress-perf/comp_perf_test_benchmark.h @@ -5,9 +5,32 @@ #ifndef _COMP_PERF_TEST_BENCHMARK_ #define _COMP_PERF_TEST_BENCHMARK_ +#include + #include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + +struct cperf_benchmark_ctx { + struct cperf_verify_ctx ver; + + /* Store TSC duration for all levels (including level 0) */ + uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; + uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; + double comp_gbps; + double decomp_gbps; + double comp_tsc_byte; + double decomp_tsc_byte; +}; + +void +cperf_benchmark_test_destructor(void *arg); int -cperf_benchmark(struct comp_test_data *test_data, uint8_t level); +cperf_benchmark_test_runner(void *test_ctx); + +void * +cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); #endif diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index 51ca9f5b74..dd6ef9a6d6 100644 --- a/app/test-compress-perf/main.c +++ b/app/test-compress-perf/main.c @@ -9,6 +9,7 @@ #include "comp_perf_options.h" #include "comp_perf_test_verify.h" +#include "comp_perf_test_benchmark.h" #include "comp_perf.h" #include "comp_perf_test_common.h" diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build index c6246e5033..1136f04bca 100644 --- a/app/test-compress-perf/meson.build +++ b/app/test-compress-perf/meson.build @@ -5,5 +5,6 @@ allow_experimental_apis = true sources = files('comp_perf_options_parse.c', 'main.c', 'comp_perf_test_verify.c', + 'comp_perf_test_benchmark.c', 'comp_perf_test_common.c') deps = ['compressdev'] diff --git a/doc/guides/rel_notes/release_19_08.rst b/doc/guides/rel_notes/release_19_08.rst index 4a1fd8dd8a..f8f63f7412 100644 --- a/doc/guides/rel_notes/release_19_08.rst +++ b/doc/guides/rel_notes/release_19_08.rst @@ -208,6 +208,10 @@ New Features Add support for pdump to exit with primary process. +* **Updated test-compress-perf tool application.** + + Added multiple cores feature to compression perf tool application. + Removed Items ------------- -- 2.20.1