From: Artur Trybula Date: Wed, 11 Dec 2019 15:50:00 +0000 (+0100) Subject: test/compress: add cycle-count mode to perf tool X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=2695db95a1474e07d0105d31b9c52562dc6fb89a;p=dpdk.git test/compress: add cycle-count mode to perf tool This commit adds cycle-count mode to the compression perf tool. The new mode enhances the compression performance tool to allow cycle-count measurement of both hardware and softwate PMDs. Signed-off-by: Artur Trybula Acked-by: Fiona Trahe --- diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile index d1a6820e62..ad3b91d0a2 100644 --- a/app/test-compress-perf/Makefile +++ b/app/test-compress-perf/Makefile @@ -13,7 +13,8 @@ CFLAGS += -O3 SRCS-y := main.c SRCS-y += comp_perf_options_parse.c SRCS-y += comp_perf_test_verify.c -SRCS-y += comp_perf_test_benchmark.c +SRCS-y += comp_perf_test_throughput.c +SRCS-y += comp_perf_test_cyclecount.c SRCS-y += comp_perf_test_common.c include $(RTE_SDK)/mk/rte.app.mk diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h index 57289b07a4..997d46b59b 100644 --- a/app/test-compress-perf/comp_perf.h +++ b/app/test-compress-perf/comp_perf.h @@ -26,15 +26,15 @@ struct cperf_test { /* Needed for weak functions*/ void * -cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, +cperf_throughput_test_constructor(uint8_t dev_id __rte_unused, uint16_t qp_id __rte_unused, struct comp_test_data *options __rte_unused); void -cperf_benchmark_test_destructor(void *arg __rte_unused); +cperf_throughput_test_destructor(void *arg __rte_unused); int -cperf_benchmark_test_runner(void *test_ctx __rte_unused); +cperf_throughput_test_runner(void *test_ctx __rte_unused); void * cperf_verify_test_constructor(uint8_t dev_id __rte_unused, diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h index 2c26511ef0..0b777521c5 100644 --- a/app/test-compress-perf/comp_perf_options.h +++ b/app/test-compress-perf/comp_perf_options.h @@ -24,8 +24,9 @@ enum cleanup_st { }; enum cperf_test_type { - CPERF_TEST_TYPE_BENCHMARK, - CPERF_TEST_TYPE_VERIFY + CPERF_TEST_TYPE_THROUGHPUT, + CPERF_TEST_TYPE_VERIFY, + CPERF_TEST_TYPE_PMDCC }; enum comp_operation { @@ -68,6 +69,8 @@ struct comp_test_data { double ratio; enum cleanup_st cleanup; int perf_comp_force_stop; + + uint32_t cyclecount_delay; }; int diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c index 12d0a6caf0..04a8d2fbee 100644 --- a/app/test-compress-perf/comp_perf_options_parse.c +++ b/app/test-compress-perf/comp_perf_options_parse.c @@ -30,6 +30,9 @@ #define CPERF_WINDOW_SIZE ("window-sz") #define CPERF_EXTERNAL_MBUFS ("external-mbufs") +/* cyclecount-specific options */ +#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us") + struct name_id_map { const char *name; uint32_t id; @@ -39,7 +42,7 @@ static void usage(char *progname) { printf("%s [EAL options] --\n" - " --ptest benchmark / verify :" + " --ptest throughput / verify / pmd-cyclecount\n" " --driver-name NAME: compress driver to use\n" " --input-file NAME: file to compress and decompress\n" " --extended-input-sz N: extend file data up to this size (default: no extension)\n" @@ -61,6 +64,8 @@ usage(char *progname) " (e.g.: 15 => 32k, default: max supported by PMD)\n" " --external-mbufs: use memzones as external buffers instead of\n" " keeping the data directly in mbuf area\n" + " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n" + " valid only for cyclecount perf test (default: 500 us)\n" " -h: prints this help\n", progname); } @@ -85,12 +90,16 @@ parse_cperf_test_type(struct comp_test_data *test_data, const char *arg) { struct name_id_map cperftest_namemap[] = { { - comp_perf_test_type_strs[CPERF_TEST_TYPE_BENCHMARK], - CPERF_TEST_TYPE_BENCHMARK + comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT], + CPERF_TEST_TYPE_THROUGHPUT }, { comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY], CPERF_TEST_TYPE_VERIFY + }, + { + comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC], + CPERF_TEST_TYPE_PMDCC } }; @@ -531,17 +540,28 @@ parse_external_mbufs(struct comp_test_data *test_data, return 0; } +static int +parse_cyclecount_delay_us(struct comp_test_data *test_data, + const char *arg) +{ + int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n"); + return -1; + } + return 0; +} + typedef int (*option_parser_t)(struct comp_test_data *test_data, const char *arg); struct long_opt_parser { const char *lgopt_name; option_parser_t parser_fn; - }; static struct option lgopts[] = { - { CPERF_PTEST_TYPE, required_argument, 0, 0 }, { CPERF_DRIVER_NAME, required_argument, 0, 0 }, { CPERF_TEST_FILE, required_argument, 0, 0 }, @@ -556,6 +576,7 @@ static struct option lgopts[] = { { CPERF_LEVEL, required_argument, 0, 0 }, { CPERF_WINDOW_SIZE, required_argument, 0, 0 }, { CPERF_EXTERNAL_MBUFS, 0, 0, 0 }, + { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 }, { NULL, 0, 0, 0 } }; @@ -577,6 +598,7 @@ comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data) { CPERF_LEVEL, parse_level }, { CPERF_WINDOW_SIZE, parse_window_sz }, { CPERF_EXTERNAL_MBUFS, parse_external_mbufs }, + { CPERF_CYCLECOUNT_DELAY_US, parse_cyclecount_delay_us }, }; unsigned int i; @@ -631,8 +653,9 @@ comp_perf_options_default(struct comp_test_data *test_data) test_data->level_lst.min = RTE_COMP_LEVEL_MIN; test_data->level_lst.max = RTE_COMP_LEVEL_MAX; test_data->level_lst.inc = 1; - test_data->test = CPERF_TEST_TYPE_BENCHMARK; + test_data->test = CPERF_TEST_TYPE_THROUGHPUT; test_data->use_external_mbufs = 0; + test_data->cyclecount_delay = 500; } int diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c deleted file mode 100644 index 0c6bb9b454..0000000000 --- a/app/test-compress-perf/comp_perf_test_benchmark.c +++ /dev/null @@ -1,408 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2018 Intel Corporation - */ - -#include -#include -#include -#include -#include - -#include "comp_perf_test_benchmark.h" - -void -cperf_benchmark_test_destructor(void *arg) -{ - if (arg) { - comp_perf_free_memory( - ((struct cperf_benchmark_ctx *)arg)->ver.options, - &((struct cperf_benchmark_ctx *)arg)->ver.mem); - rte_free(arg); - } -} - -void * -cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, - struct comp_test_data *options) -{ - struct cperf_benchmark_ctx *ctx = NULL; - - ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); - - if (ctx == NULL) - return NULL; - - ctx->ver.mem.dev_id = dev_id; - ctx->ver.mem.qp_id = qp_id; - ctx->ver.options = options; - ctx->ver.silent = 1; /* ver. part will be silent */ - - if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) - && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) - return ctx; - - cperf_benchmark_test_destructor(ctx); - return NULL; -} - -static int -main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) -{ - struct comp_test_data *test_data = ctx->ver.options; - struct cperf_mem_resources *mem = &ctx->ver.mem; - uint8_t dev_id = mem->dev_id; - uint32_t i, iter, num_iter; - struct rte_comp_op **ops, **deq_ops; - void *priv_xform = NULL; - struct rte_comp_xform xform; - struct rte_mbuf **input_bufs, **output_bufs; - int res = 0; - int allocated = 0; - uint32_t out_seg_sz; - - if (test_data == NULL || !test_data->burst_sz) { - RTE_LOG(ERR, USER1, - "Unknown burst size\n"); - return -1; - } - - ops = rte_zmalloc_socket(NULL, - 2 * mem->total_bufs * sizeof(struct rte_comp_op *), - 0, rte_socket_id()); - - if (ops == NULL) { - RTE_LOG(ERR, USER1, - "Can't allocate memory for ops strucures\n"); - return -1; - } - - deq_ops = &ops[mem->total_bufs]; - - if (type == RTE_COMP_COMPRESS) { - xform = (struct rte_comp_xform) { - .type = RTE_COMP_COMPRESS, - .compress = { - .algo = RTE_COMP_ALGO_DEFLATE, - .deflate.huffman = test_data->huffman_enc, - .level = test_data->level, - .window_size = test_data->window_sz, - .chksum = RTE_COMP_CHECKSUM_NONE, - .hash_algo = RTE_COMP_HASH_ALGO_NONE - } - }; - input_bufs = mem->decomp_bufs; - output_bufs = mem->comp_bufs; - out_seg_sz = test_data->out_seg_sz; - } else { - xform = (struct rte_comp_xform) { - .type = RTE_COMP_DECOMPRESS, - .decompress = { - .algo = RTE_COMP_ALGO_DEFLATE, - .chksum = RTE_COMP_CHECKSUM_NONE, - .window_size = test_data->window_sz, - .hash_algo = RTE_COMP_HASH_ALGO_NONE - } - }; - input_bufs = mem->comp_bufs; - output_bufs = mem->decomp_bufs; - out_seg_sz = test_data->seg_sz; - } - - /* Create private xform */ - if (rte_compressdev_private_xform_create(dev_id, &xform, - &priv_xform) < 0) { - RTE_LOG(ERR, USER1, "Private xform could not be created\n"); - res = -1; - goto end; - } - - uint64_t tsc_start, tsc_end, tsc_duration; - - num_iter = test_data->num_iter; - tsc_start = tsc_end = tsc_duration = 0; - tsc_start = rte_rdtsc_precise(); - - for (iter = 0; iter < num_iter; iter++) { - uint32_t total_ops = mem->total_bufs; - uint32_t remaining_ops = mem->total_bufs; - uint32_t total_deq_ops = 0; - uint32_t total_enq_ops = 0; - uint16_t ops_unused = 0; - uint16_t num_enq = 0; - uint16_t num_deq = 0; - - while (remaining_ops > 0) { - uint16_t num_ops = RTE_MIN(remaining_ops, - test_data->burst_sz); - uint16_t ops_needed = num_ops - ops_unused; - - /* - * Move the unused operations from the previous - * enqueue_burst call to the front, to maintain order - */ - if ((ops_unused > 0) && (num_enq > 0)) { - size_t nb_b_to_mov = - ops_unused * sizeof(struct rte_comp_op *); - - memmove(ops, &ops[num_enq], nb_b_to_mov); - } - - /* Allocate compression operations */ - if (ops_needed && !rte_comp_op_bulk_alloc( - mem->op_pool, - &ops[ops_unused], - ops_needed)) { - RTE_LOG(ERR, USER1, - "Could not allocate enough operations\n"); - res = -1; - goto end; - } - allocated += ops_needed; - - for (i = 0; i < ops_needed; i++) { - /* - * Calculate next buffer to attach to operation - */ - uint32_t buf_id = total_enq_ops + i + - ops_unused; - uint16_t op_id = ops_unused + i; - /* Reset all data in output buffers */ - struct rte_mbuf *m = output_bufs[buf_id]; - - m->pkt_len = out_seg_sz * m->nb_segs; - while (m) { - m->data_len = m->buf_len - m->data_off; - m = m->next; - } - ops[op_id]->m_src = input_bufs[buf_id]; - ops[op_id]->m_dst = output_bufs[buf_id]; - ops[op_id]->src.offset = 0; - ops[op_id]->src.length = - rte_pktmbuf_pkt_len(input_bufs[buf_id]); - ops[op_id]->dst.offset = 0; - ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; - ops[op_id]->input_chksum = buf_id; - ops[op_id]->private_xform = priv_xform; - } - - if (unlikely(test_data->perf_comp_force_stop)) - goto end; - - num_enq = rte_compressdev_enqueue_burst(dev_id, - mem->qp_id, ops, - num_ops); - if (num_enq == 0) { - struct rte_compressdev_stats stats; - - rte_compressdev_stats_get(dev_id, &stats); - if (stats.enqueue_err_count) { - res = -1; - goto end; - } - } - - ops_unused = num_ops - num_enq; - remaining_ops -= num_enq; - total_enq_ops += num_enq; - - num_deq = rte_compressdev_dequeue_burst(dev_id, - mem->qp_id, - deq_ops, - test_data->burst_sz); - total_deq_ops += num_deq; - - if (iter == num_iter - 1) { - for (i = 0; i < num_deq; i++) { - struct rte_comp_op *op = deq_ops[i]; - - if (op->status != - RTE_COMP_OP_STATUS_SUCCESS) { - RTE_LOG(ERR, USER1, - "Some operations were not successful\n"); - goto end; - } - - struct rte_mbuf *m = op->m_dst; - - m->pkt_len = op->produced; - uint32_t remaining_data = op->produced; - uint16_t data_to_append; - - while (remaining_data > 0) { - data_to_append = - RTE_MIN(remaining_data, - out_seg_sz); - m->data_len = data_to_append; - remaining_data -= - data_to_append; - m = m->next; - } - } - } - rte_mempool_put_bulk(mem->op_pool, - (void **)deq_ops, num_deq); - allocated -= num_deq; - } - - /* Dequeue the last operations */ - while (total_deq_ops < total_ops) { - if (unlikely(test_data->perf_comp_force_stop)) - goto end; - - num_deq = rte_compressdev_dequeue_burst(dev_id, - mem->qp_id, - deq_ops, - test_data->burst_sz); - if (num_deq == 0) { - struct rte_compressdev_stats stats; - - rte_compressdev_stats_get(dev_id, &stats); - if (stats.dequeue_err_count) { - res = -1; - goto end; - } - } - - total_deq_ops += num_deq; - - if (iter == num_iter - 1) { - for (i = 0; i < num_deq; i++) { - struct rte_comp_op *op = deq_ops[i]; - - if (op->status != - RTE_COMP_OP_STATUS_SUCCESS) { - RTE_LOG(ERR, USER1, - "Some operations were not successful\n"); - goto end; - } - - struct rte_mbuf *m = op->m_dst; - - m->pkt_len = op->produced; - uint32_t remaining_data = op->produced; - uint16_t data_to_append; - - while (remaining_data > 0) { - data_to_append = - RTE_MIN(remaining_data, - out_seg_sz); - m->data_len = data_to_append; - remaining_data -= - data_to_append; - m = m->next; - } - } - } - rte_mempool_put_bulk(mem->op_pool, - (void **)deq_ops, num_deq); - allocated -= num_deq; - } - } - - tsc_end = rte_rdtsc_precise(); - tsc_duration = tsc_end - tsc_start; - - if (type == RTE_COMP_COMPRESS) - ctx->comp_tsc_duration[test_data->level] = - tsc_duration / num_iter; - else - ctx->decomp_tsc_duration[test_data->level] = - tsc_duration / num_iter; - -end: - rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); - rte_compressdev_private_xform_free(dev_id, priv_xform); - rte_free(ops); - - if (test_data->perf_comp_force_stop) { - RTE_LOG(ERR, USER1, - "lcore: %d Perf. test has been aborted by user\n", - mem->lcore_id); - res = -1; - } - return res; -} - -int -cperf_benchmark_test_runner(void *test_ctx) -{ - struct cperf_benchmark_ctx *ctx = test_ctx; - struct comp_test_data *test_data = ctx->ver.options; - uint32_t lcore = rte_lcore_id(); - static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); - int i, ret = EXIT_SUCCESS; - - ctx->ver.mem.lcore_id = lcore; - - /* - * printing information about current compression thread - */ - if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) - printf(" lcore: %u," - " driver name: %s," - " device name: %s," - " device id: %u," - " socket id: %u," - " queue pair id: %u\n", - lcore, - ctx->ver.options->driver_name, - rte_compressdev_name_get(ctx->ver.mem.dev_id), - ctx->ver.mem.dev_id, - rte_compressdev_socket_id(ctx->ver.mem.dev_id), - ctx->ver.mem.qp_id); - - /* - * First the verification part is needed - */ - if (cperf_verify_test_runner(&ctx->ver)) { - ret = EXIT_FAILURE; - goto end; - } - - /* - * Run the tests twice, discarding the first performance - * results, before the cache is warmed up - */ - for (i = 0; i < 2; i++) { - if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { - ret = EXIT_FAILURE; - goto end; - } - } - - for (i = 0; i < 2; i++) { - if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { - ret = EXIT_FAILURE; - goto end; - } - } - - ctx->comp_tsc_byte = - (double)(ctx->comp_tsc_duration[test_data->level]) / - test_data->input_data_sz; - - ctx->decomp_tsc_byte = - (double)(ctx->decomp_tsc_duration[test_data->level]) / - test_data->input_data_sz; - - ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / - 1000000000; - - ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / - 1000000000; - - if (rte_atomic16_test_and_set(&display_once)) { - printf("\n%12s%6s%12s%17s%15s%16s\n", - "lcore id", "Level", "Comp size", "Comp ratio [%]", - "Comp [Gbps]", "Decomp [Gbps]"); - } - - printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", - ctx->ver.mem.lcore_id, - test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, - ctx->comp_gbps, - ctx->decomp_gbps); - -end: - return ret; -} diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_benchmark.h deleted file mode 100644 index d9b2694b89..0000000000 --- a/app/test-compress-perf/comp_perf_test_benchmark.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2018 Intel Corporation - */ - -#ifndef _COMP_PERF_TEST_BENCHMARK_ -#define _COMP_PERF_TEST_BENCHMARK_ - -#include - -#include "comp_perf_options.h" -#include "comp_perf_test_common.h" -#include "comp_perf_test_verify.h" - -struct cperf_benchmark_ctx { - struct cperf_verify_ctx ver; - - /* Store TSC duration for all levels (including level 0) */ - uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; - uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; - double comp_gbps; - double decomp_gbps; - double comp_tsc_byte; - double decomp_tsc_byte; -}; - -void -cperf_benchmark_test_destructor(void *arg); - -int -cperf_benchmark_test_runner(void *test_ctx); - -void * -cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, - struct comp_test_data *options); - -#endif diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c index 1b8985b430..b402a0d839 100644 --- a/app/test-compress-perf/comp_perf_test_common.c +++ b/app/test-compress-perf/comp_perf_test_common.c @@ -9,7 +9,8 @@ #include "comp_perf.h" #include "comp_perf_options.h" -#include "comp_perf_test_benchmark.h" +#include "comp_perf_test_throughput.h" +#include "comp_perf_test_cyclecount.h" #include "comp_perf_test_common.h" #include "comp_perf_test_verify.h" @@ -276,9 +277,11 @@ comp_perf_allocate_memory(struct comp_test_data *test_data, snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u", mem->dev_id, mem->qp_id); + + /* one mempool for both src and dst mbufs */ mem->op_pool = rte_comp_op_pool_create(pool_name, - mem->total_bufs, - 0, 0, rte_socket_id()); + mem->total_bufs * 2, + 0, 0, rte_socket_id()); if (mem->op_pool == NULL) { RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n"); return -1; @@ -495,20 +498,24 @@ prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem) } void -print_test_dynamics(void) +print_test_dynamics(const struct comp_test_data *test_data) { uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz, MAX_SEG_SIZE); if (buffer_info.total_buffs > 1) { - printf("\nWarning: for the current input parameters, number" + if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) { + printf("\nWarning: for the current input parameters, number" " of ops is higher than one, which may result" " in sub-optimal performance.\n"); - printf("To improve the performance (for the current" + printf("To improve the performance (for the current" " input data) following parameters are" " suggested:\n"); - printf(" * Segment size: %d\n", MAX_SEG_SIZE); - printf(" * Number of segments: %u\n", opt_total_segs); + printf(" * Segment size: %d\n", + MAX_SEG_SIZE); + printf(" * Number of segments: %u\n", + opt_total_segs); + } } else if (buffer_info.total_buffs == 1) { printf("\nInfo: there is only one op with %u segments -" " the compression ratio is the best.\n", diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h index 9206428886..72705c6a2b 100644 --- a/app/test-compress-perf/comp_perf_test_common.h +++ b/app/test-compress-perf/comp_perf_test_common.h @@ -49,6 +49,6 @@ int prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem); void -print_test_dynamics(void); +print_test_dynamics(const struct comp_test_data *test_data); #endif /* _COMP_PERF_TEST_COMMON_H_ */ diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-compress-perf/comp_perf_test_cyclecount.c new file mode 100644 index 0000000000..55559a7d5a --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_cyclecount.c @@ -0,0 +1,614 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include +#include +#include +#include +#include "rte_spinlock.h" +#include + +#include "comp_perf_test_cyclecount.h" + +struct cperf_cyclecount_ctx { + struct cperf_verify_ctx ver; + + uint32_t ops_enq_retries; + uint32_t ops_deq_retries; + + uint64_t duration_op; + uint64_t duration_enq; + uint64_t duration_deq; +}; + +void +cperf_cyclecount_test_destructor(void *arg) +{ + struct cperf_cyclecount_ctx *ctx = arg; + + if (arg) { + comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); + rte_free(arg); + } +} + +void * +cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_cyclecount_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->ver.mem.dev_id = dev_id; + ctx->ver.mem.qp_id = qp_id; + ctx->ver.options = options; + ctx->ver.silent = 1; /* ver. part will be silent */ + + if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) + && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) + return ctx; + + cperf_cyclecount_test_destructor(ctx); + return NULL; +} + +static int +cperf_cyclecount_op_setup(struct rte_comp_op **ops, + struct cperf_cyclecount_ctx *ctx, + struct rte_mbuf **input_bufs, + struct rte_mbuf **output_bufs, + void *priv_xform, + uint32_t out_seg_sz) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + + uint32_t i, iter, num_iter; + int res = 0; + uint16_t ops_needed; + + num_iter = test_data->num_iter; + + for (iter = 0; iter < num_iter; iter++) { + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + ops_needed = num_ops; + + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, + (void **)ops, + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Cyclecount: could not allocate enough operations\n"); + res = -1; + goto end; + } + + for (i = 0; i < ops_needed; i++) { + + /* Calculate next buffer to attach */ + /* to operation */ + uint32_t buf_id = total_enq_ops + i; + uint16_t op_id = i; + + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + /* E N Q U E U I N G */ + /* assuming that all ops are enqueued */ + /* instead of the real enqueue operation */ + num_enq = num_ops; + + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + /* D E Q U E U I N G */ + /* assuming that all ops dequeued */ + /* instead of the real dequeue operation */ + num_deq = num_ops; + + total_deq_ops += num_deq; + rte_mempool_put_bulk(mem->op_pool, + (void **)ops, num_deq); + } + } + return res; +end: + rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); + rte_free(ops); + + return res; +} + +static int +main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + uint8_t dev_id = mem->dev_id; + uint32_t i, iter, num_iter; + struct rte_comp_op **ops, **deq_ops; + void *priv_xform = NULL; + struct rte_comp_xform xform; + struct rte_mbuf **input_bufs, **output_bufs; + int ret, res = 0; + int allocated = 0; + uint32_t out_seg_sz; + + uint64_t tsc_start, tsc_end, tsc_duration; + + if (test_data == NULL || !test_data->burst_sz) { + RTE_LOG(ERR, USER1, "Unknown burst size\n"); + return -1; + } + ctx->duration_enq = 0; + ctx->duration_deq = 0; + ctx->ops_enq_retries = 0; + ctx->ops_deq_retries = 0; + + /* one array for both enqueue and dequeue */ + ops = rte_zmalloc_socket(NULL, + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, + "Can't allocate memory for ops strucures\n"); + return -1; + } + + deq_ops = &ops[mem->total_bufs]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_COMPRESS, + .compress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .deflate.huffman = test_data->huffman_enc, + .level = test_data->level, + .window_size = test_data->window_sz, + .chksum = RTE_COMP_CHECKSUM_NONE, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; + out_seg_sz = test_data->out_seg_sz; + } else { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_DECOMPRESS, + .decompress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .chksum = RTE_COMP_CHECKSUM_NONE, + .window_size = test_data->window_sz, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; + out_seg_sz = test_data->seg_sz; + } + + /* Create private xform */ + if (rte_compressdev_private_xform_create(dev_id, &xform, + &priv_xform) < 0) { + RTE_LOG(ERR, USER1, "Private xform could not be created\n"); + res = -1; + goto end; + } + + tsc_start = rte_rdtsc_precise(); + ret = cperf_cyclecount_op_setup(ops, + ctx, + input_bufs, + output_bufs, + priv_xform, + out_seg_sz); + + tsc_end = rte_rdtsc_precise(); + + /* ret value check postponed a bit to cancel extra 'if' bias */ + if (ret < 0) { + RTE_LOG(ERR, USER1, "Setup function failed\n"); + res = -1; + goto end; + } + + tsc_duration = tsc_end - tsc_start; + ctx->duration_op = tsc_duration; + + num_iter = test_data->num_iter; + for (iter = 0; iter < num_iter; iter++) { + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t ops_unused = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + uint16_t ops_needed = num_ops - ops_unused; + + /* + * Move the unused operations from the previous + * enqueue_burst call to the front, to maintain order + */ + if ((ops_unused > 0) && (num_enq > 0)) { + size_t nb_b_to_mov = + ops_unused * sizeof(struct rte_comp_op *); + + memmove(ops, &ops[num_enq], nb_b_to_mov); + } + + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, + (void **)ops, + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); + res = -1; + goto end; + } + allocated += ops_needed; + + for (i = 0; i < ops_needed; i++) { + /* + * Calculate next buffer to attach to operation + */ + uint32_t buf_id = total_enq_ops + i + + ops_unused; + uint16_t op_id = ops_unused + i; + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + tsc_start = rte_rdtsc_precise(); + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, + num_ops); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_enq += tsc_duration; + + if (num_enq < num_ops) + ctx->ops_enq_retries++; + + if (test_data->cyclecount_delay) + rte_delay_us_block(test_data->cyclecount_delay); + + if (num_enq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.enqueue_err_count) { + res = -1; + goto end; + } + } + + ops_unused = num_ops - num_enq; + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + tsc_start = rte_rdtsc_precise(); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + allocated); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_deq += tsc_duration; + + if (num_deq < allocated) + ctx->ops_deq_retries++; + + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + + /* Dequeue the last operations */ + while (total_deq_ops < total_ops) { + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + tsc_start = rte_rdtsc_precise(); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_deq += tsc_duration; + ctx->ops_deq_retries++; + + if (num_deq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.dequeue_err_count) { + res = -1; + goto end; + } + } + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + } + allocated = 0; + +end: + if (allocated) + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); + rte_compressdev_private_xform_free(dev_id, priv_xform); + rte_free(ops); + + if (test_data->perf_comp_force_stop) { + RTE_LOG(ERR, USER1, + "lcore: %d Perf. test has been aborted by user\n", + mem->lcore_id); + res = -1; + } + return res; +} + +int +cperf_cyclecount_test_runner(void *test_ctx) +{ + struct cperf_cyclecount_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->ver.options; + uint32_t lcore = rte_lcore_id(); + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + static rte_spinlock_t print_spinlock; + int i; + + uint32_t ops_enq_retries_comp; + uint32_t ops_deq_retries_comp; + + uint32_t ops_enq_retries_decomp; + uint32_t ops_deq_retries_decomp; + + uint32_t duration_setup_per_op; + + uint32_t duration_enq_per_op_comp; + uint32_t duration_deq_per_op_comp; + + uint32_t duration_enq_per_op_decomp; + uint32_t duration_deq_per_op_decomp; + + ctx->ver.mem.lcore_id = lcore; + + /* + * printing information about current compression thread + */ + if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) + printf(" lcore: %u," + " driver name: %s," + " device name: %s," + " device id: %u," + " socket id: %u," + " queue pair id: %u\n", + lcore, + ctx->ver.options->driver_name, + rte_compressdev_name_get(ctx->ver.mem.dev_id), + ctx->ver.mem.dev_id, + rte_compressdev_socket_id(ctx->ver.mem.dev_id), + ctx->ver.mem.qp_id); + + /* + * First the verification part is needed + */ + if (cperf_verify_test_runner(&ctx->ver)) + return EXIT_FAILURE; + + /* + * Run the tests twice, discarding the first performance + * results, before the cache is warmed up + */ + + /* C O M P R E S S */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) + return EXIT_FAILURE; + } + + ops_enq_retries_comp = ctx->ops_enq_retries; + ops_deq_retries_comp = ctx->ops_deq_retries; + + duration_enq_per_op_comp = ctx->duration_enq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + duration_deq_per_op_comp = ctx->duration_deq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + /* D E C O M P R E S S */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) + return EXIT_FAILURE; + } + + ops_enq_retries_decomp = ctx->ops_enq_retries; + ops_deq_retries_decomp = ctx->ops_deq_retries; + + duration_enq_per_op_decomp = ctx->duration_enq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + duration_deq_per_op_decomp = ctx->duration_deq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + duration_setup_per_op = ctx->duration_op / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + /* R E P O R T processing */ + if (rte_atomic16_test_and_set(&display_once)) { + + rte_spinlock_lock(&print_spinlock); + + printf("\nLegend for the table\n" + " - Retries section: number of retries for the following operations:\n" + " [C-e] - compression enqueue\n" + " [C-d] - compression dequeue\n" + " [D-e] - decompression enqueue\n" + " [D-d] - decompression dequeue\n" + " - Cycles section: number of cycles per 'op' for the following operations:\n" + " setup/op - memory allocation, op configuration and memory dealocation\n" + " [C-e] - compression enqueue\n" + " [C-d] - compression dequeue\n" + " [D-e] - decompression enqueue\n" + " [D-d] - decompression dequeue\n\n"); + + printf("\n%12s%6s%12s%17s", + "lcore id", "Level", "Comp size", "Comp ratio [%]"); + + printf(" |%10s %6s %8s %6s %8s", + " Retries:", + "[C-e]", "[C-d]", + "[D-e]", "[D-d]"); + + printf(" |%9s %9s %9s %9s %9s %9s\n", + " Cycles:", + "setup/op", + "[C-e]", "[C-d]", + "[D-e]", "[D-d]"); + + rte_spinlock_unlock(&print_spinlock); + } + + rte_spinlock_lock(&print_spinlock); + + printf("%12u" + "%6u" + "%12zu" + "%17.2f", + ctx->ver.mem.lcore_id, + test_data->level, + ctx->ver.comp_data_sz, + ctx->ver.ratio); + + printf(" |%10s %6u %8u %6u %8u", + " ", + ops_enq_retries_comp, + ops_deq_retries_comp, + ops_enq_retries_decomp, + ops_deq_retries_decomp); + + printf(" |%9s %9u %9u %9u %9u %9u\n", + " ", + duration_setup_per_op, + duration_enq_per_op_comp, + duration_deq_per_op_comp, + duration_enq_per_op_decomp, + duration_deq_per_op_decomp); + + rte_spinlock_unlock(&print_spinlock); + + return EXIT_SUCCESS; +} diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.h b/app/test-compress-perf/comp_perf_test_cyclecount.h new file mode 100644 index 0000000000..8e1b4d9e96 --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_cyclecount.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_CYCLECOUNT_ +#define _COMP_PERF_TEST_CYCLECOUNT_ + +#include + +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + +void +cperf_cyclecount_test_destructor(void *arg); + +int +cperf_cyclecount_test_runner(void *test_ctx); + +void * +cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); + +#endif diff --git a/app/test-compress-perf/comp_perf_test_throughput.c b/app/test-compress-perf/comp_perf_test_throughput.c new file mode 100644 index 0000000000..13922b658c --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_throughput.c @@ -0,0 +1,408 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include "comp_perf_test_throughput.h" + +void +cperf_throughput_test_destructor(void *arg) +{ + if (arg) { + comp_perf_free_memory( + ((struct cperf_benchmark_ctx *)arg)->ver.options, + &((struct cperf_benchmark_ctx *)arg)->ver.mem); + rte_free(arg); + } +} + +void * +cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_benchmark_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->ver.mem.dev_id = dev_id; + ctx->ver.mem.qp_id = qp_id; + ctx->ver.options = options; + ctx->ver.silent = 1; /* ver. part will be silent */ + + if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) + && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) + return ctx; + + cperf_throughput_test_destructor(ctx); + return NULL; +} + +static int +main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + uint8_t dev_id = mem->dev_id; + uint32_t i, iter, num_iter; + struct rte_comp_op **ops, **deq_ops; + void *priv_xform = NULL; + struct rte_comp_xform xform; + struct rte_mbuf **input_bufs, **output_bufs; + int res = 0; + int allocated = 0; + uint32_t out_seg_sz; + + if (test_data == NULL || !test_data->burst_sz) { + RTE_LOG(ERR, USER1, + "Unknown burst size\n"); + return -1; + } + + ops = rte_zmalloc_socket(NULL, + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, + "Can't allocate memory for ops strucures\n"); + return -1; + } + + deq_ops = &ops[mem->total_bufs]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_COMPRESS, + .compress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .deflate.huffman = test_data->huffman_enc, + .level = test_data->level, + .window_size = test_data->window_sz, + .chksum = RTE_COMP_CHECKSUM_NONE, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; + out_seg_sz = test_data->out_seg_sz; + } else { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_DECOMPRESS, + .decompress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .chksum = RTE_COMP_CHECKSUM_NONE, + .window_size = test_data->window_sz, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; + out_seg_sz = test_data->seg_sz; + } + + /* Create private xform */ + if (rte_compressdev_private_xform_create(dev_id, &xform, + &priv_xform) < 0) { + RTE_LOG(ERR, USER1, "Private xform could not be created\n"); + res = -1; + goto end; + } + + uint64_t tsc_start, tsc_end, tsc_duration; + + num_iter = test_data->num_iter; + tsc_start = tsc_end = tsc_duration = 0; + tsc_start = rte_rdtsc_precise(); + + for (iter = 0; iter < num_iter; iter++) { + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t ops_unused = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + uint16_t ops_needed = num_ops - ops_unused; + + /* + * Move the unused operations from the previous + * enqueue_burst call to the front, to maintain order + */ + if ((ops_unused > 0) && (num_enq > 0)) { + size_t nb_b_to_mov = + ops_unused * sizeof(struct rte_comp_op *); + + memmove(ops, &ops[num_enq], nb_b_to_mov); + } + + /* Allocate compression operations */ + if (ops_needed && !rte_comp_op_bulk_alloc( + mem->op_pool, + &ops[ops_unused], + ops_needed)) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); + res = -1; + goto end; + } + allocated += ops_needed; + + for (i = 0; i < ops_needed; i++) { + /* + * Calculate next buffer to attach to operation + */ + uint32_t buf_id = total_enq_ops + i + + ops_unused; + uint16_t op_id = ops_unused + i; + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, + num_ops); + if (num_enq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.enqueue_err_count) { + res = -1; + goto end; + } + } + + ops_unused = num_ops - num_enq; + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + + /* Dequeue the last operations */ + while (total_deq_ops < total_ops) { + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + if (num_deq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.dequeue_err_count) { + res = -1; + goto end; + } + } + + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, + "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + } + + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + + if (type == RTE_COMP_COMPRESS) + ctx->comp_tsc_duration[test_data->level] = + tsc_duration / num_iter; + else + ctx->decomp_tsc_duration[test_data->level] = + tsc_duration / num_iter; + +end: + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); + rte_compressdev_private_xform_free(dev_id, priv_xform); + rte_free(ops); + + if (test_data->perf_comp_force_stop) { + RTE_LOG(ERR, USER1, + "lcore: %d Perf. test has been aborted by user\n", + mem->lcore_id); + res = -1; + } + return res; +} + +int +cperf_throughput_test_runner(void *test_ctx) +{ + struct cperf_benchmark_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->ver.options; + uint32_t lcore = rte_lcore_id(); + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + int i, ret = EXIT_SUCCESS; + + ctx->ver.mem.lcore_id = lcore; + + /* + * printing information about current compression thread + */ + if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) + printf(" lcore: %u," + " driver name: %s," + " device name: %s," + " device id: %u," + " socket id: %u," + " queue pair id: %u\n", + lcore, + ctx->ver.options->driver_name, + rte_compressdev_name_get(ctx->ver.mem.dev_id), + ctx->ver.mem.dev_id, + rte_compressdev_socket_id(ctx->ver.mem.dev_id), + ctx->ver.mem.qp_id); + + /* + * First the verification part is needed + */ + if (cperf_verify_test_runner(&ctx->ver)) { + ret = EXIT_FAILURE; + goto end; + } + + /* + * Run the tests twice, discarding the first performance + * results, before the cache is warmed up + */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { + ret = EXIT_FAILURE; + goto end; + } + } + + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { + ret = EXIT_FAILURE; + goto end; + } + } + + ctx->comp_tsc_byte = + (double)(ctx->comp_tsc_duration[test_data->level]) / + test_data->input_data_sz; + + ctx->decomp_tsc_byte = + (double)(ctx->decomp_tsc_duration[test_data->level]) / + test_data->input_data_sz; + + ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / + 1000000000; + + ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / + 1000000000; + + if (rte_atomic16_test_and_set(&display_once)) { + printf("\n%12s%6s%12s%17s%15s%16s\n", + "lcore id", "Level", "Comp size", "Comp ratio [%]", + "Comp [Gbps]", "Decomp [Gbps]"); + } + + printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", + ctx->ver.mem.lcore_id, + test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, + ctx->comp_gbps, + ctx->decomp_gbps); + +end: + return ret; +} diff --git a/app/test-compress-perf/comp_perf_test_throughput.h b/app/test-compress-perf/comp_perf_test_throughput.h new file mode 100644 index 0000000000..467e3aa78a --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_throughput.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_BENCHMARK_ +#define _COMP_PERF_TEST_BENCHMARK_ + +#include + +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + +struct cperf_benchmark_ctx { + struct cperf_verify_ctx ver; + + /* Store TSC duration for all levels (including level 0) */ + uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; + uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; + double comp_gbps; + double decomp_gbps; + double comp_tsc_byte; + double decomp_tsc_byte; +}; + +void +cperf_throughput_test_destructor(void *arg); + +int +cperf_throughput_test_runner(void *test_ctx); + +void * +cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); + +#endif diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c index 758a22ff5b..5e13257b79 100644 --- a/app/test-compress-perf/comp_perf_test_verify.c +++ b/app/test-compress-perf/comp_perf_test_verify.c @@ -48,8 +48,8 @@ static int main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) { struct comp_test_data *test_data = ctx->options; - uint8_t *output_data_ptr; - size_t *output_data_sz; + uint8_t *output_data_ptr = NULL; + size_t *output_data_sz = NULL; struct cperf_mem_resources *mem = &ctx->mem; uint8_t dev_id = mem->dev_id; diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index 6b56dd6809..ed21605d89 100644 --- a/app/test-compress-perf/main.c +++ b/app/test-compress-perf/main.c @@ -11,32 +11,41 @@ #include #include -#include "comp_perf_options.h" -#include "comp_perf_test_verify.h" -#include "comp_perf_test_benchmark.h" #include "comp_perf.h" +#include "comp_perf_options.h" #include "comp_perf_test_common.h" +#include "comp_perf_test_cyclecount.h" +#include "comp_perf_test_throughput.h" +#include "comp_perf_test_verify.h" #define NUM_MAX_XFORMS 16 #define NUM_MAX_INFLIGHT_OPS 512 __extension__ const char *comp_perf_test_type_strs[] = { - [CPERF_TEST_TYPE_BENCHMARK] = "benchmark", - [CPERF_TEST_TYPE_VERIFY] = "verify" + [CPERF_TEST_TYPE_THROUGHPUT] = "throughput", + [CPERF_TEST_TYPE_VERIFY] = "verify", + [CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount" }; __extension__ static const struct cperf_test cperf_testmap[] = { - [CPERF_TEST_TYPE_BENCHMARK] = { - cperf_benchmark_test_constructor, - cperf_benchmark_test_runner, - cperf_benchmark_test_destructor + [CPERF_TEST_TYPE_THROUGHPUT] = { + cperf_throughput_test_constructor, + cperf_throughput_test_runner, + cperf_throughput_test_destructor + }, [CPERF_TEST_TYPE_VERIFY] = { cperf_verify_test_constructor, cperf_verify_test_runner, cperf_verify_test_destructor + }, + + [CPERF_TEST_TYPE_PMDCC] = { + cperf_cyclecount_test_constructor, + cperf_cyclecount_test_runner, + cperf_cyclecount_test_destructor } }; @@ -116,7 +125,8 @@ comp_perf_initialize_compressdev(struct comp_test_data *test_data, enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name, enabled_cdevs, RTE_COMPRESS_MAX_DEVS); if (enabled_cdev_count == 0) { - RTE_LOG(ERR, USER1, "No compress devices type %s available\n", + RTE_LOG(ERR, USER1, "No compress devices type %s available," + " please check the list of specified devices in EAL section\n", test_data->driver_name); return -EINVAL; } @@ -270,6 +280,7 @@ comp_perf_dump_input_data(struct comp_test_data *test_data) data += data_to_read; } + printf("\n"); if (test_data->input_data_sz > actual_file_sz) RTE_LOG(INFO, USER1, "%zu bytes read from file %s, extending the file %.2f times\n", @@ -365,9 +376,12 @@ main(int argc, char **argv) else test_data->level = test_data->level_lst.list[0]; - printf("App uses socket: %u\n", rte_socket_id()); + printf("\nApp uses socket: %u\n", rte_socket_id()); printf("Burst size = %u\n", test_data->burst_sz); printf("Input data size = %zu\n", test_data->input_data_sz); + if (test_data->test == CPERF_TEST_TYPE_PMDCC) + printf("Cycle-count delay = %u [us]\n", + test_data->cyclecount_delay); test_data->cleanup = ST_DURING_TEST; total_nb_qps = nb_compressdevs * test_data->nb_qps; @@ -394,7 +408,7 @@ main(int argc, char **argv) i++; } - print_test_dynamics(); /* constructors must be executed first */ + print_test_dynamics(test_data); while (test_data->level <= test_data->level_lst.max) { @@ -472,7 +486,28 @@ end: } __rte_weak void * -cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, +cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_cyclecount_test_destructor(void *arg __rte_unused) +{ + RTE_LOG(INFO, USER1, "Something wrong happened!!!\n"); +} + +__rte_weak int +cperf_cyclecount_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} + +__rte_weak void * +cperf_throughput_test_constructor(uint8_t dev_id __rte_unused, uint16_t qp_id __rte_unused, struct comp_test_data *options __rte_unused) { @@ -481,13 +516,13 @@ cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, } __rte_weak void -cperf_benchmark_test_destructor(void *arg __rte_unused) +cperf_throughput_test_destructor(void *arg __rte_unused) { } __rte_weak int -cperf_benchmark_test_runner(void *test_ctx __rte_unused) +cperf_throughput_test_runner(void *test_ctx __rte_unused) { return 0; } diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build index 1136f04bca..1fe26cc14a 100644 --- a/app/test-compress-perf/meson.build +++ b/app/test-compress-perf/meson.build @@ -5,6 +5,7 @@ allow_experimental_apis = true sources = files('comp_perf_options_parse.c', 'main.c', 'comp_perf_test_verify.c', - 'comp_perf_test_benchmark.c', + 'comp_perf_test_throughput.c', + 'comp_perf_test_cyclecount.c', 'comp_perf_test_common.c') deps = ['compressdev']