test/compress: add cycle-count mode to perf tool
authorArtur Trybula <arturx.trybula@intel.com>
Wed, 11 Dec 2019 15:50:00 +0000 (16:50 +0100)
committerAkhil Goyal <akhil.goyal@nxp.com>
Wed, 5 Feb 2020 14:20:51 +0000 (15:20 +0100)
This commit adds cycle-count mode to the compression perf tool.
The new mode enhances the compression performance tool to allow
cycle-count measurement of both hardware and softwate PMDs.

Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Fiona Trahe <fiona.trahe@intel.com>
15 files changed:
app/test-compress-perf/Makefile
app/test-compress-perf/comp_perf.h
app/test-compress-perf/comp_perf_options.h
app/test-compress-perf/comp_perf_options_parse.c
app/test-compress-perf/comp_perf_test_benchmark.c [deleted file]
app/test-compress-perf/comp_perf_test_benchmark.h [deleted file]
app/test-compress-perf/comp_perf_test_common.c
app/test-compress-perf/comp_perf_test_common.h
app/test-compress-perf/comp_perf_test_cyclecount.c [new file with mode: 0644]
app/test-compress-perf/comp_perf_test_cyclecount.h [new file with mode: 0644]
app/test-compress-perf/comp_perf_test_throughput.c [new file with mode: 0644]
app/test-compress-perf/comp_perf_test_throughput.h [new file with mode: 0644]
app/test-compress-perf/comp_perf_test_verify.c
app/test-compress-perf/main.c
app/test-compress-perf/meson.build

index d1a6820..ad3b91d 100644 (file)
@@ -13,7 +13,8 @@ CFLAGS += -O3
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
 SRCS-y += comp_perf_test_verify.c
-SRCS-y += comp_perf_test_benchmark.c
+SRCS-y += comp_perf_test_throughput.c
+SRCS-y += comp_perf_test_cyclecount.c
 SRCS-y += comp_perf_test_common.c
 
 include $(RTE_SDK)/mk/rte.app.mk
index 57289b0..997d46b 100644 (file)
@@ -26,15 +26,15 @@ struct cperf_test {
 /* Needed for weak functions*/
 
 void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
                                 uint16_t qp_id __rte_unused,
                                 struct comp_test_data *options __rte_unused);
 
 void
-cperf_benchmark_test_destructor(void *arg __rte_unused);
+cperf_throughput_test_destructor(void *arg __rte_unused);
 
 int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused);
+cperf_throughput_test_runner(void *test_ctx __rte_unused);
 
 void *
 cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
index 2c26511..0b77752 100644 (file)
@@ -24,8 +24,9 @@ enum cleanup_st {
 };
 
 enum cperf_test_type {
-       CPERF_TEST_TYPE_BENCHMARK,
-       CPERF_TEST_TYPE_VERIFY
+       CPERF_TEST_TYPE_THROUGHPUT,
+       CPERF_TEST_TYPE_VERIFY,
+       CPERF_TEST_TYPE_PMDCC
 };
 
 enum comp_operation {
@@ -68,6 +69,8 @@ struct comp_test_data {
        double ratio;
        enum cleanup_st cleanup;
        int perf_comp_force_stop;
+
+       uint32_t cyclecount_delay;
 };
 
 int
index 12d0a6c..04a8d2f 100644 (file)
@@ -30,6 +30,9 @@
 #define CPERF_WINDOW_SIZE      ("window-sz")
 #define CPERF_EXTERNAL_MBUFS   ("external-mbufs")
 
+/* cyclecount-specific options */
+#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us")
+
 struct name_id_map {
        const char *name;
        uint32_t id;
@@ -39,7 +42,7 @@ static void
 usage(char *progname)
 {
        printf("%s [EAL options] --\n"
-               " --ptest benchmark / verify :"
+               " --ptest throughput / verify / pmd-cyclecount\n"
                " --driver-name NAME: compress driver to use\n"
                " --input-file NAME: file to compress and decompress\n"
                " --extended-input-sz N: extend file data up to this size (default: no extension)\n"
@@ -61,6 +64,8 @@ usage(char *progname)
                "               (e.g.: 15 => 32k, default: max supported by PMD)\n"
                " --external-mbufs: use memzones as external buffers instead of\n"
                "               keeping the data directly in mbuf area\n"
+               " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n"
+               "               valid only for cyclecount perf test (default: 500 us)\n"
                " -h: prints this help\n",
                progname);
 }
@@ -85,12 +90,16 @@ parse_cperf_test_type(struct comp_test_data *test_data, const char *arg)
 {
        struct name_id_map cperftest_namemap[] = {
                {
-                       comp_perf_test_type_strs[CPERF_TEST_TYPE_BENCHMARK],
-                       CPERF_TEST_TYPE_BENCHMARK
+                       comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT],
+                       CPERF_TEST_TYPE_THROUGHPUT
                },
                {
                        comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY],
                        CPERF_TEST_TYPE_VERIFY
+               },
+               {
+                       comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC],
+                       CPERF_TEST_TYPE_PMDCC
                }
        };
 
@@ -531,17 +540,28 @@ parse_external_mbufs(struct comp_test_data *test_data,
        return 0;
 }
 
+static int
+parse_cyclecount_delay_us(struct comp_test_data *test_data,
+                       const char *arg)
+{
+       int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg);
+
+       if (ret) {
+               RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n");
+               return -1;
+       }
+       return 0;
+}
+
 typedef int (*option_parser_t)(struct comp_test_data *test_data,
                const char *arg);
 
 struct long_opt_parser {
        const char *lgopt_name;
        option_parser_t parser_fn;
-
 };
 
 static struct option lgopts[] = {
-
        { CPERF_PTEST_TYPE, required_argument, 0, 0 },
        { CPERF_DRIVER_NAME, required_argument, 0, 0 },
        { CPERF_TEST_FILE, required_argument, 0, 0 },
@@ -556,6 +576,7 @@ static struct option lgopts[] = {
        { CPERF_LEVEL, required_argument, 0, 0 },
        { CPERF_WINDOW_SIZE, required_argument, 0, 0 },
        { CPERF_EXTERNAL_MBUFS, 0, 0, 0 },
+       { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 },
        { NULL, 0, 0, 0 }
 };
 
@@ -577,6 +598,7 @@ comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
                { CPERF_LEVEL,          parse_level },
                { CPERF_WINDOW_SIZE,    parse_window_sz },
                { CPERF_EXTERNAL_MBUFS, parse_external_mbufs },
+               { CPERF_CYCLECOUNT_DELAY_US,    parse_cyclecount_delay_us },
        };
        unsigned int i;
 
@@ -631,8 +653,9 @@ comp_perf_options_default(struct comp_test_data *test_data)
        test_data->level_lst.min = RTE_COMP_LEVEL_MIN;
        test_data->level_lst.max = RTE_COMP_LEVEL_MAX;
        test_data->level_lst.inc = 1;
-       test_data->test = CPERF_TEST_TYPE_BENCHMARK;
+       test_data->test = CPERF_TEST_TYPE_THROUGHPUT;
        test_data->use_external_mbufs = 0;
+       test_data->cyclecount_delay = 500;
 }
 
 int
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c
deleted file mode 100644 (file)
index 0c6bb9b..0000000
+++ /dev/null
@@ -1,408 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2018 Intel Corporation
- */
-
-#include <rte_malloc.h>
-#include <rte_eal.h>
-#include <rte_log.h>
-#include <rte_cycles.h>
-#include <rte_compressdev.h>
-
-#include "comp_perf_test_benchmark.h"
-
-void
-cperf_benchmark_test_destructor(void *arg)
-{
-       if (arg) {
-               comp_perf_free_memory(
-                       ((struct cperf_benchmark_ctx *)arg)->ver.options,
-                       &((struct cperf_benchmark_ctx *)arg)->ver.mem);
-               rte_free(arg);
-       }
-}
-
-void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
-               struct comp_test_data *options)
-{
-       struct cperf_benchmark_ctx *ctx = NULL;
-
-       ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
-
-       if (ctx == NULL)
-               return NULL;
-
-       ctx->ver.mem.dev_id = dev_id;
-       ctx->ver.mem.qp_id = qp_id;
-       ctx->ver.options = options;
-       ctx->ver.silent = 1; /* ver. part will be silent */
-
-       if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
-                       && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
-               return ctx;
-
-       cperf_benchmark_test_destructor(ctx);
-       return NULL;
-}
-
-static int
-main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
-{
-       struct comp_test_data *test_data = ctx->ver.options;
-       struct cperf_mem_resources *mem = &ctx->ver.mem;
-       uint8_t dev_id = mem->dev_id;
-       uint32_t i, iter, num_iter;
-       struct rte_comp_op **ops, **deq_ops;
-       void *priv_xform = NULL;
-       struct rte_comp_xform xform;
-       struct rte_mbuf **input_bufs, **output_bufs;
-       int res = 0;
-       int allocated = 0;
-       uint32_t out_seg_sz;
-
-       if (test_data == NULL || !test_data->burst_sz) {
-               RTE_LOG(ERR, USER1,
-                       "Unknown burst size\n");
-               return -1;
-       }
-
-       ops = rte_zmalloc_socket(NULL,
-               2 * mem->total_bufs * sizeof(struct rte_comp_op *),
-               0, rte_socket_id());
-
-       if (ops == NULL) {
-               RTE_LOG(ERR, USER1,
-                       "Can't allocate memory for ops strucures\n");
-               return -1;
-       }
-
-       deq_ops = &ops[mem->total_bufs];
-
-       if (type == RTE_COMP_COMPRESS) {
-               xform = (struct rte_comp_xform) {
-                       .type = RTE_COMP_COMPRESS,
-                       .compress = {
-                               .algo = RTE_COMP_ALGO_DEFLATE,
-                               .deflate.huffman = test_data->huffman_enc,
-                               .level = test_data->level,
-                               .window_size = test_data->window_sz,
-                               .chksum = RTE_COMP_CHECKSUM_NONE,
-                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
-                       }
-               };
-               input_bufs = mem->decomp_bufs;
-               output_bufs = mem->comp_bufs;
-               out_seg_sz = test_data->out_seg_sz;
-       } else {
-               xform = (struct rte_comp_xform) {
-                       .type = RTE_COMP_DECOMPRESS,
-                       .decompress = {
-                               .algo = RTE_COMP_ALGO_DEFLATE,
-                               .chksum = RTE_COMP_CHECKSUM_NONE,
-                               .window_size = test_data->window_sz,
-                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
-                       }
-               };
-               input_bufs = mem->comp_bufs;
-               output_bufs = mem->decomp_bufs;
-               out_seg_sz = test_data->seg_sz;
-       }
-
-       /* Create private xform */
-       if (rte_compressdev_private_xform_create(dev_id, &xform,
-                       &priv_xform) < 0) {
-               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
-               res = -1;
-               goto end;
-       }
-
-       uint64_t tsc_start, tsc_end, tsc_duration;
-
-       num_iter = test_data->num_iter;
-       tsc_start = tsc_end = tsc_duration = 0;
-       tsc_start = rte_rdtsc_precise();
-
-       for (iter = 0; iter < num_iter; iter++) {
-               uint32_t total_ops = mem->total_bufs;
-               uint32_t remaining_ops = mem->total_bufs;
-               uint32_t total_deq_ops = 0;
-               uint32_t total_enq_ops = 0;
-               uint16_t ops_unused = 0;
-               uint16_t num_enq = 0;
-               uint16_t num_deq = 0;
-
-               while (remaining_ops > 0) {
-                       uint16_t num_ops = RTE_MIN(remaining_ops,
-                                                  test_data->burst_sz);
-                       uint16_t ops_needed = num_ops - ops_unused;
-
-                       /*
-                        * Move the unused operations from the previous
-                        * enqueue_burst call to the front, to maintain order
-                        */
-                       if ((ops_unused > 0) && (num_enq > 0)) {
-                               size_t nb_b_to_mov =
-                                     ops_unused * sizeof(struct rte_comp_op *);
-
-                               memmove(ops, &ops[num_enq], nb_b_to_mov);
-                       }
-
-                       /* Allocate compression operations */
-                       if (ops_needed && !rte_comp_op_bulk_alloc(
-                                               mem->op_pool,
-                                               &ops[ops_unused],
-                                               ops_needed)) {
-                               RTE_LOG(ERR, USER1,
-                                     "Could not allocate enough operations\n");
-                               res = -1;
-                               goto end;
-                       }
-                       allocated += ops_needed;
-
-                       for (i = 0; i < ops_needed; i++) {
-                               /*
-                                * Calculate next buffer to attach to operation
-                                */
-                               uint32_t buf_id = total_enq_ops + i +
-                                               ops_unused;
-                               uint16_t op_id = ops_unused + i;
-                               /* Reset all data in output buffers */
-                               struct rte_mbuf *m = output_bufs[buf_id];
-
-                               m->pkt_len = out_seg_sz * m->nb_segs;
-                               while (m) {
-                                       m->data_len = m->buf_len - m->data_off;
-                                       m = m->next;
-                               }
-                               ops[op_id]->m_src = input_bufs[buf_id];
-                               ops[op_id]->m_dst = output_bufs[buf_id];
-                               ops[op_id]->src.offset = 0;
-                               ops[op_id]->src.length =
-                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
-                               ops[op_id]->dst.offset = 0;
-                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
-                               ops[op_id]->input_chksum = buf_id;
-                               ops[op_id]->private_xform = priv_xform;
-                       }
-
-                       if (unlikely(test_data->perf_comp_force_stop))
-                               goto end;
-
-                       num_enq = rte_compressdev_enqueue_burst(dev_id,
-                                                               mem->qp_id, ops,
-                                                               num_ops);
-                       if (num_enq == 0) {
-                               struct rte_compressdev_stats stats;
-
-                               rte_compressdev_stats_get(dev_id, &stats);
-                               if (stats.enqueue_err_count) {
-                                       res = -1;
-                                       goto end;
-                               }
-                       }
-
-                       ops_unused = num_ops - num_enq;
-                       remaining_ops -= num_enq;
-                       total_enq_ops += num_enq;
-
-                       num_deq = rte_compressdev_dequeue_burst(dev_id,
-                                                          mem->qp_id,
-                                                          deq_ops,
-                                                          test_data->burst_sz);
-                       total_deq_ops += num_deq;
-
-                       if (iter == num_iter - 1) {
-                               for (i = 0; i < num_deq; i++) {
-                                       struct rte_comp_op *op = deq_ops[i];
-
-                                       if (op->status !=
-                                               RTE_COMP_OP_STATUS_SUCCESS) {
-                                               RTE_LOG(ERR, USER1,
-                                      "Some operations were not successful\n");
-                                               goto end;
-                                       }
-
-                                       struct rte_mbuf *m = op->m_dst;
-
-                                       m->pkt_len = op->produced;
-                                       uint32_t remaining_data = op->produced;
-                                       uint16_t data_to_append;
-
-                                       while (remaining_data > 0) {
-                                               data_to_append =
-                                                       RTE_MIN(remaining_data,
-                                                            out_seg_sz);
-                                               m->data_len = data_to_append;
-                                               remaining_data -=
-                                                               data_to_append;
-                                               m = m->next;
-                                       }
-                               }
-                       }
-                       rte_mempool_put_bulk(mem->op_pool,
-                                            (void **)deq_ops, num_deq);
-                       allocated -= num_deq;
-               }
-
-               /* Dequeue the last operations */
-               while (total_deq_ops < total_ops) {
-                       if (unlikely(test_data->perf_comp_force_stop))
-                               goto end;
-
-                       num_deq = rte_compressdev_dequeue_burst(dev_id,
-                                                          mem->qp_id,
-                                                          deq_ops,
-                                                          test_data->burst_sz);
-                       if (num_deq == 0) {
-                               struct rte_compressdev_stats stats;
-
-                               rte_compressdev_stats_get(dev_id, &stats);
-                               if (stats.dequeue_err_count) {
-                                       res = -1;
-                                       goto end;
-                               }
-                       }
-
-                       total_deq_ops += num_deq;
-
-                       if (iter == num_iter - 1) {
-                               for (i = 0; i < num_deq; i++) {
-                                       struct rte_comp_op *op = deq_ops[i];
-
-                                       if (op->status !=
-                                               RTE_COMP_OP_STATUS_SUCCESS) {
-                                               RTE_LOG(ERR, USER1,
-                                      "Some operations were not successful\n");
-                                               goto end;
-                                       }
-
-                                       struct rte_mbuf *m = op->m_dst;
-
-                                       m->pkt_len = op->produced;
-                                       uint32_t remaining_data = op->produced;
-                                       uint16_t data_to_append;
-
-                                       while (remaining_data > 0) {
-                                               data_to_append =
-                                               RTE_MIN(remaining_data,
-                                                       out_seg_sz);
-                                               m->data_len = data_to_append;
-                                               remaining_data -=
-                                                               data_to_append;
-                                               m = m->next;
-                                       }
-                               }
-                       }
-                       rte_mempool_put_bulk(mem->op_pool,
-                                            (void **)deq_ops, num_deq);
-                       allocated -= num_deq;
-               }
-       }
-
-       tsc_end = rte_rdtsc_precise();
-       tsc_duration = tsc_end - tsc_start;
-
-       if (type == RTE_COMP_COMPRESS)
-               ctx->comp_tsc_duration[test_data->level] =
-                               tsc_duration / num_iter;
-       else
-               ctx->decomp_tsc_duration[test_data->level] =
-                               tsc_duration / num_iter;
-
-end:
-       rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
-       rte_compressdev_private_xform_free(dev_id, priv_xform);
-       rte_free(ops);
-
-       if (test_data->perf_comp_force_stop) {
-               RTE_LOG(ERR, USER1,
-                     "lcore: %d Perf. test has been aborted by user\n",
-                       mem->lcore_id);
-               res = -1;
-       }
-       return res;
-}
-
-int
-cperf_benchmark_test_runner(void *test_ctx)
-{
-       struct cperf_benchmark_ctx *ctx = test_ctx;
-       struct comp_test_data *test_data = ctx->ver.options;
-       uint32_t lcore = rte_lcore_id();
-       static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
-       int i, ret = EXIT_SUCCESS;
-
-       ctx->ver.mem.lcore_id = lcore;
-
-       /*
-        * printing information about current compression thread
-        */
-       if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
-               printf("    lcore: %u,"
-                               " driver name: %s,"
-                               " device name: %s,"
-                               " device id: %u,"
-                               " socket id: %u,"
-                               " queue pair id: %u\n",
-                       lcore,
-                       ctx->ver.options->driver_name,
-                       rte_compressdev_name_get(ctx->ver.mem.dev_id),
-                       ctx->ver.mem.dev_id,
-                       rte_compressdev_socket_id(ctx->ver.mem.dev_id),
-                       ctx->ver.mem.qp_id);
-
-       /*
-        * First the verification part is needed
-        */
-       if (cperf_verify_test_runner(&ctx->ver)) {
-               ret =  EXIT_FAILURE;
-               goto end;
-       }
-
-       /*
-        * Run the tests twice, discarding the first performance
-        * results, before the cache is warmed up
-        */
-       for (i = 0; i < 2; i++) {
-               if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
-                       ret = EXIT_FAILURE;
-                       goto end;
-               }
-       }
-
-       for (i = 0; i < 2; i++) {
-               if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
-                       ret = EXIT_FAILURE;
-                       goto end;
-               }
-       }
-
-       ctx->comp_tsc_byte =
-                       (double)(ctx->comp_tsc_duration[test_data->level]) /
-                                       test_data->input_data_sz;
-
-       ctx->decomp_tsc_byte =
-                       (double)(ctx->decomp_tsc_duration[test_data->level]) /
-                                       test_data->input_data_sz;
-
-       ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
-                       1000000000;
-
-       ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
-                       1000000000;
-
-       if (rte_atomic16_test_and_set(&display_once)) {
-               printf("\n%12s%6s%12s%17s%15s%16s\n",
-                       "lcore id", "Level", "Comp size", "Comp ratio [%]",
-                       "Comp [Gbps]", "Decomp [Gbps]");
-       }
-
-       printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
-               ctx->ver.mem.lcore_id,
-               test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
-               ctx->comp_gbps,
-               ctx->decomp_gbps);
-
-end:
-       return ret;
-}
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_benchmark.h
deleted file mode 100644 (file)
index d9b2694..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2018 Intel Corporation
- */
-
-#ifndef _COMP_PERF_TEST_BENCHMARK_
-#define _COMP_PERF_TEST_BENCHMARK_
-
-#include <stdint.h>
-
-#include "comp_perf_options.h"
-#include "comp_perf_test_common.h"
-#include "comp_perf_test_verify.h"
-
-struct cperf_benchmark_ctx {
-       struct cperf_verify_ctx ver;
-
-       /* Store TSC duration for all levels (including level 0) */
-       uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
-       uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
-       double comp_gbps;
-       double decomp_gbps;
-       double comp_tsc_byte;
-       double decomp_tsc_byte;
-};
-
-void
-cperf_benchmark_test_destructor(void *arg);
-
-int
-cperf_benchmark_test_runner(void *test_ctx);
-
-void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
-               struct comp_test_data *options);
-
-#endif
index 1b8985b..b402a0d 100644 (file)
@@ -9,7 +9,8 @@
 
 #include "comp_perf.h"
 #include "comp_perf_options.h"
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_cyclecount.h"
 #include "comp_perf_test_common.h"
 #include "comp_perf_test_verify.h"
 
@@ -276,9 +277,11 @@ comp_perf_allocate_memory(struct comp_test_data *test_data,
 
        snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
                        mem->dev_id, mem->qp_id);
+
+       /* one mempool for both src and dst mbufs */
        mem->op_pool = rte_comp_op_pool_create(pool_name,
-                                 mem->total_bufs,
-                                 0, 0, rte_socket_id());
+                               mem->total_bufs * 2,
+                               0, 0, rte_socket_id());
        if (mem->op_pool == NULL) {
                RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
                return -1;
@@ -495,20 +498,24 @@ prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
 }
 
 void
-print_test_dynamics(void)
+print_test_dynamics(const struct comp_test_data *test_data)
 {
        uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz,
                        MAX_SEG_SIZE);
 
        if (buffer_info.total_buffs > 1) {
-               printf("\nWarning: for the current input parameters, number"
+               if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) {
+                       printf("\nWarning: for the current input parameters, number"
                                " of ops is higher than one, which may result"
                                " in sub-optimal performance.\n");
-               printf("To improve the performance (for the current"
+                       printf("To improve the performance (for the current"
                                " input data) following parameters are"
                                " suggested:\n");
-               printf("        * Segment size: %d\n", MAX_SEG_SIZE);
-               printf("        * Number of segments: %u\n", opt_total_segs);
+                       printf("        * Segment size: %d\n",
+                              MAX_SEG_SIZE);
+                       printf("        * Number of segments: %u\n",
+                              opt_total_segs);
+               }
        } else if (buffer_info.total_buffs == 1) {
                printf("\nInfo: there is only one op with %u segments -"
                                " the compression ratio is the best.\n",
index 9206428..72705c6 100644 (file)
@@ -49,6 +49,6 @@ int
 prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
 
 void
-print_test_dynamics(void);
+print_test_dynamics(const struct comp_test_data *test_data);
 
 #endif /* _COMP_PERF_TEST_COMMON_H_ */
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-compress-perf/comp_perf_test_cyclecount.c
new file mode 100644 (file)
index 0000000..55559a7
--- /dev/null
@@ -0,0 +1,614 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include "rte_spinlock.h"
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_cyclecount.h"
+
+struct cperf_cyclecount_ctx {
+       struct cperf_verify_ctx ver;
+
+       uint32_t ops_enq_retries;
+       uint32_t ops_deq_retries;
+
+       uint64_t duration_op;
+       uint64_t duration_enq;
+       uint64_t duration_deq;
+};
+
+void
+cperf_cyclecount_test_destructor(void *arg)
+{
+       struct cperf_cyclecount_ctx *ctx = arg;
+
+       if (arg) {
+               comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
+               rte_free(arg);
+       }
+}
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+               struct comp_test_data *options)
+{
+       struct cperf_cyclecount_ctx *ctx = NULL;
+
+       ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
+
+       if (ctx == NULL)
+               return NULL;
+
+       ctx->ver.mem.dev_id = dev_id;
+       ctx->ver.mem.qp_id = qp_id;
+       ctx->ver.options = options;
+       ctx->ver.silent = 1; /* ver. part will be silent */
+
+       if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
+                       && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
+               return ctx;
+
+       cperf_cyclecount_test_destructor(ctx);
+       return NULL;
+}
+
+static int
+cperf_cyclecount_op_setup(struct rte_comp_op **ops,
+                                struct cperf_cyclecount_ctx *ctx,
+                                struct rte_mbuf **input_bufs,
+                                struct rte_mbuf **output_bufs,
+                                void *priv_xform,
+                                uint32_t out_seg_sz)
+{
+       struct comp_test_data *test_data = ctx->ver.options;
+       struct cperf_mem_resources *mem = &ctx->ver.mem;
+
+       uint32_t i, iter, num_iter;
+       int res = 0;
+       uint16_t ops_needed;
+
+       num_iter = test_data->num_iter;
+
+       for (iter = 0; iter < num_iter; iter++) {
+               uint32_t remaining_ops = mem->total_bufs;
+               uint32_t total_deq_ops = 0;
+               uint32_t total_enq_ops = 0;
+               uint16_t num_enq = 0;
+               uint16_t num_deq = 0;
+
+               while (remaining_ops > 0) {
+                       uint16_t num_ops = RTE_MIN(remaining_ops,
+                                                  test_data->burst_sz);
+                       ops_needed = num_ops;
+
+                       /* Allocate compression operations */
+                       if (ops_needed && rte_mempool_get_bulk(
+                                               mem->op_pool,
+                                               (void **)ops,
+                                               ops_needed) != 0) {
+                               RTE_LOG(ERR, USER1,
+                                     "Cyclecount: could not allocate enough operations\n");
+                               res = -1;
+                               goto end;
+                       }
+
+                       for (i = 0; i < ops_needed; i++) {
+
+                               /* Calculate next buffer to attach */
+                               /* to operation */
+                               uint32_t buf_id = total_enq_ops + i;
+                               uint16_t op_id = i;
+
+                               /* Reset all data in output buffers */
+                               struct rte_mbuf *m = output_bufs[buf_id];
+
+                               m->pkt_len = out_seg_sz * m->nb_segs;
+                               while (m) {
+                                       m->data_len = m->buf_len - m->data_off;
+                                       m = m->next;
+                               }
+                               ops[op_id]->m_src = input_bufs[buf_id];
+                               ops[op_id]->m_dst = output_bufs[buf_id];
+                               ops[op_id]->src.offset = 0;
+                               ops[op_id]->src.length =
+                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+                               ops[op_id]->dst.offset = 0;
+                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+                               ops[op_id]->input_chksum = buf_id;
+                               ops[op_id]->private_xform = priv_xform;
+                       }
+
+                       /* E N Q U E U I N G */
+                       /* assuming that all ops are enqueued */
+                       /* instead of the real enqueue operation */
+                       num_enq = num_ops;
+
+                       remaining_ops -= num_enq;
+                       total_enq_ops += num_enq;
+
+                       /* D E Q U E U I N G */
+                       /* assuming that all ops dequeued */
+                       /* instead of the real dequeue operation */
+                       num_deq = num_ops;
+
+                       total_deq_ops += num_deq;
+                       rte_mempool_put_bulk(mem->op_pool,
+                                            (void **)ops, num_deq);
+               }
+       }
+       return res;
+end:
+       rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
+       rte_free(ops);
+
+       return res;
+}
+
+static int
+main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
+{
+       struct comp_test_data *test_data = ctx->ver.options;
+       struct cperf_mem_resources *mem = &ctx->ver.mem;
+       uint8_t dev_id = mem->dev_id;
+       uint32_t i, iter, num_iter;
+       struct rte_comp_op **ops, **deq_ops;
+       void *priv_xform = NULL;
+       struct rte_comp_xform xform;
+       struct rte_mbuf **input_bufs, **output_bufs;
+       int ret, res = 0;
+       int allocated = 0;
+       uint32_t out_seg_sz;
+
+       uint64_t tsc_start, tsc_end, tsc_duration;
+
+       if (test_data == NULL || !test_data->burst_sz) {
+               RTE_LOG(ERR, USER1, "Unknown burst size\n");
+               return -1;
+       }
+       ctx->duration_enq = 0;
+       ctx->duration_deq = 0;
+       ctx->ops_enq_retries = 0;
+       ctx->ops_deq_retries = 0;
+
+       /* one array for both enqueue and dequeue */
+       ops = rte_zmalloc_socket(NULL,
+               2 * mem->total_bufs * sizeof(struct rte_comp_op *),
+               0, rte_socket_id());
+
+       if (ops == NULL) {
+               RTE_LOG(ERR, USER1,
+                       "Can't allocate memory for ops strucures\n");
+               return -1;
+       }
+
+       deq_ops = &ops[mem->total_bufs];
+
+       if (type == RTE_COMP_COMPRESS) {
+               xform = (struct rte_comp_xform) {
+                       .type = RTE_COMP_COMPRESS,
+                       .compress = {
+                               .algo = RTE_COMP_ALGO_DEFLATE,
+                               .deflate.huffman = test_data->huffman_enc,
+                               .level = test_data->level,
+                               .window_size = test_data->window_sz,
+                               .chksum = RTE_COMP_CHECKSUM_NONE,
+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
+                       }
+               };
+               input_bufs = mem->decomp_bufs;
+               output_bufs = mem->comp_bufs;
+               out_seg_sz = test_data->out_seg_sz;
+       } else {
+               xform = (struct rte_comp_xform) {
+                       .type = RTE_COMP_DECOMPRESS,
+                       .decompress = {
+                               .algo = RTE_COMP_ALGO_DEFLATE,
+                               .chksum = RTE_COMP_CHECKSUM_NONE,
+                               .window_size = test_data->window_sz,
+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
+                       }
+               };
+               input_bufs = mem->comp_bufs;
+               output_bufs = mem->decomp_bufs;
+               out_seg_sz = test_data->seg_sz;
+       }
+
+       /* Create private xform */
+       if (rte_compressdev_private_xform_create(dev_id, &xform,
+                                               &priv_xform) < 0) {
+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+               res = -1;
+               goto end;
+       }
+
+       tsc_start = rte_rdtsc_precise();
+       ret = cperf_cyclecount_op_setup(ops,
+                               ctx,
+                               input_bufs,
+                               output_bufs,
+                               priv_xform,
+                               out_seg_sz);
+
+       tsc_end = rte_rdtsc_precise();
+
+       /* ret value check postponed a bit to cancel extra 'if' bias */
+       if (ret < 0) {
+               RTE_LOG(ERR, USER1, "Setup function failed\n");
+               res = -1;
+               goto end;
+       }
+
+       tsc_duration = tsc_end - tsc_start;
+       ctx->duration_op = tsc_duration;
+
+       num_iter = test_data->num_iter;
+       for (iter = 0; iter < num_iter; iter++) {
+               uint32_t total_ops = mem->total_bufs;
+               uint32_t remaining_ops = mem->total_bufs;
+               uint32_t total_deq_ops = 0;
+               uint32_t total_enq_ops = 0;
+               uint16_t ops_unused = 0;
+               uint16_t num_enq = 0;
+               uint16_t num_deq = 0;
+
+               while (remaining_ops > 0) {
+                       uint16_t num_ops = RTE_MIN(remaining_ops,
+                                                  test_data->burst_sz);
+                       uint16_t ops_needed = num_ops - ops_unused;
+
+                       /*
+                        * Move the unused operations from the previous
+                        * enqueue_burst call to the front, to maintain order
+                        */
+                       if ((ops_unused > 0) && (num_enq > 0)) {
+                               size_t nb_b_to_mov =
+                                     ops_unused * sizeof(struct rte_comp_op *);
+
+                               memmove(ops, &ops[num_enq], nb_b_to_mov);
+                       }
+
+                       /* Allocate compression operations */
+                       if (ops_needed && rte_mempool_get_bulk(
+                                               mem->op_pool,
+                                               (void **)ops,
+                                               ops_needed) != 0) {
+                               RTE_LOG(ERR, USER1,
+                                     "Could not allocate enough operations\n");
+                               res = -1;
+                               goto end;
+                       }
+                       allocated += ops_needed;
+
+                       for (i = 0; i < ops_needed; i++) {
+                               /*
+                                * Calculate next buffer to attach to operation
+                                */
+                               uint32_t buf_id = total_enq_ops + i +
+                                               ops_unused;
+                               uint16_t op_id = ops_unused + i;
+                               /* Reset all data in output buffers */
+                               struct rte_mbuf *m = output_bufs[buf_id];
+
+                               m->pkt_len = out_seg_sz * m->nb_segs;
+                               while (m) {
+                                       m->data_len = m->buf_len - m->data_off;
+                                       m = m->next;
+                               }
+                               ops[op_id]->m_src = input_bufs[buf_id];
+                               ops[op_id]->m_dst = output_bufs[buf_id];
+                               ops[op_id]->src.offset = 0;
+                               ops[op_id]->src.length =
+                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+                               ops[op_id]->dst.offset = 0;
+                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+                               ops[op_id]->input_chksum = buf_id;
+                               ops[op_id]->private_xform = priv_xform;
+                       }
+
+                       if (unlikely(test_data->perf_comp_force_stop))
+                               goto end;
+
+                       tsc_start = rte_rdtsc_precise();
+                       num_enq = rte_compressdev_enqueue_burst(dev_id,
+                                                               mem->qp_id, ops,
+                                                               num_ops);
+                       tsc_end = rte_rdtsc_precise();
+                       tsc_duration = tsc_end - tsc_start;
+                       ctx->duration_enq += tsc_duration;
+
+                       if (num_enq < num_ops)
+                               ctx->ops_enq_retries++;
+
+                       if (test_data->cyclecount_delay)
+                               rte_delay_us_block(test_data->cyclecount_delay);
+
+                       if (num_enq == 0) {
+                               struct rte_compressdev_stats stats;
+
+                               rte_compressdev_stats_get(dev_id, &stats);
+                               if (stats.enqueue_err_count) {
+                                       res = -1;
+                                       goto end;
+                               }
+                       }
+
+                       ops_unused = num_ops - num_enq;
+                       remaining_ops -= num_enq;
+                       total_enq_ops += num_enq;
+
+                       tsc_start = rte_rdtsc_precise();
+                       num_deq = rte_compressdev_dequeue_burst(dev_id,
+                                                          mem->qp_id,
+                                                          deq_ops,
+                                                          allocated);
+                       tsc_end = rte_rdtsc_precise();
+                       tsc_duration = tsc_end - tsc_start;
+                       ctx->duration_deq += tsc_duration;
+
+                       if (num_deq < allocated)
+                               ctx->ops_deq_retries++;
+
+                       total_deq_ops += num_deq;
+
+                       if (iter == num_iter - 1) {
+                               for (i = 0; i < num_deq; i++) {
+                                       struct rte_comp_op *op = deq_ops[i];
+
+                                       if (op->status !=
+                                               RTE_COMP_OP_STATUS_SUCCESS) {
+                                               RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+                                               goto end;
+                                       }
+
+                                       struct rte_mbuf *m = op->m_dst;
+
+                                       m->pkt_len = op->produced;
+                                       uint32_t remaining_data = op->produced;
+                                       uint16_t data_to_append;
+
+                                       while (remaining_data > 0) {
+                                               data_to_append =
+                                                       RTE_MIN(remaining_data,
+                                                            out_seg_sz);
+                                               m->data_len = data_to_append;
+                                               remaining_data -=
+                                                               data_to_append;
+                                               m = m->next;
+                                       }
+                               }
+                       }
+                       rte_mempool_put_bulk(mem->op_pool,
+                                            (void **)deq_ops, num_deq);
+                       allocated -= num_deq;
+               }
+
+               /* Dequeue the last operations */
+               while (total_deq_ops < total_ops) {
+                       if (unlikely(test_data->perf_comp_force_stop))
+                               goto end;
+
+                       tsc_start = rte_rdtsc_precise();
+                       num_deq = rte_compressdev_dequeue_burst(dev_id,
+                                               mem->qp_id,
+                                               deq_ops,
+                                               test_data->burst_sz);
+                       tsc_end = rte_rdtsc_precise();
+                       tsc_duration = tsc_end - tsc_start;
+                       ctx->duration_deq += tsc_duration;
+                       ctx->ops_deq_retries++;
+
+                       if (num_deq == 0) {
+                               struct rte_compressdev_stats stats;
+
+                               rte_compressdev_stats_get(dev_id, &stats);
+                               if (stats.dequeue_err_count) {
+                                       res = -1;
+                                       goto end;
+                               }
+                       }
+                       total_deq_ops += num_deq;
+
+                       if (iter == num_iter - 1) {
+                               for (i = 0; i < num_deq; i++) {
+                                       struct rte_comp_op *op = deq_ops[i];
+
+                                       if (op->status !=
+                                               RTE_COMP_OP_STATUS_SUCCESS) {
+                                               RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+                                               goto end;
+                                       }
+
+                                       struct rte_mbuf *m = op->m_dst;
+
+                                       m->pkt_len = op->produced;
+                                       uint32_t remaining_data = op->produced;
+                                       uint16_t data_to_append;
+
+                                       while (remaining_data > 0) {
+                                               data_to_append =
+                                               RTE_MIN(remaining_data,
+                                                       out_seg_sz);
+                                               m->data_len = data_to_append;
+                                               remaining_data -=
+                                                               data_to_append;
+                                               m = m->next;
+                                       }
+                               }
+                       }
+                       rte_mempool_put_bulk(mem->op_pool,
+                                            (void **)deq_ops, num_deq);
+                       allocated -= num_deq;
+               }
+       }
+       allocated = 0;
+
+end:
+       if (allocated)
+               rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
+       rte_compressdev_private_xform_free(dev_id, priv_xform);
+       rte_free(ops);
+
+       if (test_data->perf_comp_force_stop) {
+               RTE_LOG(ERR, USER1,
+                     "lcore: %d Perf. test has been aborted by user\n",
+                       mem->lcore_id);
+               res = -1;
+       }
+       return res;
+}
+
+int
+cperf_cyclecount_test_runner(void *test_ctx)
+{
+       struct cperf_cyclecount_ctx *ctx = test_ctx;
+       struct comp_test_data *test_data = ctx->ver.options;
+       uint32_t lcore = rte_lcore_id();
+       static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+       static rte_spinlock_t print_spinlock;
+       int i;
+
+       uint32_t ops_enq_retries_comp;
+       uint32_t ops_deq_retries_comp;
+
+       uint32_t ops_enq_retries_decomp;
+       uint32_t ops_deq_retries_decomp;
+
+       uint32_t duration_setup_per_op;
+
+       uint32_t duration_enq_per_op_comp;
+       uint32_t duration_deq_per_op_comp;
+
+       uint32_t duration_enq_per_op_decomp;
+       uint32_t duration_deq_per_op_decomp;
+
+       ctx->ver.mem.lcore_id = lcore;
+
+       /*
+        * printing information about current compression thread
+        */
+       if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+               printf("    lcore: %u,"
+                               " driver name: %s,"
+                               " device name: %s,"
+                               " device id: %u,"
+                               " socket id: %u,"
+                               " queue pair id: %u\n",
+                       lcore,
+                       ctx->ver.options->driver_name,
+                       rte_compressdev_name_get(ctx->ver.mem.dev_id),
+                       ctx->ver.mem.dev_id,
+                       rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+                       ctx->ver.mem.qp_id);
+
+       /*
+        * First the verification part is needed
+        */
+       if (cperf_verify_test_runner(&ctx->ver))
+               return EXIT_FAILURE;
+
+       /*
+        * Run the tests twice, discarding the first performance
+        * results, before the cache is warmed up
+        */
+
+       /* C O M P R E S S */
+       for (i = 0; i < 2; i++) {
+               if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
+                       return EXIT_FAILURE;
+       }
+
+       ops_enq_retries_comp = ctx->ops_enq_retries;
+       ops_deq_retries_comp = ctx->ops_deq_retries;
+
+       duration_enq_per_op_comp = ctx->duration_enq /
+                       (ctx->ver.mem.total_bufs * test_data->num_iter);
+       duration_deq_per_op_comp = ctx->duration_deq /
+                       (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+       /* D E C O M P R E S S */
+       for (i = 0; i < 2; i++) {
+               if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
+                       return EXIT_FAILURE;
+       }
+
+       ops_enq_retries_decomp = ctx->ops_enq_retries;
+       ops_deq_retries_decomp = ctx->ops_deq_retries;
+
+       duration_enq_per_op_decomp = ctx->duration_enq /
+                       (ctx->ver.mem.total_bufs * test_data->num_iter);
+       duration_deq_per_op_decomp = ctx->duration_deq /
+                       (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+       duration_setup_per_op = ctx->duration_op /
+                       (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+       /* R E P O R T processing */
+       if (rte_atomic16_test_and_set(&display_once)) {
+
+               rte_spinlock_lock(&print_spinlock);
+
+               printf("\nLegend for the table\n"
+               "  - Retries section: number of retries for the following operations:\n"
+               "    [C-e] - compression enqueue\n"
+               "    [C-d] - compression dequeue\n"
+               "    [D-e] - decompression enqueue\n"
+               "    [D-d] - decompression dequeue\n"
+               "  - Cycles section: number of cycles per 'op' for the following operations:\n"
+               "    setup/op - memory allocation, op configuration and memory dealocation\n"
+               "    [C-e] - compression enqueue\n"
+               "    [C-d] - compression dequeue\n"
+               "    [D-e] - decompression enqueue\n"
+               "    [D-d] - decompression dequeue\n\n");
+
+               printf("\n%12s%6s%12s%17s",
+                       "lcore id", "Level", "Comp size", "Comp ratio [%]");
+
+               printf("  |%10s %6s %8s %6s %8s",
+                       " Retries:",
+                       "[C-e]", "[C-d]",
+                       "[D-e]", "[D-d]");
+
+               printf("  |%9s %9s %9s %9s %9s %9s\n",
+                       " Cycles:",
+                       "setup/op",
+                       "[C-e]", "[C-d]",
+                       "[D-e]", "[D-d]");
+
+               rte_spinlock_unlock(&print_spinlock);
+       }
+
+       rte_spinlock_lock(&print_spinlock);
+
+       printf("%12u"
+              "%6u"
+              "%12zu"
+              "%17.2f",
+               ctx->ver.mem.lcore_id,
+               test_data->level,
+               ctx->ver.comp_data_sz,
+               ctx->ver.ratio);
+
+       printf("  |%10s %6u %8u %6u %8u",
+              " ",
+               ops_enq_retries_comp,
+               ops_deq_retries_comp,
+               ops_enq_retries_decomp,
+               ops_deq_retries_decomp);
+
+       printf("  |%9s %9u %9u %9u %9u %9u\n",
+              " ",
+               duration_setup_per_op,
+               duration_enq_per_op_comp,
+               duration_deq_per_op_comp,
+               duration_enq_per_op_decomp,
+               duration_deq_per_op_decomp);
+
+       rte_spinlock_unlock(&print_spinlock);
+
+       return EXIT_SUCCESS;
+}
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.h b/app/test-compress-perf/comp_perf_test_cyclecount.h
new file mode 100644 (file)
index 0000000..8e1b4d9
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_CYCLECOUNT_
+#define _COMP_PERF_TEST_CYCLECOUNT_
+
+#include <stdint.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_common.h"
+#include "comp_perf_test_verify.h"
+
+void
+cperf_cyclecount_test_destructor(void *arg);
+
+int
+cperf_cyclecount_test_runner(void *test_ctx);
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+               struct comp_test_data *options);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_throughput.c b/app/test-compress-perf/comp_perf_test_throughput.c
new file mode 100644 (file)
index 0000000..13922b6
--- /dev/null
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_throughput.h"
+
+void
+cperf_throughput_test_destructor(void *arg)
+{
+       if (arg) {
+               comp_perf_free_memory(
+                       ((struct cperf_benchmark_ctx *)arg)->ver.options,
+                       &((struct cperf_benchmark_ctx *)arg)->ver.mem);
+               rte_free(arg);
+       }
+}
+
+void *
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
+               struct comp_test_data *options)
+{
+       struct cperf_benchmark_ctx *ctx = NULL;
+
+       ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
+
+       if (ctx == NULL)
+               return NULL;
+
+       ctx->ver.mem.dev_id = dev_id;
+       ctx->ver.mem.qp_id = qp_id;
+       ctx->ver.options = options;
+       ctx->ver.silent = 1; /* ver. part will be silent */
+
+       if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
+                       && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
+               return ctx;
+
+       cperf_throughput_test_destructor(ctx);
+       return NULL;
+}
+
+static int
+main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
+{
+       struct comp_test_data *test_data = ctx->ver.options;
+       struct cperf_mem_resources *mem = &ctx->ver.mem;
+       uint8_t dev_id = mem->dev_id;
+       uint32_t i, iter, num_iter;
+       struct rte_comp_op **ops, **deq_ops;
+       void *priv_xform = NULL;
+       struct rte_comp_xform xform;
+       struct rte_mbuf **input_bufs, **output_bufs;
+       int res = 0;
+       int allocated = 0;
+       uint32_t out_seg_sz;
+
+       if (test_data == NULL || !test_data->burst_sz) {
+               RTE_LOG(ERR, USER1,
+                       "Unknown burst size\n");
+               return -1;
+       }
+
+       ops = rte_zmalloc_socket(NULL,
+               2 * mem->total_bufs * sizeof(struct rte_comp_op *),
+               0, rte_socket_id());
+
+       if (ops == NULL) {
+               RTE_LOG(ERR, USER1,
+                       "Can't allocate memory for ops strucures\n");
+               return -1;
+       }
+
+       deq_ops = &ops[mem->total_bufs];
+
+       if (type == RTE_COMP_COMPRESS) {
+               xform = (struct rte_comp_xform) {
+                       .type = RTE_COMP_COMPRESS,
+                       .compress = {
+                               .algo = RTE_COMP_ALGO_DEFLATE,
+                               .deflate.huffman = test_data->huffman_enc,
+                               .level = test_data->level,
+                               .window_size = test_data->window_sz,
+                               .chksum = RTE_COMP_CHECKSUM_NONE,
+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
+                       }
+               };
+               input_bufs = mem->decomp_bufs;
+               output_bufs = mem->comp_bufs;
+               out_seg_sz = test_data->out_seg_sz;
+       } else {
+               xform = (struct rte_comp_xform) {
+                       .type = RTE_COMP_DECOMPRESS,
+                       .decompress = {
+                               .algo = RTE_COMP_ALGO_DEFLATE,
+                               .chksum = RTE_COMP_CHECKSUM_NONE,
+                               .window_size = test_data->window_sz,
+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
+                       }
+               };
+               input_bufs = mem->comp_bufs;
+               output_bufs = mem->decomp_bufs;
+               out_seg_sz = test_data->seg_sz;
+       }
+
+       /* Create private xform */
+       if (rte_compressdev_private_xform_create(dev_id, &xform,
+                       &priv_xform) < 0) {
+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+               res = -1;
+               goto end;
+       }
+
+       uint64_t tsc_start, tsc_end, tsc_duration;
+
+       num_iter = test_data->num_iter;
+       tsc_start = tsc_end = tsc_duration = 0;
+       tsc_start = rte_rdtsc_precise();
+
+       for (iter = 0; iter < num_iter; iter++) {
+               uint32_t total_ops = mem->total_bufs;
+               uint32_t remaining_ops = mem->total_bufs;
+               uint32_t total_deq_ops = 0;
+               uint32_t total_enq_ops = 0;
+               uint16_t ops_unused = 0;
+               uint16_t num_enq = 0;
+               uint16_t num_deq = 0;
+
+               while (remaining_ops > 0) {
+                       uint16_t num_ops = RTE_MIN(remaining_ops,
+                                                  test_data->burst_sz);
+                       uint16_t ops_needed = num_ops - ops_unused;
+
+                       /*
+                        * Move the unused operations from the previous
+                        * enqueue_burst call to the front, to maintain order
+                        */
+                       if ((ops_unused > 0) && (num_enq > 0)) {
+                               size_t nb_b_to_mov =
+                                     ops_unused * sizeof(struct rte_comp_op *);
+
+                               memmove(ops, &ops[num_enq], nb_b_to_mov);
+                       }
+
+                       /* Allocate compression operations */
+                       if (ops_needed && !rte_comp_op_bulk_alloc(
+                                               mem->op_pool,
+                                               &ops[ops_unused],
+                                               ops_needed)) {
+                               RTE_LOG(ERR, USER1,
+                                     "Could not allocate enough operations\n");
+                               res = -1;
+                               goto end;
+                       }
+                       allocated += ops_needed;
+
+                       for (i = 0; i < ops_needed; i++) {
+                               /*
+                                * Calculate next buffer to attach to operation
+                                */
+                               uint32_t buf_id = total_enq_ops + i +
+                                               ops_unused;
+                               uint16_t op_id = ops_unused + i;
+                               /* Reset all data in output buffers */
+                               struct rte_mbuf *m = output_bufs[buf_id];
+
+                               m->pkt_len = out_seg_sz * m->nb_segs;
+                               while (m) {
+                                       m->data_len = m->buf_len - m->data_off;
+                                       m = m->next;
+                               }
+                               ops[op_id]->m_src = input_bufs[buf_id];
+                               ops[op_id]->m_dst = output_bufs[buf_id];
+                               ops[op_id]->src.offset = 0;
+                               ops[op_id]->src.length =
+                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+                               ops[op_id]->dst.offset = 0;
+                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+                               ops[op_id]->input_chksum = buf_id;
+                               ops[op_id]->private_xform = priv_xform;
+                       }
+
+                       if (unlikely(test_data->perf_comp_force_stop))
+                               goto end;
+
+                       num_enq = rte_compressdev_enqueue_burst(dev_id,
+                                                               mem->qp_id, ops,
+                                                               num_ops);
+                       if (num_enq == 0) {
+                               struct rte_compressdev_stats stats;
+
+                               rte_compressdev_stats_get(dev_id, &stats);
+                               if (stats.enqueue_err_count) {
+                                       res = -1;
+                                       goto end;
+                               }
+                       }
+
+                       ops_unused = num_ops - num_enq;
+                       remaining_ops -= num_enq;
+                       total_enq_ops += num_enq;
+
+                       num_deq = rte_compressdev_dequeue_burst(dev_id,
+                                                          mem->qp_id,
+                                                          deq_ops,
+                                                          test_data->burst_sz);
+                       total_deq_ops += num_deq;
+
+                       if (iter == num_iter - 1) {
+                               for (i = 0; i < num_deq; i++) {
+                                       struct rte_comp_op *op = deq_ops[i];
+
+                                       if (op->status !=
+                                               RTE_COMP_OP_STATUS_SUCCESS) {
+                                               RTE_LOG(ERR, USER1,
+                                      "Some operations were not successful\n");
+                                               goto end;
+                                       }
+
+                                       struct rte_mbuf *m = op->m_dst;
+
+                                       m->pkt_len = op->produced;
+                                       uint32_t remaining_data = op->produced;
+                                       uint16_t data_to_append;
+
+                                       while (remaining_data > 0) {
+                                               data_to_append =
+                                                       RTE_MIN(remaining_data,
+                                                            out_seg_sz);
+                                               m->data_len = data_to_append;
+                                               remaining_data -=
+                                                               data_to_append;
+                                               m = m->next;
+                                       }
+                               }
+                       }
+                       rte_mempool_put_bulk(mem->op_pool,
+                                            (void **)deq_ops, num_deq);
+                       allocated -= num_deq;
+               }
+
+               /* Dequeue the last operations */
+               while (total_deq_ops < total_ops) {
+                       if (unlikely(test_data->perf_comp_force_stop))
+                               goto end;
+
+                       num_deq = rte_compressdev_dequeue_burst(dev_id,
+                                                          mem->qp_id,
+                                                          deq_ops,
+                                                          test_data->burst_sz);
+                       if (num_deq == 0) {
+                               struct rte_compressdev_stats stats;
+
+                               rte_compressdev_stats_get(dev_id, &stats);
+                               if (stats.dequeue_err_count) {
+                                       res = -1;
+                                       goto end;
+                               }
+                       }
+
+                       total_deq_ops += num_deq;
+
+                       if (iter == num_iter - 1) {
+                               for (i = 0; i < num_deq; i++) {
+                                       struct rte_comp_op *op = deq_ops[i];
+
+                                       if (op->status !=
+                                               RTE_COMP_OP_STATUS_SUCCESS) {
+                                               RTE_LOG(ERR, USER1,
+                                      "Some operations were not successful\n");
+                                               goto end;
+                                       }
+
+                                       struct rte_mbuf *m = op->m_dst;
+
+                                       m->pkt_len = op->produced;
+                                       uint32_t remaining_data = op->produced;
+                                       uint16_t data_to_append;
+
+                                       while (remaining_data > 0) {
+                                               data_to_append =
+                                               RTE_MIN(remaining_data,
+                                                       out_seg_sz);
+                                               m->data_len = data_to_append;
+                                               remaining_data -=
+                                                               data_to_append;
+                                               m = m->next;
+                                       }
+                               }
+                       }
+                       rte_mempool_put_bulk(mem->op_pool,
+                                            (void **)deq_ops, num_deq);
+                       allocated -= num_deq;
+               }
+       }
+
+       tsc_end = rte_rdtsc_precise();
+       tsc_duration = tsc_end - tsc_start;
+
+       if (type == RTE_COMP_COMPRESS)
+               ctx->comp_tsc_duration[test_data->level] =
+                               tsc_duration / num_iter;
+       else
+               ctx->decomp_tsc_duration[test_data->level] =
+                               tsc_duration / num_iter;
+
+end:
+       rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
+       rte_compressdev_private_xform_free(dev_id, priv_xform);
+       rte_free(ops);
+
+       if (test_data->perf_comp_force_stop) {
+               RTE_LOG(ERR, USER1,
+                     "lcore: %d Perf. test has been aborted by user\n",
+                       mem->lcore_id);
+               res = -1;
+       }
+       return res;
+}
+
+int
+cperf_throughput_test_runner(void *test_ctx)
+{
+       struct cperf_benchmark_ctx *ctx = test_ctx;
+       struct comp_test_data *test_data = ctx->ver.options;
+       uint32_t lcore = rte_lcore_id();
+       static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+       int i, ret = EXIT_SUCCESS;
+
+       ctx->ver.mem.lcore_id = lcore;
+
+       /*
+        * printing information about current compression thread
+        */
+       if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+               printf("    lcore: %u,"
+                               " driver name: %s,"
+                               " device name: %s,"
+                               " device id: %u,"
+                               " socket id: %u,"
+                               " queue pair id: %u\n",
+                       lcore,
+                       ctx->ver.options->driver_name,
+                       rte_compressdev_name_get(ctx->ver.mem.dev_id),
+                       ctx->ver.mem.dev_id,
+                       rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+                       ctx->ver.mem.qp_id);
+
+       /*
+        * First the verification part is needed
+        */
+       if (cperf_verify_test_runner(&ctx->ver)) {
+               ret =  EXIT_FAILURE;
+               goto end;
+       }
+
+       /*
+        * Run the tests twice, discarding the first performance
+        * results, before the cache is warmed up
+        */
+       for (i = 0; i < 2; i++) {
+               if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
+                       ret = EXIT_FAILURE;
+                       goto end;
+               }
+       }
+
+       for (i = 0; i < 2; i++) {
+               if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
+                       ret = EXIT_FAILURE;
+                       goto end;
+               }
+       }
+
+       ctx->comp_tsc_byte =
+                       (double)(ctx->comp_tsc_duration[test_data->level]) /
+                                       test_data->input_data_sz;
+
+       ctx->decomp_tsc_byte =
+                       (double)(ctx->decomp_tsc_duration[test_data->level]) /
+                                       test_data->input_data_sz;
+
+       ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
+                       1000000000;
+
+       ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
+                       1000000000;
+
+       if (rte_atomic16_test_and_set(&display_once)) {
+               printf("\n%12s%6s%12s%17s%15s%16s\n",
+                       "lcore id", "Level", "Comp size", "Comp ratio [%]",
+                       "Comp [Gbps]", "Decomp [Gbps]");
+       }
+
+       printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
+               ctx->ver.mem.lcore_id,
+               test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
+               ctx->comp_gbps,
+               ctx->decomp_gbps);
+
+end:
+       return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_throughput.h b/app/test-compress-perf/comp_perf_test_throughput.h
new file mode 100644 (file)
index 0000000..467e3aa
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_BENCHMARK_
+#define _COMP_PERF_TEST_BENCHMARK_
+
+#include <stdint.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_common.h"
+#include "comp_perf_test_verify.h"
+
+struct cperf_benchmark_ctx {
+       struct cperf_verify_ctx ver;
+
+       /* Store TSC duration for all levels (including level 0) */
+       uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+       uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+       double comp_gbps;
+       double decomp_gbps;
+       double comp_tsc_byte;
+       double decomp_tsc_byte;
+};
+
+void
+cperf_throughput_test_destructor(void *arg);
+
+int
+cperf_throughput_test_runner(void *test_ctx);
+
+void *
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
+               struct comp_test_data *options);
+
+#endif
index 758a22f..5e13257 100644 (file)
@@ -48,8 +48,8 @@ static int
 main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type)
 {
        struct comp_test_data *test_data = ctx->options;
-       uint8_t *output_data_ptr;
-       size_t *output_data_sz;
+       uint8_t *output_data_ptr = NULL;
+       size_t *output_data_sz = NULL;
        struct cperf_mem_resources *mem = &ctx->mem;
 
        uint8_t dev_id = mem->dev_id;
index 6b56dd6..ed21605 100644 (file)
 #include <rte_log.h>
 #include <rte_compressdev.h>
 
-#include "comp_perf_options.h"
-#include "comp_perf_test_verify.h"
-#include "comp_perf_test_benchmark.h"
 #include "comp_perf.h"
+#include "comp_perf_options.h"
 #include "comp_perf_test_common.h"
+#include "comp_perf_test_cyclecount.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_verify.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
 
 __extension__
 const char *comp_perf_test_type_strs[] = {
-       [CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
-       [CPERF_TEST_TYPE_VERIFY] = "verify"
+       [CPERF_TEST_TYPE_THROUGHPUT] = "throughput",
+       [CPERF_TEST_TYPE_VERIFY] = "verify",
+       [CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount"
 };
 
 __extension__
 static const struct cperf_test cperf_testmap[] = {
-       [CPERF_TEST_TYPE_BENCHMARK] = {
-                       cperf_benchmark_test_constructor,
-                       cperf_benchmark_test_runner,
-                       cperf_benchmark_test_destructor
+       [CPERF_TEST_TYPE_THROUGHPUT] = {
+                       cperf_throughput_test_constructor,
+                       cperf_throughput_test_runner,
+                       cperf_throughput_test_destructor
+
        },
        [CPERF_TEST_TYPE_VERIFY] = {
                        cperf_verify_test_constructor,
                        cperf_verify_test_runner,
                        cperf_verify_test_destructor
+       },
+
+       [CPERF_TEST_TYPE_PMDCC] = {
+                       cperf_cyclecount_test_constructor,
+                       cperf_cyclecount_test_runner,
+                       cperf_cyclecount_test_destructor
        }
 };
 
@@ -116,7 +125,8 @@ comp_perf_initialize_compressdev(struct comp_test_data *test_data,
        enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
                        enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
        if (enabled_cdev_count == 0) {
-               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+               RTE_LOG(ERR, USER1, "No compress devices type %s available,"
+                                   " please check the list of specified devices in EAL section\n",
                                test_data->driver_name);
                return -EINVAL;
        }
@@ -270,6 +280,7 @@ comp_perf_dump_input_data(struct comp_test_data *test_data)
                data += data_to_read;
        }
 
+       printf("\n");
        if (test_data->input_data_sz > actual_file_sz)
                RTE_LOG(INFO, USER1,
                  "%zu bytes read from file %s, extending the file %.2f times\n",
@@ -365,9 +376,12 @@ main(int argc, char **argv)
        else
                test_data->level = test_data->level_lst.list[0];
 
-       printf("App uses socket: %u\n", rte_socket_id());
+       printf("\nApp uses socket: %u\n", rte_socket_id());
        printf("Burst size = %u\n", test_data->burst_sz);
        printf("Input data size = %zu\n", test_data->input_data_sz);
+       if (test_data->test == CPERF_TEST_TYPE_PMDCC)
+               printf("Cycle-count delay = %u [us]\n",
+                      test_data->cyclecount_delay);
 
        test_data->cleanup = ST_DURING_TEST;
        total_nb_qps = nb_compressdevs * test_data->nb_qps;
@@ -394,7 +408,7 @@ main(int argc, char **argv)
                i++;
        }
 
-       print_test_dynamics(); /* constructors must be executed first */
+       print_test_dynamics(test_data);
 
        while (test_data->level <= test_data->level_lst.max) {
 
@@ -472,7 +486,28 @@ end:
 }
 
 __rte_weak void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused,
+                                uint16_t qp_id __rte_unused,
+                                struct comp_test_data *options __rte_unused)
+{
+       RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n");
+       return NULL;
+}
+
+__rte_weak void
+cperf_cyclecount_test_destructor(void *arg __rte_unused)
+{
+       RTE_LOG(INFO, USER1, "Something wrong happened!!!\n");
+}
+
+__rte_weak int
+cperf_cyclecount_test_runner(void *test_ctx __rte_unused)
+{
+       return 0;
+}
+
+__rte_weak void *
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
                                 uint16_t qp_id __rte_unused,
                                 struct comp_test_data *options __rte_unused)
 {
@@ -481,13 +516,13 @@ cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
 }
 
 __rte_weak void
-cperf_benchmark_test_destructor(void *arg __rte_unused)
+cperf_throughput_test_destructor(void *arg __rte_unused)
 {
 
 }
 
 __rte_weak int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused)
+cperf_throughput_test_runner(void *test_ctx __rte_unused)
 {
        return 0;
 }
index 1136f04..1fe26cc 100644 (file)
@@ -5,6 +5,7 @@ allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
                'main.c',
                'comp_perf_test_verify.c',
-               'comp_perf_test_benchmark.c',
+               'comp_perf_test_throughput.c',
+               'comp_perf_test_cyclecount.c',
                'comp_perf_test_common.c')
 deps = ['compressdev']