app/compress-perf: add weak functions for multicore test
authorTomasz Jozwiak <tomaszx.jozwiak@intel.com>
Mon, 8 Jul 2019 18:16:13 +0000 (20:16 +0200)
committerAkhil Goyal <akhil.goyal@nxp.com>
Fri, 19 Jul 2019 11:53:27 +0000 (13:53 +0200)
This patch adds template functions for multi-cores performance
version of compress-perf-tool

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
Acked-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Shally Verma <shallyv@marvell.com>
app/test-compress-perf/Makefile
app/test-compress-perf/comp_perf.h [new file with mode: 0644]
app/test-compress-perf/comp_perf_options.h
app/test-compress-perf/comp_perf_options_parse.c
app/test-compress-perf/comp_perf_test_common.c [new file with mode: 0644]
app/test-compress-perf/comp_perf_test_common.h [new file with mode: 0644]
app/test-compress-perf/main.c
app/test-compress-perf/meson.build

index d20e17e..de74129 100644 (file)
@@ -12,7 +12,6 @@ CFLAGS += -O3
 # all source are stored in SRCS-y
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
-SRCS-y += comp_perf_test_verify.c
-SRCS-y += comp_perf_test_benchmark.c
+SRCS-y += comp_perf_test_common.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h
new file mode 100644 (file)
index 0000000..57289b0
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_
+#define _COMP_PERF_
+
+#include <rte_mempool.h>
+
+struct comp_test_data;
+
+typedef void  *(*cperf_constructor_t)(
+               uint8_t dev_id,
+               uint16_t qp_id,
+               struct comp_test_data *options);
+
+typedef int (*cperf_runner_t)(void *test_ctx);
+typedef void (*cperf_destructor_t)(void *test_ctx);
+
+struct cperf_test {
+       cperf_constructor_t constructor;
+       cperf_runner_t runner;
+       cperf_destructor_t destructor;
+};
+
+/* Needed for weak functions*/
+
+void *
+cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+                                uint16_t qp_id __rte_unused,
+                                struct comp_test_data *options __rte_unused);
+
+void
+cperf_benchmark_test_destructor(void *arg __rte_unused);
+
+int
+cperf_benchmark_test_runner(void *test_ctx __rte_unused);
+
+void *
+cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
+                                uint16_t qp_id __rte_unused,
+                                struct comp_test_data *options __rte_unused);
+
+void
+cperf_verify_test_destructor(void *arg __rte_unused);
+
+int
+cperf_verify_test_runner(void *test_ctx __rte_unused);
+
+#endif /* _COMP_PERF_ */
index f87751d..0aa29a5 100644 (file)
 #define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM)
 #define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO))
 
+extern const char *cperf_test_type_strs[];
+
+/* Cleanup state machine */
+enum cleanup_st {
+       ST_CLEAR = 0,
+       ST_TEST_DATA,
+       ST_COMPDEV,
+       ST_INPUT_DATA,
+       ST_MEMORY_ALLOC,
+       ST_DURING_TEST
+};
+
+enum cperf_perf_test_type {
+       CPERF_TEST_TYPE_BENCHMARK,
+       CPERF_TEST_TYPE_VERIFY
+};
+
 enum comp_operation {
        COMPRESS_ONLY,
        DECOMPRESS_ONLY,
@@ -30,37 +47,26 @@ struct range_list {
 struct comp_test_data {
        char driver_name[64];
        char input_file[64];
-       struct rte_mbuf **comp_bufs;
-       struct rte_mbuf **decomp_bufs;
-       uint32_t total_bufs;
+       enum cperf_perf_test_type test;
+
        uint8_t *input_data;
        size_t input_data_sz;
-       uint8_t *compressed_data;
-       uint8_t *decompressed_data;
-       struct rte_mempool *comp_buf_pool;
-       struct rte_mempool *decomp_buf_pool;
-       struct rte_mempool *op_pool;
-       int8_t cdev_id;
+       uint16_t nb_qps;
        uint16_t seg_sz;
        uint16_t out_seg_sz;
        uint16_t burst_sz;
        uint32_t pool_sz;
        uint32_t num_iter;
        uint16_t max_sgl_segs;
+
        enum rte_comp_huffman huffman_enc;
        enum comp_operation test_op;
        int window_sz;
-       struct range_list level;
-       /* Store TSC duration for all levels (including level 0) */
-       uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
-       uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
-       size_t comp_data_sz;
-       size_t decomp_data_sz;
+       struct range_list level_lst;
+       uint8_t level;
+
        double ratio;
-       double comp_gbps;
-       double decomp_gbps;
-       double comp_tsc_byte;
-       double decomp_tsc_byte;
+       enum cleanup_st cleanup;
 };
 
 int
index a7a8c1f..74ea81d 100644 (file)
@@ -468,19 +468,20 @@ parse_level(struct comp_test_data *test_data, const char *arg)
         * Try parsing the argument as a range, if it fails,
         * arse it as a list
         */
-       if (parse_range(arg, &test_data->level.min, &test_data->level.max,
-                       &test_data->level.inc) < 0) {
-               ret = parse_list(arg, test_data->level.list,
-                                       &test_data->level.min,
-                                       &test_data->level.max);
+       if (parse_range(arg, &test_data->level_lst.min,
+                       &test_data->level_lst.max,
+                       &test_data->level_lst.inc) < 0) {
+               ret = parse_list(arg, test_data->level_lst.list,
+                                       &test_data->level_lst.min,
+                                       &test_data->level_lst.max);
                if (ret < 0) {
                        RTE_LOG(ERR, USER1,
                                "Failed to parse compression level/s\n");
                        return -1;
                }
-               test_data->level.count = ret;
+               test_data->level_lst.count = ret;
 
-               if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+               if (test_data->level_lst.max > RTE_COMP_LEVEL_MAX) {
                        RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
                                        RTE_COMP_LEVEL_MAX);
                        return -1;
@@ -500,7 +501,6 @@ struct long_opt_parser {
 };
 
 static struct option lgopts[] = {
-
        { CPERF_DRIVER_NAME, required_argument, 0, 0 },
        { CPERF_TEST_FILE, required_argument, 0, 0 },
        { CPERF_SEG_SIZE, required_argument, 0, 0 },
@@ -574,7 +574,6 @@ comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
 void
 comp_perf_options_default(struct comp_test_data *test_data)
 {
-       test_data->cdev_id = -1;
        test_data->seg_sz = 2048;
        test_data->burst_sz = 32;
        test_data->pool_sz = 8192;
@@ -583,9 +582,10 @@ comp_perf_options_default(struct comp_test_data *test_data)
        test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
        test_data->test_op = COMPRESS_DECOMPRESS;
        test_data->window_sz = -1;
-       test_data->level.min = 1;
-       test_data->level.max = 9;
-       test_data->level.inc = 1;
+       test_data->level_lst.min = 1;
+       test_data->level_lst.max = 9;
+       test_data->level_lst.inc = 1;
+       test_data->test = CPERF_TEST_TYPE_BENCHMARK;
 }
 
 int
diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c
new file mode 100644 (file)
index 0000000..dc9d0b0
--- /dev/null
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_verify.h"
+#include "comp_perf_test_benchmark.h"
+#include "comp_perf.h"
+#include "comp_perf_test_common.h"
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+       unsigned int next_size;
+
+       /* Check lower/upper bounds */
+       if (size < range->min)
+               return -1;
+
+       if (size > range->max)
+               return -1;
+
+       /* If range is actually only one value, size is correct */
+       if (range->increment == 0)
+               return 0;
+
+       /* Check if value is one of the supported sizes */
+       for (next_size = range->min; next_size <= range->max;
+                       next_size += range->increment)
+               if (size == next_size)
+                       return 0;
+
+       return -1;
+}
+
+static uint32_t
+find_buf_size(uint32_t input_size)
+{
+       uint32_t i;
+
+       /* From performance point of view the buffer size should be a
+        * power of 2 but also should be enough to store incompressible data
+        */
+
+       /* We're looking for nearest power of 2 buffer size, which is greather
+        * than input_size
+        */
+       uint32_t size =
+               !input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
+
+       for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
+               ;
+
+       return i > ((UINT16_MAX + 1) >> 1)
+                       ? (uint32_t)((float)input_size * EXPANSE_RATIO)
+                       : i;
+}
+
+void
+comp_perf_free_memory(struct cperf_mem_resources *mem)
+{
+       uint32_t i;
+
+       for (i = 0; i < mem->total_bufs; i++) {
+               rte_pktmbuf_free(mem->comp_bufs[i]);
+               rte_pktmbuf_free(mem->decomp_bufs[i]);
+       }
+
+       rte_free(mem->decomp_bufs);
+       rte_free(mem->comp_bufs);
+       rte_free(mem->decompressed_data);
+       rte_free(mem->compressed_data);
+       rte_mempool_free(mem->op_pool);
+       rte_mempool_free(mem->decomp_buf_pool);
+       rte_mempool_free(mem->comp_buf_pool);
+}
+
+int
+comp_perf_allocate_memory(struct comp_test_data *test_data,
+                         struct cperf_mem_resources *mem)
+{
+       test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
+       /* Number of segments for input and output
+        * (compression and decompression)
+        */
+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+                       test_data->seg_sz);
+       char pool_name[32] = "";
+
+       snprintf(pool_name, sizeof(pool_name), "comp_buf_pool_%u_qp_%u",
+                       mem->dev_id, mem->qp_id);
+       mem->comp_buf_pool = rte_pktmbuf_pool_create(pool_name,
+                               total_segs,
+                               0, 0,
+                               test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
+                               rte_socket_id());
+       if (mem->comp_buf_pool == NULL) {
+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+               return -1;
+       }
+
+       snprintf(pool_name, sizeof(pool_name), "decomp_buf_pool_%u_qp_%u",
+                       mem->dev_id, mem->qp_id);
+       mem->decomp_buf_pool = rte_pktmbuf_pool_create(pool_name,
+                               total_segs,
+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+                               rte_socket_id());
+       if (mem->decomp_buf_pool == NULL) {
+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+               return -1;
+       }
+
+       mem->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+       snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
+                       mem->dev_id, mem->qp_id);
+       mem->op_pool = rte_comp_op_pool_create(pool_name,
+                                 mem->total_bufs,
+                                 0, 0, rte_socket_id());
+       if (mem->op_pool == NULL) {
+               RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+               return -1;
+       }
+
+       /*
+        * Compressed data might be a bit larger than input data,
+        * if data cannot be compressed
+        */
+       mem->compressed_data = rte_zmalloc_socket(NULL,
+                               test_data->input_data_sz * EXPANSE_RATIO
+                                               + MIN_COMPRESSED_BUF_SIZE, 0,
+                               rte_socket_id());
+       if (mem->compressed_data == NULL) {
+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+                               "file could not be allocated\n");
+               return -1;
+       }
+
+       mem->decompressed_data = rte_zmalloc_socket(NULL,
+                               test_data->input_data_sz, 0,
+                               rte_socket_id());
+       if (mem->decompressed_data == NULL) {
+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+                               "file could not be allocated\n");
+               return -1;
+       }
+
+       mem->comp_bufs = rte_zmalloc_socket(NULL,
+                       mem->total_bufs * sizeof(struct rte_mbuf *),
+                       0, rte_socket_id());
+       if (mem->comp_bufs == NULL) {
+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+                               " could not be allocated\n");
+               return -1;
+       }
+
+       mem->decomp_bufs = rte_zmalloc_socket(NULL,
+                       mem->total_bufs * sizeof(struct rte_mbuf *),
+                       0, rte_socket_id());
+       if (mem->decomp_bufs == NULL) {
+               RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+                               " could not be allocated\n");
+               return -1;
+       }
+       return 0;
+}
+
+int
+prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
+{
+       uint32_t remaining_data = test_data->input_data_sz;
+       uint8_t *input_data_ptr = test_data->input_data;
+       size_t data_sz;
+       uint8_t *data_addr;
+       uint32_t i, j;
+
+       for (i = 0; i < mem->total_bufs; i++) {
+               /* Allocate data in input mbuf and copy data from input file */
+               mem->decomp_bufs[i] =
+                       rte_pktmbuf_alloc(mem->decomp_buf_pool);
+               if (mem->decomp_bufs[i] == NULL) {
+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+                       return -1;
+               }
+
+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+               data_addr = (uint8_t *) rte_pktmbuf_append(
+                                       mem->decomp_bufs[i], data_sz);
+               if (data_addr == NULL) {
+                       RTE_LOG(ERR, USER1, "Could not append data\n");
+                       return -1;
+               }
+               rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+               input_data_ptr += data_sz;
+               remaining_data -= data_sz;
+
+               /* Already one segment in the mbuf */
+               uint16_t segs_per_mbuf = 1;
+
+               /* Chain mbufs if needed for input mbufs */
+               while (segs_per_mbuf < test_data->max_sgl_segs
+                               && remaining_data > 0) {
+                       struct rte_mbuf *next_seg =
+                               rte_pktmbuf_alloc(mem->decomp_buf_pool);
+
+                       if (next_seg == NULL) {
+                               RTE_LOG(ERR, USER1,
+                                       "Could not allocate mbuf\n");
+                               return -1;
+                       }
+
+                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+                               data_sz);
+
+                       if (data_addr == NULL) {
+                               RTE_LOG(ERR, USER1, "Could not append data\n");
+                               return -1;
+                       }
+
+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
+                       input_data_ptr += data_sz;
+                       remaining_data -= data_sz;
+
+                       if (rte_pktmbuf_chain(mem->decomp_bufs[i],
+                                       next_seg) < 0) {
+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+                               return -1;
+                       }
+                       segs_per_mbuf++;
+               }
+
+               /* Allocate data in output mbuf */
+               mem->comp_bufs[i] =
+                       rte_pktmbuf_alloc(mem->comp_buf_pool);
+               if (mem->comp_bufs[i] == NULL) {
+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+                       return -1;
+               }
+               data_addr = (uint8_t *) rte_pktmbuf_append(
+                                       mem->comp_bufs[i],
+                                       test_data->out_seg_sz);
+               if (data_addr == NULL) {
+                       RTE_LOG(ERR, USER1, "Could not append data\n");
+                       return -1;
+               }
+
+               /* Chain mbufs if needed for output mbufs */
+               for (j = 1; j < segs_per_mbuf; j++) {
+                       struct rte_mbuf *next_seg =
+                               rte_pktmbuf_alloc(mem->comp_buf_pool);
+
+                       if (next_seg == NULL) {
+                               RTE_LOG(ERR, USER1,
+                                       "Could not allocate mbuf\n");
+                               return -1;
+                       }
+
+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+                               test_data->out_seg_sz);
+
+                       if (data_addr == NULL) {
+                               RTE_LOG(ERR, USER1, "Could not append data\n");
+                               return -1;
+                       }
+
+                       if (rte_pktmbuf_chain(mem->comp_bufs[i],
+                                       next_seg) < 0) {
+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
+}
diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h
new file mode 100644 (file)
index 0000000..9c11e3a
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_COMMON_H_
+#define _COMP_PERF_TEST_COMMON_H_
+
+#include <stdint.h>
+
+#include <rte_mempool.h>
+
+struct cperf_mem_resources {
+       uint8_t dev_id;
+       uint16_t qp_id;
+       uint8_t lcore_id;
+       uint32_t total_bufs;
+       uint8_t *compressed_data;
+       uint8_t *decompressed_data;
+
+       struct rte_mbuf **comp_bufs;
+       struct rte_mbuf **decomp_bufs;
+
+       struct rte_mempool *comp_buf_pool;
+       struct rte_mempool *decomp_buf_pool;
+       struct rte_mempool *op_pool;
+};
+
+int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range);
+
+void
+comp_perf_free_memory(struct cperf_mem_resources *mem);
+
+int
+comp_perf_allocate_memory(struct comp_test_data *test_data,
+                         struct cperf_mem_resources *mem);
+
+int
+prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
+
+#endif /* _COMP_PERF_TEST_COMMON_H_ */
index 7b9ee74..3a3da4c 100644 (file)
@@ -8,56 +8,38 @@
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
-#include "comp_perf_test_verify.h"
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf.h"
+#include "comp_perf_test_common.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
 
-#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
-
-/* Cleanup state machine */
-static enum cleanup_st {
-       ST_CLEAR = 0,
-       ST_TEST_DATA,
-       ST_COMPDEV,
-       ST_INPUT_DATA,
-       ST_MEMORY_ALLOC,
-       ST_PREPARE_BUF,
-       ST_DURING_TEST
-} cleanup = ST_CLEAR;
-
-static int
-param_range_check(uint16_t size, const struct rte_param_log2_range *range)
-{
-       unsigned int next_size;
-
-       /* Check lower/upper bounds */
-       if (size < range->min)
-               return -1;
-
-       if (size > range->max)
-               return -1;
-
-       /* If range is actually only one value, size is correct */
-       if (range->increment == 0)
-               return 0;
-
-       /* Check if value is one of the supported sizes */
-       for (next_size = range->min; next_size <= range->max;
-                       next_size += range->increment)
-               if (size == next_size)
-                       return 0;
-
-       return -1;
-}
+__extension__
+const char *cperf_test_type_strs[] = {
+       [CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
+       [CPERF_TEST_TYPE_VERIFY] = "verify"
+};
+
+__extension__
+static const struct cperf_test cperf_testmap[] = {
+       [CPERF_TEST_TYPE_BENCHMARK] = {
+                       cperf_benchmark_test_constructor,
+                       cperf_benchmark_test_runner,
+                       cperf_benchmark_test_destructor
+       },
+       [CPERF_TEST_TYPE_VERIFY] = {
+                       cperf_verify_test_constructor,
+                       cperf_verify_test_runner,
+                       cperf_verify_test_destructor
+       }
+};
 
 static int
-comp_perf_check_capabilities(struct comp_test_data *test_data)
+comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id)
 {
        const struct rte_compressdev_capabilities *cap;
 
-       cap = rte_compressdev_capability_get(test_data->cdev_id,
+       cap = rte_compressdev_capability_get(cdev_id,
                                             RTE_COMP_ALGO_DEFLATE);
 
        if (cap == NULL) {
@@ -105,7 +87,7 @@ comp_perf_check_capabilities(struct comp_test_data *test_data)
        }
 
        /* Level 0 support */
-       if (test_data->level.min == 0 &&
+       if (test_data->level_lst.min == 0 &&
                        (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
                RTE_LOG(ERR, USER1, "Compress device does not support "
                                "level 0 (no compression)\n");
@@ -115,110 +97,108 @@ comp_perf_check_capabilities(struct comp_test_data *test_data)
        return 0;
 }
 
-static uint32_t
-find_buf_size(uint32_t input_size)
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data,
+                                uint8_t *enabled_cdevs)
 {
-       uint32_t i;
+       uint8_t enabled_cdev_count, nb_lcores, cdev_id;
+       unsigned int i, j;
+       int ret;
 
-       /* From performance point of view the buffer size should be a
-        * power of 2 but also should be enough to store incompressible data
-        */
+       enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+       if (enabled_cdev_count == 0) {
+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+                               test_data->driver_name);
+               return -EINVAL;
+       }
 
-       /* We're looking for nearest power of 2 buffer size, which is greather
-        * than input_size
+       nb_lcores = rte_lcore_count() - 1;
+       /*
+        * Use fewer devices,
+        * if there are more available than cores.
         */
-       uint32_t size =
-               !input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
-
-       for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
-               ;
-
-       return i > ((UINT16_MAX + 1) >> 1)
-                       ? (uint32_t)((float)input_size * EXPANSE_RATIO)
-                       : i;
-}
-
-static int
-comp_perf_allocate_memory(struct comp_test_data *test_data)
-{
+       if (enabled_cdev_count > nb_lcores) {
+               enabled_cdev_count = nb_lcores;
+               RTE_LOG(INFO, USER1,
+                       " There's more available devices than cores!"
+                       " The number of devices has been aligned to %d cores\n",
+                       nb_lcores);
+       }
 
-       test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
-       /* Number of segments for input and output
-        * (compression and decompression)
+       /*
+        * Calculate number of needed queue pairs, based on the amount
+        * of available number of logical cores and compression devices.
+        * For instance, if there are 4 cores and 2 compression devices,
+        * 2 queue pairs will be set up per device.
+        * One queue pair per one core.
+        * if e.g.: there're 3 cores and 2 compression devices,
+        * 2 queue pairs will be set up per device but one queue pair
+        * will left unused in the last one device
         */
-       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
-                       test_data->seg_sz);
-       test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
-                               total_segs,
-                               0, 0,
-                               test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
-                               rte_socket_id());
-       if (test_data->comp_buf_pool == NULL) {
-               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
-               return -1;
-       }
+       test_data->nb_qps = (nb_lcores % enabled_cdev_count) ?
+                               (nb_lcores / enabled_cdev_count) + 1 :
+                               nb_lcores / enabled_cdev_count;
 
-       cleanup = ST_MEMORY_ALLOC;
-       test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
-                               total_segs,
-                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
-                               rte_socket_id());
-       if (test_data->decomp_buf_pool == NULL) {
-               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
-               return -1;
-       }
+       for (i = 0; i < enabled_cdev_count &&
+                       i < RTE_COMPRESS_MAX_DEVS; i++,
+                                       nb_lcores -= test_data->nb_qps) {
+               cdev_id = enabled_cdevs[i];
 
-       test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+               struct rte_compressdev_info cdev_info;
+               uint8_t socket_id = rte_compressdev_socket_id(cdev_id);
 
-       test_data->op_pool = rte_comp_op_pool_create("op_pool",
-                                 test_data->total_bufs,
-                                 0, 0, rte_socket_id());
-       if (test_data->op_pool == NULL) {
-               RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
-               return -1;
-       }
+               rte_compressdev_info_get(cdev_id, &cdev_info);
+               if (cdev_info.max_nb_queue_pairs &&
+                       test_data->nb_qps > cdev_info.max_nb_queue_pairs) {
+                       RTE_LOG(ERR, USER1,
+                               "Number of needed queue pairs is higher "
+                               "than the maximum number of queue pairs "
+                               "per device.\n");
+                       RTE_LOG(ERR, USER1,
+                               "Lower the number of cores or increase "
+                               "the number of crypto devices\n");
+                       return -EINVAL;
+               }
 
-       /*
-        * Compressed data might be a bit larger than input data,
-        * if data cannot be compressed
-        */
-       test_data->compressed_data = rte_zmalloc_socket(NULL,
-                               test_data->input_data_sz * EXPANSE_RATIO
-                                               + MIN_COMPRESSED_BUF_SIZE, 0,
-                               rte_socket_id());
-       if (test_data->compressed_data == NULL) {
-               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
-                               "file could not be allocated\n");
-               return -1;
-       }
+               if (comp_perf_check_capabilities(test_data, cdev_id) < 0)
+                       return -EINVAL;
+
+               /* Configure compressdev */
+               struct rte_compressdev_config config = {
+                       .socket_id = socket_id,
+                       .nb_queue_pairs = nb_lcores > test_data->nb_qps
+                                       ? test_data->nb_qps : nb_lcores,
+                       .max_nb_priv_xforms = NUM_MAX_XFORMS,
+                       .max_nb_streams = 0
+               };
+
+               if (rte_compressdev_configure(cdev_id, &config) < 0) {
+                       RTE_LOG(ERR, USER1, "Device configuration failed\n");
+                       return -EINVAL;
+               }
 
-       test_data->decompressed_data = rte_zmalloc_socket(NULL,
-                               test_data->input_data_sz, 0,
-                               rte_socket_id());
-       if (test_data->decompressed_data == NULL) {
-               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
-                               "file could not be allocated\n");
-               return -1;
-       }
+               for (j = 0; j < test_data->nb_qps; j++) {
+                       ret = rte_compressdev_queue_pair_setup(cdev_id, j,
+                                       NUM_MAX_INFLIGHT_OPS, socket_id);
+                       if (ret < 0) {
+                               RTE_LOG(ERR, USER1,
+                             "Failed to setup queue pair %u on compressdev %u",
+                                       j, cdev_id);
+                               return -EINVAL;
+                       }
+               }
 
-       test_data->comp_bufs = rte_zmalloc_socket(NULL,
-                       test_data->total_bufs * sizeof(struct rte_mbuf *),
-                       0, rte_socket_id());
-       if (test_data->comp_bufs == NULL) {
-               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
-                               " could not be allocated\n");
-               return -1;
+               ret = rte_compressdev_start(cdev_id);
+               if (ret < 0) {
+                       RTE_LOG(ERR, USER1,
+                               "Failed to start device %u: error %d\n",
+                               cdev_id, ret);
+                       return -EPERM;
+               }
        }
 
-       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
-                       test_data->total_bufs * sizeof(struct rte_mbuf *),
-                       0, rte_socket_id());
-       if (test_data->decomp_bufs == NULL) {
-               RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
-                               " could not be allocated\n");
-               return -1;
-       }
-       return 0;
+       return enabled_cdev_count;
 }
 
 static int
@@ -295,187 +275,18 @@ end:
        return ret;
 }
 
-static int
-comp_perf_initialize_compressdev(struct comp_test_data *test_data)
-{
-       uint8_t enabled_cdev_count;
-       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
-
-       enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
-                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
-       if (enabled_cdev_count == 0) {
-               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
-                               test_data->driver_name);
-               return -EINVAL;
-       }
-
-       if (enabled_cdev_count > 1)
-               RTE_LOG(INFO, USER1,
-                       "Only the first compress device will be used\n");
-
-       test_data->cdev_id = enabled_cdevs[0];
-
-       if (comp_perf_check_capabilities(test_data) < 0)
-               return -1;
-
-       /* Configure compressdev (one device, one queue pair) */
-       struct rte_compressdev_config config = {
-               .socket_id = rte_socket_id(),
-               .nb_queue_pairs = 1,
-               .max_nb_priv_xforms = NUM_MAX_XFORMS,
-               .max_nb_streams = 0
-       };
-
-       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
-               RTE_LOG(ERR, USER1, "Device configuration failed\n");
-               return -1;
-       }
-
-       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
-                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
-               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
-               return -1;
-       }
-
-       if (rte_compressdev_start(test_data->cdev_id) < 0) {
-               RTE_LOG(ERR, USER1, "Device could not be started\n");
-               return -1;
-       }
-
-       return 0;
-}
-
-static int
-prepare_bufs(struct comp_test_data *test_data)
-{
-       uint32_t remaining_data = test_data->input_data_sz;
-       uint8_t *input_data_ptr = test_data->input_data;
-       size_t data_sz;
-       uint8_t *data_addr;
-       uint32_t i, j;
-
-       for (i = 0; i < test_data->total_bufs; i++) {
-               /* Allocate data in input mbuf and copy data from input file */
-               test_data->decomp_bufs[i] =
-                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
-               if (test_data->decomp_bufs[i] == NULL) {
-                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
-                       return -1;
-               }
-
-               cleanup = ST_PREPARE_BUF;
-               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
-               data_addr = (uint8_t *) rte_pktmbuf_append(
-                                       test_data->decomp_bufs[i], data_sz);
-               if (data_addr == NULL) {
-                       RTE_LOG(ERR, USER1, "Could not append data\n");
-                       return -1;
-               }
-               rte_memcpy(data_addr, input_data_ptr, data_sz);
-
-               input_data_ptr += data_sz;
-               remaining_data -= data_sz;
-
-               /* Already one segment in the mbuf */
-               uint16_t segs_per_mbuf = 1;
-
-               /* Chain mbufs if needed for input mbufs */
-               while (segs_per_mbuf < test_data->max_sgl_segs
-                               && remaining_data > 0) {
-                       struct rte_mbuf *next_seg =
-                               rte_pktmbuf_alloc(test_data->decomp_buf_pool);
-
-                       if (next_seg == NULL) {
-                               RTE_LOG(ERR, USER1,
-                                       "Could not allocate mbuf\n");
-                               return -1;
-                       }
-
-                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
-                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
-                               data_sz);
-
-                       if (data_addr == NULL) {
-                               RTE_LOG(ERR, USER1, "Could not append data\n");
-                               return -1;
-                       }
-
-                       rte_memcpy(data_addr, input_data_ptr, data_sz);
-                       input_data_ptr += data_sz;
-                       remaining_data -= data_sz;
-
-                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
-                                       next_seg) < 0) {
-                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
-                               return -1;
-                       }
-                       segs_per_mbuf++;
-               }
-
-               /* Allocate data in output mbuf */
-               test_data->comp_bufs[i] =
-                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
-               if (test_data->comp_bufs[i] == NULL) {
-                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
-                       return -1;
-               }
-               data_addr = (uint8_t *) rte_pktmbuf_append(
-                                       test_data->comp_bufs[i],
-                                       test_data->out_seg_sz);
-               if (data_addr == NULL) {
-                       RTE_LOG(ERR, USER1, "Could not append data\n");
-                       return -1;
-               }
-
-               /* Chain mbufs if needed for output mbufs */
-               for (j = 1; j < segs_per_mbuf; j++) {
-                       struct rte_mbuf *next_seg =
-                               rte_pktmbuf_alloc(test_data->comp_buf_pool);
-
-                       if (next_seg == NULL) {
-                               RTE_LOG(ERR, USER1,
-                                       "Could not allocate mbuf\n");
-                               return -1;
-                       }
-
-                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
-                               test_data->out_seg_sz);
-
-                       if (data_addr == NULL) {
-                               RTE_LOG(ERR, USER1, "Could not append data\n");
-                               return -1;
-                       }
-
-                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
-                                       next_seg) < 0) {
-                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
-                               return -1;
-                       }
-               }
-       }
-
-       return 0;
-}
-
-static void
-free_bufs(struct comp_test_data *test_data)
-{
-       uint32_t i;
-
-       for (i = 0; i < test_data->total_bufs; i++) {
-               rte_pktmbuf_free(test_data->comp_bufs[i]);
-               rte_pktmbuf_free(test_data->decomp_bufs[i]);
-       }
-}
-
-
-
 int
 main(int argc, char **argv)
 {
-       uint8_t level, level_idx = 0;
+       uint8_t level_idx = 0;
        int ret, i;
        struct comp_test_data *test_data;
+       void *ctx[RTE_MAX_LCORE] = {};
+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+       int nb_compressdevs = 0;
+       uint16_t total_nb_qps = 0;
+       uint8_t cdev_id;
+       uint32_t lcore_id;
 
        /* Initialise DPDK EAL */
        ret = rte_eal_init(argc, argv);
@@ -492,7 +303,7 @@ main(int argc, char **argv)
                                rte_socket_id());
 
        ret = EXIT_SUCCESS;
-       cleanup = ST_TEST_DATA;
+       test_data->cleanup = ST_TEST_DATA;
        comp_perf_options_default(test_data);
 
        if (comp_perf_options_parse(test_data, argc, argv) < 0) {
@@ -507,99 +318,112 @@ main(int argc, char **argv)
                goto end;
        }
 
-       if (comp_perf_initialize_compressdev(test_data) < 0) {
-               ret = EXIT_FAILURE;
-               goto end;
-       }
+       nb_compressdevs =
+               comp_perf_initialize_compressdev(test_data, enabled_cdevs);
 
-       cleanup = ST_COMPDEV;
-       if (comp_perf_dump_input_data(test_data) < 0) {
+       if (nb_compressdevs < 1) {
                ret = EXIT_FAILURE;
                goto end;
        }
 
-       cleanup = ST_INPUT_DATA;
-       if (comp_perf_allocate_memory(test_data) < 0) {
+       test_data->cleanup = ST_COMPDEV;
+       if (comp_perf_dump_input_data(test_data) < 0) {
                ret = EXIT_FAILURE;
                goto end;
        }
 
-       if (prepare_bufs(test_data) < 0) {
-               ret = EXIT_FAILURE;
-               goto end;
-       }
+       test_data->cleanup = ST_INPUT_DATA;
 
-       if (test_data->level.inc != 0)
-               level = test_data->level.min;
+       if (test_data->level_lst.inc != 0)
+               test_data->level = test_data->level_lst.min;
        else
-               level = test_data->level.list[0];
+               test_data->level = test_data->level_lst.list[0];
 
        printf("App uses socket: %u\n", rte_socket_id());
-       printf("Driver uses socket: %u\n",
-              rte_compressdev_socket_id(test_data->cdev_id));
        printf("Burst size = %u\n", test_data->burst_sz);
        printf("File size = %zu\n", test_data->input_data_sz);
 
-       printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
-               "Level", "Comp size", "Comp ratio [%]",
-               "Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
-               "Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+       test_data->cleanup = ST_DURING_TEST;
+       total_nb_qps = nb_compressdevs * test_data->nb_qps;
 
-       cleanup = ST_DURING_TEST;
-       while (level <= test_data->level.max) {
+       i = 0;
+       uint8_t qp_id = 0, cdev_index = 0;
 
-               /*
-                * Run a first iteration, to verify compression and
-                * get the compression ratio for the level
-                */
-               if (cperf_verification(test_data, level) != EXIT_SUCCESS)
-                       break;
+       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 
-               /*
-                * Run benchmarking test
-                */
-               if (cperf_benchmark(test_data, level) != EXIT_SUCCESS)
+               if (i == total_nb_qps)
                        break;
 
-               printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
-                                       "%15.2f%21"PRIu64"%23.2f%16.2f\n",
-                      level, test_data->comp_data_sz, test_data->ratio,
-                      test_data->comp_tsc_duration[level],
-                      test_data->comp_tsc_byte, test_data->comp_gbps,
-                      test_data->decomp_tsc_duration[level],
-                      test_data->decomp_tsc_byte, test_data->decomp_gbps);
+               cdev_id = enabled_cdevs[cdev_index];
+               ctx[i] = cperf_testmap[test_data->test].constructor(
+                                                       cdev_id, qp_id,
+                                                       test_data);
+               if (ctx[i] == NULL) {
+                       RTE_LOG(ERR, USER1, "Test run constructor failed\n");
+                       goto end;
+               }
+               qp_id = (qp_id + 1) % test_data->nb_qps;
+               if (qp_id == 0)
+                       cdev_index++;
+               i++;
+       }
+
+       while (test_data->level <= test_data->level_lst.max) {
 
-               if (test_data->level.inc != 0)
-                       level += test_data->level.inc;
+               i = 0;
+               RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+
+                       if (i == total_nb_qps)
+                               break;
+
+                       rte_eal_remote_launch(
+                                       cperf_testmap[test_data->test].runner,
+                                       ctx[i], lcore_id);
+                       i++;
+               }
+               i = 0;
+               RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+
+                       if (i == total_nb_qps)
+                               break;
+                       ret |= rte_eal_wait_lcore(lcore_id);
+                       i++;
+               }
+
+               if (ret != EXIT_SUCCESS)
+                       break;
+
+               if (test_data->level_lst.inc != 0)
+                       test_data->level += test_data->level_lst.inc;
                else {
-                       if (++level_idx == test_data->level.count)
+                       if (++level_idx == test_data->level_lst.count)
                                break;
-                       level = test_data->level.list[level_idx];
+                       test_data->level = test_data->level_lst.list[level_idx];
                }
        }
 
 end:
-       switch (cleanup) {
+       switch (test_data->cleanup) {
 
        case ST_DURING_TEST:
-       case ST_PREPARE_BUF:
-               free_bufs(test_data);
-               /* fallthrough */
-       case ST_MEMORY_ALLOC:
-               rte_free(test_data->decomp_bufs);
-               rte_free(test_data->comp_bufs);
-               rte_free(test_data->decompressed_data);
-               rte_free(test_data->compressed_data);
-               rte_mempool_free(test_data->op_pool);
-               rte_mempool_free(test_data->decomp_buf_pool);
-               rte_mempool_free(test_data->comp_buf_pool);
+               i = 0;
+               RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+                       if (i == total_nb_qps)
+                               break;
+
+                       if (ctx[i] && cperf_testmap[test_data->test].destructor)
+                               cperf_testmap[test_data->test].destructor(
+                                                                       ctx[i]);
+                       i++;
+               }
                /* fallthrough */
        case ST_INPUT_DATA:
                rte_free(test_data->input_data);
                /* fallthrough */
        case ST_COMPDEV:
-               if (test_data->cdev_id != -1)
-                       rte_compressdev_stop(test_data->cdev_id);
+               for (i = 0; i < nb_compressdevs &&
+                               i < RTE_COMPRESS_MAX_DEVS; i++)
+                       rte_compressdev_stop(enabled_cdevs[i]);
                /* fallthrough */
        case ST_TEST_DATA:
                rte_free(test_data);
@@ -616,3 +440,44 @@ end:
        }
        return ret;
 }
+
+__rte_weak void *
+cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+                                uint16_t qp_id __rte_unused,
+                                struct comp_test_data *options __rte_unused)
+{
+       RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n");
+       return NULL;
+}
+
+__rte_weak void
+cperf_benchmark_test_destructor(void *arg __rte_unused)
+{
+
+}
+
+__rte_weak int
+cperf_benchmark_test_runner(void *test_ctx __rte_unused)
+{
+       return 0;
+}
+__rte_weak void *
+cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
+                                uint16_t qp_id __rte_unused,
+                                struct comp_test_data *options __rte_unused)
+{
+       RTE_LOG(INFO, USER1, "Verify test is not supported yet\n");
+       return NULL;
+}
+
+__rte_weak void
+cperf_verify_test_destructor(void *arg __rte_unused)
+{
+
+}
+
+__rte_weak int
+cperf_verify_test_runner(void *test_ctx __rte_unused)
+{
+       return 0;
+}
index ec73e5e..00413c6 100644 (file)
@@ -4,6 +4,5 @@
 allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
                'main.c',
-               'comp_perf_test_verify.c',
-               'comp_perf_test_benchmark.c')
+               'comp_perf_test_common.c')
 deps = ['compressdev']