This commit adds cycle-count mode to the compression perf tool.
The new mode enhances the compression performance tool to allow
cycle-count measurement of both hardware and softwate PMDs.
Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Fiona Trahe <fiona.trahe@intel.com>
SRCS-y := main.c
SRCS-y += comp_perf_options_parse.c
SRCS-y += comp_perf_test_verify.c
-SRCS-y += comp_perf_test_benchmark.c
+SRCS-y += comp_perf_test_throughput.c
+SRCS-y += comp_perf_test_cyclecount.c
SRCS-y += comp_perf_test_common.c
include $(RTE_SDK)/mk/rte.app.mk
/* Needed for weak functions*/
void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused);
void
-cperf_benchmark_test_destructor(void *arg __rte_unused);
+cperf_throughput_test_destructor(void *arg __rte_unused);
int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused);
+cperf_throughput_test_runner(void *test_ctx __rte_unused);
void *
cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
};
enum cperf_test_type {
- CPERF_TEST_TYPE_BENCHMARK,
- CPERF_TEST_TYPE_VERIFY
+ CPERF_TEST_TYPE_THROUGHPUT,
+ CPERF_TEST_TYPE_VERIFY,
+ CPERF_TEST_TYPE_PMDCC
};
enum comp_operation {
double ratio;
enum cleanup_st cleanup;
int perf_comp_force_stop;
+
+ uint32_t cyclecount_delay;
};
int
#define CPERF_WINDOW_SIZE ("window-sz")
#define CPERF_EXTERNAL_MBUFS ("external-mbufs")
+/* cyclecount-specific options */
+#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us")
+
struct name_id_map {
const char *name;
uint32_t id;
usage(char *progname)
{
printf("%s [EAL options] --\n"
- " --ptest benchmark / verify :"
+ " --ptest throughput / verify / pmd-cyclecount\n"
" --driver-name NAME: compress driver to use\n"
" --input-file NAME: file to compress and decompress\n"
" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
" (e.g.: 15 => 32k, default: max supported by PMD)\n"
" --external-mbufs: use memzones as external buffers instead of\n"
" keeping the data directly in mbuf area\n"
+ " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n"
+ " valid only for cyclecount perf test (default: 500 us)\n"
" -h: prints this help\n",
progname);
}
{
struct name_id_map cperftest_namemap[] = {
{
- comp_perf_test_type_strs[CPERF_TEST_TYPE_BENCHMARK],
- CPERF_TEST_TYPE_BENCHMARK
+ comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT],
+ CPERF_TEST_TYPE_THROUGHPUT
},
{
comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY],
CPERF_TEST_TYPE_VERIFY
+ },
+ {
+ comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC],
+ CPERF_TEST_TYPE_PMDCC
}
};
return 0;
}
+static int
+parse_cyclecount_delay_us(struct comp_test_data *test_data,
+ const char *arg)
+{
+ int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg);
+
+ if (ret) {
+ RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n");
+ return -1;
+ }
+ return 0;
+}
+
typedef int (*option_parser_t)(struct comp_test_data *test_data,
const char *arg);
struct long_opt_parser {
const char *lgopt_name;
option_parser_t parser_fn;
-
};
static struct option lgopts[] = {
-
{ CPERF_PTEST_TYPE, required_argument, 0, 0 },
{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
{ CPERF_TEST_FILE, required_argument, 0, 0 },
{ CPERF_LEVEL, required_argument, 0, 0 },
{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
{ CPERF_EXTERNAL_MBUFS, 0, 0, 0 },
+ { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 },
{ NULL, 0, 0, 0 }
};
{ CPERF_LEVEL, parse_level },
{ CPERF_WINDOW_SIZE, parse_window_sz },
{ CPERF_EXTERNAL_MBUFS, parse_external_mbufs },
+ { CPERF_CYCLECOUNT_DELAY_US, parse_cyclecount_delay_us },
};
unsigned int i;
test_data->level_lst.min = RTE_COMP_LEVEL_MIN;
test_data->level_lst.max = RTE_COMP_LEVEL_MAX;
test_data->level_lst.inc = 1;
- test_data->test = CPERF_TEST_TYPE_BENCHMARK;
+ test_data->test = CPERF_TEST_TYPE_THROUGHPUT;
test_data->use_external_mbufs = 0;
+ test_data->cyclecount_delay = 500;
}
int
+++ /dev/null
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2018 Intel Corporation
- */
-
-#include <rte_malloc.h>
-#include <rte_eal.h>
-#include <rte_log.h>
-#include <rte_cycles.h>
-#include <rte_compressdev.h>
-
-#include "comp_perf_test_benchmark.h"
-
-void
-cperf_benchmark_test_destructor(void *arg)
-{
- if (arg) {
- comp_perf_free_memory(
- ((struct cperf_benchmark_ctx *)arg)->ver.options,
- &((struct cperf_benchmark_ctx *)arg)->ver.mem);
- rte_free(arg);
- }
-}
-
-void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
- struct comp_test_data *options)
-{
- struct cperf_benchmark_ctx *ctx = NULL;
-
- ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
-
- if (ctx == NULL)
- return NULL;
-
- ctx->ver.mem.dev_id = dev_id;
- ctx->ver.mem.qp_id = qp_id;
- ctx->ver.options = options;
- ctx->ver.silent = 1; /* ver. part will be silent */
-
- if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
- && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
- return ctx;
-
- cperf_benchmark_test_destructor(ctx);
- return NULL;
-}
-
-static int
-main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
-{
- struct comp_test_data *test_data = ctx->ver.options;
- struct cperf_mem_resources *mem = &ctx->ver.mem;
- uint8_t dev_id = mem->dev_id;
- uint32_t i, iter, num_iter;
- struct rte_comp_op **ops, **deq_ops;
- void *priv_xform = NULL;
- struct rte_comp_xform xform;
- struct rte_mbuf **input_bufs, **output_bufs;
- int res = 0;
- int allocated = 0;
- uint32_t out_seg_sz;
-
- if (test_data == NULL || !test_data->burst_sz) {
- RTE_LOG(ERR, USER1,
- "Unknown burst size\n");
- return -1;
- }
-
- ops = rte_zmalloc_socket(NULL,
- 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
- 0, rte_socket_id());
-
- if (ops == NULL) {
- RTE_LOG(ERR, USER1,
- "Can't allocate memory for ops strucures\n");
- return -1;
- }
-
- deq_ops = &ops[mem->total_bufs];
-
- if (type == RTE_COMP_COMPRESS) {
- xform = (struct rte_comp_xform) {
- .type = RTE_COMP_COMPRESS,
- .compress = {
- .algo = RTE_COMP_ALGO_DEFLATE,
- .deflate.huffman = test_data->huffman_enc,
- .level = test_data->level,
- .window_size = test_data->window_sz,
- .chksum = RTE_COMP_CHECKSUM_NONE,
- .hash_algo = RTE_COMP_HASH_ALGO_NONE
- }
- };
- input_bufs = mem->decomp_bufs;
- output_bufs = mem->comp_bufs;
- out_seg_sz = test_data->out_seg_sz;
- } else {
- xform = (struct rte_comp_xform) {
- .type = RTE_COMP_DECOMPRESS,
- .decompress = {
- .algo = RTE_COMP_ALGO_DEFLATE,
- .chksum = RTE_COMP_CHECKSUM_NONE,
- .window_size = test_data->window_sz,
- .hash_algo = RTE_COMP_HASH_ALGO_NONE
- }
- };
- input_bufs = mem->comp_bufs;
- output_bufs = mem->decomp_bufs;
- out_seg_sz = test_data->seg_sz;
- }
-
- /* Create private xform */
- if (rte_compressdev_private_xform_create(dev_id, &xform,
- &priv_xform) < 0) {
- RTE_LOG(ERR, USER1, "Private xform could not be created\n");
- res = -1;
- goto end;
- }
-
- uint64_t tsc_start, tsc_end, tsc_duration;
-
- num_iter = test_data->num_iter;
- tsc_start = tsc_end = tsc_duration = 0;
- tsc_start = rte_rdtsc_precise();
-
- for (iter = 0; iter < num_iter; iter++) {
- uint32_t total_ops = mem->total_bufs;
- uint32_t remaining_ops = mem->total_bufs;
- uint32_t total_deq_ops = 0;
- uint32_t total_enq_ops = 0;
- uint16_t ops_unused = 0;
- uint16_t num_enq = 0;
- uint16_t num_deq = 0;
-
- while (remaining_ops > 0) {
- uint16_t num_ops = RTE_MIN(remaining_ops,
- test_data->burst_sz);
- uint16_t ops_needed = num_ops - ops_unused;
-
- /*
- * Move the unused operations from the previous
- * enqueue_burst call to the front, to maintain order
- */
- if ((ops_unused > 0) && (num_enq > 0)) {
- size_t nb_b_to_mov =
- ops_unused * sizeof(struct rte_comp_op *);
-
- memmove(ops, &ops[num_enq], nb_b_to_mov);
- }
-
- /* Allocate compression operations */
- if (ops_needed && !rte_comp_op_bulk_alloc(
- mem->op_pool,
- &ops[ops_unused],
- ops_needed)) {
- RTE_LOG(ERR, USER1,
- "Could not allocate enough operations\n");
- res = -1;
- goto end;
- }
- allocated += ops_needed;
-
- for (i = 0; i < ops_needed; i++) {
- /*
- * Calculate next buffer to attach to operation
- */
- uint32_t buf_id = total_enq_ops + i +
- ops_unused;
- uint16_t op_id = ops_unused + i;
- /* Reset all data in output buffers */
- struct rte_mbuf *m = output_bufs[buf_id];
-
- m->pkt_len = out_seg_sz * m->nb_segs;
- while (m) {
- m->data_len = m->buf_len - m->data_off;
- m = m->next;
- }
- ops[op_id]->m_src = input_bufs[buf_id];
- ops[op_id]->m_dst = output_bufs[buf_id];
- ops[op_id]->src.offset = 0;
- ops[op_id]->src.length =
- rte_pktmbuf_pkt_len(input_bufs[buf_id]);
- ops[op_id]->dst.offset = 0;
- ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
- ops[op_id]->input_chksum = buf_id;
- ops[op_id]->private_xform = priv_xform;
- }
-
- if (unlikely(test_data->perf_comp_force_stop))
- goto end;
-
- num_enq = rte_compressdev_enqueue_burst(dev_id,
- mem->qp_id, ops,
- num_ops);
- if (num_enq == 0) {
- struct rte_compressdev_stats stats;
-
- rte_compressdev_stats_get(dev_id, &stats);
- if (stats.enqueue_err_count) {
- res = -1;
- goto end;
- }
- }
-
- ops_unused = num_ops - num_enq;
- remaining_ops -= num_enq;
- total_enq_ops += num_enq;
-
- num_deq = rte_compressdev_dequeue_burst(dev_id,
- mem->qp_id,
- deq_ops,
- test_data->burst_sz);
- total_deq_ops += num_deq;
-
- if (iter == num_iter - 1) {
- for (i = 0; i < num_deq; i++) {
- struct rte_comp_op *op = deq_ops[i];
-
- if (op->status !=
- RTE_COMP_OP_STATUS_SUCCESS) {
- RTE_LOG(ERR, USER1,
- "Some operations were not successful\n");
- goto end;
- }
-
- struct rte_mbuf *m = op->m_dst;
-
- m->pkt_len = op->produced;
- uint32_t remaining_data = op->produced;
- uint16_t data_to_append;
-
- while (remaining_data > 0) {
- data_to_append =
- RTE_MIN(remaining_data,
- out_seg_sz);
- m->data_len = data_to_append;
- remaining_data -=
- data_to_append;
- m = m->next;
- }
- }
- }
- rte_mempool_put_bulk(mem->op_pool,
- (void **)deq_ops, num_deq);
- allocated -= num_deq;
- }
-
- /* Dequeue the last operations */
- while (total_deq_ops < total_ops) {
- if (unlikely(test_data->perf_comp_force_stop))
- goto end;
-
- num_deq = rte_compressdev_dequeue_burst(dev_id,
- mem->qp_id,
- deq_ops,
- test_data->burst_sz);
- if (num_deq == 0) {
- struct rte_compressdev_stats stats;
-
- rte_compressdev_stats_get(dev_id, &stats);
- if (stats.dequeue_err_count) {
- res = -1;
- goto end;
- }
- }
-
- total_deq_ops += num_deq;
-
- if (iter == num_iter - 1) {
- for (i = 0; i < num_deq; i++) {
- struct rte_comp_op *op = deq_ops[i];
-
- if (op->status !=
- RTE_COMP_OP_STATUS_SUCCESS) {
- RTE_LOG(ERR, USER1,
- "Some operations were not successful\n");
- goto end;
- }
-
- struct rte_mbuf *m = op->m_dst;
-
- m->pkt_len = op->produced;
- uint32_t remaining_data = op->produced;
- uint16_t data_to_append;
-
- while (remaining_data > 0) {
- data_to_append =
- RTE_MIN(remaining_data,
- out_seg_sz);
- m->data_len = data_to_append;
- remaining_data -=
- data_to_append;
- m = m->next;
- }
- }
- }
- rte_mempool_put_bulk(mem->op_pool,
- (void **)deq_ops, num_deq);
- allocated -= num_deq;
- }
- }
-
- tsc_end = rte_rdtsc_precise();
- tsc_duration = tsc_end - tsc_start;
-
- if (type == RTE_COMP_COMPRESS)
- ctx->comp_tsc_duration[test_data->level] =
- tsc_duration / num_iter;
- else
- ctx->decomp_tsc_duration[test_data->level] =
- tsc_duration / num_iter;
-
-end:
- rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
- rte_compressdev_private_xform_free(dev_id, priv_xform);
- rte_free(ops);
-
- if (test_data->perf_comp_force_stop) {
- RTE_LOG(ERR, USER1,
- "lcore: %d Perf. test has been aborted by user\n",
- mem->lcore_id);
- res = -1;
- }
- return res;
-}
-
-int
-cperf_benchmark_test_runner(void *test_ctx)
-{
- struct cperf_benchmark_ctx *ctx = test_ctx;
- struct comp_test_data *test_data = ctx->ver.options;
- uint32_t lcore = rte_lcore_id();
- static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
- int i, ret = EXIT_SUCCESS;
-
- ctx->ver.mem.lcore_id = lcore;
-
- /*
- * printing information about current compression thread
- */
- if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
- printf(" lcore: %u,"
- " driver name: %s,"
- " device name: %s,"
- " device id: %u,"
- " socket id: %u,"
- " queue pair id: %u\n",
- lcore,
- ctx->ver.options->driver_name,
- rte_compressdev_name_get(ctx->ver.mem.dev_id),
- ctx->ver.mem.dev_id,
- rte_compressdev_socket_id(ctx->ver.mem.dev_id),
- ctx->ver.mem.qp_id);
-
- /*
- * First the verification part is needed
- */
- if (cperf_verify_test_runner(&ctx->ver)) {
- ret = EXIT_FAILURE;
- goto end;
- }
-
- /*
- * Run the tests twice, discarding the first performance
- * results, before the cache is warmed up
- */
- for (i = 0; i < 2; i++) {
- if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
- ret = EXIT_FAILURE;
- goto end;
- }
- }
-
- for (i = 0; i < 2; i++) {
- if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
- ret = EXIT_FAILURE;
- goto end;
- }
- }
-
- ctx->comp_tsc_byte =
- (double)(ctx->comp_tsc_duration[test_data->level]) /
- test_data->input_data_sz;
-
- ctx->decomp_tsc_byte =
- (double)(ctx->decomp_tsc_duration[test_data->level]) /
- test_data->input_data_sz;
-
- ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
- 1000000000;
-
- ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
- 1000000000;
-
- if (rte_atomic16_test_and_set(&display_once)) {
- printf("\n%12s%6s%12s%17s%15s%16s\n",
- "lcore id", "Level", "Comp size", "Comp ratio [%]",
- "Comp [Gbps]", "Decomp [Gbps]");
- }
-
- printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
- ctx->ver.mem.lcore_id,
- test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
- ctx->comp_gbps,
- ctx->decomp_gbps);
-
-end:
- return ret;
-}
+++ /dev/null
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2018 Intel Corporation
- */
-
-#ifndef _COMP_PERF_TEST_BENCHMARK_
-#define _COMP_PERF_TEST_BENCHMARK_
-
-#include <stdint.h>
-
-#include "comp_perf_options.h"
-#include "comp_perf_test_common.h"
-#include "comp_perf_test_verify.h"
-
-struct cperf_benchmark_ctx {
- struct cperf_verify_ctx ver;
-
- /* Store TSC duration for all levels (including level 0) */
- uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
- uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
- double comp_gbps;
- double decomp_gbps;
- double comp_tsc_byte;
- double decomp_tsc_byte;
-};
-
-void
-cperf_benchmark_test_destructor(void *arg);
-
-int
-cperf_benchmark_test_runner(void *test_ctx);
-
-void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
- struct comp_test_data *options);
-
-#endif
#include "comp_perf.h"
#include "comp_perf_options.h"
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_cyclecount.h"
#include "comp_perf_test_common.h"
#include "comp_perf_test_verify.h"
snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
+
+ /* one mempool for both src and dst mbufs */
mem->op_pool = rte_comp_op_pool_create(pool_name,
- mem->total_bufs,
- 0, 0, rte_socket_id());
+ mem->total_bufs * 2,
+ 0, 0, rte_socket_id());
if (mem->op_pool == NULL) {
RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
return -1;
}
void
-print_test_dynamics(void)
+print_test_dynamics(const struct comp_test_data *test_data)
{
uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz,
MAX_SEG_SIZE);
if (buffer_info.total_buffs > 1) {
- printf("\nWarning: for the current input parameters, number"
+ if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) {
+ printf("\nWarning: for the current input parameters, number"
" of ops is higher than one, which may result"
" in sub-optimal performance.\n");
- printf("To improve the performance (for the current"
+ printf("To improve the performance (for the current"
" input data) following parameters are"
" suggested:\n");
- printf(" * Segment size: %d\n", MAX_SEG_SIZE);
- printf(" * Number of segments: %u\n", opt_total_segs);
+ printf(" * Segment size: %d\n",
+ MAX_SEG_SIZE);
+ printf(" * Number of segments: %u\n",
+ opt_total_segs);
+ }
} else if (buffer_info.total_buffs == 1) {
printf("\nInfo: there is only one op with %u segments -"
" the compression ratio is the best.\n",
prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
void
-print_test_dynamics(void);
+print_test_dynamics(const struct comp_test_data *test_data);
#endif /* _COMP_PERF_TEST_COMMON_H_ */
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include "rte_spinlock.h"
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_cyclecount.h"
+
+struct cperf_cyclecount_ctx {
+ struct cperf_verify_ctx ver;
+
+ uint32_t ops_enq_retries;
+ uint32_t ops_deq_retries;
+
+ uint64_t duration_op;
+ uint64_t duration_enq;
+ uint64_t duration_deq;
+};
+
+void
+cperf_cyclecount_test_destructor(void *arg)
+{
+ struct cperf_cyclecount_ctx *ctx = arg;
+
+ if (arg) {
+ comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
+ rte_free(arg);
+ }
+}
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+ struct comp_test_data *options)
+{
+ struct cperf_cyclecount_ctx *ctx = NULL;
+
+ ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
+
+ if (ctx == NULL)
+ return NULL;
+
+ ctx->ver.mem.dev_id = dev_id;
+ ctx->ver.mem.qp_id = qp_id;
+ ctx->ver.options = options;
+ ctx->ver.silent = 1; /* ver. part will be silent */
+
+ if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
+ && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
+ return ctx;
+
+ cperf_cyclecount_test_destructor(ctx);
+ return NULL;
+}
+
+static int
+cperf_cyclecount_op_setup(struct rte_comp_op **ops,
+ struct cperf_cyclecount_ctx *ctx,
+ struct rte_mbuf **input_bufs,
+ struct rte_mbuf **output_bufs,
+ void *priv_xform,
+ uint32_t out_seg_sz)
+{
+ struct comp_test_data *test_data = ctx->ver.options;
+ struct cperf_mem_resources *mem = &ctx->ver.mem;
+
+ uint32_t i, iter, num_iter;
+ int res = 0;
+ uint16_t ops_needed;
+
+ num_iter = test_data->num_iter;
+
+ for (iter = 0; iter < num_iter; iter++) {
+ uint32_t remaining_ops = mem->total_bufs;
+ uint32_t total_deq_ops = 0;
+ uint32_t total_enq_ops = 0;
+ uint16_t num_enq = 0;
+ uint16_t num_deq = 0;
+
+ while (remaining_ops > 0) {
+ uint16_t num_ops = RTE_MIN(remaining_ops,
+ test_data->burst_sz);
+ ops_needed = num_ops;
+
+ /* Allocate compression operations */
+ if (ops_needed && rte_mempool_get_bulk(
+ mem->op_pool,
+ (void **)ops,
+ ops_needed) != 0) {
+ RTE_LOG(ERR, USER1,
+ "Cyclecount: could not allocate enough operations\n");
+ res = -1;
+ goto end;
+ }
+
+ for (i = 0; i < ops_needed; i++) {
+
+ /* Calculate next buffer to attach */
+ /* to operation */
+ uint32_t buf_id = total_enq_ops + i;
+ uint16_t op_id = i;
+
+ /* Reset all data in output buffers */
+ struct rte_mbuf *m = output_bufs[buf_id];
+
+ m->pkt_len = out_seg_sz * m->nb_segs;
+ while (m) {
+ m->data_len = m->buf_len - m->data_off;
+ m = m->next;
+ }
+ ops[op_id]->m_src = input_bufs[buf_id];
+ ops[op_id]->m_dst = output_bufs[buf_id];
+ ops[op_id]->src.offset = 0;
+ ops[op_id]->src.length =
+ rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+ ops[op_id]->dst.offset = 0;
+ ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+ ops[op_id]->input_chksum = buf_id;
+ ops[op_id]->private_xform = priv_xform;
+ }
+
+ /* E N Q U E U I N G */
+ /* assuming that all ops are enqueued */
+ /* instead of the real enqueue operation */
+ num_enq = num_ops;
+
+ remaining_ops -= num_enq;
+ total_enq_ops += num_enq;
+
+ /* D E Q U E U I N G */
+ /* assuming that all ops dequeued */
+ /* instead of the real dequeue operation */
+ num_deq = num_ops;
+
+ total_deq_ops += num_deq;
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)ops, num_deq);
+ }
+ }
+ return res;
+end:
+ rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
+ rte_free(ops);
+
+ return res;
+}
+
+static int
+main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
+{
+ struct comp_test_data *test_data = ctx->ver.options;
+ struct cperf_mem_resources *mem = &ctx->ver.mem;
+ uint8_t dev_id = mem->dev_id;
+ uint32_t i, iter, num_iter;
+ struct rte_comp_op **ops, **deq_ops;
+ void *priv_xform = NULL;
+ struct rte_comp_xform xform;
+ struct rte_mbuf **input_bufs, **output_bufs;
+ int ret, res = 0;
+ int allocated = 0;
+ uint32_t out_seg_sz;
+
+ uint64_t tsc_start, tsc_end, tsc_duration;
+
+ if (test_data == NULL || !test_data->burst_sz) {
+ RTE_LOG(ERR, USER1, "Unknown burst size\n");
+ return -1;
+ }
+ ctx->duration_enq = 0;
+ ctx->duration_deq = 0;
+ ctx->ops_enq_retries = 0;
+ ctx->ops_deq_retries = 0;
+
+ /* one array for both enqueue and dequeue */
+ ops = rte_zmalloc_socket(NULL,
+ 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
+ 0, rte_socket_id());
+
+ if (ops == NULL) {
+ RTE_LOG(ERR, USER1,
+ "Can't allocate memory for ops strucures\n");
+ return -1;
+ }
+
+ deq_ops = &ops[mem->total_bufs];
+
+ if (type == RTE_COMP_COMPRESS) {
+ xform = (struct rte_comp_xform) {
+ .type = RTE_COMP_COMPRESS,
+ .compress = {
+ .algo = RTE_COMP_ALGO_DEFLATE,
+ .deflate.huffman = test_data->huffman_enc,
+ .level = test_data->level,
+ .window_size = test_data->window_sz,
+ .chksum = RTE_COMP_CHECKSUM_NONE,
+ .hash_algo = RTE_COMP_HASH_ALGO_NONE
+ }
+ };
+ input_bufs = mem->decomp_bufs;
+ output_bufs = mem->comp_bufs;
+ out_seg_sz = test_data->out_seg_sz;
+ } else {
+ xform = (struct rte_comp_xform) {
+ .type = RTE_COMP_DECOMPRESS,
+ .decompress = {
+ .algo = RTE_COMP_ALGO_DEFLATE,
+ .chksum = RTE_COMP_CHECKSUM_NONE,
+ .window_size = test_data->window_sz,
+ .hash_algo = RTE_COMP_HASH_ALGO_NONE
+ }
+ };
+ input_bufs = mem->comp_bufs;
+ output_bufs = mem->decomp_bufs;
+ out_seg_sz = test_data->seg_sz;
+ }
+
+ /* Create private xform */
+ if (rte_compressdev_private_xform_create(dev_id, &xform,
+ &priv_xform) < 0) {
+ RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+ res = -1;
+ goto end;
+ }
+
+ tsc_start = rte_rdtsc_precise();
+ ret = cperf_cyclecount_op_setup(ops,
+ ctx,
+ input_bufs,
+ output_bufs,
+ priv_xform,
+ out_seg_sz);
+
+ tsc_end = rte_rdtsc_precise();
+
+ /* ret value check postponed a bit to cancel extra 'if' bias */
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1, "Setup function failed\n");
+ res = -1;
+ goto end;
+ }
+
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_op = tsc_duration;
+
+ num_iter = test_data->num_iter;
+ for (iter = 0; iter < num_iter; iter++) {
+ uint32_t total_ops = mem->total_bufs;
+ uint32_t remaining_ops = mem->total_bufs;
+ uint32_t total_deq_ops = 0;
+ uint32_t total_enq_ops = 0;
+ uint16_t ops_unused = 0;
+ uint16_t num_enq = 0;
+ uint16_t num_deq = 0;
+
+ while (remaining_ops > 0) {
+ uint16_t num_ops = RTE_MIN(remaining_ops,
+ test_data->burst_sz);
+ uint16_t ops_needed = num_ops - ops_unused;
+
+ /*
+ * Move the unused operations from the previous
+ * enqueue_burst call to the front, to maintain order
+ */
+ if ((ops_unused > 0) && (num_enq > 0)) {
+ size_t nb_b_to_mov =
+ ops_unused * sizeof(struct rte_comp_op *);
+
+ memmove(ops, &ops[num_enq], nb_b_to_mov);
+ }
+
+ /* Allocate compression operations */
+ if (ops_needed && rte_mempool_get_bulk(
+ mem->op_pool,
+ (void **)ops,
+ ops_needed) != 0) {
+ RTE_LOG(ERR, USER1,
+ "Could not allocate enough operations\n");
+ res = -1;
+ goto end;
+ }
+ allocated += ops_needed;
+
+ for (i = 0; i < ops_needed; i++) {
+ /*
+ * Calculate next buffer to attach to operation
+ */
+ uint32_t buf_id = total_enq_ops + i +
+ ops_unused;
+ uint16_t op_id = ops_unused + i;
+ /* Reset all data in output buffers */
+ struct rte_mbuf *m = output_bufs[buf_id];
+
+ m->pkt_len = out_seg_sz * m->nb_segs;
+ while (m) {
+ m->data_len = m->buf_len - m->data_off;
+ m = m->next;
+ }
+ ops[op_id]->m_src = input_bufs[buf_id];
+ ops[op_id]->m_dst = output_bufs[buf_id];
+ ops[op_id]->src.offset = 0;
+ ops[op_id]->src.length =
+ rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+ ops[op_id]->dst.offset = 0;
+ ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+ ops[op_id]->input_chksum = buf_id;
+ ops[op_id]->private_xform = priv_xform;
+ }
+
+ if (unlikely(test_data->perf_comp_force_stop))
+ goto end;
+
+ tsc_start = rte_rdtsc_precise();
+ num_enq = rte_compressdev_enqueue_burst(dev_id,
+ mem->qp_id, ops,
+ num_ops);
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_enq += tsc_duration;
+
+ if (num_enq < num_ops)
+ ctx->ops_enq_retries++;
+
+ if (test_data->cyclecount_delay)
+ rte_delay_us_block(test_data->cyclecount_delay);
+
+ if (num_enq == 0) {
+ struct rte_compressdev_stats stats;
+
+ rte_compressdev_stats_get(dev_id, &stats);
+ if (stats.enqueue_err_count) {
+ res = -1;
+ goto end;
+ }
+ }
+
+ ops_unused = num_ops - num_enq;
+ remaining_ops -= num_enq;
+ total_enq_ops += num_enq;
+
+ tsc_start = rte_rdtsc_precise();
+ num_deq = rte_compressdev_dequeue_burst(dev_id,
+ mem->qp_id,
+ deq_ops,
+ allocated);
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_deq += tsc_duration;
+
+ if (num_deq < allocated)
+ ctx->ops_deq_retries++;
+
+ total_deq_ops += num_deq;
+
+ if (iter == num_iter - 1) {
+ for (i = 0; i < num_deq; i++) {
+ struct rte_comp_op *op = deq_ops[i];
+
+ if (op->status !=
+ RTE_COMP_OP_STATUS_SUCCESS) {
+ RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+ goto end;
+ }
+
+ struct rte_mbuf *m = op->m_dst;
+
+ m->pkt_len = op->produced;
+ uint32_t remaining_data = op->produced;
+ uint16_t data_to_append;
+
+ while (remaining_data > 0) {
+ data_to_append =
+ RTE_MIN(remaining_data,
+ out_seg_sz);
+ m->data_len = data_to_append;
+ remaining_data -=
+ data_to_append;
+ m = m->next;
+ }
+ }
+ }
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)deq_ops, num_deq);
+ allocated -= num_deq;
+ }
+
+ /* Dequeue the last operations */
+ while (total_deq_ops < total_ops) {
+ if (unlikely(test_data->perf_comp_force_stop))
+ goto end;
+
+ tsc_start = rte_rdtsc_precise();
+ num_deq = rte_compressdev_dequeue_burst(dev_id,
+ mem->qp_id,
+ deq_ops,
+ test_data->burst_sz);
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_deq += tsc_duration;
+ ctx->ops_deq_retries++;
+
+ if (num_deq == 0) {
+ struct rte_compressdev_stats stats;
+
+ rte_compressdev_stats_get(dev_id, &stats);
+ if (stats.dequeue_err_count) {
+ res = -1;
+ goto end;
+ }
+ }
+ total_deq_ops += num_deq;
+
+ if (iter == num_iter - 1) {
+ for (i = 0; i < num_deq; i++) {
+ struct rte_comp_op *op = deq_ops[i];
+
+ if (op->status !=
+ RTE_COMP_OP_STATUS_SUCCESS) {
+ RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+ goto end;
+ }
+
+ struct rte_mbuf *m = op->m_dst;
+
+ m->pkt_len = op->produced;
+ uint32_t remaining_data = op->produced;
+ uint16_t data_to_append;
+
+ while (remaining_data > 0) {
+ data_to_append =
+ RTE_MIN(remaining_data,
+ out_seg_sz);
+ m->data_len = data_to_append;
+ remaining_data -=
+ data_to_append;
+ m = m->next;
+ }
+ }
+ }
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)deq_ops, num_deq);
+ allocated -= num_deq;
+ }
+ }
+ allocated = 0;
+
+end:
+ if (allocated)
+ rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
+ rte_compressdev_private_xform_free(dev_id, priv_xform);
+ rte_free(ops);
+
+ if (test_data->perf_comp_force_stop) {
+ RTE_LOG(ERR, USER1,
+ "lcore: %d Perf. test has been aborted by user\n",
+ mem->lcore_id);
+ res = -1;
+ }
+ return res;
+}
+
+int
+cperf_cyclecount_test_runner(void *test_ctx)
+{
+ struct cperf_cyclecount_ctx *ctx = test_ctx;
+ struct comp_test_data *test_data = ctx->ver.options;
+ uint32_t lcore = rte_lcore_id();
+ static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+ static rte_spinlock_t print_spinlock;
+ int i;
+
+ uint32_t ops_enq_retries_comp;
+ uint32_t ops_deq_retries_comp;
+
+ uint32_t ops_enq_retries_decomp;
+ uint32_t ops_deq_retries_decomp;
+
+ uint32_t duration_setup_per_op;
+
+ uint32_t duration_enq_per_op_comp;
+ uint32_t duration_deq_per_op_comp;
+
+ uint32_t duration_enq_per_op_decomp;
+ uint32_t duration_deq_per_op_decomp;
+
+ ctx->ver.mem.lcore_id = lcore;
+
+ /*
+ * printing information about current compression thread
+ */
+ if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+ printf(" lcore: %u,"
+ " driver name: %s,"
+ " device name: %s,"
+ " device id: %u,"
+ " socket id: %u,"
+ " queue pair id: %u\n",
+ lcore,
+ ctx->ver.options->driver_name,
+ rte_compressdev_name_get(ctx->ver.mem.dev_id),
+ ctx->ver.mem.dev_id,
+ rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+ ctx->ver.mem.qp_id);
+
+ /*
+ * First the verification part is needed
+ */
+ if (cperf_verify_test_runner(&ctx->ver))
+ return EXIT_FAILURE;
+
+ /*
+ * Run the tests twice, discarding the first performance
+ * results, before the cache is warmed up
+ */
+
+ /* C O M P R E S S */
+ for (i = 0; i < 2; i++) {
+ if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
+ return EXIT_FAILURE;
+ }
+
+ ops_enq_retries_comp = ctx->ops_enq_retries;
+ ops_deq_retries_comp = ctx->ops_deq_retries;
+
+ duration_enq_per_op_comp = ctx->duration_enq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+ duration_deq_per_op_comp = ctx->duration_deq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+ /* D E C O M P R E S S */
+ for (i = 0; i < 2; i++) {
+ if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
+ return EXIT_FAILURE;
+ }
+
+ ops_enq_retries_decomp = ctx->ops_enq_retries;
+ ops_deq_retries_decomp = ctx->ops_deq_retries;
+
+ duration_enq_per_op_decomp = ctx->duration_enq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+ duration_deq_per_op_decomp = ctx->duration_deq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+ duration_setup_per_op = ctx->duration_op /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+ /* R E P O R T processing */
+ if (rte_atomic16_test_and_set(&display_once)) {
+
+ rte_spinlock_lock(&print_spinlock);
+
+ printf("\nLegend for the table\n"
+ " - Retries section: number of retries for the following operations:\n"
+ " [C-e] - compression enqueue\n"
+ " [C-d] - compression dequeue\n"
+ " [D-e] - decompression enqueue\n"
+ " [D-d] - decompression dequeue\n"
+ " - Cycles section: number of cycles per 'op' for the following operations:\n"
+ " setup/op - memory allocation, op configuration and memory dealocation\n"
+ " [C-e] - compression enqueue\n"
+ " [C-d] - compression dequeue\n"
+ " [D-e] - decompression enqueue\n"
+ " [D-d] - decompression dequeue\n\n");
+
+ printf("\n%12s%6s%12s%17s",
+ "lcore id", "Level", "Comp size", "Comp ratio [%]");
+
+ printf(" |%10s %6s %8s %6s %8s",
+ " Retries:",
+ "[C-e]", "[C-d]",
+ "[D-e]", "[D-d]");
+
+ printf(" |%9s %9s %9s %9s %9s %9s\n",
+ " Cycles:",
+ "setup/op",
+ "[C-e]", "[C-d]",
+ "[D-e]", "[D-d]");
+
+ rte_spinlock_unlock(&print_spinlock);
+ }
+
+ rte_spinlock_lock(&print_spinlock);
+
+ printf("%12u"
+ "%6u"
+ "%12zu"
+ "%17.2f",
+ ctx->ver.mem.lcore_id,
+ test_data->level,
+ ctx->ver.comp_data_sz,
+ ctx->ver.ratio);
+
+ printf(" |%10s %6u %8u %6u %8u",
+ " ",
+ ops_enq_retries_comp,
+ ops_deq_retries_comp,
+ ops_enq_retries_decomp,
+ ops_deq_retries_decomp);
+
+ printf(" |%9s %9u %9u %9u %9u %9u\n",
+ " ",
+ duration_setup_per_op,
+ duration_enq_per_op_comp,
+ duration_deq_per_op_comp,
+ duration_enq_per_op_decomp,
+ duration_deq_per_op_decomp);
+
+ rte_spinlock_unlock(&print_spinlock);
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_CYCLECOUNT_
+#define _COMP_PERF_TEST_CYCLECOUNT_
+
+#include <stdint.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_common.h"
+#include "comp_perf_test_verify.h"
+
+void
+cperf_cyclecount_test_destructor(void *arg);
+
+int
+cperf_cyclecount_test_runner(void *test_ctx);
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+ struct comp_test_data *options);
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_throughput.h"
+
+void
+cperf_throughput_test_destructor(void *arg)
+{
+ if (arg) {
+ comp_perf_free_memory(
+ ((struct cperf_benchmark_ctx *)arg)->ver.options,
+ &((struct cperf_benchmark_ctx *)arg)->ver.mem);
+ rte_free(arg);
+ }
+}
+
+void *
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
+ struct comp_test_data *options)
+{
+ struct cperf_benchmark_ctx *ctx = NULL;
+
+ ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0);
+
+ if (ctx == NULL)
+ return NULL;
+
+ ctx->ver.mem.dev_id = dev_id;
+ ctx->ver.mem.qp_id = qp_id;
+ ctx->ver.options = options;
+ ctx->ver.silent = 1; /* ver. part will be silent */
+
+ if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
+ && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
+ return ctx;
+
+ cperf_throughput_test_destructor(ctx);
+ return NULL;
+}
+
+static int
+main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
+{
+ struct comp_test_data *test_data = ctx->ver.options;
+ struct cperf_mem_resources *mem = &ctx->ver.mem;
+ uint8_t dev_id = mem->dev_id;
+ uint32_t i, iter, num_iter;
+ struct rte_comp_op **ops, **deq_ops;
+ void *priv_xform = NULL;
+ struct rte_comp_xform xform;
+ struct rte_mbuf **input_bufs, **output_bufs;
+ int res = 0;
+ int allocated = 0;
+ uint32_t out_seg_sz;
+
+ if (test_data == NULL || !test_data->burst_sz) {
+ RTE_LOG(ERR, USER1,
+ "Unknown burst size\n");
+ return -1;
+ }
+
+ ops = rte_zmalloc_socket(NULL,
+ 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
+ 0, rte_socket_id());
+
+ if (ops == NULL) {
+ RTE_LOG(ERR, USER1,
+ "Can't allocate memory for ops strucures\n");
+ return -1;
+ }
+
+ deq_ops = &ops[mem->total_bufs];
+
+ if (type == RTE_COMP_COMPRESS) {
+ xform = (struct rte_comp_xform) {
+ .type = RTE_COMP_COMPRESS,
+ .compress = {
+ .algo = RTE_COMP_ALGO_DEFLATE,
+ .deflate.huffman = test_data->huffman_enc,
+ .level = test_data->level,
+ .window_size = test_data->window_sz,
+ .chksum = RTE_COMP_CHECKSUM_NONE,
+ .hash_algo = RTE_COMP_HASH_ALGO_NONE
+ }
+ };
+ input_bufs = mem->decomp_bufs;
+ output_bufs = mem->comp_bufs;
+ out_seg_sz = test_data->out_seg_sz;
+ } else {
+ xform = (struct rte_comp_xform) {
+ .type = RTE_COMP_DECOMPRESS,
+ .decompress = {
+ .algo = RTE_COMP_ALGO_DEFLATE,
+ .chksum = RTE_COMP_CHECKSUM_NONE,
+ .window_size = test_data->window_sz,
+ .hash_algo = RTE_COMP_HASH_ALGO_NONE
+ }
+ };
+ input_bufs = mem->comp_bufs;
+ output_bufs = mem->decomp_bufs;
+ out_seg_sz = test_data->seg_sz;
+ }
+
+ /* Create private xform */
+ if (rte_compressdev_private_xform_create(dev_id, &xform,
+ &priv_xform) < 0) {
+ RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+ res = -1;
+ goto end;
+ }
+
+ uint64_t tsc_start, tsc_end, tsc_duration;
+
+ num_iter = test_data->num_iter;
+ tsc_start = tsc_end = tsc_duration = 0;
+ tsc_start = rte_rdtsc_precise();
+
+ for (iter = 0; iter < num_iter; iter++) {
+ uint32_t total_ops = mem->total_bufs;
+ uint32_t remaining_ops = mem->total_bufs;
+ uint32_t total_deq_ops = 0;
+ uint32_t total_enq_ops = 0;
+ uint16_t ops_unused = 0;
+ uint16_t num_enq = 0;
+ uint16_t num_deq = 0;
+
+ while (remaining_ops > 0) {
+ uint16_t num_ops = RTE_MIN(remaining_ops,
+ test_data->burst_sz);
+ uint16_t ops_needed = num_ops - ops_unused;
+
+ /*
+ * Move the unused operations from the previous
+ * enqueue_burst call to the front, to maintain order
+ */
+ if ((ops_unused > 0) && (num_enq > 0)) {
+ size_t nb_b_to_mov =
+ ops_unused * sizeof(struct rte_comp_op *);
+
+ memmove(ops, &ops[num_enq], nb_b_to_mov);
+ }
+
+ /* Allocate compression operations */
+ if (ops_needed && !rte_comp_op_bulk_alloc(
+ mem->op_pool,
+ &ops[ops_unused],
+ ops_needed)) {
+ RTE_LOG(ERR, USER1,
+ "Could not allocate enough operations\n");
+ res = -1;
+ goto end;
+ }
+ allocated += ops_needed;
+
+ for (i = 0; i < ops_needed; i++) {
+ /*
+ * Calculate next buffer to attach to operation
+ */
+ uint32_t buf_id = total_enq_ops + i +
+ ops_unused;
+ uint16_t op_id = ops_unused + i;
+ /* Reset all data in output buffers */
+ struct rte_mbuf *m = output_bufs[buf_id];
+
+ m->pkt_len = out_seg_sz * m->nb_segs;
+ while (m) {
+ m->data_len = m->buf_len - m->data_off;
+ m = m->next;
+ }
+ ops[op_id]->m_src = input_bufs[buf_id];
+ ops[op_id]->m_dst = output_bufs[buf_id];
+ ops[op_id]->src.offset = 0;
+ ops[op_id]->src.length =
+ rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+ ops[op_id]->dst.offset = 0;
+ ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+ ops[op_id]->input_chksum = buf_id;
+ ops[op_id]->private_xform = priv_xform;
+ }
+
+ if (unlikely(test_data->perf_comp_force_stop))
+ goto end;
+
+ num_enq = rte_compressdev_enqueue_burst(dev_id,
+ mem->qp_id, ops,
+ num_ops);
+ if (num_enq == 0) {
+ struct rte_compressdev_stats stats;
+
+ rte_compressdev_stats_get(dev_id, &stats);
+ if (stats.enqueue_err_count) {
+ res = -1;
+ goto end;
+ }
+ }
+
+ ops_unused = num_ops - num_enq;
+ remaining_ops -= num_enq;
+ total_enq_ops += num_enq;
+
+ num_deq = rte_compressdev_dequeue_burst(dev_id,
+ mem->qp_id,
+ deq_ops,
+ test_data->burst_sz);
+ total_deq_ops += num_deq;
+
+ if (iter == num_iter - 1) {
+ for (i = 0; i < num_deq; i++) {
+ struct rte_comp_op *op = deq_ops[i];
+
+ if (op->status !=
+ RTE_COMP_OP_STATUS_SUCCESS) {
+ RTE_LOG(ERR, USER1,
+ "Some operations were not successful\n");
+ goto end;
+ }
+
+ struct rte_mbuf *m = op->m_dst;
+
+ m->pkt_len = op->produced;
+ uint32_t remaining_data = op->produced;
+ uint16_t data_to_append;
+
+ while (remaining_data > 0) {
+ data_to_append =
+ RTE_MIN(remaining_data,
+ out_seg_sz);
+ m->data_len = data_to_append;
+ remaining_data -=
+ data_to_append;
+ m = m->next;
+ }
+ }
+ }
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)deq_ops, num_deq);
+ allocated -= num_deq;
+ }
+
+ /* Dequeue the last operations */
+ while (total_deq_ops < total_ops) {
+ if (unlikely(test_data->perf_comp_force_stop))
+ goto end;
+
+ num_deq = rte_compressdev_dequeue_burst(dev_id,
+ mem->qp_id,
+ deq_ops,
+ test_data->burst_sz);
+ if (num_deq == 0) {
+ struct rte_compressdev_stats stats;
+
+ rte_compressdev_stats_get(dev_id, &stats);
+ if (stats.dequeue_err_count) {
+ res = -1;
+ goto end;
+ }
+ }
+
+ total_deq_ops += num_deq;
+
+ if (iter == num_iter - 1) {
+ for (i = 0; i < num_deq; i++) {
+ struct rte_comp_op *op = deq_ops[i];
+
+ if (op->status !=
+ RTE_COMP_OP_STATUS_SUCCESS) {
+ RTE_LOG(ERR, USER1,
+ "Some operations were not successful\n");
+ goto end;
+ }
+
+ struct rte_mbuf *m = op->m_dst;
+
+ m->pkt_len = op->produced;
+ uint32_t remaining_data = op->produced;
+ uint16_t data_to_append;
+
+ while (remaining_data > 0) {
+ data_to_append =
+ RTE_MIN(remaining_data,
+ out_seg_sz);
+ m->data_len = data_to_append;
+ remaining_data -=
+ data_to_append;
+ m = m->next;
+ }
+ }
+ }
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)deq_ops, num_deq);
+ allocated -= num_deq;
+ }
+ }
+
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+
+ if (type == RTE_COMP_COMPRESS)
+ ctx->comp_tsc_duration[test_data->level] =
+ tsc_duration / num_iter;
+ else
+ ctx->decomp_tsc_duration[test_data->level] =
+ tsc_duration / num_iter;
+
+end:
+ rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
+ rte_compressdev_private_xform_free(dev_id, priv_xform);
+ rte_free(ops);
+
+ if (test_data->perf_comp_force_stop) {
+ RTE_LOG(ERR, USER1,
+ "lcore: %d Perf. test has been aborted by user\n",
+ mem->lcore_id);
+ res = -1;
+ }
+ return res;
+}
+
+int
+cperf_throughput_test_runner(void *test_ctx)
+{
+ struct cperf_benchmark_ctx *ctx = test_ctx;
+ struct comp_test_data *test_data = ctx->ver.options;
+ uint32_t lcore = rte_lcore_id();
+ static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+ int i, ret = EXIT_SUCCESS;
+
+ ctx->ver.mem.lcore_id = lcore;
+
+ /*
+ * printing information about current compression thread
+ */
+ if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+ printf(" lcore: %u,"
+ " driver name: %s,"
+ " device name: %s,"
+ " device id: %u,"
+ " socket id: %u,"
+ " queue pair id: %u\n",
+ lcore,
+ ctx->ver.options->driver_name,
+ rte_compressdev_name_get(ctx->ver.mem.dev_id),
+ ctx->ver.mem.dev_id,
+ rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+ ctx->ver.mem.qp_id);
+
+ /*
+ * First the verification part is needed
+ */
+ if (cperf_verify_test_runner(&ctx->ver)) {
+ ret = EXIT_FAILURE;
+ goto end;
+ }
+
+ /*
+ * Run the tests twice, discarding the first performance
+ * results, before the cache is warmed up
+ */
+ for (i = 0; i < 2; i++) {
+ if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) {
+ ret = EXIT_FAILURE;
+ goto end;
+ }
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) {
+ ret = EXIT_FAILURE;
+ goto end;
+ }
+ }
+
+ ctx->comp_tsc_byte =
+ (double)(ctx->comp_tsc_duration[test_data->level]) /
+ test_data->input_data_sz;
+
+ ctx->decomp_tsc_byte =
+ (double)(ctx->decomp_tsc_duration[test_data->level]) /
+ test_data->input_data_sz;
+
+ ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 /
+ 1000000000;
+
+ ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 /
+ 1000000000;
+
+ if (rte_atomic16_test_and_set(&display_once)) {
+ printf("\n%12s%6s%12s%17s%15s%16s\n",
+ "lcore id", "Level", "Comp size", "Comp ratio [%]",
+ "Comp [Gbps]", "Decomp [Gbps]");
+ }
+
+ printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n",
+ ctx->ver.mem.lcore_id,
+ test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio,
+ ctx->comp_gbps,
+ ctx->decomp_gbps);
+
+end:
+ return ret;
+}
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_BENCHMARK_
+#define _COMP_PERF_TEST_BENCHMARK_
+
+#include <stdint.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_common.h"
+#include "comp_perf_test_verify.h"
+
+struct cperf_benchmark_ctx {
+ struct cperf_verify_ctx ver;
+
+ /* Store TSC duration for all levels (including level 0) */
+ uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+ uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+ double comp_gbps;
+ double decomp_gbps;
+ double comp_tsc_byte;
+ double decomp_tsc_byte;
+};
+
+void
+cperf_throughput_test_destructor(void *arg);
+
+int
+cperf_throughput_test_runner(void *test_ctx);
+
+void *
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
+ struct comp_test_data *options);
+
+#endif
main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type)
{
struct comp_test_data *test_data = ctx->options;
- uint8_t *output_data_ptr;
- size_t *output_data_sz;
+ uint8_t *output_data_ptr = NULL;
+ size_t *output_data_sz = NULL;
struct cperf_mem_resources *mem = &ctx->mem;
uint8_t dev_id = mem->dev_id;
#include <rte_log.h>
#include <rte_compressdev.h>
-#include "comp_perf_options.h"
-#include "comp_perf_test_verify.h"
-#include "comp_perf_test_benchmark.h"
#include "comp_perf.h"
+#include "comp_perf_options.h"
#include "comp_perf_test_common.h"
+#include "comp_perf_test_cyclecount.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_verify.h"
#define NUM_MAX_XFORMS 16
#define NUM_MAX_INFLIGHT_OPS 512
__extension__
const char *comp_perf_test_type_strs[] = {
- [CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
- [CPERF_TEST_TYPE_VERIFY] = "verify"
+ [CPERF_TEST_TYPE_THROUGHPUT] = "throughput",
+ [CPERF_TEST_TYPE_VERIFY] = "verify",
+ [CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount"
};
__extension__
static const struct cperf_test cperf_testmap[] = {
- [CPERF_TEST_TYPE_BENCHMARK] = {
- cperf_benchmark_test_constructor,
- cperf_benchmark_test_runner,
- cperf_benchmark_test_destructor
+ [CPERF_TEST_TYPE_THROUGHPUT] = {
+ cperf_throughput_test_constructor,
+ cperf_throughput_test_runner,
+ cperf_throughput_test_destructor
+
},
[CPERF_TEST_TYPE_VERIFY] = {
cperf_verify_test_constructor,
cperf_verify_test_runner,
cperf_verify_test_destructor
+ },
+
+ [CPERF_TEST_TYPE_PMDCC] = {
+ cperf_cyclecount_test_constructor,
+ cperf_cyclecount_test_runner,
+ cperf_cyclecount_test_destructor
}
};
enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
if (enabled_cdev_count == 0) {
- RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+ RTE_LOG(ERR, USER1, "No compress devices type %s available,"
+ " please check the list of specified devices in EAL section\n",
test_data->driver_name);
return -EINVAL;
}
data += data_to_read;
}
+ printf("\n");
if (test_data->input_data_sz > actual_file_sz)
RTE_LOG(INFO, USER1,
"%zu bytes read from file %s, extending the file %.2f times\n",
else
test_data->level = test_data->level_lst.list[0];
- printf("App uses socket: %u\n", rte_socket_id());
+ printf("\nApp uses socket: %u\n", rte_socket_id());
printf("Burst size = %u\n", test_data->burst_sz);
printf("Input data size = %zu\n", test_data->input_data_sz);
+ if (test_data->test == CPERF_TEST_TYPE_PMDCC)
+ printf("Cycle-count delay = %u [us]\n",
+ test_data->cyclecount_delay);
test_data->cleanup = ST_DURING_TEST;
total_nb_qps = nb_compressdevs * test_data->nb_qps;
i++;
}
- print_test_dynamics(); /* constructors must be executed first */
+ print_test_dynamics(test_data);
while (test_data->level <= test_data->level_lst.max) {
}
__rte_weak void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused,
+ uint16_t qp_id __rte_unused,
+ struct comp_test_data *options __rte_unused)
+{
+ RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n");
+ return NULL;
+}
+
+__rte_weak void
+cperf_cyclecount_test_destructor(void *arg __rte_unused)
+{
+ RTE_LOG(INFO, USER1, "Something wrong happened!!!\n");
+}
+
+__rte_weak int
+cperf_cyclecount_test_runner(void *test_ctx __rte_unused)
+{
+ return 0;
+}
+
+__rte_weak void *
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused)
{
}
__rte_weak void
-cperf_benchmark_test_destructor(void *arg __rte_unused)
+cperf_throughput_test_destructor(void *arg __rte_unused)
{
}
__rte_weak int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused)
+cperf_throughput_test_runner(void *test_ctx __rte_unused)
{
return 0;
}
sources = files('comp_perf_options_parse.c',
'main.c',
'comp_perf_test_verify.c',
- 'comp_perf_test_benchmark.c',
+ 'comp_perf_test_throughput.c',
+ 'comp_perf_test_cyclecount.c',
'comp_perf_test_common.c')
deps = ['compressdev']