bbdev: enhance throughput test
authorKamil Chalupnik <kamilx.chalupnik@intel.com>
Fri, 7 Dec 2018 15:15:32 +0000 (16:15 +0100)
committerAkhil Goyal <akhil.goyal@nxp.com>
Wed, 19 Dec 2018 10:19:10 +0000 (11:19 +0100)
Improvements added to throughput test:
- test is run in loop (number of iterations is specified by
TEST_REPETITIONS define) which ensures more accurate results
- length of input data is calculated based on amount of CBs in TB
- maximum number of decoding iterations is gathered from results
- added new functions responsible for printing results
- small fixes for memory management

Signed-off-by: Kamil Chalupnik <kamilx.chalupnik@intel.com>
Acked-by: Amr Mokhtar <amr.mokhtar@intel.com>
app/test-bbdev/main.c
app/test-bbdev/test_bbdev_perf.c
app/test-bbdev/test_bbdev_vector.c
lib/librte_bbdev/rte_bbdev_op.h

index 41b54bb..7af2522 100644 (file)
@@ -316,8 +316,6 @@ main(int argc, char **argv)
                return 1;
        }
 
-       rte_log_set_global_level(RTE_LOG_INFO);
-
        /* If no argument provided - run all tests */
        if (test_params.num_tests == 0)
                return run_all_tests();
index bf97edb..a25e3a7 100644 (file)
@@ -23,6 +23,7 @@
 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
 
 #define MAX_QUEUES RTE_MAX_LCORE
+#define TEST_REPETITIONS 1000
 
 #define OPS_CACHE_SIZE 256U
 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
@@ -77,8 +78,9 @@ struct thread_params {
        uint8_t dev_id;
        uint16_t queue_id;
        uint64_t start_time;
-       double mops;
+       double ops_per_sec;
        double mbps;
+       uint8_t iter_count;
        rte_atomic16_t nb_dequeued;
        rte_atomic16_t processing_status;
        struct test_op_params *op_params;
@@ -757,6 +759,8 @@ copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
                                        turbo_dec->tb_params.c_neg;
                        ops[i]->turbo_dec.tb_params.cab =
                                        turbo_dec->tb_params.cab;
+                       ops[i]->turbo_dec.tb_params.r =
+                                       turbo_dec->tb_params.r;
                } else {
                        ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
                        ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
@@ -883,47 +887,6 @@ validate_op_chain(struct rte_bbdev_op_data *op,
        return TEST_SUCCESS;
 }
 
-static int
-validate_dec_buffers(struct rte_bbdev_dec_op *ref_op, struct test_buffers *bufs,
-               const uint16_t num_to_process)
-{
-       int i;
-
-       struct op_data_entries *hard_data_orig =
-                       &test_vector.entries[DATA_HARD_OUTPUT];
-       struct op_data_entries *soft_data_orig =
-                       &test_vector.entries[DATA_SOFT_OUTPUT];
-
-       for (i = 0; i < num_to_process; i++) {
-               TEST_ASSERT_SUCCESS(validate_op_chain(&bufs->hard_outputs[i],
-                               hard_data_orig),
-                               "Hard output buffers are not equal");
-               if (ref_op->turbo_dec.op_flags &
-                               RTE_BBDEV_TURBO_SOFT_OUTPUT)
-                       TEST_ASSERT_SUCCESS(validate_op_chain(
-                                       &bufs->soft_outputs[i],
-                                       soft_data_orig),
-                                       "Soft output buffers are not equal");
-       }
-
-       return TEST_SUCCESS;
-}
-
-static int
-validate_enc_buffers(struct test_buffers *bufs, const uint16_t num_to_process)
-{
-       int i;
-
-       struct op_data_entries *hard_data_orig =
-                       &test_vector.entries[DATA_HARD_OUTPUT];
-
-       for (i = 0; i < num_to_process; i++)
-               TEST_ASSERT_SUCCESS(validate_op_chain(&bufs->hard_outputs[i],
-                               hard_data_orig), "");
-
-       return TEST_SUCCESS;
-}
-
 static int
 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
                struct rte_bbdev_dec_op *ref_op, const int vector_mask)
@@ -1016,6 +979,44 @@ create_reference_enc_op(struct rte_bbdev_enc_op *op)
                                entry->segments[i].length;
 }
 
+static uint32_t
+calc_dec_TB_size(struct rte_bbdev_dec_op *op)
+{
+       uint8_t i;
+       uint32_t c, r, tb_size = 0;
+
+       if (op->turbo_dec.code_block_mode) {
+               tb_size = op->turbo_dec.tb_params.k_neg;
+       } else {
+               c = op->turbo_dec.tb_params.c;
+               r = op->turbo_dec.tb_params.r;
+               for (i = 0; i < c-r; i++)
+                       tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
+                               op->turbo_dec.tb_params.k_neg :
+                               op->turbo_dec.tb_params.k_pos;
+       }
+       return tb_size;
+}
+
+static uint32_t
+calc_enc_TB_size(struct rte_bbdev_enc_op *op)
+{
+       uint8_t i;
+       uint32_t c, r, tb_size = 0;
+
+       if (op->turbo_enc.code_block_mode) {
+               tb_size = op->turbo_enc.tb_params.k_neg;
+       } else {
+               c = op->turbo_enc.tb_params.c;
+               r = op->turbo_enc.tb_params.r;
+               for (i = 0; i < c-r; i++)
+                       tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
+                               op->turbo_enc.tb_params.k_neg :
+                               op->turbo_enc.tb_params.k_pos;
+       }
+       return tb_size;
+}
+
 static int
 init_test_op_params(struct test_op_params *op_params,
                enum rte_bbdev_op_type op_type, const int expected_status,
@@ -1163,17 +1164,13 @@ dequeue_event_callback(uint16_t dev_id,
        int ret;
        uint16_t i;
        uint64_t total_time;
-       uint16_t deq, burst_sz, num_to_process;
+       uint16_t deq, burst_sz, num_ops;
        uint16_t queue_id = INVALID_QUEUE_ID;
        struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
        struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
-       struct test_buffers *bufs;
        struct rte_bbdev_info info;
 
-       /* Input length in bytes, million operations per second,
-        * million bits per second.
-        */
-       double in_len;
+       double tb_len_bits;
 
        struct thread_params *tp = cb_arg;
        RTE_SET_USED(ret_param);
@@ -1198,7 +1195,7 @@ dequeue_event_callback(uint16_t dev_id,
        }
 
        burst_sz = tp->op_params->burst_sz;
-       num_to_process = tp->op_params->num_to_process;
+       num_ops = tp->op_params->num_to_process;
 
        if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
                deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, dec_ops,
@@ -1218,7 +1215,7 @@ dequeue_event_callback(uint16_t dev_id,
                return;
        }
 
-       if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_to_process) {
+       if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
                rte_atomic16_add(&tp->nb_dequeued, deq);
                return;
        }
@@ -1227,14 +1224,18 @@ dequeue_event_callback(uint16_t dev_id,
 
        rte_bbdev_info_get(dev_id, &info);
 
-       bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
-
        ret = TEST_SUCCESS;
-       if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
-               ret = validate_dec_buffers(tp->op_params->ref_dec_op, bufs,
-                               num_to_process);
-       else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
-               ret = validate_enc_buffers(bufs, num_to_process);
+
+       if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
+               struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
+               ret = validate_dec_op(dec_ops, num_ops, ref_op,
+                               tp->op_params->vector_mask);
+               rte_bbdev_dec_op_free_bulk(dec_ops, deq);
+       } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
+               struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
+               ret = validate_enc_op(enc_ops, num_ops, ref_op);
+               rte_bbdev_enc_op_free_bulk(enc_ops, deq);
+       }
 
        if (ret) {
                printf("Buffers validation failed\n");
@@ -1243,13 +1244,13 @@ dequeue_event_callback(uint16_t dev_id,
 
        switch (test_vector.op_type) {
        case RTE_BBDEV_OP_TURBO_DEC:
-               in_len = tp->op_params->ref_dec_op->turbo_dec.input.length;
+               tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
                break;
        case RTE_BBDEV_OP_TURBO_ENC:
-               in_len = tp->op_params->ref_enc_op->turbo_enc.input.length;
+               tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
                break;
        case RTE_BBDEV_OP_NONE:
-               in_len = 0.0;
+               tb_len_bits = 0.0;
                break;
        default:
                printf("Unknown op type: %d\n", test_vector.op_type);
@@ -1257,9 +1258,9 @@ dequeue_event_callback(uint16_t dev_id,
                return;
        }
 
-       tp->mops = ((double)num_to_process / 1000000.0) /
+       tp->ops_per_sec = ((double)num_ops) /
                        ((double)total_time / (double)rte_get_tsc_hz());
-       tp->mbps = ((double)num_to_process * in_len * 8 / 1000000.0) /
+       tp->mbps = (((double)(num_ops * tb_len_bits)) / 1000000.0) /
                        ((double)total_time / (double)rte_get_tsc_hz());
 
        rte_atomic16_add(&tp->nb_dequeued, deq);
@@ -1270,14 +1271,14 @@ throughput_intr_lcore_dec(void *arg)
 {
        struct thread_params *tp = arg;
        unsigned int enqueued;
-       struct rte_bbdev_dec_op *ops[MAX_BURST];
        const uint16_t queue_id = tp->queue_id;
        const uint16_t burst_sz = tp->op_params->burst_sz;
        const uint16_t num_to_process = tp->op_params->num_to_process;
+       struct rte_bbdev_dec_op *ops[num_to_process];
        struct test_buffers *bufs = NULL;
-       unsigned int allocs_failed = 0;
        struct rte_bbdev_info info;
        int ret;
+       uint16_t num_to_enq;
 
        TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
                        "BURST_SIZE should be <= %u", MAX_BURST);
@@ -1287,6 +1288,11 @@ throughput_intr_lcore_dec(void *arg)
                        tp->dev_id, queue_id);
 
        rte_bbdev_info_get(tp->dev_id, &info);
+
+       TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
+                       "NUM_OPS cannot exceed %u for this device",
+                       info.drv.queue_size_lim);
+
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        rte_atomic16_clear(&tp->processing_status);
@@ -1295,36 +1301,27 @@ throughput_intr_lcore_dec(void *arg)
        while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
                rte_pause();
 
+       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
+                               num_to_process);
+       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
+                       num_to_process);
+       if (test_vector.op_type != RTE_BBDEV_OP_NONE)
+               copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
+                               bufs->hard_outputs, bufs->soft_outputs,
+                               tp->op_params->ref_dec_op);
+
        tp->start_time = rte_rdtsc_precise();
        for (enqueued = 0; enqueued < num_to_process;) {
 
-               uint16_t num_to_enq = burst_sz;
+               num_to_enq = burst_sz;
 
                if (unlikely(num_to_process - enqueued < num_to_enq))
                        num_to_enq = num_to_process - enqueued;
 
-               ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
-                               num_to_enq);
-               if (ret != 0) {
-                       allocs_failed++;
-                       continue;
-               }
-
-               if (test_vector.op_type != RTE_BBDEV_OP_NONE)
-                       copy_reference_dec_op(ops, num_to_enq, enqueued,
-                                       bufs->inputs,
-                                       bufs->hard_outputs,
-                                       bufs->soft_outputs,
-                                       tp->op_params->ref_dec_op);
-
-               enqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id, queue_id, ops,
-                               num_to_enq);
+               enqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id, queue_id,
+                               &ops[enqueued], num_to_enq);
        }
 
-       if (allocs_failed > 0)
-               printf("WARNING: op allocations failed: %u times\n",
-                               allocs_failed);
-
        return TEST_SUCCESS;
 }
 
@@ -1333,14 +1330,14 @@ throughput_intr_lcore_enc(void *arg)
 {
        struct thread_params *tp = arg;
        unsigned int enqueued;
-       struct rte_bbdev_enc_op *ops[MAX_BURST];
        const uint16_t queue_id = tp->queue_id;
        const uint16_t burst_sz = tp->op_params->burst_sz;
        const uint16_t num_to_process = tp->op_params->num_to_process;
+       struct rte_bbdev_enc_op *ops[num_to_process];
        struct test_buffers *bufs = NULL;
-       unsigned int allocs_failed = 0;
        struct rte_bbdev_info info;
        int ret;
+       uint16_t num_to_enq;
 
        TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
                        "BURST_SIZE should be <= %u", MAX_BURST);
@@ -1350,6 +1347,11 @@ throughput_intr_lcore_enc(void *arg)
                        tp->dev_id, queue_id);
 
        rte_bbdev_info_get(tp->dev_id, &info);
+
+       TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
+                       "NUM_OPS cannot exceed %u for this device",
+                       info.drv.queue_size_lim);
+
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        rte_atomic16_clear(&tp->processing_status);
@@ -1358,35 +1360,26 @@ throughput_intr_lcore_enc(void *arg)
        while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
                rte_pause();
 
+       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
+                       num_to_process);
+       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
+                       num_to_process);
+       if (test_vector.op_type != RTE_BBDEV_OP_NONE)
+               copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
+                               bufs->hard_outputs, tp->op_params->ref_enc_op);
+
        tp->start_time = rte_rdtsc_precise();
        for (enqueued = 0; enqueued < num_to_process;) {
 
-               uint16_t num_to_enq = burst_sz;
+               num_to_enq = burst_sz;
 
                if (unlikely(num_to_process - enqueued < num_to_enq))
                        num_to_enq = num_to_process - enqueued;
 
-               ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
-                               num_to_enq);
-               if (ret != 0) {
-                       allocs_failed++;
-                       continue;
-               }
-
-               if (test_vector.op_type != RTE_BBDEV_OP_NONE)
-                       copy_reference_enc_op(ops, num_to_enq, enqueued,
-                                       bufs->inputs,
-                                       bufs->hard_outputs,
-                                       tp->op_params->ref_enc_op);
-
-               enqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id, queue_id, ops,
-                               num_to_enq);
+               enqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id, queue_id,
+                               &ops[enqueued], num_to_enq);
        }
 
-       if (allocs_failed > 0)
-               printf("WARNING: op allocations failed: %u times\n",
-                               allocs_failed);
-
        return TEST_SUCCESS;
 }
 
@@ -1394,86 +1387,97 @@ static int
 throughput_pmd_lcore_dec(void *arg)
 {
        struct thread_params *tp = arg;
-       unsigned int enqueued, dequeued;
-       struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
-       uint64_t total_time, start_time;
+       uint16_t enq, deq;
+       uint64_t total_time = 0, start_time;
        const uint16_t queue_id = tp->queue_id;
        const uint16_t burst_sz = tp->op_params->burst_sz;
-       const uint16_t num_to_process = tp->op_params->num_to_process;
+       const uint16_t num_ops = tp->op_params->num_to_process;
+       struct rte_bbdev_dec_op *ops_enq[num_ops];
+       struct rte_bbdev_dec_op *ops_deq[num_ops];
        struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
        struct test_buffers *bufs = NULL;
-       unsigned int allocs_failed = 0;
-       int ret;
+       int i, j, ret;
        struct rte_bbdev_info info;
-
-       /* Input length in bytes, million operations per second, million bits
-        * per second.
-        */
-       double in_len;
+       uint16_t num_to_enq;
 
        TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
                        "BURST_SIZE should be <= %u", MAX_BURST);
 
        rte_bbdev_info_get(tp->dev_id, &info);
+
+       TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
+                       "NUM_OPS cannot exceed %u for this device",
+                       info.drv.queue_size_lim);
+
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
                rte_pause();
 
-       start_time = rte_rdtsc_precise();
-       for (enqueued = 0, dequeued = 0; dequeued < num_to_process;) {
-               uint16_t deq;
+       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
+       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
 
-               if (likely(enqueued < num_to_process)) {
+       if (test_vector.op_type != RTE_BBDEV_OP_NONE)
+               copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
+                               bufs->hard_outputs, bufs->soft_outputs, ref_op);
 
-                       uint16_t num_to_enq = burst_sz;
+       /* Set counter to validate the ordering */
+       for (j = 0; j < num_ops; ++j)
+               ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
 
-                       if (unlikely(num_to_process - enqueued < num_to_enq))
-                               num_to_enq = num_to_process - enqueued;
+       for (i = 0; i < TEST_REPETITIONS; ++i) {
 
-                       ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp,
-                                       ops_enq, num_to_enq);
-                       if (ret != 0) {
-                               allocs_failed++;
-                               goto do_dequeue;
-                       }
+               for (j = 0; j < num_ops; ++j) {
+                       struct rte_bbdev_dec_op *op = ops_enq[j];
+                       rte_pktmbuf_reset(op->turbo_dec.hard_output.data);
+               }
+
+               start_time = rte_rdtsc_precise();
+
+               for (enq = 0, deq = 0; enq < num_ops;) {
+                       num_to_enq = burst_sz;
+
+                       if (unlikely(num_ops - enq < num_to_enq))
+                               num_to_enq = num_ops - enq;
 
-                       if (test_vector.op_type != RTE_BBDEV_OP_NONE)
-                               copy_reference_dec_op(ops_enq, num_to_enq,
-                                               enqueued,
-                                               bufs->inputs,
-                                               bufs->hard_outputs,
-                                               bufs->soft_outputs,
-                                               ref_op);
+                       enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
+                                       queue_id, &ops_enq[enq], num_to_enq);
 
-                       enqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id,
-                                       queue_id, ops_enq, num_to_enq);
+                       deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
+                                       queue_id, &ops_deq[deq], enq - deq);
                }
-do_dequeue:
-               deq = rte_bbdev_dequeue_dec_ops(tp->dev_id, queue_id, ops_deq,
-                               burst_sz);
-               dequeued += deq;
-               rte_bbdev_dec_op_free_bulk(ops_enq, deq);
-       }
-       total_time = rte_rdtsc_precise() - start_time;
 
-       if (allocs_failed > 0)
-               printf("WARNING: op allocations failed: %u times\n",
-                               allocs_failed);
+               /* dequeue the remaining */
+               while (deq < enq) {
+                       deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
+                                       queue_id, &ops_deq[deq], enq - deq);
+               }
 
-       TEST_ASSERT(enqueued == dequeued, "enqueued (%u) != dequeued (%u)",
-                       enqueued, dequeued);
+               total_time += rte_rdtsc_precise() - start_time;
+       }
+
+       tp->iter_count = 0;
+       /* get the max of iter_count for all dequeued ops */
+       for (i = 0; i < num_ops; ++i) {
+               tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
+                               tp->iter_count);
+       }
 
        if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
-               ret = validate_dec_buffers(ref_op, bufs, num_to_process);
-               TEST_ASSERT_SUCCESS(ret, "Buffers validation failed");
+               ret = validate_dec_op(ops_deq, num_ops, ref_op,
+                               tp->op_params->vector_mask);
+               TEST_ASSERT_SUCCESS(ret, "Validation failed!");
        }
 
-       in_len = ref_op->turbo_dec.input.length;
-       tp->mops = ((double)num_to_process / 1000000.0) /
-                       ((double)total_time / (double)rte_get_tsc_hz());
-       tp->mbps = ((double)num_to_process * in_len * 8 / 1000000.0) /
+       rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
+
+       double tb_len_bits = calc_dec_TB_size(ref_op);
+
+       tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
                        ((double)total_time / (double)rte_get_tsc_hz());
+       tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
+                       1000000.0) / ((double)total_time /
+                       (double)rte_get_tsc_hz());
 
        return TEST_SUCCESS;
 }
@@ -1482,91 +1486,94 @@ static int
 throughput_pmd_lcore_enc(void *arg)
 {
        struct thread_params *tp = arg;
-       unsigned int enqueued, dequeued;
-       struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
-       uint64_t total_time, start_time;
+       uint16_t enq, deq;
+       uint64_t total_time = 0, start_time;
        const uint16_t queue_id = tp->queue_id;
        const uint16_t burst_sz = tp->op_params->burst_sz;
-       const uint16_t num_to_process = tp->op_params->num_to_process;
+       const uint16_t num_ops = tp->op_params->num_to_process;
+       struct rte_bbdev_enc_op *ops_enq[num_ops];
+       struct rte_bbdev_enc_op *ops_deq[num_ops];
        struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
        struct test_buffers *bufs = NULL;
-       unsigned int allocs_failed = 0;
-       int ret;
+       int i, j, ret;
        struct rte_bbdev_info info;
-
-       /* Input length in bytes, million operations per second, million bits
-        * per second.
-        */
-       double in_len;
+       uint16_t num_to_enq;
 
        TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
                        "BURST_SIZE should be <= %u", MAX_BURST);
 
        rte_bbdev_info_get(tp->dev_id, &info);
+
+       TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
+                       "NUM_OPS cannot exceed %u for this device",
+                       info.drv.queue_size_lim);
+
        bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
        while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
                rte_pause();
 
-       start_time = rte_rdtsc_precise();
-       for (enqueued = 0, dequeued = 0; dequeued < num_to_process;) {
-               uint16_t deq;
+       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
+                       num_ops);
+       TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
+                       num_ops);
+       if (test_vector.op_type != RTE_BBDEV_OP_NONE)
+               copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
+                               bufs->hard_outputs, ref_op);
 
-               if (likely(enqueued < num_to_process)) {
+       /* Set counter to validate the ordering */
+       for (j = 0; j < num_ops; ++j)
+               ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
 
-                       uint16_t num_to_enq = burst_sz;
+       for (i = 0; i < TEST_REPETITIONS; ++i) {
 
-                       if (unlikely(num_to_process - enqueued < num_to_enq))
-                               num_to_enq = num_to_process - enqueued;
+               if (test_vector.op_type != RTE_BBDEV_OP_NONE)
+                       for (j = 0; j < num_ops; ++j)
+                               rte_pktmbuf_reset(
+                                       ops_enq[j]->turbo_enc.output.data);
 
-                       ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp,
-                                       ops_enq, num_to_enq);
-                       if (ret != 0) {
-                               allocs_failed++;
-                               goto do_dequeue;
-                       }
+               start_time = rte_rdtsc_precise();
+
+               for (enq = 0, deq = 0; enq < num_ops;) {
+                       num_to_enq = burst_sz;
 
-                       if (test_vector.op_type != RTE_BBDEV_OP_NONE)
-                               copy_reference_enc_op(ops_enq, num_to_enq,
-                                               enqueued,
-                                               bufs->inputs,
-                                               bufs->hard_outputs,
-                                               ref_op);
+                       if (unlikely(num_ops - enq < num_to_enq))
+                               num_to_enq = num_ops - enq;
 
-                       enqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id,
-                                       queue_id, ops_enq, num_to_enq);
+                       enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
+                                       queue_id, &ops_enq[enq], num_to_enq);
+
+                       deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
+                                       queue_id, &ops_deq[deq], enq - deq);
                }
-do_dequeue:
-               deq = rte_bbdev_dequeue_enc_ops(tp->dev_id, queue_id, ops_deq,
-                               burst_sz);
-               dequeued += deq;
-               rte_bbdev_enc_op_free_bulk(ops_enq, deq);
-       }
-       total_time = rte_rdtsc_precise() - start_time;
 
-       if (allocs_failed > 0)
-               printf("WARNING: op allocations failed: %u times\n",
-                               allocs_failed);
+               /* dequeue the remaining */
+               while (deq < enq) {
+                       deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
+                                       queue_id, &ops_deq[deq], enq - deq);
+               }
 
-       TEST_ASSERT(enqueued == dequeued, "enqueued (%u) != dequeued (%u)",
-                       enqueued, dequeued);
+               total_time += rte_rdtsc_precise() - start_time;
+       }
 
        if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
-               ret = validate_enc_buffers(bufs, num_to_process);
-               TEST_ASSERT_SUCCESS(ret, "Buffers validation failed");
+               ret = validate_enc_op(ops_deq, num_ops, ref_op);
+               TEST_ASSERT_SUCCESS(ret, "Validation failed!");
        }
 
-       in_len = ref_op->turbo_enc.input.length;
+       double tb_len_bits = calc_enc_TB_size(ref_op);
 
-       tp->mops = ((double)num_to_process / 1000000.0) /
-                       ((double)total_time / (double)rte_get_tsc_hz());
-       tp->mbps = ((double)num_to_process * in_len * 8 / 1000000.0) /
+       tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
                        ((double)total_time / (double)rte_get_tsc_hz());
+       tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
+                       / 1000000.0) / ((double)total_time /
+                       (double)rte_get_tsc_hz());
 
        return TEST_SUCCESS;
 }
+
 static void
-print_throughput(struct thread_params *t_params, unsigned int used_cores)
+print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
 {
        unsigned int lcore_id, iter = 0;
        double total_mops = 0, total_mbps = 0;
@@ -1574,10 +1581,11 @@ print_throughput(struct thread_params *t_params, unsigned int used_cores)
        RTE_LCORE_FOREACH(lcore_id) {
                if (iter++ >= used_cores)
                        break;
-               printf("Throughput for core (%u): %.8lg MOPS, %.8lg Mbps\n",
-                               lcore_id, t_params[lcore_id].mops,
+               printf(
+                               "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
+                               lcore_id, t_params[lcore_id].ops_per_sec,
                                t_params[lcore_id].mbps);
-               total_mops += t_params[lcore_id].mops;
+               total_mops += t_params[lcore_id].ops_per_sec;
                total_mbps += t_params[lcore_id].mbps;
        }
        printf(
@@ -1585,6 +1593,30 @@ print_throughput(struct thread_params *t_params, unsigned int used_cores)
                used_cores, total_mops, total_mbps);
 }
 
+static void
+print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
+{
+       unsigned int lcore_id, iter = 0;
+       double total_mops = 0, total_mbps = 0;
+       uint8_t iter_count = 0;
+
+       RTE_LCORE_FOREACH(lcore_id) {
+               if (iter++ >= used_cores)
+                       break;
+               printf(
+                               "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
+                               lcore_id, t_params[lcore_id].ops_per_sec,
+                               t_params[lcore_id].mbps,
+                               t_params[lcore_id].iter_count);
+               total_mops += t_params[lcore_id].ops_per_sec;
+               total_mbps += t_params[lcore_id].mbps;
+               iter_count = RTE_MAX(iter_count, t_params[lcore_id].iter_count);
+       }
+       printf(
+               "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
+               used_cores, total_mops, total_mbps, iter_count);
+}
+
 /*
  * Test function that determines how long an enqueue + dequeue of a burst
  * takes on available lcores.
@@ -1677,8 +1709,10 @@ throughput_test(struct active_device *ad,
 
        /* Print throughput if interrupts are disabled and test passed */
        if (!intr_enabled) {
-               if (test_vector.op_type != RTE_BBDEV_OP_NONE)
-                       print_throughput(t_params, num_lcores);
+               if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
+                       print_dec_throughput(t_params, num_lcores);
+               else
+                       print_enc_throughput(t_params, num_lcores);
                return ret;
        }
 
@@ -1713,9 +1747,12 @@ throughput_test(struct active_device *ad,
        }
 
        /* Print throughput if test passed */
-       if (!ret && test_vector.op_type != RTE_BBDEV_OP_NONE)
-               print_throughput(t_params, num_lcores);
-
+       if (!ret) {
+               if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
+                       print_dec_throughput(t_params, num_lcores);
+               else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
+                       print_enc_throughput(t_params, num_lcores);
+       }
        return ret;
 }
 
index 81b8ee7..45fe999 100644 (file)
@@ -412,6 +412,10 @@ parse_decoder_params(const char *key_token, char *token,
                vector->mask |= TEST_BBDEV_VF_NUM_MAPS;
                turbo_dec->num_maps = (uint8_t) strtoul(token, &err, 0);
                ret = ((err == NULL) || (*err != '\0')) ? -1 : 0;
+       } else if (!strcmp(key_token, "r")) {
+               vector->mask |= TEST_BBDEV_VF_R;
+               turbo_dec->tb_params.r = (uint8_t) strtoul(token, &err, 0);
+               ret = ((err == NULL) || (*err != '\0')) ? -1 : 0;
        } else if (!strcmp(key_token, "code_block_mode")) {
                vector->mask |= TEST_BBDEV_VF_CODE_BLOCK_MODE;
                turbo_dec->code_block_mode = (uint8_t) strtoul(token, &err, 0);
@@ -714,6 +718,9 @@ check_decoder(struct test_bbdev_vector *vector)
                if (!(mask & TEST_BBDEV_VF_CAB))
                        printf(
                                "WARNING: cab was not specified in vector file and will be set to 0\n");
+               if (!(mask & TEST_BBDEV_VF_R))
+                       printf(
+                               "WARNING: r was not specified in vector file and will be set to 0\n");
        } else {
                if (!(mask & TEST_BBDEV_VF_E))
                        printf(
index 83f62c2..962e2ed 100644 (file)
@@ -216,6 +216,8 @@ struct rte_bbdev_op_dec_tb_params {
         * operation when r >= cab
         */
        uint32_t eb;
+       /**< The index of the first CB in the inbound mbuf data, default is 0 */
+       uint8_t r;
 };
 
 /**< Operation structure for Turbo decode.