1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
10 #include <rte_common.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
21 #include "test_bbdev_vector.h"
23 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25 #define MAX_QUEUES RTE_MAX_LCORE
26 #define TEST_REPETITIONS 1000
28 #define OPS_CACHE_SIZE 256U
29 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
34 #define INVALID_QUEUE_ID -1
36 static struct test_bbdev_vector test_vector;
38 /* Switch between PMD and Interrupt for throughput TC */
39 static bool intr_enabled;
41 /* Represents tested active devices */
42 static struct active_device {
43 const char *driver_name;
45 uint16_t supported_ops;
46 uint16_t queue_ids[MAX_QUEUES];
48 struct rte_mempool *ops_mempool;
49 struct rte_mempool *in_mbuf_pool;
50 struct rte_mempool *hard_out_mbuf_pool;
51 struct rte_mempool *soft_out_mbuf_pool;
52 } active_devs[RTE_BBDEV_MAX_DEVS];
54 static uint8_t nb_active_devs;
56 /* Data buffers used by BBDEV ops */
58 struct rte_bbdev_op_data *inputs;
59 struct rte_bbdev_op_data *hard_outputs;
60 struct rte_bbdev_op_data *soft_outputs;
63 /* Operation parameters specific for given test case */
64 struct test_op_params {
65 struct rte_mempool *mp;
66 struct rte_bbdev_dec_op *ref_dec_op;
67 struct rte_bbdev_enc_op *ref_enc_op;
69 uint16_t num_to_process;
73 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
76 /* Contains per lcore params */
77 struct thread_params {
85 rte_atomic16_t nb_dequeued;
86 rte_atomic16_t processing_status;
87 rte_atomic16_t burst_sz;
88 struct test_op_params *op_params;
89 struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
90 struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
93 #ifdef RTE_BBDEV_OFFLOAD_COST
94 /* Stores time statistics */
95 struct test_time_stats {
96 /* Stores software enqueue total working time */
97 uint64_t enq_sw_total_time;
98 /* Stores minimum value of software enqueue working time */
99 uint64_t enq_sw_min_time;
100 /* Stores maximum value of software enqueue working time */
101 uint64_t enq_sw_max_time;
102 /* Stores turbo enqueue total working time */
103 uint64_t enq_acc_total_time;
104 /* Stores minimum value of accelerator enqueue working time */
105 uint64_t enq_acc_min_time;
106 /* Stores maximum value of accelerator enqueue working time */
107 uint64_t enq_acc_max_time;
108 /* Stores dequeue total working time */
109 uint64_t deq_total_time;
110 /* Stores minimum value of dequeue working time */
111 uint64_t deq_min_time;
112 /* Stores maximum value of dequeue working time */
113 uint64_t deq_max_time;
117 typedef int (test_case_function)(struct active_device *ad,
118 struct test_op_params *op_params);
121 mbuf_reset(struct rte_mbuf *m)
132 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
134 ad->supported_ops |= (1 << op_type);
138 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
140 return ad->supported_ops & (1 << op_type);
144 flags_match(uint32_t flags_req, uint32_t flags_present)
146 return (flags_req & flags_present) == flags_req;
150 clear_soft_out_cap(uint32_t *op_flags)
152 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
153 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
154 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
158 check_dev_cap(const struct rte_bbdev_info *dev_info)
161 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs;
162 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
164 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
165 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
166 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
168 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
169 if (op_cap->type != test_vector.op_type)
172 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
173 const struct rte_bbdev_op_cap_turbo_dec *cap =
174 &op_cap->cap.turbo_dec;
175 /* Ignore lack of soft output capability, just skip
176 * checking if soft output is valid.
178 if ((test_vector.turbo_dec.op_flags &
179 RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
180 !(cap->capability_flags &
181 RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
183 "WARNING: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
186 &test_vector.turbo_dec.op_flags);
189 if (!flags_match(test_vector.turbo_dec.op_flags,
190 cap->capability_flags))
192 if (nb_inputs > cap->num_buffers_src) {
193 printf("Too many inputs defined: %u, max: %u\n",
194 nb_inputs, cap->num_buffers_src);
197 if (nb_soft_outputs > cap->num_buffers_soft_out &&
198 (test_vector.turbo_dec.op_flags &
199 RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
201 "Too many soft outputs defined: %u, max: %u\n",
203 cap->num_buffers_soft_out);
206 if (nb_hard_outputs > cap->num_buffers_hard_out) {
208 "Too many hard outputs defined: %u, max: %u\n",
210 cap->num_buffers_hard_out);
213 if (intr_enabled && !(cap->capability_flags &
214 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
216 "Dequeue interrupts are not supported!\n");
221 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
222 const struct rte_bbdev_op_cap_turbo_enc *cap =
223 &op_cap->cap.turbo_enc;
225 if (!flags_match(test_vector.turbo_enc.op_flags,
226 cap->capability_flags))
228 if (nb_inputs > cap->num_buffers_src) {
229 printf("Too many inputs defined: %u, max: %u\n",
230 nb_inputs, cap->num_buffers_src);
233 if (nb_hard_outputs > cap->num_buffers_dst) {
235 "Too many hard outputs defined: %u, max: %u\n",
236 nb_hard_outputs, cap->num_buffers_src);
239 if (intr_enabled && !(cap->capability_flags &
240 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
242 "Dequeue interrupts are not supported!\n");
250 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
251 return TEST_SUCCESS; /* Special case for NULL device */
256 /* calculates optimal mempool size not smaller than the val */
258 optimal_mempool_size(unsigned int val)
260 return rte_align32pow2(val + 1) - 1;
263 /* allocates mbuf mempool for inputs and outputs */
264 static struct rte_mempool *
265 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
266 int socket_id, unsigned int mbuf_pool_size,
267 const char *op_type_str)
270 uint32_t max_seg_sz = 0;
271 char pool_name[RTE_MEMPOOL_NAMESIZE];
273 /* find max input segment size */
274 for (i = 0; i < entries->nb_segments; ++i)
275 if (entries->segments[i].length > max_seg_sz)
276 max_seg_sz = entries->segments[i].length;
278 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
280 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
281 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
282 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
286 create_mempools(struct active_device *ad, int socket_id,
287 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
289 struct rte_mempool *mp;
290 unsigned int ops_pool_size, mbuf_pool_size = 0;
291 char pool_name[RTE_MEMPOOL_NAMESIZE];
292 const char *op_type_str;
293 enum rte_bbdev_op_type op_type = org_op_type;
295 struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
296 struct op_data_entries *hard_out =
297 &test_vector.entries[DATA_HARD_OUTPUT];
298 struct op_data_entries *soft_out =
299 &test_vector.entries[DATA_SOFT_OUTPUT];
301 /* allocate ops mempool */
302 ops_pool_size = optimal_mempool_size(RTE_MAX(
303 /* Ops used plus 1 reference op */
304 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
305 /* Minimal cache size plus 1 reference op */
306 (unsigned int)(1.5 * rte_lcore_count() *
307 OPS_CACHE_SIZE + 1)),
310 if (org_op_type == RTE_BBDEV_OP_NONE)
311 op_type = RTE_BBDEV_OP_TURBO_ENC;
313 op_type_str = rte_bbdev_op_type_str(op_type);
314 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
316 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
318 mp = rte_bbdev_op_pool_create(pool_name, op_type,
319 ops_pool_size, OPS_CACHE_SIZE, socket_id);
320 TEST_ASSERT_NOT_NULL(mp,
321 "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
325 ad->ops_mempool = mp;
327 /* Do not create inputs and outputs mbufs for BaseBand Null Device */
328 if (org_op_type == RTE_BBDEV_OP_NONE)
332 mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
333 mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
334 TEST_ASSERT_NOT_NULL(mp,
335 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
339 ad->in_mbuf_pool = mp;
342 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
343 hard_out->nb_segments);
344 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
346 TEST_ASSERT_NOT_NULL(mp,
347 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
351 ad->hard_out_mbuf_pool = mp;
353 if (soft_out->nb_segments == 0)
357 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
358 soft_out->nb_segments);
359 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, mbuf_pool_size,
361 TEST_ASSERT_NOT_NULL(mp,
362 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
366 ad->soft_out_mbuf_pool = mp;
372 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
373 struct test_bbdev_vector *vector)
376 unsigned int queue_id;
377 struct rte_bbdev_queue_conf qconf;
378 struct active_device *ad = &active_devs[nb_active_devs];
379 unsigned int nb_queues;
380 enum rte_bbdev_op_type op_type = vector->op_type;
382 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
384 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
386 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
387 dev_id, nb_queues, info->socket_id, ret);
391 /* configure interrupts if needed */
393 ret = rte_bbdev_intr_enable(dev_id);
395 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
401 /* setup device queues */
402 qconf.socket = info->socket_id;
403 qconf.queue_size = info->drv.default_queue_conf.queue_size;
405 qconf.deferred_start = 0;
406 qconf.op_type = op_type;
408 for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
409 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
412 "Allocated all queues (id=%u) at prio%u on dev%u\n",
413 queue_id, qconf.priority, dev_id);
415 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
419 printf("All queues on dev %u allocated: %u\n",
423 ad->queue_ids[queue_id] = queue_id;
425 TEST_ASSERT(queue_id != 0,
426 "ERROR Failed to configure any queues on dev %u",
428 ad->nb_queues = queue_id;
430 set_avail_op(ad, op_type);
436 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
437 struct test_bbdev_vector *vector)
441 active_devs[nb_active_devs].driver_name = info->drv.driver_name;
442 active_devs[nb_active_devs].dev_id = dev_id;
444 ret = add_bbdev_dev(dev_id, info, vector);
445 if (ret == TEST_SUCCESS)
451 populate_active_devices(void)
455 uint8_t nb_devs_added = 0;
456 struct rte_bbdev_info info;
458 RTE_BBDEV_FOREACH(dev_id) {
459 rte_bbdev_info_get(dev_id, &info);
461 if (check_dev_cap(&info)) {
463 "Device %d (%s) does not support specified capabilities\n",
464 dev_id, info.dev_name);
468 ret = add_active_device(dev_id, &info, &test_vector);
470 printf("Adding active bbdev %s skipped\n",
477 return nb_devs_added;
481 read_test_vector(void)
485 memset(&test_vector, 0, sizeof(test_vector));
486 printf("Test vector file = %s\n", get_vector_filename());
487 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
488 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
489 get_vector_filename());
495 testsuite_setup(void)
497 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
499 if (populate_active_devices() == 0) {
500 printf("No suitable devices found!\n");
508 interrupt_testsuite_setup(void)
510 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
512 /* Enable interrupts */
515 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
516 if (populate_active_devices() == 0 ||
517 test_vector.op_type == RTE_BBDEV_OP_NONE) {
518 intr_enabled = false;
519 printf("No suitable devices found!\n");
527 testsuite_teardown(void)
531 /* Unconfigure devices */
532 RTE_BBDEV_FOREACH(dev_id)
533 rte_bbdev_close(dev_id);
535 /* Clear active devices structs. */
536 memset(active_devs, 0, sizeof(active_devs));
545 for (i = 0; i < nb_active_devs; i++) {
546 dev_id = active_devs[i].dev_id;
547 /* reset bbdev stats */
548 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
549 "Failed to reset stats of bbdev %u", dev_id);
550 /* start the device */
551 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
552 "Failed to start bbdev %u", dev_id);
562 struct rte_bbdev_stats stats;
564 for (i = 0; i < nb_active_devs; i++) {
565 dev_id = active_devs[i].dev_id;
566 /* read stats and print */
567 rte_bbdev_stats_get(dev_id, &stats);
568 /* Stop the device */
569 rte_bbdev_stop(dev_id);
574 init_op_data_objs(struct rte_bbdev_op_data *bufs,
575 struct op_data_entries *ref_entries,
576 struct rte_mempool *mbuf_pool, const uint16_t n,
577 enum op_data_type op_type, uint16_t min_alignment)
582 for (i = 0; i < n; ++i) {
584 struct op_data_buf *seg = &ref_entries->segments[0];
585 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
586 TEST_ASSERT_NOT_NULL(m_head,
587 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
588 op_type, n * ref_entries->nb_segments,
591 TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
592 (uint32_t)UINT16_MAX),
593 "Given data is bigger than allowed mbuf segment size");
595 bufs[i].data = m_head;
599 if (op_type == DATA_INPUT) {
600 data = rte_pktmbuf_append(m_head, seg->length);
601 TEST_ASSERT_NOT_NULL(data,
602 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
603 seg->length, op_type);
605 TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
606 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
607 data, min_alignment);
608 rte_memcpy(data, seg->addr, seg->length);
609 bufs[i].length += seg->length;
611 for (j = 1; j < ref_entries->nb_segments; ++j) {
612 struct rte_mbuf *m_tail =
613 rte_pktmbuf_alloc(mbuf_pool);
614 TEST_ASSERT_NOT_NULL(m_tail,
615 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
617 n * ref_entries->nb_segments,
621 data = rte_pktmbuf_append(m_tail, seg->length);
622 TEST_ASSERT_NOT_NULL(data,
623 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
624 seg->length, op_type);
626 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
628 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
629 data, min_alignment);
630 rte_memcpy(data, seg->addr, seg->length);
631 bufs[i].length += seg->length;
633 ret = rte_pktmbuf_chain(m_head, m_tail);
634 TEST_ASSERT_SUCCESS(ret,
635 "Couldn't chain mbufs from %d data type mbuf pool",
641 /* allocate chained-mbuf for output buffer */
642 for (j = 1; j < ref_entries->nb_segments; ++j) {
643 struct rte_mbuf *m_tail =
644 rte_pktmbuf_alloc(mbuf_pool);
645 TEST_ASSERT_NOT_NULL(m_tail,
646 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
648 n * ref_entries->nb_segments,
651 ret = rte_pktmbuf_chain(m_head, m_tail);
652 TEST_ASSERT_SUCCESS(ret,
653 "Couldn't chain mbufs from %d data type mbuf pool",
663 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
668 *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
669 if (*buffers == NULL) {
670 printf("WARNING: Failed to allocate op_data on socket %d\n",
672 /* try to allocate memory on other detected sockets */
673 for (i = 0; i < socket; i++) {
674 *buffers = rte_zmalloc_socket(NULL, len, 0, i);
675 if (*buffers != NULL)
680 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
684 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
685 uint16_t n, int8_t max_llr_modulus)
687 uint16_t i, byte_idx;
689 for (i = 0; i < n; ++i) {
690 struct rte_mbuf *m = input_ops[i].data;
692 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
693 input_ops[i].offset);
694 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
696 llr[byte_idx] = round((double)max_llr_modulus *
697 llr[byte_idx] / INT8_MAX);
705 fill_queue_buffers(struct test_op_params *op_params,
706 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
707 struct rte_mempool *soft_out_mp, uint16_t queue_id,
708 const struct rte_bbdev_op_cap *capabilities,
709 uint16_t min_alignment, const int socket_id)
712 enum op_data_type type;
713 const uint16_t n = op_params->num_to_process;
715 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
721 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
722 &op_params->q_bufs[socket_id][queue_id].inputs,
723 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
724 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
727 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
728 struct op_data_entries *ref_entries =
729 &test_vector.entries[type];
730 if (ref_entries->nb_segments == 0)
733 ret = allocate_buffers_on_socket(queue_ops[type],
734 n * sizeof(struct rte_bbdev_op_data),
736 TEST_ASSERT_SUCCESS(ret,
737 "Couldn't allocate memory for rte_bbdev_op_data structs");
739 ret = init_op_data_objs(*queue_ops[type], ref_entries,
740 mbuf_pools[type], n, type, min_alignment);
741 TEST_ASSERT_SUCCESS(ret,
742 "Couldn't init rte_bbdev_op_data structs");
745 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
746 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
747 capabilities->cap.turbo_dec.max_llr_modulus);
753 free_buffers(struct active_device *ad, struct test_op_params *op_params)
757 rte_mempool_free(ad->ops_mempool);
758 rte_mempool_free(ad->in_mbuf_pool);
759 rte_mempool_free(ad->hard_out_mbuf_pool);
760 rte_mempool_free(ad->soft_out_mbuf_pool);
762 for (i = 0; i < rte_lcore_count(); ++i) {
763 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
764 rte_free(op_params->q_bufs[j][i].inputs);
765 rte_free(op_params->q_bufs[j][i].hard_outputs);
766 rte_free(op_params->q_bufs[j][i].soft_outputs);
772 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
773 unsigned int start_idx,
774 struct rte_bbdev_op_data *inputs,
775 struct rte_bbdev_op_data *hard_outputs,
776 struct rte_bbdev_op_data *soft_outputs,
777 struct rte_bbdev_dec_op *ref_op)
780 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
782 for (i = 0; i < n; ++i) {
783 if (turbo_dec->code_block_mode == 0) {
784 ops[i]->turbo_dec.tb_params.ea =
785 turbo_dec->tb_params.ea;
786 ops[i]->turbo_dec.tb_params.eb =
787 turbo_dec->tb_params.eb;
788 ops[i]->turbo_dec.tb_params.k_pos =
789 turbo_dec->tb_params.k_pos;
790 ops[i]->turbo_dec.tb_params.k_neg =
791 turbo_dec->tb_params.k_neg;
792 ops[i]->turbo_dec.tb_params.c =
793 turbo_dec->tb_params.c;
794 ops[i]->turbo_dec.tb_params.c_neg =
795 turbo_dec->tb_params.c_neg;
796 ops[i]->turbo_dec.tb_params.cab =
797 turbo_dec->tb_params.cab;
798 ops[i]->turbo_dec.tb_params.r =
799 turbo_dec->tb_params.r;
801 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
802 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
805 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
806 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
807 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
808 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
809 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
810 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
811 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
813 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
814 ops[i]->turbo_dec.input = inputs[start_idx + i];
815 if (soft_outputs != NULL)
816 ops[i]->turbo_dec.soft_output =
817 soft_outputs[start_idx + i];
822 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
823 unsigned int start_idx,
824 struct rte_bbdev_op_data *inputs,
825 struct rte_bbdev_op_data *outputs,
826 struct rte_bbdev_enc_op *ref_op)
829 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
830 for (i = 0; i < n; ++i) {
831 if (turbo_enc->code_block_mode == 0) {
832 ops[i]->turbo_enc.tb_params.ea =
833 turbo_enc->tb_params.ea;
834 ops[i]->turbo_enc.tb_params.eb =
835 turbo_enc->tb_params.eb;
836 ops[i]->turbo_enc.tb_params.k_pos =
837 turbo_enc->tb_params.k_pos;
838 ops[i]->turbo_enc.tb_params.k_neg =
839 turbo_enc->tb_params.k_neg;
840 ops[i]->turbo_enc.tb_params.c =
841 turbo_enc->tb_params.c;
842 ops[i]->turbo_enc.tb_params.c_neg =
843 turbo_enc->tb_params.c_neg;
844 ops[i]->turbo_enc.tb_params.cab =
845 turbo_enc->tb_params.cab;
846 ops[i]->turbo_enc.tb_params.ncb_pos =
847 turbo_enc->tb_params.ncb_pos;
848 ops[i]->turbo_enc.tb_params.ncb_neg =
849 turbo_enc->tb_params.ncb_neg;
850 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
852 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
853 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
854 ops[i]->turbo_enc.cb_params.ncb =
855 turbo_enc->cb_params.ncb;
857 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
858 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
859 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
861 ops[i]->turbo_enc.output = outputs[start_idx + i];
862 ops[i]->turbo_enc.input = inputs[start_idx + i];
867 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
868 unsigned int order_idx, const int expected_status)
870 TEST_ASSERT(op->status == expected_status,
871 "op_status (%d) != expected_status (%d)",
872 op->status, expected_status);
874 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
875 "Ordering error, expected %p, got %p",
876 (void *)(uintptr_t)order_idx, op->opaque_data);
882 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
883 unsigned int order_idx, const int expected_status)
885 TEST_ASSERT(op->status == expected_status,
886 "op_status (%d) != expected_status (%d)",
887 op->status, expected_status);
889 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
890 "Ordering error, expected %p, got %p",
891 (void *)(uintptr_t)order_idx, op->opaque_data);
897 validate_op_chain(struct rte_bbdev_op_data *op,
898 struct op_data_entries *orig_op)
901 struct rte_mbuf *m = op->data;
902 uint8_t nb_dst_segments = orig_op->nb_segments;
903 uint32_t total_data_size = 0;
905 TEST_ASSERT(nb_dst_segments == m->nb_segs,
906 "Number of segments differ in original (%u) and filled (%u) op",
907 nb_dst_segments, m->nb_segs);
909 /* Validate each mbuf segment length */
910 for (i = 0; i < nb_dst_segments; ++i) {
911 /* Apply offset to the first mbuf segment */
912 uint16_t offset = (i == 0) ? op->offset : 0;
913 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
914 total_data_size += orig_op->segments[i].length;
916 TEST_ASSERT(orig_op->segments[i].length == data_len,
917 "Length of segment differ in original (%u) and filled (%u) op",
918 orig_op->segments[i].length, data_len);
919 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
920 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
922 "Output buffers (CB=%u) are not equal", i);
926 /* Validate total mbuf pkt length */
927 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
928 TEST_ASSERT(total_data_size == pkt_len,
929 "Length of data differ in original (%u) and filled (%u) op",
930 total_data_size, pkt_len);
936 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
937 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
941 struct op_data_entries *hard_data_orig =
942 &test_vector.entries[DATA_HARD_OUTPUT];
943 struct op_data_entries *soft_data_orig =
944 &test_vector.entries[DATA_SOFT_OUTPUT];
945 struct rte_bbdev_op_turbo_dec *ops_td;
946 struct rte_bbdev_op_data *hard_output;
947 struct rte_bbdev_op_data *soft_output;
948 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
950 for (i = 0; i < n; ++i) {
951 ops_td = &ops[i]->turbo_dec;
952 hard_output = &ops_td->hard_output;
953 soft_output = &ops_td->soft_output;
955 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
956 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
957 "Returned iter_count (%d) > expected iter_count (%d)",
958 ops_td->iter_count, ref_td->iter_count);
959 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
960 TEST_ASSERT_SUCCESS(ret,
961 "Checking status and ordering for decoder failed");
963 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
965 "Hard output buffers (CB=%u) are not equal",
968 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
969 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
971 "Soft output buffers (CB=%u) are not equal",
979 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
980 struct rte_bbdev_enc_op *ref_op)
984 struct op_data_entries *hard_data_orig =
985 &test_vector.entries[DATA_HARD_OUTPUT];
987 for (i = 0; i < n; ++i) {
988 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
989 TEST_ASSERT_SUCCESS(ret,
990 "Checking status and ordering for encoder failed");
991 TEST_ASSERT_SUCCESS(validate_op_chain(
992 &ops[i]->turbo_enc.output,
994 "Output buffers (CB=%u) are not equal",
1002 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1005 struct op_data_entries *entry;
1007 op->turbo_dec = test_vector.turbo_dec;
1008 entry = &test_vector.entries[DATA_INPUT];
1009 for (i = 0; i < entry->nb_segments; ++i)
1010 op->turbo_dec.input.length +=
1011 entry->segments[i].length;
1015 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1018 struct op_data_entries *entry;
1020 op->turbo_enc = test_vector.turbo_enc;
1021 entry = &test_vector.entries[DATA_INPUT];
1022 for (i = 0; i < entry->nb_segments; ++i)
1023 op->turbo_enc.input.length +=
1024 entry->segments[i].length;
1028 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1031 uint32_t c, r, tb_size = 0;
1033 if (op->turbo_dec.code_block_mode) {
1034 tb_size = op->turbo_dec.tb_params.k_neg;
1036 c = op->turbo_dec.tb_params.c;
1037 r = op->turbo_dec.tb_params.r;
1038 for (i = 0; i < c-r; i++)
1039 tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1040 op->turbo_dec.tb_params.k_neg :
1041 op->turbo_dec.tb_params.k_pos;
1047 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1050 uint32_t c, r, tb_size = 0;
1052 if (op->turbo_enc.code_block_mode) {
1053 tb_size = op->turbo_enc.tb_params.k_neg;
1055 c = op->turbo_enc.tb_params.c;
1056 r = op->turbo_enc.tb_params.r;
1057 for (i = 0; i < c-r; i++)
1058 tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1059 op->turbo_enc.tb_params.k_neg :
1060 op->turbo_enc.tb_params.k_pos;
1066 init_test_op_params(struct test_op_params *op_params,
1067 enum rte_bbdev_op_type op_type, const int expected_status,
1068 const int vector_mask, struct rte_mempool *ops_mp,
1069 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1072 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1073 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1074 &op_params->ref_dec_op, 1);
1076 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1077 &op_params->ref_enc_op, 1);
1079 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1081 op_params->mp = ops_mp;
1082 op_params->burst_sz = burst_sz;
1083 op_params->num_to_process = num_to_process;
1084 op_params->num_lcores = num_lcores;
1085 op_params->vector_mask = vector_mask;
1086 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1087 op_params->ref_dec_op->status = expected_status;
1088 else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
1089 op_params->ref_enc_op->status = expected_status;
1095 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1096 struct test_op_params *op_params)
1098 int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1100 struct active_device *ad;
1101 unsigned int burst_sz = get_burst_sz();
1102 enum rte_bbdev_op_type op_type = test_vector.op_type;
1103 const struct rte_bbdev_op_cap *capabilities = NULL;
1105 ad = &active_devs[dev_id];
1107 /* Check if device supports op_type */
1108 if (!is_avail_op(ad, test_vector.op_type))
1109 return TEST_SUCCESS;
1111 struct rte_bbdev_info info;
1112 rte_bbdev_info_get(ad->dev_id, &info);
1113 socket_id = GET_SOCKET(info.socket_id);
1115 f_ret = create_mempools(ad, socket_id, op_type,
1117 if (f_ret != TEST_SUCCESS) {
1118 printf("Couldn't create mempools");
1121 if (op_type == RTE_BBDEV_OP_NONE)
1122 op_type = RTE_BBDEV_OP_TURBO_ENC;
1124 f_ret = init_test_op_params(op_params, test_vector.op_type,
1125 test_vector.expected_status,
1131 if (f_ret != TEST_SUCCESS) {
1132 printf("Couldn't init test op params");
1136 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1137 /* Find Decoder capabilities */
1138 const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1139 while (cap->type != RTE_BBDEV_OP_NONE) {
1140 if (cap->type == RTE_BBDEV_OP_TURBO_DEC) {
1145 TEST_ASSERT_NOT_NULL(capabilities,
1146 "Couldn't find Decoder capabilities");
1148 create_reference_dec_op(op_params->ref_dec_op);
1149 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1150 create_reference_enc_op(op_params->ref_enc_op);
1152 for (i = 0; i < ad->nb_queues; ++i) {
1153 f_ret = fill_queue_buffers(op_params,
1155 ad->hard_out_mbuf_pool,
1156 ad->soft_out_mbuf_pool,
1159 info.drv.min_alignment,
1161 if (f_ret != TEST_SUCCESS) {
1162 printf("Couldn't init queue buffers");
1167 /* Run test case function */
1168 t_ret = test_case_func(ad, op_params);
1170 /* Free active device resources and return */
1171 free_buffers(ad, op_params);
1175 free_buffers(ad, op_params);
1179 /* Run given test function per active device per supported op type
1183 run_test_case(test_case_function *test_case_func)
1188 /* Alloc op_params */
1189 struct test_op_params *op_params = rte_zmalloc(NULL,
1190 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1191 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1192 RTE_ALIGN(sizeof(struct test_op_params),
1193 RTE_CACHE_LINE_SIZE));
1195 /* For each device run test case function */
1196 for (dev = 0; dev < nb_active_devs; ++dev)
1197 ret |= run_test_case_on_device(test_case_func, dev, op_params);
1199 rte_free(op_params);
1205 dequeue_event_callback(uint16_t dev_id,
1206 enum rte_bbdev_event_type event, void *cb_arg,
1211 uint64_t total_time;
1212 uint16_t deq, burst_sz, num_ops;
1213 uint16_t queue_id = *(uint16_t *) ret_param;
1214 struct rte_bbdev_info info;
1218 struct thread_params *tp = cb_arg;
1220 /* Find matching thread params using queue_id */
1221 for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1222 if (tp->queue_id == queue_id)
1225 if (i == MAX_QUEUES) {
1226 printf("%s: Queue_id from interrupt details was not found!\n",
1231 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1232 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1234 "Dequeue interrupt handler called for incorrect event!\n");
1238 burst_sz = rte_atomic16_read(&tp->burst_sz);
1239 num_ops = tp->op_params->num_to_process;
1241 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1242 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1244 rte_atomic16_read(&tp->nb_dequeued)],
1247 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1249 rte_atomic16_read(&tp->nb_dequeued)],
1252 if (deq < burst_sz) {
1254 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1256 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1260 if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1261 rte_atomic16_add(&tp->nb_dequeued, deq);
1265 total_time = rte_rdtsc_precise() - tp->start_time;
1267 rte_bbdev_info_get(dev_id, &info);
1271 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1272 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1273 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
1274 tp->op_params->vector_mask);
1275 /* get the max of iter_count for all dequeued ops */
1276 for (i = 0; i < num_ops; ++i)
1277 tp->iter_count = RTE_MAX(
1278 tp->dec_ops[i]->turbo_dec.iter_count,
1280 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1281 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1282 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1283 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
1284 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1288 printf("Buffers validation failed\n");
1289 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1292 switch (test_vector.op_type) {
1293 case RTE_BBDEV_OP_TURBO_DEC:
1294 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1296 case RTE_BBDEV_OP_TURBO_ENC:
1297 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1299 case RTE_BBDEV_OP_NONE:
1303 printf("Unknown op type: %d\n", test_vector.op_type);
1304 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1308 tp->ops_per_sec += ((double)num_ops) /
1309 ((double)total_time / (double)rte_get_tsc_hz());
1310 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1311 ((double)total_time / (double)rte_get_tsc_hz());
1313 rte_atomic16_add(&tp->nb_dequeued, deq);
1317 throughput_intr_lcore_dec(void *arg)
1319 struct thread_params *tp = arg;
1320 unsigned int enqueued;
1321 const uint16_t queue_id = tp->queue_id;
1322 const uint16_t burst_sz = tp->op_params->burst_sz;
1323 const uint16_t num_to_process = tp->op_params->num_to_process;
1324 struct rte_bbdev_dec_op *ops[num_to_process];
1325 struct test_buffers *bufs = NULL;
1326 struct rte_bbdev_info info;
1328 uint16_t num_to_enq, enq;
1330 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1331 "BURST_SIZE should be <= %u", MAX_BURST);
1333 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1334 "Failed to enable interrupts for dev: %u, queue_id: %u",
1335 tp->dev_id, queue_id);
1337 rte_bbdev_info_get(tp->dev_id, &info);
1339 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1340 "NUM_OPS cannot exceed %u for this device",
1341 info.drv.queue_size_lim);
1343 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1345 rte_atomic16_clear(&tp->processing_status);
1346 rte_atomic16_clear(&tp->nb_dequeued);
1348 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1351 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1353 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1355 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1356 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1357 bufs->hard_outputs, bufs->soft_outputs,
1358 tp->op_params->ref_dec_op);
1360 /* Set counter to validate the ordering */
1361 for (j = 0; j < num_to_process; ++j)
1362 ops[j]->opaque_data = (void *)(uintptr_t)j;
1364 for (j = 0; j < TEST_REPETITIONS; ++j) {
1365 for (i = 0; i < num_to_process; ++i)
1366 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
1368 tp->start_time = rte_rdtsc_precise();
1369 for (enqueued = 0; enqueued < num_to_process;) {
1370 num_to_enq = burst_sz;
1372 if (unlikely(num_to_process - enqueued < num_to_enq))
1373 num_to_enq = num_to_process - enqueued;
1377 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1378 queue_id, &ops[enqueued],
1380 } while (unlikely(num_to_enq != enq));
1383 /* Write to thread burst_sz current number of enqueued
1384 * descriptors. It ensures that proper number of
1385 * descriptors will be dequeued in callback
1386 * function - needed for last batch in case where
1387 * the number of operations is not a multiple of
1390 rte_atomic16_set(&tp->burst_sz, num_to_enq);
1392 /* Wait until processing of previous batch is
1395 while (rte_atomic16_read(&tp->nb_dequeued) !=
1399 if (j != TEST_REPETITIONS - 1)
1400 rte_atomic16_clear(&tp->nb_dequeued);
1403 return TEST_SUCCESS;
1407 throughput_intr_lcore_enc(void *arg)
1409 struct thread_params *tp = arg;
1410 unsigned int enqueued;
1411 const uint16_t queue_id = tp->queue_id;
1412 const uint16_t burst_sz = tp->op_params->burst_sz;
1413 const uint16_t num_to_process = tp->op_params->num_to_process;
1414 struct rte_bbdev_enc_op *ops[num_to_process];
1415 struct test_buffers *bufs = NULL;
1416 struct rte_bbdev_info info;
1418 uint16_t num_to_enq, enq;
1420 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1421 "BURST_SIZE should be <= %u", MAX_BURST);
1423 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1424 "Failed to enable interrupts for dev: %u, queue_id: %u",
1425 tp->dev_id, queue_id);
1427 rte_bbdev_info_get(tp->dev_id, &info);
1429 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1430 "NUM_OPS cannot exceed %u for this device",
1431 info.drv.queue_size_lim);
1433 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1435 rte_atomic16_clear(&tp->processing_status);
1436 rte_atomic16_clear(&tp->nb_dequeued);
1438 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1441 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1443 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1445 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1446 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1447 bufs->hard_outputs, tp->op_params->ref_enc_op);
1449 /* Set counter to validate the ordering */
1450 for (j = 0; j < num_to_process; ++j)
1451 ops[j]->opaque_data = (void *)(uintptr_t)j;
1453 for (j = 0; j < TEST_REPETITIONS; ++j) {
1454 for (i = 0; i < num_to_process; ++i)
1455 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
1457 tp->start_time = rte_rdtsc_precise();
1458 for (enqueued = 0; enqueued < num_to_process;) {
1459 num_to_enq = burst_sz;
1461 if (unlikely(num_to_process - enqueued < num_to_enq))
1462 num_to_enq = num_to_process - enqueued;
1466 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1467 queue_id, &ops[enqueued],
1469 } while (unlikely(enq != num_to_enq));
1472 /* Write to thread burst_sz current number of enqueued
1473 * descriptors. It ensures that proper number of
1474 * descriptors will be dequeued in callback
1475 * function - needed for last batch in case where
1476 * the number of operations is not a multiple of
1479 rte_atomic16_set(&tp->burst_sz, num_to_enq);
1481 /* Wait until processing of previous batch is
1484 while (rte_atomic16_read(&tp->nb_dequeued) !=
1488 if (j != TEST_REPETITIONS - 1)
1489 rte_atomic16_clear(&tp->nb_dequeued);
1492 return TEST_SUCCESS;
1496 throughput_pmd_lcore_dec(void *arg)
1498 struct thread_params *tp = arg;
1500 uint64_t total_time = 0, start_time;
1501 const uint16_t queue_id = tp->queue_id;
1502 const uint16_t burst_sz = tp->op_params->burst_sz;
1503 const uint16_t num_ops = tp->op_params->num_to_process;
1504 struct rte_bbdev_dec_op *ops_enq[num_ops];
1505 struct rte_bbdev_dec_op *ops_deq[num_ops];
1506 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1507 struct test_buffers *bufs = NULL;
1509 struct rte_bbdev_info info;
1510 uint16_t num_to_enq;
1512 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1513 "BURST_SIZE should be <= %u", MAX_BURST);
1515 rte_bbdev_info_get(tp->dev_id, &info);
1517 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1518 "NUM_OPS cannot exceed %u for this device",
1519 info.drv.queue_size_lim);
1521 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1523 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1526 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
1527 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
1529 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1530 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
1531 bufs->hard_outputs, bufs->soft_outputs, ref_op);
1533 /* Set counter to validate the ordering */
1534 for (j = 0; j < num_ops; ++j)
1535 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1537 for (i = 0; i < TEST_REPETITIONS; ++i) {
1539 for (j = 0; j < num_ops; ++j)
1540 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
1542 start_time = rte_rdtsc_precise();
1544 for (enq = 0, deq = 0; enq < num_ops;) {
1545 num_to_enq = burst_sz;
1547 if (unlikely(num_ops - enq < num_to_enq))
1548 num_to_enq = num_ops - enq;
1550 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1551 queue_id, &ops_enq[enq], num_to_enq);
1553 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1554 queue_id, &ops_deq[deq], enq - deq);
1557 /* dequeue the remaining */
1559 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1560 queue_id, &ops_deq[deq], enq - deq);
1563 total_time += rte_rdtsc_precise() - start_time;
1567 /* get the max of iter_count for all dequeued ops */
1568 for (i = 0; i < num_ops; ++i) {
1569 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
1573 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1574 ret = validate_dec_op(ops_deq, num_ops, ref_op,
1575 tp->op_params->vector_mask);
1576 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1579 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
1581 double tb_len_bits = calc_dec_TB_size(ref_op);
1583 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1584 ((double)total_time / (double)rte_get_tsc_hz());
1585 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1586 1000000.0) / ((double)total_time /
1587 (double)rte_get_tsc_hz());
1589 return TEST_SUCCESS;
1593 throughput_pmd_lcore_enc(void *arg)
1595 struct thread_params *tp = arg;
1597 uint64_t total_time = 0, start_time;
1598 const uint16_t queue_id = tp->queue_id;
1599 const uint16_t burst_sz = tp->op_params->burst_sz;
1600 const uint16_t num_ops = tp->op_params->num_to_process;
1601 struct rte_bbdev_enc_op *ops_enq[num_ops];
1602 struct rte_bbdev_enc_op *ops_deq[num_ops];
1603 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1604 struct test_buffers *bufs = NULL;
1606 struct rte_bbdev_info info;
1607 uint16_t num_to_enq;
1609 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1610 "BURST_SIZE should be <= %u", MAX_BURST);
1612 rte_bbdev_info_get(tp->dev_id, &info);
1614 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1615 "NUM_OPS cannot exceed %u for this device",
1616 info.drv.queue_size_lim);
1618 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1620 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1623 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
1625 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1627 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1628 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
1629 bufs->hard_outputs, ref_op);
1631 /* Set counter to validate the ordering */
1632 for (j = 0; j < num_ops; ++j)
1633 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1635 for (i = 0; i < TEST_REPETITIONS; ++i) {
1637 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1638 for (j = 0; j < num_ops; ++j)
1639 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
1641 start_time = rte_rdtsc_precise();
1643 for (enq = 0, deq = 0; enq < num_ops;) {
1644 num_to_enq = burst_sz;
1646 if (unlikely(num_ops - enq < num_to_enq))
1647 num_to_enq = num_ops - enq;
1649 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1650 queue_id, &ops_enq[enq], num_to_enq);
1652 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1653 queue_id, &ops_deq[deq], enq - deq);
1656 /* dequeue the remaining */
1658 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1659 queue_id, &ops_deq[deq], enq - deq);
1662 total_time += rte_rdtsc_precise() - start_time;
1665 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1666 ret = validate_enc_op(ops_deq, num_ops, ref_op);
1667 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1670 double tb_len_bits = calc_enc_TB_size(ref_op);
1672 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1673 ((double)total_time / (double)rte_get_tsc_hz());
1674 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
1675 / 1000000.0) / ((double)total_time /
1676 (double)rte_get_tsc_hz());
1678 return TEST_SUCCESS;
1682 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
1684 unsigned int iter = 0;
1685 double total_mops = 0, total_mbps = 0;
1687 for (iter = 0; iter < used_cores; iter++) {
1689 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
1690 t_params[iter].lcore_id, t_params[iter].ops_per_sec,
1691 t_params[iter].mbps);
1692 total_mops += t_params[iter].ops_per_sec;
1693 total_mbps += t_params[iter].mbps;
1696 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
1697 used_cores, total_mops, total_mbps);
1701 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
1703 unsigned int iter = 0;
1704 double total_mops = 0, total_mbps = 0;
1705 uint8_t iter_count = 0;
1707 for (iter = 0; iter < used_cores; iter++) {
1709 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
1710 t_params[iter].lcore_id, t_params[iter].ops_per_sec,
1711 t_params[iter].mbps, t_params[iter].iter_count);
1712 total_mops += t_params[iter].ops_per_sec;
1713 total_mbps += t_params[iter].mbps;
1714 iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
1717 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
1718 used_cores, total_mops, total_mbps, iter_count);
1722 * Test function that determines how long an enqueue + dequeue of a burst
1723 * takes on available lcores.
1726 throughput_test(struct active_device *ad,
1727 struct test_op_params *op_params)
1730 unsigned int lcore_id, used_cores = 0;
1731 struct thread_params *t_params, *tp;
1732 struct rte_bbdev_info info;
1733 lcore_function_t *throughput_function;
1734 uint16_t num_lcores;
1735 const char *op_type_str;
1737 rte_bbdev_info_get(ad->dev_id, &info);
1739 op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
1740 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
1741 test_vector.op_type);
1744 "Throughput test: dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, int mode: %s, GHz: %lg\n",
1745 info.dev_name, ad->nb_queues, op_params->burst_sz,
1746 op_params->num_to_process, op_params->num_lcores,
1748 intr_enabled ? "Interrupt mode" : "PMD mode",
1749 (double)rte_get_tsc_hz() / 1000000000.0);
1751 /* Set number of lcores */
1752 num_lcores = (ad->nb_queues < (op_params->num_lcores))
1754 : op_params->num_lcores;
1756 /* Allocate memory for thread parameters structure */
1757 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
1758 RTE_CACHE_LINE_SIZE);
1759 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
1760 RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
1761 RTE_CACHE_LINE_SIZE));
1764 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1765 throughput_function = throughput_intr_lcore_dec;
1767 throughput_function = throughput_intr_lcore_enc;
1769 /* Dequeue interrupt callback registration */
1770 ret = rte_bbdev_callback_register(ad->dev_id,
1771 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
1778 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1779 throughput_function = throughput_pmd_lcore_dec;
1781 throughput_function = throughput_pmd_lcore_enc;
1784 rte_atomic16_set(&op_params->sync, SYNC_WAIT);
1786 /* Master core is set at first entry */
1787 t_params[0].dev_id = ad->dev_id;
1788 t_params[0].lcore_id = rte_lcore_id();
1789 t_params[0].op_params = op_params;
1790 t_params[0].queue_id = ad->queue_ids[used_cores++];
1791 t_params[0].iter_count = 0;
1793 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1794 if (used_cores >= num_lcores)
1797 t_params[used_cores].dev_id = ad->dev_id;
1798 t_params[used_cores].lcore_id = lcore_id;
1799 t_params[used_cores].op_params = op_params;
1800 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
1801 t_params[used_cores].iter_count = 0;
1803 rte_eal_remote_launch(throughput_function,
1804 &t_params[used_cores++], lcore_id);
1807 rte_atomic16_set(&op_params->sync, SYNC_START);
1808 ret = throughput_function(&t_params[0]);
1810 /* Master core is always used */
1811 for (used_cores = 1; used_cores < num_lcores; used_cores++)
1812 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
1814 /* Return if test failed */
1820 /* Print throughput if interrupts are disabled and test passed */
1821 if (!intr_enabled) {
1822 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1823 print_dec_throughput(t_params, num_lcores);
1825 print_enc_throughput(t_params, num_lcores);
1830 /* In interrupt TC we need to wait for the interrupt callback to deqeue
1831 * all pending operations. Skip waiting for queues which reported an
1832 * error using processing_status variable.
1833 * Wait for master lcore operations.
1836 while ((rte_atomic16_read(&tp->nb_dequeued) <
1837 op_params->num_to_process) &&
1838 (rte_atomic16_read(&tp->processing_status) !=
1842 tp->ops_per_sec /= TEST_REPETITIONS;
1843 tp->mbps /= TEST_REPETITIONS;
1844 ret |= rte_atomic16_read(&tp->processing_status);
1846 /* Wait for slave lcores operations */
1847 for (used_cores = 1; used_cores < num_lcores; used_cores++) {
1848 tp = &t_params[used_cores];
1850 while ((rte_atomic16_read(&tp->nb_dequeued) <
1851 op_params->num_to_process) &&
1852 (rte_atomic16_read(&tp->processing_status) !=
1856 tp->ops_per_sec /= TEST_REPETITIONS;
1857 tp->mbps /= TEST_REPETITIONS;
1858 ret |= rte_atomic16_read(&tp->processing_status);
1861 /* Print throughput if test passed */
1863 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1864 print_dec_throughput(t_params, num_lcores);
1865 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1866 print_enc_throughput(t_params, num_lcores);
1874 latency_test_dec(struct rte_mempool *mempool,
1875 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
1876 int vector_mask, uint16_t dev_id, uint16_t queue_id,
1877 const uint16_t num_to_process, uint16_t burst_sz,
1878 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1880 int ret = TEST_SUCCESS;
1881 uint16_t i, j, dequeued;
1882 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1883 uint64_t start_time = 0, last_time = 0;
1885 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1886 uint16_t enq = 0, deq = 0;
1887 bool first_time = true;
1890 if (unlikely(num_to_process - dequeued < burst_sz))
1891 burst_sz = num_to_process - dequeued;
1893 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
1894 TEST_ASSERT_SUCCESS(ret,
1895 "rte_bbdev_dec_op_alloc_bulk() failed");
1896 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1897 copy_reference_dec_op(ops_enq, burst_sz, dequeued,
1903 /* Set counter to validate the ordering */
1904 for (j = 0; j < burst_sz; ++j)
1905 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1907 start_time = rte_rdtsc_precise();
1909 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
1911 TEST_ASSERT(enq == burst_sz,
1912 "Error enqueueing burst, expected %u, got %u",
1917 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1918 &ops_deq[deq], burst_sz - deq);
1919 if (likely(first_time && (deq > 0))) {
1920 last_time = rte_rdtsc_precise() - start_time;
1923 } while (unlikely(burst_sz != deq));
1925 *max_time = RTE_MAX(*max_time, last_time);
1926 *min_time = RTE_MIN(*min_time, last_time);
1927 *total_time += last_time;
1929 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1930 ret = validate_dec_op(ops_deq, burst_sz, ref_op,
1932 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1935 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
1943 latency_test_enc(struct rte_mempool *mempool,
1944 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
1945 uint16_t dev_id, uint16_t queue_id,
1946 const uint16_t num_to_process, uint16_t burst_sz,
1947 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1949 int ret = TEST_SUCCESS;
1950 uint16_t i, j, dequeued;
1951 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1952 uint64_t start_time = 0, last_time = 0;
1954 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1955 uint16_t enq = 0, deq = 0;
1956 bool first_time = true;
1959 if (unlikely(num_to_process - dequeued < burst_sz))
1960 burst_sz = num_to_process - dequeued;
1962 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
1963 TEST_ASSERT_SUCCESS(ret,
1964 "rte_bbdev_enc_op_alloc_bulk() failed");
1965 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1966 copy_reference_enc_op(ops_enq, burst_sz, dequeued,
1971 /* Set counter to validate the ordering */
1972 for (j = 0; j < burst_sz; ++j)
1973 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1975 start_time = rte_rdtsc_precise();
1977 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
1979 TEST_ASSERT(enq == burst_sz,
1980 "Error enqueueing burst, expected %u, got %u",
1985 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1986 &ops_deq[deq], burst_sz - deq);
1987 if (likely(first_time && (deq > 0))) {
1988 last_time += rte_rdtsc_precise() - start_time;
1991 } while (unlikely(burst_sz != deq));
1993 *max_time = RTE_MAX(*max_time, last_time);
1994 *min_time = RTE_MIN(*min_time, last_time);
1995 *total_time += last_time;
1997 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1998 ret = validate_enc_op(ops_deq, burst_sz, ref_op);
1999 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2002 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2010 latency_test(struct active_device *ad,
2011 struct test_op_params *op_params)
2014 uint16_t burst_sz = op_params->burst_sz;
2015 const uint16_t num_to_process = op_params->num_to_process;
2016 const enum rte_bbdev_op_type op_type = test_vector.op_type;
2017 const uint16_t queue_id = ad->queue_ids[0];
2018 struct test_buffers *bufs = NULL;
2019 struct rte_bbdev_info info;
2020 uint64_t total_time, min_time, max_time;
2021 const char *op_type_str;
2023 total_time = max_time = 0;
2024 min_time = UINT64_MAX;
2026 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2027 "BURST_SIZE should be <= %u", MAX_BURST);
2029 rte_bbdev_info_get(ad->dev_id, &info);
2030 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2032 op_type_str = rte_bbdev_op_type_str(op_type);
2033 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2036 "\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2037 info.dev_name, burst_sz, num_to_process, op_type_str);
2039 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2040 iter = latency_test_dec(op_params->mp, bufs,
2041 op_params->ref_dec_op, op_params->vector_mask,
2042 ad->dev_id, queue_id, num_to_process,
2043 burst_sz, &total_time, &min_time, &max_time);
2045 iter = latency_test_enc(op_params->mp, bufs,
2046 op_params->ref_enc_op, ad->dev_id, queue_id,
2047 num_to_process, burst_sz, &total_time,
2048 &min_time, &max_time);
2053 printf("Operation latency:\n"
2054 "\tavg latency: %lg cycles, %lg us\n"
2055 "\tmin latency: %lg cycles, %lg us\n"
2056 "\tmax latency: %lg cycles, %lg us\n",
2057 (double)total_time / (double)iter,
2058 (double)(total_time * 1000000) / (double)iter /
2059 (double)rte_get_tsc_hz(), (double)min_time,
2060 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
2061 (double)max_time, (double)(max_time * 1000000) /
2062 (double)rte_get_tsc_hz());
2064 return TEST_SUCCESS;
2067 #ifdef RTE_BBDEV_OFFLOAD_COST
2069 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
2070 struct rte_bbdev_stats *stats)
2072 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
2073 struct rte_bbdev_stats *q_stats;
2075 if (queue_id >= dev->data->num_queues)
2078 q_stats = &dev->data->queues[queue_id].queue_stats;
2080 stats->enqueued_count = q_stats->enqueued_count;
2081 stats->dequeued_count = q_stats->dequeued_count;
2082 stats->enqueue_err_count = q_stats->enqueue_err_count;
2083 stats->dequeue_err_count = q_stats->dequeue_err_count;
2084 stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2090 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2091 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
2092 uint16_t queue_id, const uint16_t num_to_process,
2093 uint16_t burst_sz, struct test_time_stats *time_st)
2095 int i, dequeued, ret;
2096 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2097 uint64_t enq_start_time, deq_start_time;
2098 uint64_t enq_sw_last_time, deq_last_time;
2099 struct rte_bbdev_stats stats;
2101 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2102 uint16_t enq = 0, deq = 0;
2104 if (unlikely(num_to_process - dequeued < burst_sz))
2105 burst_sz = num_to_process - dequeued;
2107 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2108 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2111 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2112 copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2118 /* Start time meas for enqueue function offload latency */
2119 enq_start_time = rte_rdtsc_precise();
2121 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
2122 &ops_enq[enq], burst_sz - enq);
2123 } while (unlikely(burst_sz != enq));
2125 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2126 TEST_ASSERT_SUCCESS(ret,
2127 "Failed to get stats for queue (%u) of device (%u)",
2130 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2131 stats.acc_offload_cycles;
2132 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2134 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2136 time_st->enq_sw_total_time += enq_sw_last_time;
2138 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2139 stats.acc_offload_cycles);
2140 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2141 stats.acc_offload_cycles);
2142 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2144 /* ensure enqueue has been completed */
2147 /* Start time meas for dequeue function offload latency */
2148 deq_start_time = rte_rdtsc_precise();
2149 /* Dequeue one operation */
2151 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2153 } while (unlikely(deq != 1));
2155 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2156 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2158 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2160 time_st->deq_total_time += deq_last_time;
2162 /* Dequeue remaining operations if needed*/
2163 while (burst_sz != deq)
2164 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2165 &ops_deq[deq], burst_sz - deq);
2167 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2175 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
2176 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
2177 uint16_t queue_id, const uint16_t num_to_process,
2178 uint16_t burst_sz, struct test_time_stats *time_st)
2180 int i, dequeued, ret;
2181 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2182 uint64_t enq_start_time, deq_start_time;
2183 uint64_t enq_sw_last_time, deq_last_time;
2184 struct rte_bbdev_stats stats;
2186 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2187 uint16_t enq = 0, deq = 0;
2189 if (unlikely(num_to_process - dequeued < burst_sz))
2190 burst_sz = num_to_process - dequeued;
2192 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2193 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2196 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2197 copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2202 /* Start time meas for enqueue function offload latency */
2203 enq_start_time = rte_rdtsc_precise();
2205 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
2206 &ops_enq[enq], burst_sz - enq);
2207 } while (unlikely(burst_sz != enq));
2209 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2210 TEST_ASSERT_SUCCESS(ret,
2211 "Failed to get stats for queue (%u) of device (%u)",
2214 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2215 stats.acc_offload_cycles;
2216 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2218 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2220 time_st->enq_sw_total_time += enq_sw_last_time;
2222 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2223 stats.acc_offload_cycles);
2224 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2225 stats.acc_offload_cycles);
2226 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2228 /* ensure enqueue has been completed */
2231 /* Start time meas for dequeue function offload latency */
2232 deq_start_time = rte_rdtsc_precise();
2233 /* Dequeue one operation */
2235 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2237 } while (unlikely(deq != 1));
2239 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2240 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2242 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2244 time_st->deq_total_time += deq_last_time;
2246 while (burst_sz != deq)
2247 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2248 &ops_deq[deq], burst_sz - deq);
2250 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2259 offload_cost_test(struct active_device *ad,
2260 struct test_op_params *op_params)
2262 #ifndef RTE_BBDEV_OFFLOAD_COST
2264 RTE_SET_USED(op_params);
2265 printf("Offload latency test is disabled.\n");
2266 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2267 return TEST_SKIPPED;
2270 uint16_t burst_sz = op_params->burst_sz;
2271 const uint16_t num_to_process = op_params->num_to_process;
2272 const enum rte_bbdev_op_type op_type = test_vector.op_type;
2273 const uint16_t queue_id = ad->queue_ids[0];
2274 struct test_buffers *bufs = NULL;
2275 struct rte_bbdev_info info;
2276 const char *op_type_str;
2277 struct test_time_stats time_st;
2279 memset(&time_st, 0, sizeof(struct test_time_stats));
2280 time_st.enq_sw_min_time = UINT64_MAX;
2281 time_st.enq_acc_min_time = UINT64_MAX;
2282 time_st.deq_min_time = UINT64_MAX;
2284 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2285 "BURST_SIZE should be <= %u", MAX_BURST);
2287 rte_bbdev_info_get(ad->dev_id, &info);
2288 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2290 op_type_str = rte_bbdev_op_type_str(op_type);
2291 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2294 "\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2295 info.dev_name, burst_sz, num_to_process, op_type_str);
2297 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2298 iter = offload_latency_test_dec(op_params->mp, bufs,
2299 op_params->ref_dec_op, ad->dev_id, queue_id,
2300 num_to_process, burst_sz, &time_st);
2302 iter = offload_latency_test_enc(op_params->mp, bufs,
2303 op_params->ref_enc_op, ad->dev_id, queue_id,
2304 num_to_process, burst_sz, &time_st);
2309 printf("Enqueue offload cost latency:\n"
2310 "\tDriver offload avg %lg cycles, %lg us\n"
2311 "\tDriver offload min %lg cycles, %lg us\n"
2312 "\tDriver offload max %lg cycles, %lg us\n"
2313 "\tAccelerator offload avg %lg cycles, %lg us\n"
2314 "\tAccelerator offload min %lg cycles, %lg us\n"
2315 "\tAccelerator offload max %lg cycles, %lg us\n",
2316 (double)time_st.enq_sw_total_time / (double)iter,
2317 (double)(time_st.enq_sw_total_time * 1000000) /
2318 (double)iter / (double)rte_get_tsc_hz(),
2319 (double)time_st.enq_sw_min_time,
2320 (double)(time_st.enq_sw_min_time * 1000000) /
2321 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
2322 (double)(time_st.enq_sw_max_time * 1000000) /
2323 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
2325 (double)(time_st.enq_acc_total_time * 1000000) /
2326 (double)iter / (double)rte_get_tsc_hz(),
2327 (double)time_st.enq_acc_min_time,
2328 (double)(time_st.enq_acc_min_time * 1000000) /
2329 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
2330 (double)(time_st.enq_acc_max_time * 1000000) /
2333 printf("Dequeue offload cost latency - one op:\n"
2334 "\tavg %lg cycles, %lg us\n"
2335 "\tmin %lg cycles, %lg us\n"
2336 "\tmax %lg cycles, %lg us\n",
2337 (double)time_st.deq_total_time / (double)iter,
2338 (double)(time_st.deq_total_time * 1000000) /
2339 (double)iter / (double)rte_get_tsc_hz(),
2340 (double)time_st.deq_min_time,
2341 (double)(time_st.deq_min_time * 1000000) /
2342 rte_get_tsc_hz(), (double)time_st.deq_max_time,
2343 (double)(time_st.deq_max_time * 1000000) /
2346 return TEST_SUCCESS;
2350 #ifdef RTE_BBDEV_OFFLOAD_COST
2352 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
2353 const uint16_t num_to_process, uint16_t burst_sz,
2354 uint64_t *deq_total_time, uint64_t *deq_min_time,
2355 uint64_t *deq_max_time)
2358 struct rte_bbdev_dec_op *ops[MAX_BURST];
2359 uint64_t deq_start_time, deq_last_time;
2361 /* Test deq offload latency from an empty queue */
2363 for (i = 0, deq_total = 0; deq_total < num_to_process;
2364 ++i, deq_total += burst_sz) {
2365 deq_start_time = rte_rdtsc_precise();
2367 if (unlikely(num_to_process - deq_total < burst_sz))
2368 burst_sz = num_to_process - deq_total;
2369 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
2371 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2372 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2373 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2374 *deq_total_time += deq_last_time;
2381 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
2382 const uint16_t num_to_process, uint16_t burst_sz,
2383 uint64_t *deq_total_time, uint64_t *deq_min_time,
2384 uint64_t *deq_max_time)
2387 struct rte_bbdev_enc_op *ops[MAX_BURST];
2388 uint64_t deq_start_time, deq_last_time;
2390 /* Test deq offload latency from an empty queue */
2391 for (i = 0, deq_total = 0; deq_total < num_to_process;
2392 ++i, deq_total += burst_sz) {
2393 deq_start_time = rte_rdtsc_precise();
2395 if (unlikely(num_to_process - deq_total < burst_sz))
2396 burst_sz = num_to_process - deq_total;
2397 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
2399 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2400 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2401 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2402 *deq_total_time += deq_last_time;
2410 offload_latency_empty_q_test(struct active_device *ad,
2411 struct test_op_params *op_params)
2413 #ifndef RTE_BBDEV_OFFLOAD_COST
2415 RTE_SET_USED(op_params);
2416 printf("Offload latency empty dequeue test is disabled.\n");
2417 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2418 return TEST_SKIPPED;
2421 uint64_t deq_total_time, deq_min_time, deq_max_time;
2422 uint16_t burst_sz = op_params->burst_sz;
2423 const uint16_t num_to_process = op_params->num_to_process;
2424 const enum rte_bbdev_op_type op_type = test_vector.op_type;
2425 const uint16_t queue_id = ad->queue_ids[0];
2426 struct rte_bbdev_info info;
2427 const char *op_type_str;
2429 deq_total_time = deq_max_time = 0;
2430 deq_min_time = UINT64_MAX;
2432 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2433 "BURST_SIZE should be <= %u", MAX_BURST);
2435 rte_bbdev_info_get(ad->dev_id, &info);
2437 op_type_str = rte_bbdev_op_type_str(op_type);
2438 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2441 "\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2442 info.dev_name, burst_sz, num_to_process, op_type_str);
2444 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2445 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
2446 num_to_process, burst_sz, &deq_total_time,
2447 &deq_min_time, &deq_max_time);
2449 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
2450 num_to_process, burst_sz, &deq_total_time,
2451 &deq_min_time, &deq_max_time);
2456 printf("Empty dequeue offload\n"
2457 "\tavg. latency: %lg cycles, %lg us\n"
2458 "\tmin. latency: %lg cycles, %lg us\n"
2459 "\tmax. latency: %lg cycles, %lg us\n",
2460 (double)deq_total_time / (double)iter,
2461 (double)(deq_total_time * 1000000) / (double)iter /
2462 (double)rte_get_tsc_hz(), (double)deq_min_time,
2463 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
2464 (double)deq_max_time, (double)(deq_max_time * 1000000) /
2467 return TEST_SUCCESS;
2474 return run_test_case(throughput_test);
2478 offload_cost_tc(void)
2480 return run_test_case(offload_cost_test);
2484 offload_latency_empty_q_tc(void)
2486 return run_test_case(offload_latency_empty_q_test);
2492 return run_test_case(latency_test);
2498 return run_test_case(throughput_test);
2501 static struct unit_test_suite bbdev_throughput_testsuite = {
2502 .suite_name = "BBdev Throughput Tests",
2503 .setup = testsuite_setup,
2504 .teardown = testsuite_teardown,
2505 .unit_test_cases = {
2506 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
2507 TEST_CASES_END() /**< NULL terminate unit test array */
2511 static struct unit_test_suite bbdev_validation_testsuite = {
2512 .suite_name = "BBdev Validation Tests",
2513 .setup = testsuite_setup,
2514 .teardown = testsuite_teardown,
2515 .unit_test_cases = {
2516 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2517 TEST_CASES_END() /**< NULL terminate unit test array */
2521 static struct unit_test_suite bbdev_latency_testsuite = {
2522 .suite_name = "BBdev Latency Tests",
2523 .setup = testsuite_setup,
2524 .teardown = testsuite_teardown,
2525 .unit_test_cases = {
2526 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2527 TEST_CASES_END() /**< NULL terminate unit test array */
2531 static struct unit_test_suite bbdev_offload_cost_testsuite = {
2532 .suite_name = "BBdev Offload Cost Tests",
2533 .setup = testsuite_setup,
2534 .teardown = testsuite_teardown,
2535 .unit_test_cases = {
2536 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
2537 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
2538 TEST_CASES_END() /**< NULL terminate unit test array */
2542 static struct unit_test_suite bbdev_interrupt_testsuite = {
2543 .suite_name = "BBdev Interrupt Tests",
2544 .setup = interrupt_testsuite_setup,
2545 .teardown = testsuite_teardown,
2546 .unit_test_cases = {
2547 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
2548 TEST_CASES_END() /**< NULL terminate unit test array */
2552 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
2553 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
2554 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
2555 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
2556 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);