1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
10 #include <rte_common.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
22 #include "test_bbdev_vector.h"
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
42 #define OPS_CACHE_SIZE 256U
43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
47 #define INVALID_OPAQUE -1
49 #define INVALID_QUEUE_ID -1
50 /* Increment for next code block in external HARQ memory */
51 #define HARQ_INCR 32768
52 /* Headroom for filler LLRs insertion in HARQ buffer */
53 #define FILLER_HEADROOM 1024
54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
64 static struct test_bbdev_vector test_vector;
66 /* Switch between PMD and Interrupt for throughput TC */
67 static bool intr_enabled;
69 /* LLR arithmetic representation for numerical conversion */
70 static int ldpc_llr_decimals;
71 static int ldpc_llr_size;
72 /* Keep track of the LDPC decoder device capability flag */
73 static uint32_t ldpc_cap_flags;
75 /* Represents tested active devices */
76 static struct active_device {
77 const char *driver_name;
79 uint16_t supported_ops;
80 uint16_t queue_ids[MAX_QUEUES];
82 struct rte_mempool *ops_mempool;
83 struct rte_mempool *in_mbuf_pool;
84 struct rte_mempool *hard_out_mbuf_pool;
85 struct rte_mempool *soft_out_mbuf_pool;
86 struct rte_mempool *harq_in_mbuf_pool;
87 struct rte_mempool *harq_out_mbuf_pool;
88 } active_devs[RTE_BBDEV_MAX_DEVS];
90 static uint8_t nb_active_devs;
92 /* Data buffers used by BBDEV ops */
94 struct rte_bbdev_op_data *inputs;
95 struct rte_bbdev_op_data *hard_outputs;
96 struct rte_bbdev_op_data *soft_outputs;
97 struct rte_bbdev_op_data *harq_inputs;
98 struct rte_bbdev_op_data *harq_outputs;
101 /* Operation parameters specific for given test case */
102 struct test_op_params {
103 struct rte_mempool *mp;
104 struct rte_bbdev_dec_op *ref_dec_op;
105 struct rte_bbdev_enc_op *ref_enc_op;
107 uint16_t num_to_process;
111 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
114 /* Contains per lcore params */
115 struct thread_params {
123 rte_atomic16_t nb_dequeued;
124 rte_atomic16_t processing_status;
125 rte_atomic16_t burst_sz;
126 struct test_op_params *op_params;
127 struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
128 struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
131 #ifdef RTE_BBDEV_OFFLOAD_COST
132 /* Stores time statistics */
133 struct test_time_stats {
134 /* Stores software enqueue total working time */
135 uint64_t enq_sw_total_time;
136 /* Stores minimum value of software enqueue working time */
137 uint64_t enq_sw_min_time;
138 /* Stores maximum value of software enqueue working time */
139 uint64_t enq_sw_max_time;
140 /* Stores turbo enqueue total working time */
141 uint64_t enq_acc_total_time;
142 /* Stores minimum value of accelerator enqueue working time */
143 uint64_t enq_acc_min_time;
144 /* Stores maximum value of accelerator enqueue working time */
145 uint64_t enq_acc_max_time;
146 /* Stores dequeue total working time */
147 uint64_t deq_total_time;
148 /* Stores minimum value of dequeue working time */
149 uint64_t deq_min_time;
150 /* Stores maximum value of dequeue working time */
151 uint64_t deq_max_time;
155 typedef int (test_case_function)(struct active_device *ad,
156 struct test_op_params *op_params);
159 mbuf_reset(struct rte_mbuf *m)
169 /* Read flag value 0/1 from bitmap */
171 check_bit(uint32_t bitmap, uint32_t bitmask)
173 return bitmap & bitmask;
177 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
179 ad->supported_ops |= (1 << op_type);
183 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
185 return ad->supported_ops & (1 << op_type);
189 flags_match(uint32_t flags_req, uint32_t flags_present)
191 return (flags_req & flags_present) == flags_req;
195 clear_soft_out_cap(uint32_t *op_flags)
197 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
198 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
199 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
203 check_dev_cap(const struct rte_bbdev_info *dev_info)
206 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
207 nb_harq_inputs, nb_harq_outputs;
208 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
210 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
211 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
212 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
213 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
214 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
216 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
217 if (op_cap->type != test_vector.op_type)
220 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
221 const struct rte_bbdev_op_cap_turbo_dec *cap =
222 &op_cap->cap.turbo_dec;
223 /* Ignore lack of soft output capability, just skip
224 * checking if soft output is valid.
226 if ((test_vector.turbo_dec.op_flags &
227 RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
228 !(cap->capability_flags &
229 RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
231 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
234 &test_vector.turbo_dec.op_flags);
237 if (!flags_match(test_vector.turbo_dec.op_flags,
238 cap->capability_flags))
240 if (nb_inputs > cap->num_buffers_src) {
241 printf("Too many inputs defined: %u, max: %u\n",
242 nb_inputs, cap->num_buffers_src);
245 if (nb_soft_outputs > cap->num_buffers_soft_out &&
246 (test_vector.turbo_dec.op_flags &
247 RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
249 "Too many soft outputs defined: %u, max: %u\n",
251 cap->num_buffers_soft_out);
254 if (nb_hard_outputs > cap->num_buffers_hard_out) {
256 "Too many hard outputs defined: %u, max: %u\n",
258 cap->num_buffers_hard_out);
261 if (intr_enabled && !(cap->capability_flags &
262 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
264 "Dequeue interrupts are not supported!\n");
269 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
270 const struct rte_bbdev_op_cap_turbo_enc *cap =
271 &op_cap->cap.turbo_enc;
273 if (!flags_match(test_vector.turbo_enc.op_flags,
274 cap->capability_flags))
276 if (nb_inputs > cap->num_buffers_src) {
277 printf("Too many inputs defined: %u, max: %u\n",
278 nb_inputs, cap->num_buffers_src);
281 if (nb_hard_outputs > cap->num_buffers_dst) {
283 "Too many hard outputs defined: %u, max: %u\n",
284 nb_hard_outputs, cap->num_buffers_dst);
287 if (intr_enabled && !(cap->capability_flags &
288 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
290 "Dequeue interrupts are not supported!\n");
295 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
296 const struct rte_bbdev_op_cap_ldpc_enc *cap =
297 &op_cap->cap.ldpc_enc;
299 if (!flags_match(test_vector.ldpc_enc.op_flags,
300 cap->capability_flags)){
301 printf("Flag Mismatch\n");
304 if (nb_inputs > cap->num_buffers_src) {
305 printf("Too many inputs defined: %u, max: %u\n",
306 nb_inputs, cap->num_buffers_src);
309 if (nb_hard_outputs > cap->num_buffers_dst) {
311 "Too many hard outputs defined: %u, max: %u\n",
312 nb_hard_outputs, cap->num_buffers_dst);
315 if (intr_enabled && !(cap->capability_flags &
316 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
318 "Dequeue interrupts are not supported!\n");
323 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
324 const struct rte_bbdev_op_cap_ldpc_dec *cap =
325 &op_cap->cap.ldpc_dec;
327 if (!flags_match(test_vector.ldpc_dec.op_flags,
328 cap->capability_flags)){
329 printf("Flag Mismatch\n");
332 if (nb_inputs > cap->num_buffers_src) {
333 printf("Too many inputs defined: %u, max: %u\n",
334 nb_inputs, cap->num_buffers_src);
337 if (nb_hard_outputs > cap->num_buffers_hard_out) {
339 "Too many hard outputs defined: %u, max: %u\n",
341 cap->num_buffers_hard_out);
344 if (nb_harq_inputs > cap->num_buffers_hard_out) {
346 "Too many HARQ inputs defined: %u, max: %u\n",
348 cap->num_buffers_hard_out);
351 if (nb_harq_outputs > cap->num_buffers_hard_out) {
353 "Too many HARQ outputs defined: %u, max: %u\n",
355 cap->num_buffers_hard_out);
358 if (intr_enabled && !(cap->capability_flags &
359 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
361 "Dequeue interrupts are not supported!\n");
364 if (intr_enabled && (test_vector.ldpc_dec.op_flags &
365 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
366 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
367 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
369 printf("Skip loop-back with interrupt\n");
376 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
377 return TEST_SUCCESS; /* Special case for NULL device */
382 /* calculates optimal mempool size not smaller than the val */
384 optimal_mempool_size(unsigned int val)
386 return rte_align32pow2(val + 1) - 1;
389 /* allocates mbuf mempool for inputs and outputs */
390 static struct rte_mempool *
391 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
392 int socket_id, unsigned int mbuf_pool_size,
393 const char *op_type_str)
396 uint32_t max_seg_sz = 0;
397 char pool_name[RTE_MEMPOOL_NAMESIZE];
399 /* find max input segment size */
400 for (i = 0; i < entries->nb_segments; ++i)
401 if (entries->segments[i].length > max_seg_sz)
402 max_seg_sz = entries->segments[i].length;
404 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
406 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
407 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
409 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
413 create_mempools(struct active_device *ad, int socket_id,
414 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
416 struct rte_mempool *mp;
417 unsigned int ops_pool_size, mbuf_pool_size = 0;
418 char pool_name[RTE_MEMPOOL_NAMESIZE];
419 const char *op_type_str;
420 enum rte_bbdev_op_type op_type = org_op_type;
422 struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
423 struct op_data_entries *hard_out =
424 &test_vector.entries[DATA_HARD_OUTPUT];
425 struct op_data_entries *soft_out =
426 &test_vector.entries[DATA_SOFT_OUTPUT];
427 struct op_data_entries *harq_in =
428 &test_vector.entries[DATA_HARQ_INPUT];
429 struct op_data_entries *harq_out =
430 &test_vector.entries[DATA_HARQ_OUTPUT];
432 /* allocate ops mempool */
433 ops_pool_size = optimal_mempool_size(RTE_MAX(
434 /* Ops used plus 1 reference op */
435 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
436 /* Minimal cache size plus 1 reference op */
437 (unsigned int)(1.5 * rte_lcore_count() *
438 OPS_CACHE_SIZE + 1)),
441 if (org_op_type == RTE_BBDEV_OP_NONE)
442 op_type = RTE_BBDEV_OP_TURBO_ENC;
444 op_type_str = rte_bbdev_op_type_str(op_type);
445 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
447 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
449 mp = rte_bbdev_op_pool_create(pool_name, op_type,
450 ops_pool_size, OPS_CACHE_SIZE, socket_id);
451 TEST_ASSERT_NOT_NULL(mp,
452 "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
456 ad->ops_mempool = mp;
458 /* Do not create inputs and outputs mbufs for BaseBand Null Device */
459 if (org_op_type == RTE_BBDEV_OP_NONE)
463 if (in->nb_segments > 0) {
464 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
466 mp = create_mbuf_pool(in, ad->dev_id, socket_id,
467 mbuf_pool_size, "in");
468 TEST_ASSERT_NOT_NULL(mp,
469 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
473 ad->in_mbuf_pool = mp;
477 if (hard_out->nb_segments > 0) {
478 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
479 hard_out->nb_segments);
480 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
483 TEST_ASSERT_NOT_NULL(mp,
484 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
488 ad->hard_out_mbuf_pool = mp;
492 if (soft_out->nb_segments > 0) {
493 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
494 soft_out->nb_segments);
495 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
498 TEST_ASSERT_NOT_NULL(mp,
499 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
503 ad->soft_out_mbuf_pool = mp;
507 if (harq_in->nb_segments > 0) {
508 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
509 harq_in->nb_segments);
510 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
513 TEST_ASSERT_NOT_NULL(mp,
514 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
518 ad->harq_in_mbuf_pool = mp;
522 if (harq_out->nb_segments > 0) {
523 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
524 harq_out->nb_segments);
525 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
528 TEST_ASSERT_NOT_NULL(mp,
529 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
533 ad->harq_out_mbuf_pool = mp;
540 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
541 struct test_bbdev_vector *vector)
544 unsigned int queue_id;
545 struct rte_bbdev_queue_conf qconf;
546 struct active_device *ad = &active_devs[nb_active_devs];
547 unsigned int nb_queues;
548 enum rte_bbdev_op_type op_type = vector->op_type;
550 /* Configure fpga lte fec with PF & VF values
551 * if '-i' flag is set and using fpga device
553 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
554 if ((get_init_device() == true) &&
555 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
556 struct fpga_lte_fec_conf conf;
559 printf("Configure FPGA LTE FEC Driver %s with default values\n",
560 info->drv.driver_name);
562 /* clear default configuration before initialization */
563 memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
566 * true if PF is used for data plane
569 conf.pf_mode_en = true;
571 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
572 /* Number of UL queues per VF (fpga supports 8 VFs) */
573 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
574 /* Number of DL queues per VF (fpga supports 8 VFs) */
575 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
578 /* UL bandwidth. Needed for schedule algorithm */
579 conf.ul_bandwidth = UL_4G_BANDWIDTH;
581 conf.dl_bandwidth = DL_4G_BANDWIDTH;
583 /* UL & DL load Balance Factor to 64 */
584 conf.ul_load_balance = UL_4G_LOAD_BALANCE;
585 conf.dl_load_balance = DL_4G_LOAD_BALANCE;
587 /**< FLR timeout value */
588 conf.flr_time_out = FLR_4G_TIMEOUT;
590 /* setup FPGA PF with configuration information */
591 ret = fpga_lte_fec_configure(info->dev_name, &conf);
592 TEST_ASSERT_SUCCESS(ret,
593 "Failed to configure 4G FPGA PF for bbdev %s",
597 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
598 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
601 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
603 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
604 dev_id, nb_queues, info->socket_id, ret);
608 /* configure interrupts if needed */
610 ret = rte_bbdev_intr_enable(dev_id);
612 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
618 /* setup device queues */
619 qconf.socket = info->socket_id;
620 qconf.queue_size = info->drv.default_queue_conf.queue_size;
622 qconf.deferred_start = 0;
623 qconf.op_type = op_type;
625 for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
626 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
629 "Allocated all queues (id=%u) at prio%u on dev%u\n",
630 queue_id, qconf.priority, dev_id);
632 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
636 printf("All queues on dev %u allocated: %u\n",
640 ad->queue_ids[queue_id] = queue_id;
642 TEST_ASSERT(queue_id != 0,
643 "ERROR Failed to configure any queues on dev %u",
645 ad->nb_queues = queue_id;
647 set_avail_op(ad, op_type);
653 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
654 struct test_bbdev_vector *vector)
658 active_devs[nb_active_devs].driver_name = info->drv.driver_name;
659 active_devs[nb_active_devs].dev_id = dev_id;
661 ret = add_bbdev_dev(dev_id, info, vector);
662 if (ret == TEST_SUCCESS)
668 populate_active_devices(void)
672 uint8_t nb_devs_added = 0;
673 struct rte_bbdev_info info;
675 RTE_BBDEV_FOREACH(dev_id) {
676 rte_bbdev_info_get(dev_id, &info);
678 if (check_dev_cap(&info)) {
680 "Device %d (%s) does not support specified capabilities\n",
681 dev_id, info.dev_name);
685 ret = add_active_device(dev_id, &info, &test_vector);
687 printf("Adding active bbdev %s skipped\n",
694 return nb_devs_added;
698 read_test_vector(void)
702 memset(&test_vector, 0, sizeof(test_vector));
703 printf("Test vector file = %s\n", get_vector_filename());
704 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
705 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
706 get_vector_filename());
712 testsuite_setup(void)
714 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
716 if (populate_active_devices() == 0) {
717 printf("No suitable devices found!\n");
725 interrupt_testsuite_setup(void)
727 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
729 /* Enable interrupts */
732 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
733 if (populate_active_devices() == 0 ||
734 test_vector.op_type == RTE_BBDEV_OP_NONE) {
735 intr_enabled = false;
736 printf("No suitable devices found!\n");
744 testsuite_teardown(void)
748 /* Unconfigure devices */
749 RTE_BBDEV_FOREACH(dev_id)
750 rte_bbdev_close(dev_id);
752 /* Clear active devices structs. */
753 memset(active_devs, 0, sizeof(active_devs));
762 for (i = 0; i < nb_active_devs; i++) {
763 dev_id = active_devs[i].dev_id;
764 /* reset bbdev stats */
765 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
766 "Failed to reset stats of bbdev %u", dev_id);
767 /* start the device */
768 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
769 "Failed to start bbdev %u", dev_id);
779 struct rte_bbdev_stats stats;
781 for (i = 0; i < nb_active_devs; i++) {
782 dev_id = active_devs[i].dev_id;
783 /* read stats and print */
784 rte_bbdev_stats_get(dev_id, &stats);
785 /* Stop the device */
786 rte_bbdev_stop(dev_id);
791 init_op_data_objs(struct rte_bbdev_op_data *bufs,
792 struct op_data_entries *ref_entries,
793 struct rte_mempool *mbuf_pool, const uint16_t n,
794 enum op_data_type op_type, uint16_t min_alignment)
798 bool large_input = false;
800 for (i = 0; i < n; ++i) {
802 struct op_data_buf *seg = &ref_entries->segments[0];
803 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
804 TEST_ASSERT_NOT_NULL(m_head,
805 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
806 op_type, n * ref_entries->nb_segments,
809 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
811 * Special case when DPDK mbuf cannot handle
812 * the required input size
814 printf("Warning: Larger input size than DPDK mbuf %d\n",
818 bufs[i].data = m_head;
822 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
823 if ((op_type == DATA_INPUT) && large_input) {
824 /* Allocate a fake overused mbuf */
825 data = rte_malloc(NULL, seg->length, 0);
826 memcpy(data, seg->addr, seg->length);
827 m_head->buf_addr = data;
828 m_head->buf_iova = rte_malloc_virt2iova(data);
829 m_head->data_off = 0;
830 m_head->data_len = seg->length;
832 data = rte_pktmbuf_append(m_head, seg->length);
833 TEST_ASSERT_NOT_NULL(data,
834 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
835 seg->length, op_type);
837 TEST_ASSERT(data == RTE_PTR_ALIGN(
838 data, min_alignment),
839 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
840 data, min_alignment);
841 rte_memcpy(data, seg->addr, seg->length);
844 bufs[i].length += seg->length;
846 for (j = 1; j < ref_entries->nb_segments; ++j) {
847 struct rte_mbuf *m_tail =
848 rte_pktmbuf_alloc(mbuf_pool);
849 TEST_ASSERT_NOT_NULL(m_tail,
850 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
852 n * ref_entries->nb_segments,
856 data = rte_pktmbuf_append(m_tail, seg->length);
857 TEST_ASSERT_NOT_NULL(data,
858 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
859 seg->length, op_type);
861 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
863 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
864 data, min_alignment);
865 rte_memcpy(data, seg->addr, seg->length);
866 bufs[i].length += seg->length;
868 ret = rte_pktmbuf_chain(m_head, m_tail);
869 TEST_ASSERT_SUCCESS(ret,
870 "Couldn't chain mbufs from %d data type mbuf pool",
875 /* allocate chained-mbuf for output buffer */
876 for (j = 1; j < ref_entries->nb_segments; ++j) {
877 struct rte_mbuf *m_tail =
878 rte_pktmbuf_alloc(mbuf_pool);
879 TEST_ASSERT_NOT_NULL(m_tail,
880 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
882 n * ref_entries->nb_segments,
885 ret = rte_pktmbuf_chain(m_head, m_tail);
886 TEST_ASSERT_SUCCESS(ret,
887 "Couldn't chain mbufs from %d data type mbuf pool",
897 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
902 *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
903 if (*buffers == NULL) {
904 printf("WARNING: Failed to allocate op_data on socket %d\n",
906 /* try to allocate memory on other detected sockets */
907 for (i = 0; i < socket; i++) {
908 *buffers = rte_zmalloc_socket(NULL, len, 0, i);
909 if (*buffers != NULL)
914 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
918 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
919 const uint16_t n, const int8_t max_llr_modulus)
921 uint16_t i, byte_idx;
923 for (i = 0; i < n; ++i) {
924 struct rte_mbuf *m = input_ops[i].data;
926 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
927 input_ops[i].offset);
928 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
930 llr[byte_idx] = round((double)max_llr_modulus *
931 llr[byte_idx] / INT8_MAX);
939 * We may have to insert filler bits
940 * when they are required by the HARQ assumption
943 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
944 const uint16_t n, struct test_op_params *op_params)
946 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
948 if (input_ops == NULL)
950 /* No need to add filler if not required by device */
951 if (!(ldpc_cap_flags &
952 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
954 /* No need to add filler for loopback operation */
955 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
958 uint16_t i, j, parity_offset;
959 for (i = 0; i < n; ++i) {
960 struct rte_mbuf *m = input_ops[i].data;
961 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
962 input_ops[i].offset);
963 parity_offset = (dec.basegraph == 1 ? 20 : 8)
964 * dec.z_c - dec.n_filler;
965 uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
966 m->data_len = new_hin_size;
967 input_ops[i].length = new_hin_size;
968 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
970 llr[j] = llr[j - dec.n_filler];
971 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
972 for (j = 0; j < dec.n_filler; j++)
973 llr[parity_offset + j] = llr_max_pre_scaling;
978 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
979 const uint16_t n, const int8_t llr_size,
980 const int8_t llr_decimals)
982 if (input_ops == NULL)
985 uint16_t i, byte_idx;
987 int16_t llr_max, llr_min, llr_tmp;
988 llr_max = (1 << (llr_size - 1)) - 1;
990 for (i = 0; i < n; ++i) {
991 struct rte_mbuf *m = input_ops[i].data;
993 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
994 input_ops[i].offset);
995 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
998 llr_tmp = llr[byte_idx];
999 if (llr_decimals == 4)
1001 else if (llr_decimals == 2)
1003 else if (llr_decimals == 0)
1005 llr_tmp = RTE_MIN(llr_max,
1006 RTE_MAX(llr_min, llr_tmp));
1007 llr[byte_idx] = (int8_t) llr_tmp;
1018 fill_queue_buffers(struct test_op_params *op_params,
1019 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1020 struct rte_mempool *soft_out_mp,
1021 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1023 const struct rte_bbdev_op_cap *capabilities,
1024 uint16_t min_alignment, const int socket_id)
1027 enum op_data_type type;
1028 const uint16_t n = op_params->num_to_process;
1030 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1038 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1039 &op_params->q_bufs[socket_id][queue_id].inputs,
1040 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
1041 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
1042 &op_params->q_bufs[socket_id][queue_id].harq_inputs,
1043 &op_params->q_bufs[socket_id][queue_id].harq_outputs,
1046 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1047 struct op_data_entries *ref_entries =
1048 &test_vector.entries[type];
1049 if (ref_entries->nb_segments == 0)
1052 ret = allocate_buffers_on_socket(queue_ops[type],
1053 n * sizeof(struct rte_bbdev_op_data),
1055 TEST_ASSERT_SUCCESS(ret,
1056 "Couldn't allocate memory for rte_bbdev_op_data structs");
1058 ret = init_op_data_objs(*queue_ops[type], ref_entries,
1059 mbuf_pools[type], n, type, min_alignment);
1060 TEST_ASSERT_SUCCESS(ret,
1061 "Couldn't init rte_bbdev_op_data structs");
1064 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1065 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1066 capabilities->cap.turbo_dec.max_llr_modulus);
1068 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1069 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1070 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1071 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1072 RTE_BBDEV_LDPC_LLR_COMPRESSION;
1073 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1074 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1075 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1076 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1077 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1078 if (!loopback && !llr_comp)
1079 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1080 ldpc_llr_size, ldpc_llr_decimals);
1081 if (!loopback && !harq_comp)
1082 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1083 ldpc_llr_size, ldpc_llr_decimals);
1085 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1093 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1097 rte_mempool_free(ad->ops_mempool);
1098 rte_mempool_free(ad->in_mbuf_pool);
1099 rte_mempool_free(ad->hard_out_mbuf_pool);
1100 rte_mempool_free(ad->soft_out_mbuf_pool);
1101 rte_mempool_free(ad->harq_in_mbuf_pool);
1102 rte_mempool_free(ad->harq_out_mbuf_pool);
1104 for (i = 0; i < rte_lcore_count(); ++i) {
1105 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1106 rte_free(op_params->q_bufs[j][i].inputs);
1107 rte_free(op_params->q_bufs[j][i].hard_outputs);
1108 rte_free(op_params->q_bufs[j][i].soft_outputs);
1109 rte_free(op_params->q_bufs[j][i].harq_inputs);
1110 rte_free(op_params->q_bufs[j][i].harq_outputs);
1116 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1117 unsigned int start_idx,
1118 struct rte_bbdev_op_data *inputs,
1119 struct rte_bbdev_op_data *hard_outputs,
1120 struct rte_bbdev_op_data *soft_outputs,
1121 struct rte_bbdev_dec_op *ref_op)
1124 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1126 for (i = 0; i < n; ++i) {
1127 if (turbo_dec->code_block_mode == 0) {
1128 ops[i]->turbo_dec.tb_params.ea =
1129 turbo_dec->tb_params.ea;
1130 ops[i]->turbo_dec.tb_params.eb =
1131 turbo_dec->tb_params.eb;
1132 ops[i]->turbo_dec.tb_params.k_pos =
1133 turbo_dec->tb_params.k_pos;
1134 ops[i]->turbo_dec.tb_params.k_neg =
1135 turbo_dec->tb_params.k_neg;
1136 ops[i]->turbo_dec.tb_params.c =
1137 turbo_dec->tb_params.c;
1138 ops[i]->turbo_dec.tb_params.c_neg =
1139 turbo_dec->tb_params.c_neg;
1140 ops[i]->turbo_dec.tb_params.cab =
1141 turbo_dec->tb_params.cab;
1142 ops[i]->turbo_dec.tb_params.r =
1143 turbo_dec->tb_params.r;
1145 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1146 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1149 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1150 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1151 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1152 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1153 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1154 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1155 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1157 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1158 ops[i]->turbo_dec.input = inputs[start_idx + i];
1159 if (soft_outputs != NULL)
1160 ops[i]->turbo_dec.soft_output =
1161 soft_outputs[start_idx + i];
1166 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1167 unsigned int start_idx,
1168 struct rte_bbdev_op_data *inputs,
1169 struct rte_bbdev_op_data *outputs,
1170 struct rte_bbdev_enc_op *ref_op)
1173 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1174 for (i = 0; i < n; ++i) {
1175 if (turbo_enc->code_block_mode == 0) {
1176 ops[i]->turbo_enc.tb_params.ea =
1177 turbo_enc->tb_params.ea;
1178 ops[i]->turbo_enc.tb_params.eb =
1179 turbo_enc->tb_params.eb;
1180 ops[i]->turbo_enc.tb_params.k_pos =
1181 turbo_enc->tb_params.k_pos;
1182 ops[i]->turbo_enc.tb_params.k_neg =
1183 turbo_enc->tb_params.k_neg;
1184 ops[i]->turbo_enc.tb_params.c =
1185 turbo_enc->tb_params.c;
1186 ops[i]->turbo_enc.tb_params.c_neg =
1187 turbo_enc->tb_params.c_neg;
1188 ops[i]->turbo_enc.tb_params.cab =
1189 turbo_enc->tb_params.cab;
1190 ops[i]->turbo_enc.tb_params.ncb_pos =
1191 turbo_enc->tb_params.ncb_pos;
1192 ops[i]->turbo_enc.tb_params.ncb_neg =
1193 turbo_enc->tb_params.ncb_neg;
1194 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1196 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1197 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1198 ops[i]->turbo_enc.cb_params.ncb =
1199 turbo_enc->cb_params.ncb;
1201 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1202 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1203 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1205 ops[i]->turbo_enc.output = outputs[start_idx + i];
1206 ops[i]->turbo_enc.input = inputs[start_idx + i];
1211 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1212 unsigned int start_idx,
1213 struct rte_bbdev_op_data *inputs,
1214 struct rte_bbdev_op_data *hard_outputs,
1215 struct rte_bbdev_op_data *soft_outputs,
1216 struct rte_bbdev_op_data *harq_inputs,
1217 struct rte_bbdev_op_data *harq_outputs,
1218 struct rte_bbdev_dec_op *ref_op)
1221 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1223 for (i = 0; i < n; ++i) {
1224 if (ldpc_dec->code_block_mode == 0) {
1225 ops[i]->ldpc_dec.tb_params.ea =
1226 ldpc_dec->tb_params.ea;
1227 ops[i]->ldpc_dec.tb_params.eb =
1228 ldpc_dec->tb_params.eb;
1229 ops[i]->ldpc_dec.tb_params.c =
1230 ldpc_dec->tb_params.c;
1231 ops[i]->ldpc_dec.tb_params.cab =
1232 ldpc_dec->tb_params.cab;
1233 ops[i]->ldpc_dec.tb_params.r =
1234 ldpc_dec->tb_params.r;
1236 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1239 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1240 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1241 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1242 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1243 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1244 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1245 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1246 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1247 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1249 if (hard_outputs != NULL)
1250 ops[i]->ldpc_dec.hard_output =
1251 hard_outputs[start_idx + i];
1253 ops[i]->ldpc_dec.input =
1254 inputs[start_idx + i];
1255 if (soft_outputs != NULL)
1256 ops[i]->ldpc_dec.soft_output =
1257 soft_outputs[start_idx + i];
1258 if (harq_inputs != NULL)
1259 ops[i]->ldpc_dec.harq_combined_input =
1260 harq_inputs[start_idx + i];
1261 if (harq_outputs != NULL)
1262 ops[i]->ldpc_dec.harq_combined_output =
1263 harq_outputs[start_idx + i];
1269 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1270 unsigned int start_idx,
1271 struct rte_bbdev_op_data *inputs,
1272 struct rte_bbdev_op_data *outputs,
1273 struct rte_bbdev_enc_op *ref_op)
1276 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1277 for (i = 0; i < n; ++i) {
1278 if (ldpc_enc->code_block_mode == 0) {
1279 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1280 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1281 ops[i]->ldpc_enc.tb_params.cab =
1282 ldpc_enc->tb_params.cab;
1283 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1284 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1286 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1288 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1289 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1290 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1291 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1292 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1293 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1294 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1295 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1296 ops[i]->ldpc_enc.output = outputs[start_idx + i];
1297 ops[i]->ldpc_enc.input = inputs[start_idx + i];
1302 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1303 unsigned int order_idx, const int expected_status)
1305 int status = op->status;
1306 /* ignore parity mismatch false alarms for long iterations */
1307 if (get_iter_max() >= 10) {
1308 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1309 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1310 printf("WARNING: Ignore Syndrome Check mismatch\n");
1311 status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1313 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1314 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1315 printf("WARNING: Ignore Syndrome Check mismatch\n");
1316 status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1320 TEST_ASSERT(status == expected_status,
1321 "op_status (%d) != expected_status (%d)",
1322 op->status, expected_status);
1324 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1325 "Ordering error, expected %p, got %p",
1326 (void *)(uintptr_t)order_idx, op->opaque_data);
1328 return TEST_SUCCESS;
1332 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1333 unsigned int order_idx, const int expected_status)
1335 TEST_ASSERT(op->status == expected_status,
1336 "op_status (%d) != expected_status (%d)",
1337 op->status, expected_status);
1339 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1340 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1341 "Ordering error, expected %p, got %p",
1342 (void *)(uintptr_t)order_idx, op->opaque_data);
1344 return TEST_SUCCESS;
1348 validate_op_chain(struct rte_bbdev_op_data *op,
1349 struct op_data_entries *orig_op)
1352 struct rte_mbuf *m = op->data;
1353 uint8_t nb_dst_segments = orig_op->nb_segments;
1354 uint32_t total_data_size = 0;
1356 TEST_ASSERT(nb_dst_segments == m->nb_segs,
1357 "Number of segments differ in original (%u) and filled (%u) op",
1358 nb_dst_segments, m->nb_segs);
1360 /* Validate each mbuf segment length */
1361 for (i = 0; i < nb_dst_segments; ++i) {
1362 /* Apply offset to the first mbuf segment */
1363 uint16_t offset = (i == 0) ? op->offset : 0;
1364 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1365 total_data_size += orig_op->segments[i].length;
1367 TEST_ASSERT(orig_op->segments[i].length == data_len,
1368 "Length of segment differ in original (%u) and filled (%u) op",
1369 orig_op->segments[i].length, data_len);
1370 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1371 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1373 "Output buffers (CB=%u) are not equal", i);
1377 /* Validate total mbuf pkt length */
1378 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1379 TEST_ASSERT(total_data_size == pkt_len,
1380 "Length of data differ in original (%u) and filled (%u) op",
1381 total_data_size, pkt_len);
1383 return TEST_SUCCESS;
1387 * Compute K0 for a given configuration for HARQ output length computation
1388 * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1390 static inline uint16_t
1391 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1395 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1398 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1399 else if (rv_index == 2)
1400 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1402 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1404 /* LBRM case - includes a division by N */
1406 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1408 else if (rv_index == 2)
1409 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1412 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1416 /* HARQ output length including the Filler bits */
1417 static inline uint16_t
1418 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1421 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1422 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1423 /* Compute RM out size and number of rows */
1424 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1425 * ops_ld->z_c - ops_ld->n_filler;
1426 uint16_t deRmOutSize = RTE_MIN(
1427 k0 + ops_ld->cb_params.e +
1428 ((k0 > parity_offset) ?
1429 0 : ops_ld->n_filler),
1431 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1433 uint16_t harq_output_len = numRows * ops_ld->z_c;
1434 return harq_output_len;
1438 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1439 struct op_data_entries *orig_op,
1440 struct rte_bbdev_op_ldpc_dec *ops_ld)
1444 struct rte_mbuf *m = op->data;
1445 uint8_t nb_dst_segments = orig_op->nb_segments;
1446 uint32_t total_data_size = 0;
1447 int8_t *harq_orig, *harq_out, abs_harq_origin;
1448 uint32_t byte_error = 0, cum_error = 0, error;
1449 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1450 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1451 uint16_t parity_offset;
1453 TEST_ASSERT(nb_dst_segments == m->nb_segs,
1454 "Number of segments differ in original (%u) and filled (%u) op",
1455 nb_dst_segments, m->nb_segs);
1457 /* Validate each mbuf segment length */
1458 for (i = 0; i < nb_dst_segments; ++i) {
1459 /* Apply offset to the first mbuf segment */
1460 uint16_t offset = (i == 0) ? op->offset : 0;
1461 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1462 total_data_size += orig_op->segments[i].length;
1464 TEST_ASSERT(orig_op->segments[i].length <
1465 (uint32_t)(data_len + 64),
1466 "Length of segment differ in original (%u) and filled (%u) op",
1467 orig_op->segments[i].length, data_len);
1468 harq_orig = (int8_t *) orig_op->segments[i].addr;
1469 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1471 if (!(ldpc_cap_flags &
1472 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1473 ) || (ops_ld->op_flags &
1474 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1475 data_len -= ops_ld->z_c;
1476 parity_offset = data_len;
1478 /* Compute RM out size and number of rows */
1479 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1480 * ops_ld->z_c - ops_ld->n_filler;
1481 uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1483 if (data_len > deRmOutSize)
1484 data_len = deRmOutSize;
1485 if (data_len > orig_op->segments[i].length)
1486 data_len = orig_op->segments[i].length;
1489 * HARQ output can have minor differences
1490 * due to integer representation and related scaling
1492 for (j = 0, jj = 0; j < data_len; j++, jj++) {
1493 if (j == parity_offset) {
1494 /* Special Handling of the filler bits */
1495 for (k = 0; k < ops_ld->n_filler; k++) {
1497 llr_max_pre_scaling) {
1498 printf("HARQ Filler issue %d: %d %d\n",
1506 if (!(ops_ld->op_flags &
1507 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1508 if (ldpc_llr_decimals > 1)
1509 harq_out[jj] = (harq_out[jj] + 1)
1510 >> (ldpc_llr_decimals - 1);
1511 /* Saturated to S7 */
1512 if (harq_orig[j] > llr_max)
1513 harq_orig[j] = llr_max;
1514 if (harq_orig[j] < -llr_max)
1515 harq_orig[j] = -llr_max;
1517 if (harq_orig[j] != harq_out[jj]) {
1518 error = (harq_orig[j] > harq_out[jj]) ?
1519 harq_orig[j] - harq_out[jj] :
1520 harq_out[jj] - harq_orig[j];
1521 abs_harq_origin = harq_orig[j] > 0 ?
1524 /* Residual quantization error */
1525 if ((error > 8 && (abs_harq_origin <
1528 printf("HARQ mismatch %d: exp %d act %d => %d\n",
1530 harq_out[jj], error);
1540 TEST_ASSERT(byte_error <= 1,
1541 "HARQ output mismatch (%d) %d",
1542 byte_error, cum_error);
1544 /* Validate total mbuf pkt length */
1545 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1546 TEST_ASSERT(total_data_size < pkt_len + 64,
1547 "Length of data differ in original (%u) and filled (%u) op",
1548 total_data_size, pkt_len);
1550 return TEST_SUCCESS;
1554 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1555 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1559 struct op_data_entries *hard_data_orig =
1560 &test_vector.entries[DATA_HARD_OUTPUT];
1561 struct op_data_entries *soft_data_orig =
1562 &test_vector.entries[DATA_SOFT_OUTPUT];
1563 struct rte_bbdev_op_turbo_dec *ops_td;
1564 struct rte_bbdev_op_data *hard_output;
1565 struct rte_bbdev_op_data *soft_output;
1566 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1568 for (i = 0; i < n; ++i) {
1569 ops_td = &ops[i]->turbo_dec;
1570 hard_output = &ops_td->hard_output;
1571 soft_output = &ops_td->soft_output;
1573 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1574 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1575 "Returned iter_count (%d) > expected iter_count (%d)",
1576 ops_td->iter_count, ref_td->iter_count);
1577 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1578 TEST_ASSERT_SUCCESS(ret,
1579 "Checking status and ordering for decoder failed");
1581 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1583 "Hard output buffers (CB=%u) are not equal",
1586 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1587 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1589 "Soft output buffers (CB=%u) are not equal",
1593 return TEST_SUCCESS;
1597 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1598 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1602 struct op_data_entries *hard_data_orig =
1603 &test_vector.entries[DATA_HARD_OUTPUT];
1604 struct op_data_entries *soft_data_orig =
1605 &test_vector.entries[DATA_SOFT_OUTPUT];
1606 struct op_data_entries *harq_data_orig =
1607 &test_vector.entries[DATA_HARQ_OUTPUT];
1608 struct rte_bbdev_op_ldpc_dec *ops_td;
1609 struct rte_bbdev_op_data *hard_output;
1610 struct rte_bbdev_op_data *harq_output;
1611 struct rte_bbdev_op_data *soft_output;
1612 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1614 for (i = 0; i < n; ++i) {
1615 ops_td = &ops[i]->ldpc_dec;
1616 hard_output = &ops_td->hard_output;
1617 harq_output = &ops_td->harq_combined_output;
1618 soft_output = &ops_td->soft_output;
1620 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1621 TEST_ASSERT_SUCCESS(ret,
1622 "Checking status and ordering for decoder failed");
1623 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1624 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1625 "Returned iter_count (%d) > expected iter_count (%d)",
1626 ops_td->iter_count, ref_td->iter_count);
1628 * We can ignore output data when the decoding failed to
1629 * converge or for loop-back cases
1631 if (!check_bit(ops[i]->ldpc_dec.op_flags,
1632 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
1634 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
1636 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1638 "Hard output buffers (CB=%u) are not equal",
1641 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1642 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1644 "Soft output buffers (CB=%u) are not equal",
1646 if (ref_op->ldpc_dec.op_flags &
1647 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1648 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1649 harq_data_orig, ops_td),
1650 "HARQ output buffers (CB=%u) are not equal",
1653 if (ref_op->ldpc_dec.op_flags &
1654 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1655 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1656 harq_data_orig, ops_td),
1657 "HARQ output buffers (CB=%u) are not equal",
1662 return TEST_SUCCESS;
1667 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1668 struct rte_bbdev_enc_op *ref_op)
1672 struct op_data_entries *hard_data_orig =
1673 &test_vector.entries[DATA_HARD_OUTPUT];
1675 for (i = 0; i < n; ++i) {
1676 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1677 TEST_ASSERT_SUCCESS(ret,
1678 "Checking status and ordering for encoder failed");
1679 TEST_ASSERT_SUCCESS(validate_op_chain(
1680 &ops[i]->turbo_enc.output,
1682 "Output buffers (CB=%u) are not equal",
1686 return TEST_SUCCESS;
1690 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1691 struct rte_bbdev_enc_op *ref_op)
1695 struct op_data_entries *hard_data_orig =
1696 &test_vector.entries[DATA_HARD_OUTPUT];
1698 for (i = 0; i < n; ++i) {
1699 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1700 TEST_ASSERT_SUCCESS(ret,
1701 "Checking status and ordering for encoder failed");
1702 TEST_ASSERT_SUCCESS(validate_op_chain(
1703 &ops[i]->ldpc_enc.output,
1705 "Output buffers (CB=%u) are not equal",
1709 return TEST_SUCCESS;
1713 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1716 struct op_data_entries *entry;
1718 op->turbo_dec = test_vector.turbo_dec;
1719 entry = &test_vector.entries[DATA_INPUT];
1720 for (i = 0; i < entry->nb_segments; ++i)
1721 op->turbo_dec.input.length +=
1722 entry->segments[i].length;
1726 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
1729 struct op_data_entries *entry;
1731 op->ldpc_dec = test_vector.ldpc_dec;
1732 entry = &test_vector.entries[DATA_INPUT];
1733 for (i = 0; i < entry->nb_segments; ++i)
1734 op->ldpc_dec.input.length +=
1735 entry->segments[i].length;
1736 if (test_vector.ldpc_dec.op_flags &
1737 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
1738 entry = &test_vector.entries[DATA_HARQ_INPUT];
1739 for (i = 0; i < entry->nb_segments; ++i)
1740 op->ldpc_dec.harq_combined_input.length +=
1741 entry->segments[i].length;
1747 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1750 struct op_data_entries *entry;
1752 op->turbo_enc = test_vector.turbo_enc;
1753 entry = &test_vector.entries[DATA_INPUT];
1754 for (i = 0; i < entry->nb_segments; ++i)
1755 op->turbo_enc.input.length +=
1756 entry->segments[i].length;
1760 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
1763 struct op_data_entries *entry;
1765 op->ldpc_enc = test_vector.ldpc_enc;
1766 entry = &test_vector.entries[DATA_INPUT];
1767 for (i = 0; i < entry->nb_segments; ++i)
1768 op->ldpc_enc.input.length +=
1769 entry->segments[i].length;
1773 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1776 uint32_t c, r, tb_size = 0;
1778 if (op->turbo_dec.code_block_mode) {
1779 tb_size = op->turbo_dec.tb_params.k_neg;
1781 c = op->turbo_dec.tb_params.c;
1782 r = op->turbo_dec.tb_params.r;
1783 for (i = 0; i < c-r; i++)
1784 tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1785 op->turbo_dec.tb_params.k_neg :
1786 op->turbo_dec.tb_params.k_pos;
1792 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
1795 uint32_t c, r, tb_size = 0;
1796 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
1798 if (op->ldpc_dec.code_block_mode) {
1799 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1801 c = op->ldpc_dec.tb_params.c;
1802 r = op->ldpc_dec.tb_params.r;
1803 for (i = 0; i < c-r; i++)
1804 tb_size += sys_cols * op->ldpc_dec.z_c
1805 - op->ldpc_dec.n_filler;
1811 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1814 uint32_t c, r, tb_size = 0;
1816 if (op->turbo_enc.code_block_mode) {
1817 tb_size = op->turbo_enc.tb_params.k_neg;
1819 c = op->turbo_enc.tb_params.c;
1820 r = op->turbo_enc.tb_params.r;
1821 for (i = 0; i < c-r; i++)
1822 tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1823 op->turbo_enc.tb_params.k_neg :
1824 op->turbo_enc.tb_params.k_pos;
1830 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
1833 uint32_t c, r, tb_size = 0;
1834 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
1836 if (op->turbo_enc.code_block_mode) {
1837 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
1839 c = op->turbo_enc.tb_params.c;
1840 r = op->turbo_enc.tb_params.r;
1841 for (i = 0; i < c-r; i++)
1842 tb_size += sys_cols * op->ldpc_enc.z_c
1843 - op->ldpc_enc.n_filler;
1850 init_test_op_params(struct test_op_params *op_params,
1851 enum rte_bbdev_op_type op_type, const int expected_status,
1852 const int vector_mask, struct rte_mempool *ops_mp,
1853 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1856 if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1857 op_type == RTE_BBDEV_OP_LDPC_DEC)
1858 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1859 &op_params->ref_dec_op, 1);
1861 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1862 &op_params->ref_enc_op, 1);
1864 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1866 op_params->mp = ops_mp;
1867 op_params->burst_sz = burst_sz;
1868 op_params->num_to_process = num_to_process;
1869 op_params->num_lcores = num_lcores;
1870 op_params->vector_mask = vector_mask;
1871 if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1872 op_type == RTE_BBDEV_OP_LDPC_DEC)
1873 op_params->ref_dec_op->status = expected_status;
1874 else if (op_type == RTE_BBDEV_OP_TURBO_ENC
1875 || op_type == RTE_BBDEV_OP_LDPC_ENC)
1876 op_params->ref_enc_op->status = expected_status;
1881 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1882 struct test_op_params *op_params)
1884 int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1886 struct active_device *ad;
1887 unsigned int burst_sz = get_burst_sz();
1888 enum rte_bbdev_op_type op_type = test_vector.op_type;
1889 const struct rte_bbdev_op_cap *capabilities = NULL;
1891 ad = &active_devs[dev_id];
1893 /* Check if device supports op_type */
1894 if (!is_avail_op(ad, test_vector.op_type))
1895 return TEST_SUCCESS;
1897 struct rte_bbdev_info info;
1898 rte_bbdev_info_get(ad->dev_id, &info);
1899 socket_id = GET_SOCKET(info.socket_id);
1901 f_ret = create_mempools(ad, socket_id, op_type,
1903 if (f_ret != TEST_SUCCESS) {
1904 printf("Couldn't create mempools");
1907 if (op_type == RTE_BBDEV_OP_NONE)
1908 op_type = RTE_BBDEV_OP_TURBO_ENC;
1910 f_ret = init_test_op_params(op_params, test_vector.op_type,
1911 test_vector.expected_status,
1917 if (f_ret != TEST_SUCCESS) {
1918 printf("Couldn't init test op params");
1923 /* Find capabilities */
1924 const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1925 for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
1926 if (cap->type == test_vector.op_type) {
1932 TEST_ASSERT_NOT_NULL(capabilities,
1933 "Couldn't find capabilities");
1935 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1936 create_reference_dec_op(op_params->ref_dec_op);
1937 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1938 create_reference_enc_op(op_params->ref_enc_op);
1939 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
1940 create_reference_ldpc_enc_op(op_params->ref_enc_op);
1941 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1942 create_reference_ldpc_dec_op(op_params->ref_dec_op);
1944 for (i = 0; i < ad->nb_queues; ++i) {
1945 f_ret = fill_queue_buffers(op_params,
1947 ad->hard_out_mbuf_pool,
1948 ad->soft_out_mbuf_pool,
1949 ad->harq_in_mbuf_pool,
1950 ad->harq_out_mbuf_pool,
1953 info.drv.min_alignment,
1955 if (f_ret != TEST_SUCCESS) {
1956 printf("Couldn't init queue buffers");
1961 /* Run test case function */
1962 t_ret = test_case_func(ad, op_params);
1964 /* Free active device resources and return */
1965 free_buffers(ad, op_params);
1969 free_buffers(ad, op_params);
1973 /* Run given test function per active device per supported op type
1977 run_test_case(test_case_function *test_case_func)
1982 /* Alloc op_params */
1983 struct test_op_params *op_params = rte_zmalloc(NULL,
1984 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1985 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1986 RTE_ALIGN(sizeof(struct test_op_params),
1987 RTE_CACHE_LINE_SIZE));
1989 /* For each device run test case function */
1990 for (dev = 0; dev < nb_active_devs; ++dev)
1991 ret |= run_test_case_on_device(test_case_func, dev, op_params);
1993 rte_free(op_params);
1999 /* Push back the HARQ output from DDR to host */
2001 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2002 struct rte_bbdev_dec_op **ops,
2006 int save_status, ret;
2007 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2008 struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2009 uint32_t flags = ops[0]->ldpc_dec.op_flags;
2010 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2011 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2012 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2013 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2014 for (j = 0; j < n; ++j) {
2015 if ((loopback && mem_out) || hc_out) {
2016 save_status = ops[j]->status;
2017 ops[j]->ldpc_dec.op_flags =
2018 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2019 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2021 ops[j]->ldpc_dec.op_flags +=
2022 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2023 ops[j]->ldpc_dec.harq_combined_input.offset =
2025 ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2026 harq_offset += HARQ_INCR;
2028 ops[j]->ldpc_dec.harq_combined_input.length =
2029 ops[j]->ldpc_dec.harq_combined_output.length;
2030 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2034 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2037 ops[j]->ldpc_dec.op_flags = flags;
2038 ops[j]->status = save_status;
2044 * Push back the HARQ output from HW DDR to Host
2045 * Preload HARQ memory input and adjust HARQ offset
2048 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2049 struct rte_bbdev_dec_op **ops, const uint16_t n,
2054 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2055 struct rte_bbdev_op_data save_hc_in, save_hc_out;
2056 struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2057 uint32_t flags = ops[0]->ldpc_dec.op_flags;
2058 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2059 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2060 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2061 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2062 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2063 for (j = 0; j < n; ++j) {
2064 if ((mem_in || hc_in) && preload) {
2065 save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
2066 save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
2067 ops[j]->ldpc_dec.op_flags =
2068 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2069 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2071 ops[j]->ldpc_dec.op_flags +=
2072 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2073 ops[j]->ldpc_dec.harq_combined_output.offset =
2075 ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2076 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2080 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2081 dev_id, queue_id, &ops_deq[j], 1);
2082 ops[j]->ldpc_dec.op_flags = flags;
2083 ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
2084 ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
2086 /* Adjust HARQ offset when we reach external DDR */
2087 if (mem_in || hc_in)
2088 ops[j]->ldpc_dec.harq_combined_input.offset
2090 if (mem_out || hc_out)
2091 ops[j]->ldpc_dec.harq_combined_output.offset
2093 harq_offset += HARQ_INCR;
2098 dequeue_event_callback(uint16_t dev_id,
2099 enum rte_bbdev_event_type event, void *cb_arg,
2104 uint64_t total_time;
2105 uint16_t deq, burst_sz, num_ops;
2106 uint16_t queue_id = *(uint16_t *) ret_param;
2107 struct rte_bbdev_info info;
2109 struct thread_params *tp = cb_arg;
2111 /* Find matching thread params using queue_id */
2112 for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2113 if (tp->queue_id == queue_id)
2116 if (i == MAX_QUEUES) {
2117 printf("%s: Queue_id from interrupt details was not found!\n",
2122 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2123 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2125 "Dequeue interrupt handler called for incorrect event!\n");
2129 burst_sz = rte_atomic16_read(&tp->burst_sz);
2130 num_ops = tp->op_params->num_to_process;
2132 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2133 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2135 rte_atomic16_read(&tp->nb_dequeued)],
2137 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2138 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2140 rte_atomic16_read(&tp->nb_dequeued)],
2142 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2143 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2145 rte_atomic16_read(&tp->nb_dequeued)],
2147 else /*RTE_BBDEV_OP_TURBO_ENC*/
2148 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2150 rte_atomic16_read(&tp->nb_dequeued)],
2153 if (deq < burst_sz) {
2155 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2157 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2161 if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2162 rte_atomic16_add(&tp->nb_dequeued, deq);
2166 total_time = rte_rdtsc_precise() - tp->start_time;
2168 rte_bbdev_info_get(dev_id, &info);
2172 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2173 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2174 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2175 tp->op_params->vector_mask);
2176 /* get the max of iter_count for all dequeued ops */
2177 for (i = 0; i < num_ops; ++i)
2178 tp->iter_count = RTE_MAX(
2179 tp->dec_ops[i]->turbo_dec.iter_count,
2181 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2182 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2183 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2184 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2185 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2186 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2187 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2188 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2189 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2190 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2191 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2192 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2193 tp->op_params->vector_mask);
2194 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2198 printf("Buffers validation failed\n");
2199 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2202 switch (test_vector.op_type) {
2203 case RTE_BBDEV_OP_TURBO_DEC:
2204 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2206 case RTE_BBDEV_OP_TURBO_ENC:
2207 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2209 case RTE_BBDEV_OP_LDPC_DEC:
2210 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2212 case RTE_BBDEV_OP_LDPC_ENC:
2213 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2215 case RTE_BBDEV_OP_NONE:
2219 printf("Unknown op type: %d\n", test_vector.op_type);
2220 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2224 tp->ops_per_sec += ((double)num_ops) /
2225 ((double)total_time / (double)rte_get_tsc_hz());
2226 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2227 ((double)total_time / (double)rte_get_tsc_hz());
2229 rte_atomic16_add(&tp->nb_dequeued, deq);
2233 throughput_intr_lcore_dec(void *arg)
2235 struct thread_params *tp = arg;
2236 unsigned int enqueued;
2237 const uint16_t queue_id = tp->queue_id;
2238 const uint16_t burst_sz = tp->op_params->burst_sz;
2239 const uint16_t num_to_process = tp->op_params->num_to_process;
2240 struct rte_bbdev_dec_op *ops[num_to_process];
2241 struct test_buffers *bufs = NULL;
2242 struct rte_bbdev_info info;
2244 uint16_t num_to_enq, enq;
2246 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2247 "BURST_SIZE should be <= %u", MAX_BURST);
2249 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2250 "Failed to enable interrupts for dev: %u, queue_id: %u",
2251 tp->dev_id, queue_id);
2253 rte_bbdev_info_get(tp->dev_id, &info);
2255 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2256 "NUM_OPS cannot exceed %u for this device",
2257 info.drv.queue_size_lim);
2259 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2261 rte_atomic16_clear(&tp->processing_status);
2262 rte_atomic16_clear(&tp->nb_dequeued);
2264 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2267 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2269 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2271 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2272 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2273 bufs->hard_outputs, bufs->soft_outputs,
2274 tp->op_params->ref_dec_op);
2276 /* Set counter to validate the ordering */
2277 for (j = 0; j < num_to_process; ++j)
2278 ops[j]->opaque_data = (void *)(uintptr_t)j;
2280 for (j = 0; j < TEST_REPETITIONS; ++j) {
2281 for (i = 0; i < num_to_process; ++i)
2282 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2284 tp->start_time = rte_rdtsc_precise();
2285 for (enqueued = 0; enqueued < num_to_process;) {
2286 num_to_enq = burst_sz;
2288 if (unlikely(num_to_process - enqueued < num_to_enq))
2289 num_to_enq = num_to_process - enqueued;
2293 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2294 queue_id, &ops[enqueued],
2296 } while (unlikely(num_to_enq != enq));
2299 /* Write to thread burst_sz current number of enqueued
2300 * descriptors. It ensures that proper number of
2301 * descriptors will be dequeued in callback
2302 * function - needed for last batch in case where
2303 * the number of operations is not a multiple of
2306 rte_atomic16_set(&tp->burst_sz, num_to_enq);
2308 /* Wait until processing of previous batch is
2311 while (rte_atomic16_read(&tp->nb_dequeued) !=
2315 if (j != TEST_REPETITIONS - 1)
2316 rte_atomic16_clear(&tp->nb_dequeued);
2319 return TEST_SUCCESS;
2323 throughput_intr_lcore_enc(void *arg)
2325 struct thread_params *tp = arg;
2326 unsigned int enqueued;
2327 const uint16_t queue_id = tp->queue_id;
2328 const uint16_t burst_sz = tp->op_params->burst_sz;
2329 const uint16_t num_to_process = tp->op_params->num_to_process;
2330 struct rte_bbdev_enc_op *ops[num_to_process];
2331 struct test_buffers *bufs = NULL;
2332 struct rte_bbdev_info info;
2334 uint16_t num_to_enq, enq;
2336 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2337 "BURST_SIZE should be <= %u", MAX_BURST);
2339 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2340 "Failed to enable interrupts for dev: %u, queue_id: %u",
2341 tp->dev_id, queue_id);
2343 rte_bbdev_info_get(tp->dev_id, &info);
2345 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2346 "NUM_OPS cannot exceed %u for this device",
2347 info.drv.queue_size_lim);
2349 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2351 rte_atomic16_clear(&tp->processing_status);
2352 rte_atomic16_clear(&tp->nb_dequeued);
2354 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2357 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2359 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2361 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2362 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2363 bufs->hard_outputs, tp->op_params->ref_enc_op);
2365 /* Set counter to validate the ordering */
2366 for (j = 0; j < num_to_process; ++j)
2367 ops[j]->opaque_data = (void *)(uintptr_t)j;
2369 for (j = 0; j < TEST_REPETITIONS; ++j) {
2370 for (i = 0; i < num_to_process; ++i)
2371 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2373 tp->start_time = rte_rdtsc_precise();
2374 for (enqueued = 0; enqueued < num_to_process;) {
2375 num_to_enq = burst_sz;
2377 if (unlikely(num_to_process - enqueued < num_to_enq))
2378 num_to_enq = num_to_process - enqueued;
2382 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2383 queue_id, &ops[enqueued],
2385 } while (unlikely(enq != num_to_enq));
2388 /* Write to thread burst_sz current number of enqueued
2389 * descriptors. It ensures that proper number of
2390 * descriptors will be dequeued in callback
2391 * function - needed for last batch in case where
2392 * the number of operations is not a multiple of
2395 rte_atomic16_set(&tp->burst_sz, num_to_enq);
2397 /* Wait until processing of previous batch is
2400 while (rte_atomic16_read(&tp->nb_dequeued) !=
2404 if (j != TEST_REPETITIONS - 1)
2405 rte_atomic16_clear(&tp->nb_dequeued);
2408 return TEST_SUCCESS;
2412 throughput_pmd_lcore_dec(void *arg)
2414 struct thread_params *tp = arg;
2416 uint64_t total_time = 0, start_time;
2417 const uint16_t queue_id = tp->queue_id;
2418 const uint16_t burst_sz = tp->op_params->burst_sz;
2419 const uint16_t num_ops = tp->op_params->num_to_process;
2420 struct rte_bbdev_dec_op *ops_enq[num_ops];
2421 struct rte_bbdev_dec_op *ops_deq[num_ops];
2422 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2423 struct test_buffers *bufs = NULL;
2425 struct rte_bbdev_info info;
2426 uint16_t num_to_enq;
2428 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2429 "BURST_SIZE should be <= %u", MAX_BURST);
2431 rte_bbdev_info_get(tp->dev_id, &info);
2433 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2434 "NUM_OPS cannot exceed %u for this device",
2435 info.drv.queue_size_lim);
2437 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2439 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2442 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2443 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2445 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2446 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2447 bufs->hard_outputs, bufs->soft_outputs, ref_op);
2449 /* Set counter to validate the ordering */
2450 for (j = 0; j < num_ops; ++j)
2451 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2453 for (i = 0; i < TEST_REPETITIONS; ++i) {
2455 for (j = 0; j < num_ops; ++j)
2456 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2458 start_time = rte_rdtsc_precise();
2460 for (enq = 0, deq = 0; enq < num_ops;) {
2461 num_to_enq = burst_sz;
2463 if (unlikely(num_ops - enq < num_to_enq))
2464 num_to_enq = num_ops - enq;
2466 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2467 queue_id, &ops_enq[enq], num_to_enq);
2469 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2470 queue_id, &ops_deq[deq], enq - deq);
2473 /* dequeue the remaining */
2475 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2476 queue_id, &ops_deq[deq], enq - deq);
2479 total_time += rte_rdtsc_precise() - start_time;
2483 /* get the max of iter_count for all dequeued ops */
2484 for (i = 0; i < num_ops; ++i) {
2485 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2489 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2490 ret = validate_dec_op(ops_deq, num_ops, ref_op,
2491 tp->op_params->vector_mask);
2492 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2495 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2497 double tb_len_bits = calc_dec_TB_size(ref_op);
2499 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2500 ((double)total_time / (double)rte_get_tsc_hz());
2501 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2502 1000000.0) / ((double)total_time /
2503 (double)rte_get_tsc_hz());
2505 return TEST_SUCCESS;
2509 throughput_pmd_lcore_ldpc_dec(void *arg)
2511 struct thread_params *tp = arg;
2513 uint64_t total_time = 0, start_time;
2514 const uint16_t queue_id = tp->queue_id;
2515 const uint16_t burst_sz = tp->op_params->burst_sz;
2516 const uint16_t num_ops = tp->op_params->num_to_process;
2517 struct rte_bbdev_dec_op *ops_enq[num_ops];
2518 struct rte_bbdev_dec_op *ops_deq[num_ops];
2519 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2520 struct test_buffers *bufs = NULL;
2522 struct rte_bbdev_info info;
2523 uint16_t num_to_enq;
2524 bool extDdr = check_bit(ldpc_cap_flags,
2525 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
2526 bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2527 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2528 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2529 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2531 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2532 "BURST_SIZE should be <= %u", MAX_BURST);
2534 rte_bbdev_info_get(tp->dev_id, &info);
2536 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2537 "NUM_OPS cannot exceed %u for this device",
2538 info.drv.queue_size_lim);
2540 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2542 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2545 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2546 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2548 /* For throughput tests we need to disable early termination */
2549 if (check_bit(ref_op->ldpc_dec.op_flags,
2550 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2551 ref_op->ldpc_dec.op_flags -=
2552 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2553 ref_op->ldpc_dec.iter_max = 6;
2554 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2556 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2557 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2558 bufs->hard_outputs, bufs->soft_outputs,
2559 bufs->harq_inputs, bufs->harq_outputs, ref_op);
2561 /* Set counter to validate the ordering */
2562 for (j = 0; j < num_ops; ++j)
2563 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2565 for (i = 0; i < TEST_REPETITIONS; ++i) {
2566 for (j = 0; j < num_ops; ++j) {
2569 ops_enq[j]->ldpc_dec.hard_output.data);
2570 if (hc_out || loopback)
2572 ops_enq[j]->ldpc_dec.harq_combined_output.data);
2575 bool preload = i == (TEST_REPETITIONS - 1);
2576 preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
2579 start_time = rte_rdtsc_precise();
2581 for (enq = 0, deq = 0; enq < num_ops;) {
2582 num_to_enq = burst_sz;
2584 if (unlikely(num_ops - enq < num_to_enq))
2585 num_to_enq = num_ops - enq;
2587 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2588 queue_id, &ops_enq[enq], num_to_enq);
2590 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2591 queue_id, &ops_deq[deq], enq - deq);
2594 /* dequeue the remaining */
2596 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2597 queue_id, &ops_deq[deq], enq - deq);
2600 total_time += rte_rdtsc_precise() - start_time;
2604 /* get the max of iter_count for all dequeued ops */
2605 for (i = 0; i < num_ops; ++i) {
2606 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2610 /* Read loopback is not thread safe */
2611 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
2614 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2615 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2616 tp->op_params->vector_mask);
2617 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2620 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2622 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2624 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2625 ((double)total_time / (double)rte_get_tsc_hz());
2626 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2627 1000000.0) / ((double)total_time /
2628 (double)rte_get_tsc_hz());
2630 return TEST_SUCCESS;
2634 throughput_pmd_lcore_enc(void *arg)
2636 struct thread_params *tp = arg;
2638 uint64_t total_time = 0, start_time;
2639 const uint16_t queue_id = tp->queue_id;
2640 const uint16_t burst_sz = tp->op_params->burst_sz;
2641 const uint16_t num_ops = tp->op_params->num_to_process;
2642 struct rte_bbdev_enc_op *ops_enq[num_ops];
2643 struct rte_bbdev_enc_op *ops_deq[num_ops];
2644 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2645 struct test_buffers *bufs = NULL;
2647 struct rte_bbdev_info info;
2648 uint16_t num_to_enq;
2650 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2651 "BURST_SIZE should be <= %u", MAX_BURST);
2653 rte_bbdev_info_get(tp->dev_id, &info);
2655 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2656 "NUM_OPS cannot exceed %u for this device",
2657 info.drv.queue_size_lim);
2659 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2661 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2664 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2666 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2668 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2669 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2670 bufs->hard_outputs, ref_op);
2672 /* Set counter to validate the ordering */
2673 for (j = 0; j < num_ops; ++j)
2674 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2676 for (i = 0; i < TEST_REPETITIONS; ++i) {
2678 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2679 for (j = 0; j < num_ops; ++j)
2680 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2682 start_time = rte_rdtsc_precise();
2684 for (enq = 0, deq = 0; enq < num_ops;) {
2685 num_to_enq = burst_sz;
2687 if (unlikely(num_ops - enq < num_to_enq))
2688 num_to_enq = num_ops - enq;
2690 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2691 queue_id, &ops_enq[enq], num_to_enq);
2693 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2694 queue_id, &ops_deq[deq], enq - deq);
2697 /* dequeue the remaining */
2699 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2700 queue_id, &ops_deq[deq], enq - deq);
2703 total_time += rte_rdtsc_precise() - start_time;
2706 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2707 ret = validate_enc_op(ops_deq, num_ops, ref_op);
2708 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2711 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2713 double tb_len_bits = calc_enc_TB_size(ref_op);
2715 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2716 ((double)total_time / (double)rte_get_tsc_hz());
2717 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2718 / 1000000.0) / ((double)total_time /
2719 (double)rte_get_tsc_hz());
2721 return TEST_SUCCESS;
2725 throughput_pmd_lcore_ldpc_enc(void *arg)
2727 struct thread_params *tp = arg;
2729 uint64_t total_time = 0, start_time;
2730 const uint16_t queue_id = tp->queue_id;
2731 const uint16_t burst_sz = tp->op_params->burst_sz;
2732 const uint16_t num_ops = tp->op_params->num_to_process;
2733 struct rte_bbdev_enc_op *ops_enq[num_ops];
2734 struct rte_bbdev_enc_op *ops_deq[num_ops];
2735 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2736 struct test_buffers *bufs = NULL;
2738 struct rte_bbdev_info info;
2739 uint16_t num_to_enq;
2741 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2742 "BURST_SIZE should be <= %u", MAX_BURST);
2744 rte_bbdev_info_get(tp->dev_id, &info);
2746 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2747 "NUM_OPS cannot exceed %u for this device",
2748 info.drv.queue_size_lim);
2750 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2752 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2755 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2757 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2759 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2760 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2761 bufs->hard_outputs, ref_op);
2763 /* Set counter to validate the ordering */
2764 for (j = 0; j < num_ops; ++j)
2765 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2767 for (i = 0; i < TEST_REPETITIONS; ++i) {
2769 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2770 for (j = 0; j < num_ops; ++j)
2771 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2773 start_time = rte_rdtsc_precise();
2775 for (enq = 0, deq = 0; enq < num_ops;) {
2776 num_to_enq = burst_sz;
2778 if (unlikely(num_ops - enq < num_to_enq))
2779 num_to_enq = num_ops - enq;
2781 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
2782 queue_id, &ops_enq[enq], num_to_enq);
2784 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2785 queue_id, &ops_deq[deq], enq - deq);
2788 /* dequeue the remaining */
2790 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2791 queue_id, &ops_deq[deq], enq - deq);
2794 total_time += rte_rdtsc_precise() - start_time;
2797 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2798 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
2799 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2802 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2804 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
2806 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2807 ((double)total_time / (double)rte_get_tsc_hz());
2808 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2809 / 1000000.0) / ((double)total_time /
2810 (double)rte_get_tsc_hz());
2812 return TEST_SUCCESS;
2816 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
2818 unsigned int iter = 0;
2819 double total_mops = 0, total_mbps = 0;
2821 for (iter = 0; iter < used_cores; iter++) {
2823 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
2824 t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2825 t_params[iter].mbps);
2826 total_mops += t_params[iter].ops_per_sec;
2827 total_mbps += t_params[iter].mbps;
2830 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
2831 used_cores, total_mops, total_mbps);
2835 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
2837 unsigned int iter = 0;
2838 double total_mops = 0, total_mbps = 0;
2839 uint8_t iter_count = 0;
2841 for (iter = 0; iter < used_cores; iter++) {
2843 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
2844 t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2845 t_params[iter].mbps, t_params[iter].iter_count);
2846 total_mops += t_params[iter].ops_per_sec;
2847 total_mbps += t_params[iter].mbps;
2848 iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
2851 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
2852 used_cores, total_mops, total_mbps, iter_count);
2856 * Test function that determines how long an enqueue + dequeue of a burst
2857 * takes on available lcores.
2860 throughput_test(struct active_device *ad,
2861 struct test_op_params *op_params)
2864 unsigned int lcore_id, used_cores = 0;
2865 struct thread_params *t_params, *tp;
2866 struct rte_bbdev_info info;
2867 lcore_function_t *throughput_function;
2868 uint16_t num_lcores;
2869 const char *op_type_str;
2871 rte_bbdev_info_get(ad->dev_id, &info);
2873 op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
2874 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
2875 test_vector.op_type);
2877 printf("+ ------------------------------------------------------- +\n");
2878 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
2879 info.dev_name, ad->nb_queues, op_params->burst_sz,
2880 op_params->num_to_process, op_params->num_lcores,
2882 intr_enabled ? "Interrupt mode" : "PMD mode",
2883 (double)rte_get_tsc_hz() / 1000000000.0);
2885 /* Set number of lcores */
2886 num_lcores = (ad->nb_queues < (op_params->num_lcores))
2888 : op_params->num_lcores;
2890 /* Allocate memory for thread parameters structure */
2891 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
2892 RTE_CACHE_LINE_SIZE);
2893 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
2894 RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
2895 RTE_CACHE_LINE_SIZE));
2898 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2899 throughput_function = throughput_intr_lcore_dec;
2900 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2901 throughput_function = throughput_intr_lcore_dec;
2902 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2903 throughput_function = throughput_intr_lcore_enc;
2904 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2905 throughput_function = throughput_intr_lcore_enc;
2907 throughput_function = throughput_intr_lcore_enc;
2909 /* Dequeue interrupt callback registration */
2910 ret = rte_bbdev_callback_register(ad->dev_id,
2911 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
2918 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2919 throughput_function = throughput_pmd_lcore_dec;
2920 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2921 throughput_function = throughput_pmd_lcore_ldpc_dec;
2922 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2923 throughput_function = throughput_pmd_lcore_enc;
2924 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2925 throughput_function = throughput_pmd_lcore_ldpc_enc;
2927 throughput_function = throughput_pmd_lcore_enc;
2930 rte_atomic16_set(&op_params->sync, SYNC_WAIT);
2932 /* Master core is set at first entry */
2933 t_params[0].dev_id = ad->dev_id;
2934 t_params[0].lcore_id = rte_lcore_id();
2935 t_params[0].op_params = op_params;
2936 t_params[0].queue_id = ad->queue_ids[used_cores++];
2937 t_params[0].iter_count = 0;
2939 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
2940 if (used_cores >= num_lcores)
2943 t_params[used_cores].dev_id = ad->dev_id;
2944 t_params[used_cores].lcore_id = lcore_id;
2945 t_params[used_cores].op_params = op_params;
2946 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
2947 t_params[used_cores].iter_count = 0;
2949 rte_eal_remote_launch(throughput_function,
2950 &t_params[used_cores++], lcore_id);
2953 rte_atomic16_set(&op_params->sync, SYNC_START);
2954 ret = throughput_function(&t_params[0]);
2956 /* Master core is always used */
2957 for (used_cores = 1; used_cores < num_lcores; used_cores++)
2958 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
2960 /* Return if test failed */
2966 /* Print throughput if interrupts are disabled and test passed */
2967 if (!intr_enabled) {
2968 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2969 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2970 print_dec_throughput(t_params, num_lcores);
2972 print_enc_throughput(t_params, num_lcores);
2977 /* In interrupt TC we need to wait for the interrupt callback to deqeue
2978 * all pending operations. Skip waiting for queues which reported an
2979 * error using processing_status variable.
2980 * Wait for master lcore operations.
2983 while ((rte_atomic16_read(&tp->nb_dequeued) <
2984 op_params->num_to_process) &&
2985 (rte_atomic16_read(&tp->processing_status) !=
2989 tp->ops_per_sec /= TEST_REPETITIONS;
2990 tp->mbps /= TEST_REPETITIONS;
2991 ret |= (int)rte_atomic16_read(&tp->processing_status);
2993 /* Wait for slave lcores operations */
2994 for (used_cores = 1; used_cores < num_lcores; used_cores++) {
2995 tp = &t_params[used_cores];
2997 while ((rte_atomic16_read(&tp->nb_dequeued) <
2998 op_params->num_to_process) &&
2999 (rte_atomic16_read(&tp->processing_status) !=
3003 tp->ops_per_sec /= TEST_REPETITIONS;
3004 tp->mbps /= TEST_REPETITIONS;
3005 ret |= (int)rte_atomic16_read(&tp->processing_status);
3008 /* Print throughput if test passed */
3010 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3011 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3012 print_dec_throughput(t_params, num_lcores);
3013 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3014 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3015 print_enc_throughput(t_params, num_lcores);
3023 latency_test_dec(struct rte_mempool *mempool,
3024 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3025 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3026 const uint16_t num_to_process, uint16_t burst_sz,
3027 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3029 int ret = TEST_SUCCESS;
3030 uint16_t i, j, dequeued;
3031 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3032 uint64_t start_time = 0, last_time = 0;
3034 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3035 uint16_t enq = 0, deq = 0;
3036 bool first_time = true;
3039 if (unlikely(num_to_process - dequeued < burst_sz))
3040 burst_sz = num_to_process - dequeued;
3042 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3043 TEST_ASSERT_SUCCESS(ret,
3044 "rte_bbdev_dec_op_alloc_bulk() failed");
3045 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3046 copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3052 /* Set counter to validate the ordering */
3053 for (j = 0; j < burst_sz; ++j)
3054 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3056 start_time = rte_rdtsc_precise();
3058 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3060 TEST_ASSERT(enq == burst_sz,
3061 "Error enqueueing burst, expected %u, got %u",
3066 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3067 &ops_deq[deq], burst_sz - deq);
3068 if (likely(first_time && (deq > 0))) {
3069 last_time = rte_rdtsc_precise() - start_time;
3072 } while (unlikely(burst_sz != deq));
3074 *max_time = RTE_MAX(*max_time, last_time);
3075 *min_time = RTE_MIN(*min_time, last_time);
3076 *total_time += last_time;
3078 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3079 ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3081 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3084 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3092 latency_test_ldpc_dec(struct rte_mempool *mempool,
3093 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3094 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3095 const uint16_t num_to_process, uint16_t burst_sz,
3096 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3098 int ret = TEST_SUCCESS;
3099 uint16_t i, j, dequeued;
3100 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3101 uint64_t start_time = 0, last_time = 0;
3102 bool extDdr = ldpc_cap_flags &
3103 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3105 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3106 uint16_t enq = 0, deq = 0;
3107 bool first_time = true;
3110 if (unlikely(num_to_process - dequeued < burst_sz))
3111 burst_sz = num_to_process - dequeued;
3113 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3114 TEST_ASSERT_SUCCESS(ret,
3115 "rte_bbdev_dec_op_alloc_bulk() failed");
3117 /* For latency tests we need to disable early termination */
3118 if (check_bit(ref_op->ldpc_dec.op_flags,
3119 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3120 ref_op->ldpc_dec.op_flags -=
3121 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3122 ref_op->ldpc_dec.iter_max = 6;
3123 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3125 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3126 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3135 preload_harq_ddr(dev_id, queue_id, ops_enq,
3138 /* Set counter to validate the ordering */
3139 for (j = 0; j < burst_sz; ++j)
3140 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3142 start_time = rte_rdtsc_precise();
3144 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3145 &ops_enq[enq], burst_sz);
3146 TEST_ASSERT(enq == burst_sz,
3147 "Error enqueueing burst, expected %u, got %u",
3152 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3153 &ops_deq[deq], burst_sz - deq);
3154 if (likely(first_time && (deq > 0))) {
3155 last_time = rte_rdtsc_precise() - start_time;
3158 } while (unlikely(burst_sz != deq));
3160 *max_time = RTE_MAX(*max_time, last_time);
3161 *min_time = RTE_MIN(*min_time, last_time);
3162 *total_time += last_time;
3165 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3167 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3168 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
3170 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3173 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3180 latency_test_enc(struct rte_mempool *mempool,
3181 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3182 uint16_t dev_id, uint16_t queue_id,
3183 const uint16_t num_to_process, uint16_t burst_sz,
3184 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3186 int ret = TEST_SUCCESS;
3187 uint16_t i, j, dequeued;
3188 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3189 uint64_t start_time = 0, last_time = 0;
3191 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3192 uint16_t enq = 0, deq = 0;
3193 bool first_time = true;
3196 if (unlikely(num_to_process - dequeued < burst_sz))
3197 burst_sz = num_to_process - dequeued;
3199 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3200 TEST_ASSERT_SUCCESS(ret,
3201 "rte_bbdev_enc_op_alloc_bulk() failed");
3202 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3203 copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3208 /* Set counter to validate the ordering */
3209 for (j = 0; j < burst_sz; ++j)
3210 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3212 start_time = rte_rdtsc_precise();
3214 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
3216 TEST_ASSERT(enq == burst_sz,
3217 "Error enqueueing burst, expected %u, got %u",
3222 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3223 &ops_deq[deq], burst_sz - deq);
3224 if (likely(first_time && (deq > 0))) {
3225 last_time += rte_rdtsc_precise() - start_time;
3228 } while (unlikely(burst_sz != deq));
3230 *max_time = RTE_MAX(*max_time, last_time);
3231 *min_time = RTE_MIN(*min_time, last_time);
3232 *total_time += last_time;
3234 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3235 ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3236 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3239 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3247 latency_test_ldpc_enc(struct rte_mempool *mempool,
3248 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3249 uint16_t dev_id, uint16_t queue_id,
3250 const uint16_t num_to_process, uint16_t burst_sz,
3251 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3253 int ret = TEST_SUCCESS;
3254 uint16_t i, j, dequeued;
3255 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3256 uint64_t start_time = 0, last_time = 0;
3258 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3259 uint16_t enq = 0, deq = 0;
3260 bool first_time = true;
3263 if (unlikely(num_to_process - dequeued < burst_sz))
3264 burst_sz = num_to_process - dequeued;
3266 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3267 TEST_ASSERT_SUCCESS(ret,
3268 "rte_bbdev_enc_op_alloc_bulk() failed");
3269 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3270 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3275 /* Set counter to validate the ordering */
3276 for (j = 0; j < burst_sz; ++j)
3277 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3279 start_time = rte_rdtsc_precise();
3281 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3282 &ops_enq[enq], burst_sz);
3283 TEST_ASSERT(enq == burst_sz,
3284 "Error enqueueing burst, expected %u, got %u",
3289 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3290 &ops_deq[deq], burst_sz - deq);
3291 if (likely(first_time && (deq > 0))) {
3292 last_time += rte_rdtsc_precise() - start_time;
3295 } while (unlikely(burst_sz != deq));
3297 *max_time = RTE_MAX(*max_time, last_time);
3298 *min_time = RTE_MIN(*min_time, last_time);
3299 *total_time += last_time;
3301 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3302 ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3303 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3306 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3314 latency_test(struct active_device *ad,
3315 struct test_op_params *op_params)
3318 uint16_t burst_sz = op_params->burst_sz;
3319 const uint16_t num_to_process = op_params->num_to_process;
3320 const enum rte_bbdev_op_type op_type = test_vector.op_type;
3321 const uint16_t queue_id = ad->queue_ids[0];
3322 struct test_buffers *bufs = NULL;
3323 struct rte_bbdev_info info;
3324 uint64_t total_time, min_time, max_time;
3325 const char *op_type_str;
3327 total_time = max_time = 0;
3328 min_time = UINT64_MAX;
3330 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3331 "BURST_SIZE should be <= %u", MAX_BURST);
3333 rte_bbdev_info_get(ad->dev_id, &info);
3334 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3336 op_type_str = rte_bbdev_op_type_str(op_type);
3337 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3339 printf("+ ------------------------------------------------------- +\n");
3340 printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3341 info.dev_name, burst_sz, num_to_process, op_type_str);
3343 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3344 iter = latency_test_dec(op_params->mp, bufs,
3345 op_params->ref_dec_op, op_params->vector_mask,
3346 ad->dev_id, queue_id, num_to_process,
3347 burst_sz, &total_time, &min_time, &max_time);
3348 else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3349 iter = latency_test_enc(op_params->mp, bufs,
3350 op_params->ref_enc_op, ad->dev_id, queue_id,
3351 num_to_process, burst_sz, &total_time,
3352 &min_time, &max_time);
3353 else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3354 iter = latency_test_ldpc_enc(op_params->mp, bufs,
3355 op_params->ref_enc_op, ad->dev_id, queue_id,
3356 num_to_process, burst_sz, &total_time,
3357 &min_time, &max_time);
3358 else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3359 iter = latency_test_ldpc_dec(op_params->mp, bufs,
3360 op_params->ref_dec_op, op_params->vector_mask,
3361 ad->dev_id, queue_id, num_to_process,
3362 burst_sz, &total_time, &min_time, &max_time);
3364 iter = latency_test_enc(op_params->mp, bufs,
3365 op_params->ref_enc_op,
3366 ad->dev_id, queue_id,
3367 num_to_process, burst_sz, &total_time,
3368 &min_time, &max_time);
3373 printf("Operation latency:\n"
3374 "\tavg: %lg cycles, %lg us\n"
3375 "\tmin: %lg cycles, %lg us\n"
3376 "\tmax: %lg cycles, %lg us\n",
3377 (double)total_time / (double)iter,
3378 (double)(total_time * 1000000) / (double)iter /
3379 (double)rte_get_tsc_hz(), (double)min_time,
3380 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
3381 (double)max_time, (double)(max_time * 1000000) /
3382 (double)rte_get_tsc_hz());
3384 return TEST_SUCCESS;
3387 #ifdef RTE_BBDEV_OFFLOAD_COST
3389 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
3390 struct rte_bbdev_stats *stats)
3392 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
3393 struct rte_bbdev_stats *q_stats;
3395 if (queue_id >= dev->data->num_queues)
3398 q_stats = &dev->data->queues[queue_id].queue_stats;
3400 stats->enqueued_count = q_stats->enqueued_count;
3401 stats->dequeued_count = q_stats->dequeued_count;
3402 stats->enqueue_err_count = q_stats->enqueue_err_count;
3403 stats->dequeue_err_count = q_stats->dequeue_err_count;
3404 stats->acc_offload_cycles = q_stats->acc_offload_cycles;
3410 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
3411 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3412 uint16_t queue_id, const uint16_t num_to_process,
3413 uint16_t burst_sz, struct test_time_stats *time_st)
3415 int i, dequeued, ret;
3416 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3417 uint64_t enq_start_time, deq_start_time;
3418 uint64_t enq_sw_last_time, deq_last_time;
3419 struct rte_bbdev_stats stats;
3421 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3422 uint16_t enq = 0, deq = 0;
3424 if (unlikely(num_to_process - dequeued < burst_sz))
3425 burst_sz = num_to_process - dequeued;
3427 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3428 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3429 copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3435 /* Start time meas for enqueue function offload latency */
3436 enq_start_time = rte_rdtsc_precise();
3438 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
3439 &ops_enq[enq], burst_sz - enq);
3440 } while (unlikely(burst_sz != enq));
3442 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3443 TEST_ASSERT_SUCCESS(ret,
3444 "Failed to get stats for queue (%u) of device (%u)",
3447 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3448 stats.acc_offload_cycles;
3449 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3451 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3453 time_st->enq_sw_total_time += enq_sw_last_time;
3455 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3456 stats.acc_offload_cycles);
3457 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3458 stats.acc_offload_cycles);
3459 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3461 /* give time for device to process ops */
3464 /* Start time meas for dequeue function offload latency */
3465 deq_start_time = rte_rdtsc_precise();
3466 /* Dequeue one operation */
3468 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3470 } while (unlikely(deq != 1));
3472 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3473 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3475 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3477 time_st->deq_total_time += deq_last_time;
3479 /* Dequeue remaining operations if needed*/
3480 while (burst_sz != deq)
3481 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3482 &ops_deq[deq], burst_sz - deq);
3484 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3492 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
3493 struct test_buffers *bufs,
3494 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3495 uint16_t queue_id, const uint16_t num_to_process,
3496 uint16_t burst_sz, struct test_time_stats *time_st)
3498 int i, dequeued, ret;
3499 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3500 uint64_t enq_start_time, deq_start_time;
3501 uint64_t enq_sw_last_time, deq_last_time;
3502 struct rte_bbdev_stats stats;
3503 bool extDdr = ldpc_cap_flags &
3504 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3506 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3507 uint16_t enq = 0, deq = 0;
3509 if (unlikely(num_to_process - dequeued < burst_sz))
3510 burst_sz = num_to_process - dequeued;
3512 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3513 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3514 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3523 preload_harq_ddr(dev_id, queue_id, ops_enq,
3526 /* Start time meas for enqueue function offload latency */
3527 enq_start_time = rte_rdtsc_precise();
3529 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3530 &ops_enq[enq], burst_sz - enq);
3531 } while (unlikely(burst_sz != enq));
3533 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3534 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3535 TEST_ASSERT_SUCCESS(ret,
3536 "Failed to get stats for queue (%u) of device (%u)",
3539 enq_sw_last_time -= stats.acc_offload_cycles;
3540 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3542 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3544 time_st->enq_sw_total_time += enq_sw_last_time;
3546 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3547 stats.acc_offload_cycles);
3548 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3549 stats.acc_offload_cycles);
3550 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3552 /* give time for device to process ops */
3555 /* Start time meas for dequeue function offload latency */
3556 deq_start_time = rte_rdtsc_precise();
3557 /* Dequeue one operation */
3559 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3561 } while (unlikely(deq != 1));
3563 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3564 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3566 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3568 time_st->deq_total_time += deq_last_time;
3570 /* Dequeue remaining operations if needed*/
3571 while (burst_sz != deq)
3572 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3573 &ops_deq[deq], burst_sz - deq);
3576 /* Read loopback is not thread safe */
3577 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3580 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3588 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
3589 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3590 uint16_t queue_id, const uint16_t num_to_process,
3591 uint16_t burst_sz, struct test_time_stats *time_st)
3593 int i, dequeued, ret;
3594 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3595 uint64_t enq_start_time, deq_start_time;
3596 uint64_t enq_sw_last_time, deq_last_time;
3597 struct rte_bbdev_stats stats;
3599 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3600 uint16_t enq = 0, deq = 0;
3602 if (unlikely(num_to_process - dequeued < burst_sz))
3603 burst_sz = num_to_process - dequeued;
3605 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3606 TEST_ASSERT_SUCCESS(ret,
3607 "rte_bbdev_enc_op_alloc_bulk() failed");
3608 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3609 copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3614 /* Start time meas for enqueue function offload latency */
3615 enq_start_time = rte_rdtsc_precise();
3617 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
3618 &ops_enq[enq], burst_sz - enq);
3619 } while (unlikely(burst_sz != enq));
3621 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3623 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3624 TEST_ASSERT_SUCCESS(ret,
3625 "Failed to get stats for queue (%u) of device (%u)",
3627 enq_sw_last_time -= stats.acc_offload_cycles;
3628 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3630 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3632 time_st->enq_sw_total_time += enq_sw_last_time;
3634 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3635 stats.acc_offload_cycles);
3636 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3637 stats.acc_offload_cycles);
3638 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3640 /* give time for device to process ops */
3643 /* Start time meas for dequeue function offload latency */
3644 deq_start_time = rte_rdtsc_precise();
3645 /* Dequeue one operation */
3647 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3649 } while (unlikely(deq != 1));
3651 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3652 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3654 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3656 time_st->deq_total_time += deq_last_time;
3658 while (burst_sz != deq)
3659 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3660 &ops_deq[deq], burst_sz - deq);
3662 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3670 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
3671 struct test_buffers *bufs,
3672 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3673 uint16_t queue_id, const uint16_t num_to_process,
3674 uint16_t burst_sz, struct test_time_stats *time_st)
3676 int i, dequeued, ret;
3677 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3678 uint64_t enq_start_time, deq_start_time;
3679 uint64_t enq_sw_last_time, deq_last_time;
3680 struct rte_bbdev_stats stats;
3682 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3683 uint16_t enq = 0, deq = 0;
3685 if (unlikely(num_to_process - dequeued < burst_sz))
3686 burst_sz = num_to_process - dequeued;
3688 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3689 TEST_ASSERT_SUCCESS(ret,
3690 "rte_bbdev_enc_op_alloc_bulk() failed");
3691 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3692 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3697 /* Start time meas for enqueue function offload latency */
3698 enq_start_time = rte_rdtsc_precise();
3700 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3701 &ops_enq[enq], burst_sz - enq);
3702 } while (unlikely(burst_sz != enq));
3704 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3705 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3706 TEST_ASSERT_SUCCESS(ret,
3707 "Failed to get stats for queue (%u) of device (%u)",
3710 enq_sw_last_time -= stats.acc_offload_cycles;
3711 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3713 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3715 time_st->enq_sw_total_time += enq_sw_last_time;
3717 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3718 stats.acc_offload_cycles);
3719 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3720 stats.acc_offload_cycles);
3721 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3723 /* give time for device to process ops */
3726 /* Start time meas for dequeue function offload latency */
3727 deq_start_time = rte_rdtsc_precise();
3728 /* Dequeue one operation */
3730 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3732 } while (unlikely(deq != 1));
3734 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3735 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3737 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3739 time_st->deq_total_time += deq_last_time;
3741 while (burst_sz != deq)
3742 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3743 &ops_deq[deq], burst_sz - deq);
3745 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3754 offload_cost_test(struct active_device *ad,
3755 struct test_op_params *op_params)
3757 #ifndef RTE_BBDEV_OFFLOAD_COST
3759 RTE_SET_USED(op_params);
3760 printf("Offload latency test is disabled.\n");
3761 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3762 return TEST_SKIPPED;
3765 uint16_t burst_sz = op_params->burst_sz;
3766 const uint16_t num_to_process = op_params->num_to_process;
3767 const enum rte_bbdev_op_type op_type = test_vector.op_type;
3768 const uint16_t queue_id = ad->queue_ids[0];
3769 struct test_buffers *bufs = NULL;
3770 struct rte_bbdev_info info;
3771 const char *op_type_str;
3772 struct test_time_stats time_st;
3774 memset(&time_st, 0, sizeof(struct test_time_stats));
3775 time_st.enq_sw_min_time = UINT64_MAX;
3776 time_st.enq_acc_min_time = UINT64_MAX;
3777 time_st.deq_min_time = UINT64_MAX;
3779 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3780 "BURST_SIZE should be <= %u", MAX_BURST);
3782 rte_bbdev_info_get(ad->dev_id, &info);
3783 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3785 op_type_str = rte_bbdev_op_type_str(op_type);
3786 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3788 printf("+ ------------------------------------------------------- +\n");
3789 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3790 info.dev_name, burst_sz, num_to_process, op_type_str);
3792 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3793 iter = offload_latency_test_dec(op_params->mp, bufs,
3794 op_params->ref_dec_op, ad->dev_id, queue_id,
3795 num_to_process, burst_sz, &time_st);
3796 else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3797 iter = offload_latency_test_enc(op_params->mp, bufs,
3798 op_params->ref_enc_op, ad->dev_id, queue_id,
3799 num_to_process, burst_sz, &time_st);
3800 else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3801 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
3802 op_params->ref_enc_op, ad->dev_id, queue_id,
3803 num_to_process, burst_sz, &time_st);
3804 else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3805 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
3806 op_params->ref_dec_op, ad->dev_id, queue_id,
3807 num_to_process, burst_sz, &time_st);
3809 iter = offload_latency_test_enc(op_params->mp, bufs,
3810 op_params->ref_enc_op, ad->dev_id, queue_id,
3811 num_to_process, burst_sz, &time_st);
3816 printf("Enqueue driver offload cost latency:\n"
3817 "\tavg: %lg cycles, %lg us\n"
3818 "\tmin: %lg cycles, %lg us\n"
3819 "\tmax: %lg cycles, %lg us\n"
3820 "Enqueue accelerator offload cost latency:\n"
3821 "\tavg: %lg cycles, %lg us\n"
3822 "\tmin: %lg cycles, %lg us\n"
3823 "\tmax: %lg cycles, %lg us\n",
3824 (double)time_st.enq_sw_total_time / (double)iter,
3825 (double)(time_st.enq_sw_total_time * 1000000) /
3826 (double)iter / (double)rte_get_tsc_hz(),
3827 (double)time_st.enq_sw_min_time,
3828 (double)(time_st.enq_sw_min_time * 1000000) /
3829 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
3830 (double)(time_st.enq_sw_max_time * 1000000) /
3831 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
3833 (double)(time_st.enq_acc_total_time * 1000000) /
3834 (double)iter / (double)rte_get_tsc_hz(),
3835 (double)time_st.enq_acc_min_time,
3836 (double)(time_st.enq_acc_min_time * 1000000) /
3837 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
3838 (double)(time_st.enq_acc_max_time * 1000000) /
3841 printf("Dequeue offload cost latency - one op:\n"
3842 "\tavg: %lg cycles, %lg us\n"
3843 "\tmin: %lg cycles, %lg us\n"
3844 "\tmax: %lg cycles, %lg us\n",
3845 (double)time_st.deq_total_time / (double)iter,
3846 (double)(time_st.deq_total_time * 1000000) /
3847 (double)iter / (double)rte_get_tsc_hz(),
3848 (double)time_st.deq_min_time,
3849 (double)(time_st.deq_min_time * 1000000) /
3850 rte_get_tsc_hz(), (double)time_st.deq_max_time,
3851 (double)(time_st.deq_max_time * 1000000) /
3854 return TEST_SUCCESS;
3858 #ifdef RTE_BBDEV_OFFLOAD_COST
3860 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
3861 const uint16_t num_to_process, uint16_t burst_sz,
3862 uint64_t *deq_total_time, uint64_t *deq_min_time,
3863 uint64_t *deq_max_time)
3866 struct rte_bbdev_dec_op *ops[MAX_BURST];
3867 uint64_t deq_start_time, deq_last_time;
3869 /* Test deq offload latency from an empty queue */
3871 for (i = 0, deq_total = 0; deq_total < num_to_process;
3872 ++i, deq_total += burst_sz) {
3873 deq_start_time = rte_rdtsc_precise();
3875 if (unlikely(num_to_process - deq_total < burst_sz))
3876 burst_sz = num_to_process - deq_total;
3877 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
3879 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3880 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3881 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3882 *deq_total_time += deq_last_time;
3889 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
3890 const uint16_t num_to_process, uint16_t burst_sz,
3891 uint64_t *deq_total_time, uint64_t *deq_min_time,
3892 uint64_t *deq_max_time)
3895 struct rte_bbdev_enc_op *ops[MAX_BURST];
3896 uint64_t deq_start_time, deq_last_time;
3898 /* Test deq offload latency from an empty queue */
3899 for (i = 0, deq_total = 0; deq_total < num_to_process;
3900 ++i, deq_total += burst_sz) {
3901 deq_start_time = rte_rdtsc_precise();
3903 if (unlikely(num_to_process - deq_total < burst_sz))
3904 burst_sz = num_to_process - deq_total;
3905 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
3907 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3908 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3909 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3910 *deq_total_time += deq_last_time;
3918 offload_latency_empty_q_test(struct active_device *ad,
3919 struct test_op_params *op_params)
3921 #ifndef RTE_BBDEV_OFFLOAD_COST
3923 RTE_SET_USED(op_params);
3924 printf("Offload latency empty dequeue test is disabled.\n");
3925 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3926 return TEST_SKIPPED;
3929 uint64_t deq_total_time, deq_min_time, deq_max_time;
3930 uint16_t burst_sz = op_params->burst_sz;
3931 const uint16_t num_to_process = op_params->num_to_process;
3932 const enum rte_bbdev_op_type op_type = test_vector.op_type;
3933 const uint16_t queue_id = ad->queue_ids[0];
3934 struct rte_bbdev_info info;
3935 const char *op_type_str;
3937 deq_total_time = deq_max_time = 0;
3938 deq_min_time = UINT64_MAX;
3940 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3941 "BURST_SIZE should be <= %u", MAX_BURST);
3943 rte_bbdev_info_get(ad->dev_id, &info);
3945 op_type_str = rte_bbdev_op_type_str(op_type);
3946 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3948 printf("+ ------------------------------------------------------- +\n");
3949 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3950 info.dev_name, burst_sz, num_to_process, op_type_str);
3952 if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3953 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
3954 num_to_process, burst_sz, &deq_total_time,
3955 &deq_min_time, &deq_max_time);
3957 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
3958 num_to_process, burst_sz, &deq_total_time,
3959 &deq_min_time, &deq_max_time);
3964 printf("Empty dequeue offload:\n"
3965 "\tavg: %lg cycles, %lg us\n"
3966 "\tmin: %lg cycles, %lg us\n"
3967 "\tmax: %lg cycles, %lg us\n",
3968 (double)deq_total_time / (double)iter,
3969 (double)(deq_total_time * 1000000) / (double)iter /
3970 (double)rte_get_tsc_hz(), (double)deq_min_time,
3971 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
3972 (double)deq_max_time, (double)(deq_max_time * 1000000) /
3975 return TEST_SUCCESS;
3982 return run_test_case(throughput_test);
3986 offload_cost_tc(void)
3988 return run_test_case(offload_cost_test);
3992 offload_latency_empty_q_tc(void)
3994 return run_test_case(offload_latency_empty_q_test);
4000 return run_test_case(latency_test);
4006 return run_test_case(throughput_test);
4009 static struct unit_test_suite bbdev_throughput_testsuite = {
4010 .suite_name = "BBdev Throughput Tests",
4011 .setup = testsuite_setup,
4012 .teardown = testsuite_teardown,
4013 .unit_test_cases = {
4014 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4015 TEST_CASES_END() /**< NULL terminate unit test array */
4019 static struct unit_test_suite bbdev_validation_testsuite = {
4020 .suite_name = "BBdev Validation Tests",
4021 .setup = testsuite_setup,
4022 .teardown = testsuite_teardown,
4023 .unit_test_cases = {
4024 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4025 TEST_CASES_END() /**< NULL terminate unit test array */
4029 static struct unit_test_suite bbdev_latency_testsuite = {
4030 .suite_name = "BBdev Latency Tests",
4031 .setup = testsuite_setup,
4032 .teardown = testsuite_teardown,
4033 .unit_test_cases = {
4034 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4035 TEST_CASES_END() /**< NULL terminate unit test array */
4039 static struct unit_test_suite bbdev_offload_cost_testsuite = {
4040 .suite_name = "BBdev Offload Cost Tests",
4041 .setup = testsuite_setup,
4042 .teardown = testsuite_teardown,
4043 .unit_test_cases = {
4044 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
4045 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
4046 TEST_CASES_END() /**< NULL terminate unit test array */
4050 static struct unit_test_suite bbdev_interrupt_testsuite = {
4051 .suite_name = "BBdev Interrupt Tests",
4052 .setup = interrupt_testsuite_setup,
4053 .teardown = testsuite_teardown,
4054 .unit_test_cases = {
4055 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
4056 TEST_CASES_END() /**< NULL terminate unit test array */
4060 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
4061 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
4062 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
4063 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
4064 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);