app/bbdev: add performance tests
[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28
29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
40 #endif
41
42 #define OPS_CACHE_SIZE 256U
43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
44
45 #define SYNC_WAIT 0
46 #define SYNC_START 1
47 #define INVALID_OPAQUE -1
48
49 #define INVALID_QUEUE_ID -1
50 /* Increment for next code block in external HARQ memory */
51 #define HARQ_INCR 32768
52 /* Headroom for filler LLRs insertion in HARQ buffer */
53 #define FILLER_HEADROOM 1024
54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
63
64 static struct test_bbdev_vector test_vector;
65
66 /* Switch between PMD and Interrupt for throughput TC */
67 static bool intr_enabled;
68
69 /* LLR arithmetic representation for numerical conversion */
70 static int ldpc_llr_decimals;
71 static int ldpc_llr_size;
72 /* Keep track of the LDPC decoder device capability flag */
73 static uint32_t ldpc_cap_flags;
74
75 /* Represents tested active devices */
76 static struct active_device {
77         const char *driver_name;
78         uint8_t dev_id;
79         uint16_t supported_ops;
80         uint16_t queue_ids[MAX_QUEUES];
81         uint16_t nb_queues;
82         struct rte_mempool *ops_mempool;
83         struct rte_mempool *in_mbuf_pool;
84         struct rte_mempool *hard_out_mbuf_pool;
85         struct rte_mempool *soft_out_mbuf_pool;
86         struct rte_mempool *harq_in_mbuf_pool;
87         struct rte_mempool *harq_out_mbuf_pool;
88 } active_devs[RTE_BBDEV_MAX_DEVS];
89
90 static uint8_t nb_active_devs;
91
92 /* Data buffers used by BBDEV ops */
93 struct test_buffers {
94         struct rte_bbdev_op_data *inputs;
95         struct rte_bbdev_op_data *hard_outputs;
96         struct rte_bbdev_op_data *soft_outputs;
97         struct rte_bbdev_op_data *harq_inputs;
98         struct rte_bbdev_op_data *harq_outputs;
99 };
100
101 /* Operation parameters specific for given test case */
102 struct test_op_params {
103         struct rte_mempool *mp;
104         struct rte_bbdev_dec_op *ref_dec_op;
105         struct rte_bbdev_enc_op *ref_enc_op;
106         uint16_t burst_sz;
107         uint16_t num_to_process;
108         uint16_t num_lcores;
109         int vector_mask;
110         rte_atomic16_t sync;
111         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
112 };
113
114 /* Contains per lcore params */
115 struct thread_params {
116         uint8_t dev_id;
117         uint16_t queue_id;
118         uint32_t lcore_id;
119         uint64_t start_time;
120         double ops_per_sec;
121         double mbps;
122         uint8_t iter_count;
123         double iter_average;
124         double bler;
125         rte_atomic16_t nb_dequeued;
126         rte_atomic16_t processing_status;
127         rte_atomic16_t burst_sz;
128         struct test_op_params *op_params;
129         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
130         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
131 };
132
133 #ifdef RTE_BBDEV_OFFLOAD_COST
134 /* Stores time statistics */
135 struct test_time_stats {
136         /* Stores software enqueue total working time */
137         uint64_t enq_sw_total_time;
138         /* Stores minimum value of software enqueue working time */
139         uint64_t enq_sw_min_time;
140         /* Stores maximum value of software enqueue working time */
141         uint64_t enq_sw_max_time;
142         /* Stores turbo enqueue total working time */
143         uint64_t enq_acc_total_time;
144         /* Stores minimum value of accelerator enqueue working time */
145         uint64_t enq_acc_min_time;
146         /* Stores maximum value of accelerator enqueue working time */
147         uint64_t enq_acc_max_time;
148         /* Stores dequeue total working time */
149         uint64_t deq_total_time;
150         /* Stores minimum value of dequeue working time */
151         uint64_t deq_min_time;
152         /* Stores maximum value of dequeue working time */
153         uint64_t deq_max_time;
154 };
155 #endif
156
157 typedef int (test_case_function)(struct active_device *ad,
158                 struct test_op_params *op_params);
159
160 static inline void
161 mbuf_reset(struct rte_mbuf *m)
162 {
163         m->pkt_len = 0;
164
165         do {
166                 m->data_len = 0;
167                 m = m->next;
168         } while (m != NULL);
169 }
170
171 /* Read flag value 0/1 from bitmap */
172 static inline bool
173 check_bit(uint32_t bitmap, uint32_t bitmask)
174 {
175         return bitmap & bitmask;
176 }
177
178 static inline void
179 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
180 {
181         ad->supported_ops |= (1 << op_type);
182 }
183
184 static inline bool
185 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
186 {
187         return ad->supported_ops & (1 << op_type);
188 }
189
190 static inline bool
191 flags_match(uint32_t flags_req, uint32_t flags_present)
192 {
193         return (flags_req & flags_present) == flags_req;
194 }
195
196 static void
197 clear_soft_out_cap(uint32_t *op_flags)
198 {
199         *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
200         *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
201         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
202 }
203
204 static int
205 check_dev_cap(const struct rte_bbdev_info *dev_info)
206 {
207         unsigned int i;
208         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
209                 nb_harq_inputs, nb_harq_outputs;
210         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
211
212         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
213         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
214         nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
215         nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
216         nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
217
218         for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
219                 if (op_cap->type != test_vector.op_type)
220                         continue;
221
222                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
223                         const struct rte_bbdev_op_cap_turbo_dec *cap =
224                                         &op_cap->cap.turbo_dec;
225                         /* Ignore lack of soft output capability, just skip
226                          * checking if soft output is valid.
227                          */
228                         if ((test_vector.turbo_dec.op_flags &
229                                         RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
230                                         !(cap->capability_flags &
231                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
232                                 printf(
233                                         "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
234                                         dev_info->dev_name);
235                                 clear_soft_out_cap(
236                                         &test_vector.turbo_dec.op_flags);
237                         }
238
239                         if (!flags_match(test_vector.turbo_dec.op_flags,
240                                         cap->capability_flags))
241                                 return TEST_FAILED;
242                         if (nb_inputs > cap->num_buffers_src) {
243                                 printf("Too many inputs defined: %u, max: %u\n",
244                                         nb_inputs, cap->num_buffers_src);
245                                 return TEST_FAILED;
246                         }
247                         if (nb_soft_outputs > cap->num_buffers_soft_out &&
248                                         (test_vector.turbo_dec.op_flags &
249                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
250                                 printf(
251                                         "Too many soft outputs defined: %u, max: %u\n",
252                                                 nb_soft_outputs,
253                                                 cap->num_buffers_soft_out);
254                                 return TEST_FAILED;
255                         }
256                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
257                                 printf(
258                                         "Too many hard outputs defined: %u, max: %u\n",
259                                                 nb_hard_outputs,
260                                                 cap->num_buffers_hard_out);
261                                 return TEST_FAILED;
262                         }
263                         if (intr_enabled && !(cap->capability_flags &
264                                         RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
265                                 printf(
266                                         "Dequeue interrupts are not supported!\n");
267                                 return TEST_FAILED;
268                         }
269
270                         return TEST_SUCCESS;
271                 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
272                         const struct rte_bbdev_op_cap_turbo_enc *cap =
273                                         &op_cap->cap.turbo_enc;
274
275                         if (!flags_match(test_vector.turbo_enc.op_flags,
276                                         cap->capability_flags))
277                                 return TEST_FAILED;
278                         if (nb_inputs > cap->num_buffers_src) {
279                                 printf("Too many inputs defined: %u, max: %u\n",
280                                         nb_inputs, cap->num_buffers_src);
281                                 return TEST_FAILED;
282                         }
283                         if (nb_hard_outputs > cap->num_buffers_dst) {
284                                 printf(
285                                         "Too many hard outputs defined: %u, max: %u\n",
286                                         nb_hard_outputs, cap->num_buffers_dst);
287                                 return TEST_FAILED;
288                         }
289                         if (intr_enabled && !(cap->capability_flags &
290                                         RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
291                                 printf(
292                                         "Dequeue interrupts are not supported!\n");
293                                 return TEST_FAILED;
294                         }
295
296                         return TEST_SUCCESS;
297                 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
298                         const struct rte_bbdev_op_cap_ldpc_enc *cap =
299                                         &op_cap->cap.ldpc_enc;
300
301                         if (!flags_match(test_vector.ldpc_enc.op_flags,
302                                         cap->capability_flags)){
303                                 printf("Flag Mismatch\n");
304                                 return TEST_FAILED;
305                         }
306                         if (nb_inputs > cap->num_buffers_src) {
307                                 printf("Too many inputs defined: %u, max: %u\n",
308                                         nb_inputs, cap->num_buffers_src);
309                                 return TEST_FAILED;
310                         }
311                         if (nb_hard_outputs > cap->num_buffers_dst) {
312                                 printf(
313                                         "Too many hard outputs defined: %u, max: %u\n",
314                                         nb_hard_outputs, cap->num_buffers_dst);
315                                 return TEST_FAILED;
316                         }
317                         if (intr_enabled && !(cap->capability_flags &
318                                         RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
319                                 printf(
320                                         "Dequeue interrupts are not supported!\n");
321                                 return TEST_FAILED;
322                         }
323
324                         return TEST_SUCCESS;
325                 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
326                         const struct rte_bbdev_op_cap_ldpc_dec *cap =
327                                         &op_cap->cap.ldpc_dec;
328
329                         if (!flags_match(test_vector.ldpc_dec.op_flags,
330                                         cap->capability_flags)){
331                                 printf("Flag Mismatch\n");
332                                 return TEST_FAILED;
333                         }
334                         if (nb_inputs > cap->num_buffers_src) {
335                                 printf("Too many inputs defined: %u, max: %u\n",
336                                         nb_inputs, cap->num_buffers_src);
337                                 return TEST_FAILED;
338                         }
339                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
340                                 printf(
341                                         "Too many hard outputs defined: %u, max: %u\n",
342                                         nb_hard_outputs,
343                                         cap->num_buffers_hard_out);
344                                 return TEST_FAILED;
345                         }
346                         if (nb_harq_inputs > cap->num_buffers_hard_out) {
347                                 printf(
348                                         "Too many HARQ inputs defined: %u, max: %u\n",
349                                         nb_hard_outputs,
350                                         cap->num_buffers_hard_out);
351                                 return TEST_FAILED;
352                         }
353                         if (nb_harq_outputs > cap->num_buffers_hard_out) {
354                                 printf(
355                                         "Too many HARQ outputs defined: %u, max: %u\n",
356                                         nb_hard_outputs,
357                                         cap->num_buffers_hard_out);
358                                 return TEST_FAILED;
359                         }
360                         if (intr_enabled && !(cap->capability_flags &
361                                         RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
362                                 printf(
363                                         "Dequeue interrupts are not supported!\n");
364                                 return TEST_FAILED;
365                         }
366                         if (intr_enabled && (test_vector.ldpc_dec.op_flags &
367                                 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
368                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
369                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
370                                         ))) {
371                                 printf("Skip loop-back with interrupt\n");
372                                 return TEST_FAILED;
373                         }
374                         return TEST_SUCCESS;
375                 }
376         }
377
378         if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
379                 return TEST_SUCCESS; /* Special case for NULL device */
380
381         return TEST_FAILED;
382 }
383
384 /* calculates optimal mempool size not smaller than the val */
385 static unsigned int
386 optimal_mempool_size(unsigned int val)
387 {
388         return rte_align32pow2(val + 1) - 1;
389 }
390
391 /* allocates mbuf mempool for inputs and outputs */
392 static struct rte_mempool *
393 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
394                 int socket_id, unsigned int mbuf_pool_size,
395                 const char *op_type_str)
396 {
397         unsigned int i;
398         uint32_t max_seg_sz = 0;
399         char pool_name[RTE_MEMPOOL_NAMESIZE];
400
401         /* find max input segment size */
402         for (i = 0; i < entries->nb_segments; ++i)
403                 if (entries->segments[i].length > max_seg_sz)
404                         max_seg_sz = entries->segments[i].length;
405
406         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
407                         dev_id);
408         return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
409                         RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
410                                         + FILLER_HEADROOM,
411                         (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
412 }
413
414 static int
415 create_mempools(struct active_device *ad, int socket_id,
416                 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
417 {
418         struct rte_mempool *mp;
419         unsigned int ops_pool_size, mbuf_pool_size = 0;
420         char pool_name[RTE_MEMPOOL_NAMESIZE];
421         const char *op_type_str;
422         enum rte_bbdev_op_type op_type = org_op_type;
423
424         struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
425         struct op_data_entries *hard_out =
426                         &test_vector.entries[DATA_HARD_OUTPUT];
427         struct op_data_entries *soft_out =
428                         &test_vector.entries[DATA_SOFT_OUTPUT];
429         struct op_data_entries *harq_in =
430                         &test_vector.entries[DATA_HARQ_INPUT];
431         struct op_data_entries *harq_out =
432                         &test_vector.entries[DATA_HARQ_OUTPUT];
433
434         /* allocate ops mempool */
435         ops_pool_size = optimal_mempool_size(RTE_MAX(
436                         /* Ops used plus 1 reference op */
437                         RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
438                         /* Minimal cache size plus 1 reference op */
439                         (unsigned int)(1.5 * rte_lcore_count() *
440                                         OPS_CACHE_SIZE + 1)),
441                         OPS_POOL_SIZE_MIN));
442
443         if (org_op_type == RTE_BBDEV_OP_NONE)
444                 op_type = RTE_BBDEV_OP_TURBO_ENC;
445
446         op_type_str = rte_bbdev_op_type_str(op_type);
447         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
448
449         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
450                         ad->dev_id);
451         mp = rte_bbdev_op_pool_create(pool_name, op_type,
452                         ops_pool_size, OPS_CACHE_SIZE, socket_id);
453         TEST_ASSERT_NOT_NULL(mp,
454                         "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
455                         ops_pool_size,
456                         ad->dev_id,
457                         socket_id);
458         ad->ops_mempool = mp;
459
460         /* Do not create inputs and outputs mbufs for BaseBand Null Device */
461         if (org_op_type == RTE_BBDEV_OP_NONE)
462                 return TEST_SUCCESS;
463
464         /* Inputs */
465         if (in->nb_segments > 0) {
466                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
467                                 in->nb_segments);
468                 mp = create_mbuf_pool(in, ad->dev_id, socket_id,
469                                 mbuf_pool_size, "in");
470                 TEST_ASSERT_NOT_NULL(mp,
471                                 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
472                                 mbuf_pool_size,
473                                 ad->dev_id,
474                                 socket_id);
475                 ad->in_mbuf_pool = mp;
476         }
477
478         /* Hard outputs */
479         if (hard_out->nb_segments > 0) {
480                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
481                                 hard_out->nb_segments);
482                 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
483                                 mbuf_pool_size,
484                                 "hard_out");
485                 TEST_ASSERT_NOT_NULL(mp,
486                                 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
487                                 mbuf_pool_size,
488                                 ad->dev_id,
489                                 socket_id);
490                 ad->hard_out_mbuf_pool = mp;
491         }
492
493         /* Soft outputs */
494         if (soft_out->nb_segments > 0) {
495                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
496                                 soft_out->nb_segments);
497                 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
498                                 mbuf_pool_size,
499                                 "soft_out");
500                 TEST_ASSERT_NOT_NULL(mp,
501                                 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
502                                 mbuf_pool_size,
503                                 ad->dev_id,
504                                 socket_id);
505                 ad->soft_out_mbuf_pool = mp;
506         }
507
508         /* HARQ inputs */
509         if (harq_in->nb_segments > 0) {
510                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
511                                 harq_in->nb_segments);
512                 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
513                                 mbuf_pool_size,
514                                 "harq_in");
515                 TEST_ASSERT_NOT_NULL(mp,
516                                 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
517                                 mbuf_pool_size,
518                                 ad->dev_id,
519                                 socket_id);
520                 ad->harq_in_mbuf_pool = mp;
521         }
522
523         /* HARQ outputs */
524         if (harq_out->nb_segments > 0) {
525                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
526                                 harq_out->nb_segments);
527                 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
528                                 mbuf_pool_size,
529                                 "harq_out");
530                 TEST_ASSERT_NOT_NULL(mp,
531                                 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
532                                 mbuf_pool_size,
533                                 ad->dev_id,
534                                 socket_id);
535                 ad->harq_out_mbuf_pool = mp;
536         }
537
538         return TEST_SUCCESS;
539 }
540
541 static int
542 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
543                 struct test_bbdev_vector *vector)
544 {
545         int ret;
546         unsigned int queue_id;
547         struct rte_bbdev_queue_conf qconf;
548         struct active_device *ad = &active_devs[nb_active_devs];
549         unsigned int nb_queues;
550         enum rte_bbdev_op_type op_type = vector->op_type;
551
552 /* Configure fpga lte fec with PF & VF values
553  * if '-i' flag is set and using fpga device
554  */
555 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
556         if ((get_init_device() == true) &&
557                 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
558                 struct fpga_lte_fec_conf conf;
559                 unsigned int i;
560
561                 printf("Configure FPGA LTE FEC Driver %s with default values\n",
562                                 info->drv.driver_name);
563
564                 /* clear default configuration before initialization */
565                 memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
566
567                 /* Set PF mode :
568                  * true if PF is used for data plane
569                  * false for VFs
570                  */
571                 conf.pf_mode_en = true;
572
573                 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
574                         /* Number of UL queues per VF (fpga supports 8 VFs) */
575                         conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
576                         /* Number of DL queues per VF (fpga supports 8 VFs) */
577                         conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
578                 }
579
580                 /* UL bandwidth. Needed for schedule algorithm */
581                 conf.ul_bandwidth = UL_4G_BANDWIDTH;
582                 /* DL bandwidth */
583                 conf.dl_bandwidth = DL_4G_BANDWIDTH;
584
585                 /* UL & DL load Balance Factor to 64 */
586                 conf.ul_load_balance = UL_4G_LOAD_BALANCE;
587                 conf.dl_load_balance = DL_4G_LOAD_BALANCE;
588
589                 /**< FLR timeout value */
590                 conf.flr_time_out = FLR_4G_TIMEOUT;
591
592                 /* setup FPGA PF with configuration information */
593                 ret = fpga_lte_fec_configure(info->dev_name, &conf);
594                 TEST_ASSERT_SUCCESS(ret,
595                                 "Failed to configure 4G FPGA PF for bbdev %s",
596                                 info->dev_name);
597         }
598 #endif
599         nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
600         nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
601
602         /* setup device */
603         ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
604         if (ret < 0) {
605                 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
606                                 dev_id, nb_queues, info->socket_id, ret);
607                 return TEST_FAILED;
608         }
609
610         /* configure interrupts if needed */
611         if (intr_enabled) {
612                 ret = rte_bbdev_intr_enable(dev_id);
613                 if (ret < 0) {
614                         printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
615                                         ret);
616                         return TEST_FAILED;
617                 }
618         }
619
620         /* setup device queues */
621         qconf.socket = info->socket_id;
622         qconf.queue_size = info->drv.default_queue_conf.queue_size;
623         qconf.priority = 0;
624         qconf.deferred_start = 0;
625         qconf.op_type = op_type;
626
627         for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
628                 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
629                 if (ret != 0) {
630                         printf(
631                                         "Allocated all queues (id=%u) at prio%u on dev%u\n",
632                                         queue_id, qconf.priority, dev_id);
633                         qconf.priority++;
634                         ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
635                                         &qconf);
636                 }
637                 if (ret != 0) {
638                         printf("All queues on dev %u allocated: %u\n",
639                                         dev_id, queue_id);
640                         break;
641                 }
642                 ad->queue_ids[queue_id] = queue_id;
643         }
644         TEST_ASSERT(queue_id != 0,
645                         "ERROR Failed to configure any queues on dev %u",
646                         dev_id);
647         ad->nb_queues = queue_id;
648
649         set_avail_op(ad, op_type);
650
651         return TEST_SUCCESS;
652 }
653
654 static int
655 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
656                 struct test_bbdev_vector *vector)
657 {
658         int ret;
659
660         active_devs[nb_active_devs].driver_name = info->drv.driver_name;
661         active_devs[nb_active_devs].dev_id = dev_id;
662
663         ret = add_bbdev_dev(dev_id, info, vector);
664         if (ret == TEST_SUCCESS)
665                 ++nb_active_devs;
666         return ret;
667 }
668
669 static uint8_t
670 populate_active_devices(void)
671 {
672         int ret;
673         uint8_t dev_id;
674         uint8_t nb_devs_added = 0;
675         struct rte_bbdev_info info;
676
677         RTE_BBDEV_FOREACH(dev_id) {
678                 rte_bbdev_info_get(dev_id, &info);
679
680                 if (check_dev_cap(&info)) {
681                         printf(
682                                 "Device %d (%s) does not support specified capabilities\n",
683                                         dev_id, info.dev_name);
684                         continue;
685                 }
686
687                 ret = add_active_device(dev_id, &info, &test_vector);
688                 if (ret != 0) {
689                         printf("Adding active bbdev %s skipped\n",
690                                         info.dev_name);
691                         continue;
692                 }
693                 nb_devs_added++;
694         }
695
696         return nb_devs_added;
697 }
698
699 static int
700 read_test_vector(void)
701 {
702         int ret;
703
704         memset(&test_vector, 0, sizeof(test_vector));
705         printf("Test vector file = %s\n", get_vector_filename());
706         ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
707         TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
708                         get_vector_filename());
709
710         return TEST_SUCCESS;
711 }
712
713 static int
714 testsuite_setup(void)
715 {
716         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
717
718         if (populate_active_devices() == 0) {
719                 printf("No suitable devices found!\n");
720                 return TEST_SKIPPED;
721         }
722
723         return TEST_SUCCESS;
724 }
725
726 static int
727 interrupt_testsuite_setup(void)
728 {
729         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
730
731         /* Enable interrupts */
732         intr_enabled = true;
733
734         /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
735         if (populate_active_devices() == 0 ||
736                         test_vector.op_type == RTE_BBDEV_OP_NONE) {
737                 intr_enabled = false;
738                 printf("No suitable devices found!\n");
739                 return TEST_SKIPPED;
740         }
741
742         return TEST_SUCCESS;
743 }
744
745 static void
746 testsuite_teardown(void)
747 {
748         uint8_t dev_id;
749
750         /* Unconfigure devices */
751         RTE_BBDEV_FOREACH(dev_id)
752                 rte_bbdev_close(dev_id);
753
754         /* Clear active devices structs. */
755         memset(active_devs, 0, sizeof(active_devs));
756         nb_active_devs = 0;
757 }
758
759 static int
760 ut_setup(void)
761 {
762         uint8_t i, dev_id;
763
764         for (i = 0; i < nb_active_devs; i++) {
765                 dev_id = active_devs[i].dev_id;
766                 /* reset bbdev stats */
767                 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
768                                 "Failed to reset stats of bbdev %u", dev_id);
769                 /* start the device */
770                 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
771                                 "Failed to start bbdev %u", dev_id);
772         }
773
774         return TEST_SUCCESS;
775 }
776
777 static void
778 ut_teardown(void)
779 {
780         uint8_t i, dev_id;
781         struct rte_bbdev_stats stats;
782
783         for (i = 0; i < nb_active_devs; i++) {
784                 dev_id = active_devs[i].dev_id;
785                 /* read stats and print */
786                 rte_bbdev_stats_get(dev_id, &stats);
787                 /* Stop the device */
788                 rte_bbdev_stop(dev_id);
789         }
790 }
791
792 static int
793 init_op_data_objs(struct rte_bbdev_op_data *bufs,
794                 struct op_data_entries *ref_entries,
795                 struct rte_mempool *mbuf_pool, const uint16_t n,
796                 enum op_data_type op_type, uint16_t min_alignment)
797 {
798         int ret;
799         unsigned int i, j;
800         bool large_input = false;
801
802         for (i = 0; i < n; ++i) {
803                 char *data;
804                 struct op_data_buf *seg = &ref_entries->segments[0];
805                 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
806                 TEST_ASSERT_NOT_NULL(m_head,
807                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
808                                 op_type, n * ref_entries->nb_segments,
809                                 mbuf_pool->size);
810
811                 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
812                         /*
813                          * Special case when DPDK mbuf cannot handle
814                          * the required input size
815                          */
816                         printf("Warning: Larger input size than DPDK mbuf %d\n",
817                                         seg->length);
818                         large_input = true;
819                 }
820                 bufs[i].data = m_head;
821                 bufs[i].offset = 0;
822                 bufs[i].length = 0;
823
824                 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
825                         if ((op_type == DATA_INPUT) && large_input) {
826                                 /* Allocate a fake overused mbuf */
827                                 data = rte_malloc(NULL, seg->length, 0);
828                                 memcpy(data, seg->addr, seg->length);
829                                 m_head->buf_addr = data;
830                                 m_head->buf_iova = rte_malloc_virt2iova(data);
831                                 m_head->data_off = 0;
832                                 m_head->data_len = seg->length;
833                         } else {
834                                 data = rte_pktmbuf_append(m_head, seg->length);
835                                 TEST_ASSERT_NOT_NULL(data,
836                                         "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
837                                         seg->length, op_type);
838
839                                 TEST_ASSERT(data == RTE_PTR_ALIGN(
840                                                 data, min_alignment),
841                                         "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
842                                         data, min_alignment);
843                                 rte_memcpy(data, seg->addr, seg->length);
844                         }
845
846                         bufs[i].length += seg->length;
847
848                         for (j = 1; j < ref_entries->nb_segments; ++j) {
849                                 struct rte_mbuf *m_tail =
850                                                 rte_pktmbuf_alloc(mbuf_pool);
851                                 TEST_ASSERT_NOT_NULL(m_tail,
852                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
853                                                 op_type,
854                                                 n * ref_entries->nb_segments,
855                                                 mbuf_pool->size);
856                                 seg += 1;
857
858                                 data = rte_pktmbuf_append(m_tail, seg->length);
859                                 TEST_ASSERT_NOT_NULL(data,
860                                                 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
861                                                 seg->length, op_type);
862
863                                 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
864                                                 min_alignment),
865                                                 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
866                                                 data, min_alignment);
867                                 rte_memcpy(data, seg->addr, seg->length);
868                                 bufs[i].length += seg->length;
869
870                                 ret = rte_pktmbuf_chain(m_head, m_tail);
871                                 TEST_ASSERT_SUCCESS(ret,
872                                                 "Couldn't chain mbufs from %d data type mbuf pool",
873                                                 op_type);
874                         }
875                 } else {
876
877                         /* allocate chained-mbuf for output buffer */
878                         for (j = 1; j < ref_entries->nb_segments; ++j) {
879                                 struct rte_mbuf *m_tail =
880                                                 rte_pktmbuf_alloc(mbuf_pool);
881                                 TEST_ASSERT_NOT_NULL(m_tail,
882                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
883                                                 op_type,
884                                                 n * ref_entries->nb_segments,
885                                                 mbuf_pool->size);
886
887                                 ret = rte_pktmbuf_chain(m_head, m_tail);
888                                 TEST_ASSERT_SUCCESS(ret,
889                                                 "Couldn't chain mbufs from %d data type mbuf pool",
890                                                 op_type);
891                         }
892                 }
893         }
894
895         return 0;
896 }
897
898 static int
899 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
900                 const int socket)
901 {
902         int i;
903
904         *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
905         if (*buffers == NULL) {
906                 printf("WARNING: Failed to allocate op_data on socket %d\n",
907                                 socket);
908                 /* try to allocate memory on other detected sockets */
909                 for (i = 0; i < socket; i++) {
910                         *buffers = rte_zmalloc_socket(NULL, len, 0, i);
911                         if (*buffers != NULL)
912                                 break;
913                 }
914         }
915
916         return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
917 }
918
919 static void
920 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
921                 const uint16_t n, const int8_t max_llr_modulus)
922 {
923         uint16_t i, byte_idx;
924
925         for (i = 0; i < n; ++i) {
926                 struct rte_mbuf *m = input_ops[i].data;
927                 while (m != NULL) {
928                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
929                                         input_ops[i].offset);
930                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
931                                         ++byte_idx)
932                                 llr[byte_idx] = round((double)max_llr_modulus *
933                                                 llr[byte_idx] / INT8_MAX);
934
935                         m = m->next;
936                 }
937         }
938 }
939
940 /*
941  * We may have to insert filler bits
942  * when they are required by the HARQ assumption
943  */
944 static void
945 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
946                 const uint16_t n, struct test_op_params *op_params)
947 {
948         struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
949
950         if (input_ops == NULL)
951                 return;
952         /* No need to add filler if not required by device */
953         if (!(ldpc_cap_flags &
954                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
955                 return;
956         /* No need to add filler for loopback operation */
957         if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
958                 return;
959
960         uint16_t i, j, parity_offset;
961         for (i = 0; i < n; ++i) {
962                 struct rte_mbuf *m = input_ops[i].data;
963                 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
964                                 input_ops[i].offset);
965                 parity_offset = (dec.basegraph == 1 ? 20 : 8)
966                                 * dec.z_c - dec.n_filler;
967                 uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
968                 m->data_len = new_hin_size;
969                 input_ops[i].length = new_hin_size;
970                 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
971                                 j--)
972                         llr[j] = llr[j - dec.n_filler];
973                 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
974                 for (j = 0; j < dec.n_filler; j++)
975                         llr[parity_offset + j] = llr_max_pre_scaling;
976         }
977 }
978
979 static void
980 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
981                 const uint16_t n, const int8_t llr_size,
982                 const int8_t llr_decimals)
983 {
984         if (input_ops == NULL)
985                 return;
986
987         uint16_t i, byte_idx;
988
989         int16_t llr_max, llr_min, llr_tmp;
990         llr_max = (1 << (llr_size - 1)) - 1;
991         llr_min = -llr_max;
992         for (i = 0; i < n; ++i) {
993                 struct rte_mbuf *m = input_ops[i].data;
994                 while (m != NULL) {
995                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
996                                         input_ops[i].offset);
997                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
998                                         ++byte_idx) {
999
1000                                 llr_tmp = llr[byte_idx];
1001                                 if (llr_decimals == 4)
1002                                         llr_tmp *= 8;
1003                                 else if (llr_decimals == 2)
1004                                         llr_tmp *= 2;
1005                                 else if (llr_decimals == 0)
1006                                         llr_tmp /= 2;
1007                                 llr_tmp = RTE_MIN(llr_max,
1008                                                 RTE_MAX(llr_min, llr_tmp));
1009                                 llr[byte_idx] = (int8_t) llr_tmp;
1010                         }
1011
1012                         m = m->next;
1013                 }
1014         }
1015 }
1016
1017
1018
1019 static int
1020 fill_queue_buffers(struct test_op_params *op_params,
1021                 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1022                 struct rte_mempool *soft_out_mp,
1023                 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1024                 uint16_t queue_id,
1025                 const struct rte_bbdev_op_cap *capabilities,
1026                 uint16_t min_alignment, const int socket_id)
1027 {
1028         int ret;
1029         enum op_data_type type;
1030         const uint16_t n = op_params->num_to_process;
1031
1032         struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1033                 in_mp,
1034                 soft_out_mp,
1035                 hard_out_mp,
1036                 harq_in_mp,
1037                 harq_out_mp,
1038         };
1039
1040         struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1041                 &op_params->q_bufs[socket_id][queue_id].inputs,
1042                 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
1043                 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
1044                 &op_params->q_bufs[socket_id][queue_id].harq_inputs,
1045                 &op_params->q_bufs[socket_id][queue_id].harq_outputs,
1046         };
1047
1048         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1049                 struct op_data_entries *ref_entries =
1050                                 &test_vector.entries[type];
1051                 if (ref_entries->nb_segments == 0)
1052                         continue;
1053
1054                 ret = allocate_buffers_on_socket(queue_ops[type],
1055                                 n * sizeof(struct rte_bbdev_op_data),
1056                                 socket_id);
1057                 TEST_ASSERT_SUCCESS(ret,
1058                                 "Couldn't allocate memory for rte_bbdev_op_data structs");
1059
1060                 ret = init_op_data_objs(*queue_ops[type], ref_entries,
1061                                 mbuf_pools[type], n, type, min_alignment);
1062                 TEST_ASSERT_SUCCESS(ret,
1063                                 "Couldn't init rte_bbdev_op_data structs");
1064         }
1065
1066         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1067                 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1068                         capabilities->cap.turbo_dec.max_llr_modulus);
1069
1070         if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1071                 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1072                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1073                 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1074                                 RTE_BBDEV_LDPC_LLR_COMPRESSION;
1075                 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1076                                 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1077                 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1078                 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1079                 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1080                 if (!loopback && !llr_comp)
1081                         ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1082                                         ldpc_llr_size, ldpc_llr_decimals);
1083                 if (!loopback && !harq_comp)
1084                         ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1085                                         ldpc_llr_size, ldpc_llr_decimals);
1086                 if (!loopback)
1087                         ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1088                                         op_params);
1089         }
1090
1091         return 0;
1092 }
1093
1094 static void
1095 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1096 {
1097         unsigned int i, j;
1098
1099         rte_mempool_free(ad->ops_mempool);
1100         rte_mempool_free(ad->in_mbuf_pool);
1101         rte_mempool_free(ad->hard_out_mbuf_pool);
1102         rte_mempool_free(ad->soft_out_mbuf_pool);
1103         rte_mempool_free(ad->harq_in_mbuf_pool);
1104         rte_mempool_free(ad->harq_out_mbuf_pool);
1105
1106         for (i = 0; i < rte_lcore_count(); ++i) {
1107                 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1108                         rte_free(op_params->q_bufs[j][i].inputs);
1109                         rte_free(op_params->q_bufs[j][i].hard_outputs);
1110                         rte_free(op_params->q_bufs[j][i].soft_outputs);
1111                         rte_free(op_params->q_bufs[j][i].harq_inputs);
1112                         rte_free(op_params->q_bufs[j][i].harq_outputs);
1113                 }
1114         }
1115 }
1116
1117 static void
1118 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1119                 unsigned int start_idx,
1120                 struct rte_bbdev_op_data *inputs,
1121                 struct rte_bbdev_op_data *hard_outputs,
1122                 struct rte_bbdev_op_data *soft_outputs,
1123                 struct rte_bbdev_dec_op *ref_op)
1124 {
1125         unsigned int i;
1126         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1127
1128         for (i = 0; i < n; ++i) {
1129                 if (turbo_dec->code_block_mode == 0) {
1130                         ops[i]->turbo_dec.tb_params.ea =
1131                                         turbo_dec->tb_params.ea;
1132                         ops[i]->turbo_dec.tb_params.eb =
1133                                         turbo_dec->tb_params.eb;
1134                         ops[i]->turbo_dec.tb_params.k_pos =
1135                                         turbo_dec->tb_params.k_pos;
1136                         ops[i]->turbo_dec.tb_params.k_neg =
1137                                         turbo_dec->tb_params.k_neg;
1138                         ops[i]->turbo_dec.tb_params.c =
1139                                         turbo_dec->tb_params.c;
1140                         ops[i]->turbo_dec.tb_params.c_neg =
1141                                         turbo_dec->tb_params.c_neg;
1142                         ops[i]->turbo_dec.tb_params.cab =
1143                                         turbo_dec->tb_params.cab;
1144                         ops[i]->turbo_dec.tb_params.r =
1145                                         turbo_dec->tb_params.r;
1146                 } else {
1147                         ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1148                         ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1149                 }
1150
1151                 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1152                 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1153                 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1154                 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1155                 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1156                 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1157                 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1158
1159                 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1160                 ops[i]->turbo_dec.input = inputs[start_idx + i];
1161                 if (soft_outputs != NULL)
1162                         ops[i]->turbo_dec.soft_output =
1163                                 soft_outputs[start_idx + i];
1164         }
1165 }
1166
1167 static void
1168 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1169                 unsigned int start_idx,
1170                 struct rte_bbdev_op_data *inputs,
1171                 struct rte_bbdev_op_data *outputs,
1172                 struct rte_bbdev_enc_op *ref_op)
1173 {
1174         unsigned int i;
1175         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1176         for (i = 0; i < n; ++i) {
1177                 if (turbo_enc->code_block_mode == 0) {
1178                         ops[i]->turbo_enc.tb_params.ea =
1179                                         turbo_enc->tb_params.ea;
1180                         ops[i]->turbo_enc.tb_params.eb =
1181                                         turbo_enc->tb_params.eb;
1182                         ops[i]->turbo_enc.tb_params.k_pos =
1183                                         turbo_enc->tb_params.k_pos;
1184                         ops[i]->turbo_enc.tb_params.k_neg =
1185                                         turbo_enc->tb_params.k_neg;
1186                         ops[i]->turbo_enc.tb_params.c =
1187                                         turbo_enc->tb_params.c;
1188                         ops[i]->turbo_enc.tb_params.c_neg =
1189                                         turbo_enc->tb_params.c_neg;
1190                         ops[i]->turbo_enc.tb_params.cab =
1191                                         turbo_enc->tb_params.cab;
1192                         ops[i]->turbo_enc.tb_params.ncb_pos =
1193                                         turbo_enc->tb_params.ncb_pos;
1194                         ops[i]->turbo_enc.tb_params.ncb_neg =
1195                                         turbo_enc->tb_params.ncb_neg;
1196                         ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1197                 } else {
1198                         ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1199                         ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1200                         ops[i]->turbo_enc.cb_params.ncb =
1201                                         turbo_enc->cb_params.ncb;
1202                 }
1203                 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1204                 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1205                 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1206
1207                 ops[i]->turbo_enc.output = outputs[start_idx + i];
1208                 ops[i]->turbo_enc.input = inputs[start_idx + i];
1209         }
1210 }
1211
1212
1213 /* Returns a random number drawn from a normal distribution
1214  * with mean of 0 and variance of 1
1215  * Marsaglia algorithm
1216  */
1217 static double
1218 randn(int n)
1219 {
1220         double S, Z, U1, U2, u, v, fac;
1221
1222         do {
1223                 U1 = (double)rand() / RAND_MAX;
1224                 U2 = (double)rand() / RAND_MAX;
1225                 u = 2. * U1 - 1.;
1226                 v = 2. * U2 - 1.;
1227                 S = u * u + v * v;
1228         } while (S >= 1 || S == 0);
1229         fac = sqrt(-2. * log(S) / S);
1230         Z = (n % 2) ? u * fac : v * fac;
1231         return Z;
1232 }
1233
1234 static inline double
1235 maxstar(double A, double B)
1236 {
1237         if (fabs(A - B) > 5)
1238                 return RTE_MAX(A, B);
1239         else
1240                 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1241 }
1242
1243 /*
1244  * Generate Qm LLRS for Qm==8
1245  * Modulation, AWGN and LLR estimation from max log development
1246  */
1247 static void
1248 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1249 {
1250         int qm = 8;
1251         int qam = 256;
1252         int m, k;
1253         double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1254         /* 5.1.4 of TS38.211 */
1255         const double symbols_I[256] = {
1256                         5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1257                         5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1258                         11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1259                         15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1260                         15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1261                         1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1262                         1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1263                         15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1264                         13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1265                         -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1266                         -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1267                         -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1268                         -13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1269                         -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1270                         -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1271                         -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1272                         -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1273                         -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1274                         -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1275                         -13, -13, -15, -15, -13, -13, -15, -15};
1276         const double symbols_Q[256] = {
1277                         5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1278                         9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1279                         15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1280                         11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1281                         15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1282                         -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1283                         -15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1284                         -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1285                         -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1286                         -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1287                         7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1288                         9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1289                         13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1290                         3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1291                         13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1292                         -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1293                         -13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1294                         -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1295                         -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1296                         -13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1297         /* Average constellation point energy */
1298         N0 *= 170.0;
1299         for (k = 0; k < qm; k++)
1300                 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1301         /* 5.1.4 of TS38.211 */
1302         I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1303                         (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1304         Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1305                         (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1306         /* AWGN channel */
1307         I += sqrt(N0 / 2) * randn(0);
1308         Q += sqrt(N0 / 2) * randn(1);
1309         /*
1310          * Calculate the log of the probability that each of
1311          * the constellation points was transmitted
1312          */
1313         for (m = 0; m < qam; m++)
1314                 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1315                                 + pow(Q - symbols_Q[m], 2.0)) / N0;
1316         /* Calculate an LLR for each of the k_64QAM bits in the set */
1317         for (k = 0; k < qm; k++) {
1318                 p0 = -999999;
1319                 p1 = -999999;
1320                 /* For each constellation point */
1321                 for (m = 0; m < qam; m++) {
1322                         if ((m >> (qm - k - 1)) & 1)
1323                                 p1 = maxstar(p1, log_syml_prob[m]);
1324                         else
1325                                 p0 = maxstar(p0, log_syml_prob[m]);
1326                 }
1327                 /* Calculate the LLR */
1328                 llr_ = p0 - p1;
1329                 llr_ *= (1 << ldpc_llr_decimals);
1330                 llr_ = round(llr_);
1331                 if (llr_ > llr_max)
1332                         llr_ = llr_max;
1333                 if (llr_ < -llr_max)
1334                         llr_ = -llr_max;
1335                 llrs[qm * i + k] = (int8_t) llr_;
1336         }
1337 }
1338
1339
1340 /*
1341  * Generate Qm LLRS for Qm==6
1342  * Modulation, AWGN and LLR estimation from max log development
1343  */
1344 static void
1345 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1346 {
1347         int qm = 6;
1348         int qam = 64;
1349         int m, k;
1350         double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1351         /* 5.1.4 of TS38.211 */
1352         const double symbols_I[64] = {
1353                         3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1354                         3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1355                         -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1356                         -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1357                         -5, -5, -7, -7, -5, -5, -7, -7};
1358         const double symbols_Q[64] = {
1359                         3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1360                         -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1361                         -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1362                         5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1363                         -3, -1, -3, -1, -5, -7, -5, -7};
1364         /* Average constellation point energy */
1365         N0 *= 42.0;
1366         for (k = 0; k < qm; k++)
1367                 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1368         /* 5.1.4 of TS38.211 */
1369         I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1370         Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1371         /* AWGN channel */
1372         I += sqrt(N0 / 2) * randn(0);
1373         Q += sqrt(N0 / 2) * randn(1);
1374         /*
1375          * Calculate the log of the probability that each of
1376          * the constellation points was transmitted
1377          */
1378         for (m = 0; m < qam; m++)
1379                 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1380                                 + pow(Q - symbols_Q[m], 2.0)) / N0;
1381         /* Calculate an LLR for each of the k_64QAM bits in the set */
1382         for (k = 0; k < qm; k++) {
1383                 p0 = -999999;
1384                 p1 = -999999;
1385                 /* For each constellation point */
1386                 for (m = 0; m < qam; m++) {
1387                         if ((m >> (qm - k - 1)) & 1)
1388                                 p1 = maxstar(p1, log_syml_prob[m]);
1389                         else
1390                                 p0 = maxstar(p0, log_syml_prob[m]);
1391                 }
1392                 /* Calculate the LLR */
1393                 llr_ = p0 - p1;
1394                 llr_ *= (1 << ldpc_llr_decimals);
1395                 llr_ = round(llr_);
1396                 if (llr_ > llr_max)
1397                         llr_ = llr_max;
1398                 if (llr_ < -llr_max)
1399                         llr_ = -llr_max;
1400                 llrs[qm * i + k] = (int8_t) llr_;
1401         }
1402 }
1403
1404 /*
1405  * Generate Qm LLRS for Qm==4
1406  * Modulation, AWGN and LLR estimation from max log development
1407  */
1408 static void
1409 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1410 {
1411         int qm = 4;
1412         int qam = 16;
1413         int m, k;
1414         double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1415         /* 5.1.4 of TS38.211 */
1416         const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1417                         -1, -1, -3, -3, -1, -1, -3, -3};
1418         const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1419                         1, 3, 1, 3, -1, -3, -1, -3};
1420         /* Average constellation point energy */
1421         N0 *= 10.0;
1422         for (k = 0; k < qm; k++)
1423                 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1424         /* 5.1.4 of TS38.211 */
1425         I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1426         Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1427         /* AWGN channel */
1428         I += sqrt(N0 / 2) * randn(0);
1429         Q += sqrt(N0 / 2) * randn(1);
1430         /*
1431          * Calculate the log of the probability that each of
1432          * the constellation points was transmitted
1433          */
1434         for (m = 0; m < qam; m++)
1435                 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1436                                 + pow(Q - symbols_Q[m], 2.0)) / N0;
1437         /* Calculate an LLR for each of the k_64QAM bits in the set */
1438         for (k = 0; k < qm; k++) {
1439                 p0 = -999999;
1440                 p1 = -999999;
1441                 /* For each constellation point */
1442                 for (m = 0; m < qam; m++) {
1443                         if ((m >> (qm - k - 1)) & 1)
1444                                 p1 = maxstar(p1, log_syml_prob[m]);
1445                         else
1446                                 p0 = maxstar(p0, log_syml_prob[m]);
1447                 }
1448                 /* Calculate the LLR */
1449                 llr_ = p0 - p1;
1450                 llr_ *= (1 << ldpc_llr_decimals);
1451                 llr_ = round(llr_);
1452                 if (llr_ > llr_max)
1453                         llr_ = llr_max;
1454                 if (llr_ < -llr_max)
1455                         llr_ = -llr_max;
1456                 llrs[qm * i + k] = (int8_t) llr_;
1457         }
1458 }
1459
1460 static void
1461 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1462 {
1463         double b, b1, n;
1464         double coeff = 2.0 * sqrt(N0);
1465
1466         /* Ignore in vectors rare quasi null LLRs not to be saturated */
1467         if (llrs[j] < 8 && llrs[j] > -8)
1468                 return;
1469
1470         /* Note don't change sign here */
1471         n = randn(j % 2);
1472         b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1473                         + coeff * n) / N0;
1474         b = b1 * (1 << ldpc_llr_decimals);
1475         b = round(b);
1476         if (b > llr_max)
1477                 b = llr_max;
1478         if (b < -llr_max)
1479                 b = -llr_max;
1480         llrs[j] = (int8_t) b;
1481 }
1482
1483 /* Generate LLR for a given SNR */
1484 static void
1485 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1486                 struct rte_bbdev_dec_op *ref_op)
1487 {
1488         struct rte_mbuf *m;
1489         uint16_t qm;
1490         uint32_t i, j, e, range;
1491         double N0, llr_max;
1492
1493         e = ref_op->ldpc_dec.cb_params.e;
1494         qm = ref_op->ldpc_dec.q_m;
1495         llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1496         range = e / qm;
1497         N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1498
1499         for (i = 0; i < n; ++i) {
1500                 m = inputs[i].data;
1501                 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1502                 if (qm == 8) {
1503                         for (j = 0; j < range; ++j)
1504                                 gen_qm8_llr(llrs, j, N0, llr_max);
1505                 } else if (qm == 6) {
1506                         for (j = 0; j < range; ++j)
1507                                 gen_qm6_llr(llrs, j, N0, llr_max);
1508                 } else if (qm == 4) {
1509                         for (j = 0; j < range; ++j)
1510                                 gen_qm4_llr(llrs, j, N0, llr_max);
1511                 } else {
1512                         for (j = 0; j < e; ++j)
1513                                 gen_qm2_llr(llrs, j, N0, llr_max);
1514                 }
1515         }
1516 }
1517
1518 static void
1519 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1520                 unsigned int start_idx,
1521                 struct rte_bbdev_op_data *inputs,
1522                 struct rte_bbdev_op_data *hard_outputs,
1523                 struct rte_bbdev_op_data *soft_outputs,
1524                 struct rte_bbdev_op_data *harq_inputs,
1525                 struct rte_bbdev_op_data *harq_outputs,
1526                 struct rte_bbdev_dec_op *ref_op)
1527 {
1528         unsigned int i;
1529         struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1530
1531         for (i = 0; i < n; ++i) {
1532                 if (ldpc_dec->code_block_mode == 0) {
1533                         ops[i]->ldpc_dec.tb_params.ea =
1534                                         ldpc_dec->tb_params.ea;
1535                         ops[i]->ldpc_dec.tb_params.eb =
1536                                         ldpc_dec->tb_params.eb;
1537                         ops[i]->ldpc_dec.tb_params.c =
1538                                         ldpc_dec->tb_params.c;
1539                         ops[i]->ldpc_dec.tb_params.cab =
1540                                         ldpc_dec->tb_params.cab;
1541                         ops[i]->ldpc_dec.tb_params.r =
1542                                         ldpc_dec->tb_params.r;
1543                 } else {
1544                         ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1545                 }
1546
1547                 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1548                 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1549                 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1550                 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1551                 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1552                 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1553                 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1554                 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1555                 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1556
1557                 if (hard_outputs != NULL)
1558                         ops[i]->ldpc_dec.hard_output =
1559                                         hard_outputs[start_idx + i];
1560                 if (inputs != NULL)
1561                         ops[i]->ldpc_dec.input =
1562                                         inputs[start_idx + i];
1563                 if (soft_outputs != NULL)
1564                         ops[i]->ldpc_dec.soft_output =
1565                                         soft_outputs[start_idx + i];
1566                 if (harq_inputs != NULL)
1567                         ops[i]->ldpc_dec.harq_combined_input =
1568                                         harq_inputs[start_idx + i];
1569                 if (harq_outputs != NULL)
1570                         ops[i]->ldpc_dec.harq_combined_output =
1571                                         harq_outputs[start_idx + i];
1572         }
1573 }
1574
1575
1576 static void
1577 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1578                 unsigned int start_idx,
1579                 struct rte_bbdev_op_data *inputs,
1580                 struct rte_bbdev_op_data *outputs,
1581                 struct rte_bbdev_enc_op *ref_op)
1582 {
1583         unsigned int i;
1584         struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1585         for (i = 0; i < n; ++i) {
1586                 if (ldpc_enc->code_block_mode == 0) {
1587                         ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1588                         ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1589                         ops[i]->ldpc_enc.tb_params.cab =
1590                                         ldpc_enc->tb_params.cab;
1591                         ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1592                         ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1593                 } else {
1594                         ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1595                 }
1596                 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1597                 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1598                 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1599                 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1600                 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1601                 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1602                 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1603                 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1604                 ops[i]->ldpc_enc.output = outputs[start_idx + i];
1605                 ops[i]->ldpc_enc.input = inputs[start_idx + i];
1606         }
1607 }
1608
1609 static int
1610 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1611                 unsigned int order_idx, const int expected_status)
1612 {
1613         int status = op->status;
1614         /* ignore parity mismatch false alarms for long iterations */
1615         if (get_iter_max() >= 10) {
1616                 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1617                                 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1618                         printf("WARNING: Ignore Syndrome Check mismatch\n");
1619                         status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1620                 }
1621                 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1622                                 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1623                         printf("WARNING: Ignore Syndrome Check mismatch\n");
1624                         status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1625                 }
1626         }
1627
1628         TEST_ASSERT(status == expected_status,
1629                         "op_status (%d) != expected_status (%d)",
1630                         op->status, expected_status);
1631
1632         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1633                         "Ordering error, expected %p, got %p",
1634                         (void *)(uintptr_t)order_idx, op->opaque_data);
1635
1636         return TEST_SUCCESS;
1637 }
1638
1639 static int
1640 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1641                 unsigned int order_idx, const int expected_status)
1642 {
1643         TEST_ASSERT(op->status == expected_status,
1644                         "op_status (%d) != expected_status (%d)",
1645                         op->status, expected_status);
1646
1647         if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1648                 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1649                                 "Ordering error, expected %p, got %p",
1650                                 (void *)(uintptr_t)order_idx, op->opaque_data);
1651
1652         return TEST_SUCCESS;
1653 }
1654
1655 static inline int
1656 validate_op_chain(struct rte_bbdev_op_data *op,
1657                 struct op_data_entries *orig_op)
1658 {
1659         uint8_t i;
1660         struct rte_mbuf *m = op->data;
1661         uint8_t nb_dst_segments = orig_op->nb_segments;
1662         uint32_t total_data_size = 0;
1663
1664         TEST_ASSERT(nb_dst_segments == m->nb_segs,
1665                         "Number of segments differ in original (%u) and filled (%u) op",
1666                         nb_dst_segments, m->nb_segs);
1667
1668         /* Validate each mbuf segment length */
1669         for (i = 0; i < nb_dst_segments; ++i) {
1670                 /* Apply offset to the first mbuf segment */
1671                 uint16_t offset = (i == 0) ? op->offset : 0;
1672                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1673                 total_data_size += orig_op->segments[i].length;
1674
1675                 TEST_ASSERT(orig_op->segments[i].length == data_len,
1676                                 "Length of segment differ in original (%u) and filled (%u) op",
1677                                 orig_op->segments[i].length, data_len);
1678                 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1679                                 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1680                                 data_len,
1681                                 "Output buffers (CB=%u) are not equal", i);
1682                 m = m->next;
1683         }
1684
1685         /* Validate total mbuf pkt length */
1686         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1687         TEST_ASSERT(total_data_size == pkt_len,
1688                         "Length of data differ in original (%u) and filled (%u) op",
1689                         total_data_size, pkt_len);
1690
1691         return TEST_SUCCESS;
1692 }
1693
1694 /*
1695  * Compute K0 for a given configuration for HARQ output length computation
1696  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1697  */
1698 static inline uint16_t
1699 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1700 {
1701         if (rv_index == 0)
1702                 return 0;
1703         uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1704         if (n_cb == n) {
1705                 if (rv_index == 1)
1706                         return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1707                 else if (rv_index == 2)
1708                         return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1709                 else
1710                         return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1711         }
1712         /* LBRM case - includes a division by N */
1713         if (rv_index == 1)
1714                 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1715                                 / n) * z_c;
1716         else if (rv_index == 2)
1717                 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1718                                 / n) * z_c;
1719         else
1720                 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1721                                 / n) * z_c;
1722 }
1723
1724 /* HARQ output length including the Filler bits */
1725 static inline uint16_t
1726 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1727 {
1728         uint16_t k0 = 0;
1729         uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1730         k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1731         /* Compute RM out size and number of rows */
1732         uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1733                         * ops_ld->z_c - ops_ld->n_filler;
1734         uint16_t deRmOutSize = RTE_MIN(
1735                         k0 + ops_ld->cb_params.e +
1736                         ((k0 > parity_offset) ?
1737                                         0 : ops_ld->n_filler),
1738                                         ops_ld->n_cb);
1739         uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1740                         / ops_ld->z_c);
1741         uint16_t harq_output_len = numRows * ops_ld->z_c;
1742         return harq_output_len;
1743 }
1744
1745 static inline int
1746 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1747                 struct op_data_entries *orig_op,
1748                 struct rte_bbdev_op_ldpc_dec *ops_ld)
1749 {
1750         uint8_t i;
1751         uint32_t j, jj, k;
1752         struct rte_mbuf *m = op->data;
1753         uint8_t nb_dst_segments = orig_op->nb_segments;
1754         uint32_t total_data_size = 0;
1755         int8_t *harq_orig, *harq_out, abs_harq_origin;
1756         uint32_t byte_error = 0, cum_error = 0, error;
1757         int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1758         int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1759         uint16_t parity_offset;
1760
1761         TEST_ASSERT(nb_dst_segments == m->nb_segs,
1762                         "Number of segments differ in original (%u) and filled (%u) op",
1763                         nb_dst_segments, m->nb_segs);
1764
1765         /* Validate each mbuf segment length */
1766         for (i = 0; i < nb_dst_segments; ++i) {
1767                 /* Apply offset to the first mbuf segment */
1768                 uint16_t offset = (i == 0) ? op->offset : 0;
1769                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1770                 total_data_size += orig_op->segments[i].length;
1771
1772                 TEST_ASSERT(orig_op->segments[i].length <
1773                                 (uint32_t)(data_len + 64),
1774                                 "Length of segment differ in original (%u) and filled (%u) op",
1775                                 orig_op->segments[i].length, data_len);
1776                 harq_orig = (int8_t *) orig_op->segments[i].addr;
1777                 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1778
1779                 if (!(ldpc_cap_flags &
1780                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1781                                 ) || (ops_ld->op_flags &
1782                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1783                         data_len -= ops_ld->z_c;
1784                         parity_offset = data_len;
1785                 } else {
1786                         /* Compute RM out size and number of rows */
1787                         parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1788                                         * ops_ld->z_c - ops_ld->n_filler;
1789                         uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1790                                         ops_ld->n_filler;
1791                         if (data_len > deRmOutSize)
1792                                 data_len = deRmOutSize;
1793                         if (data_len > orig_op->segments[i].length)
1794                                 data_len = orig_op->segments[i].length;
1795                 }
1796                 /*
1797                  * HARQ output can have minor differences
1798                  * due to integer representation and related scaling
1799                  */
1800                 for (j = 0, jj = 0; j < data_len; j++, jj++) {
1801                         if (j == parity_offset) {
1802                                 /* Special Handling of the filler bits */
1803                                 for (k = 0; k < ops_ld->n_filler; k++) {
1804                                         if (harq_out[jj] !=
1805                                                         llr_max_pre_scaling) {
1806                                                 printf("HARQ Filler issue %d: %d %d\n",
1807                                                         jj, harq_out[jj],
1808                                                         llr_max);
1809                                                 byte_error++;
1810                                         }
1811                                         jj++;
1812                                 }
1813                         }
1814                         if (!(ops_ld->op_flags &
1815                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1816                                 if (ldpc_llr_decimals > 1)
1817                                         harq_out[jj] = (harq_out[jj] + 1)
1818                                                 >> (ldpc_llr_decimals - 1);
1819                                 /* Saturated to S7 */
1820                                 if (harq_orig[j] > llr_max)
1821                                         harq_orig[j] = llr_max;
1822                                 if (harq_orig[j] < -llr_max)
1823                                         harq_orig[j] = -llr_max;
1824                         }
1825                         if (harq_orig[j] != harq_out[jj]) {
1826                                 error = (harq_orig[j] > harq_out[jj]) ?
1827                                                 harq_orig[j] - harq_out[jj] :
1828                                                 harq_out[jj] - harq_orig[j];
1829                                 abs_harq_origin = harq_orig[j] > 0 ?
1830                                                         harq_orig[j] :
1831                                                         -harq_orig[j];
1832                                 /* Residual quantization error */
1833                                 if ((error > 8 && (abs_harq_origin <
1834                                                 (llr_max - 16))) ||
1835                                                 (error > 16)) {
1836                                         printf("HARQ mismatch %d: exp %d act %d => %d\n",
1837                                                         j, harq_orig[j],
1838                                                         harq_out[jj], error);
1839                                         byte_error++;
1840                                         cum_error += error;
1841                                 }
1842                         }
1843                 }
1844                 m = m->next;
1845         }
1846
1847         if (byte_error)
1848                 TEST_ASSERT(byte_error <= 1,
1849                                 "HARQ output mismatch (%d) %d",
1850                                 byte_error, cum_error);
1851
1852         /* Validate total mbuf pkt length */
1853         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1854         TEST_ASSERT(total_data_size < pkt_len + 64,
1855                         "Length of data differ in original (%u) and filled (%u) op",
1856                         total_data_size, pkt_len);
1857
1858         return TEST_SUCCESS;
1859 }
1860
1861 static int
1862 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1863                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1864 {
1865         unsigned int i;
1866         int ret;
1867         struct op_data_entries *hard_data_orig =
1868                         &test_vector.entries[DATA_HARD_OUTPUT];
1869         struct op_data_entries *soft_data_orig =
1870                         &test_vector.entries[DATA_SOFT_OUTPUT];
1871         struct rte_bbdev_op_turbo_dec *ops_td;
1872         struct rte_bbdev_op_data *hard_output;
1873         struct rte_bbdev_op_data *soft_output;
1874         struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1875
1876         for (i = 0; i < n; ++i) {
1877                 ops_td = &ops[i]->turbo_dec;
1878                 hard_output = &ops_td->hard_output;
1879                 soft_output = &ops_td->soft_output;
1880
1881                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1882                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1883                                         "Returned iter_count (%d) > expected iter_count (%d)",
1884                                         ops_td->iter_count, ref_td->iter_count);
1885                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1886                 TEST_ASSERT_SUCCESS(ret,
1887                                 "Checking status and ordering for decoder failed");
1888
1889                 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1890                                 hard_data_orig),
1891                                 "Hard output buffers (CB=%u) are not equal",
1892                                 i);
1893
1894                 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1895                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1896                                         soft_data_orig),
1897                                         "Soft output buffers (CB=%u) are not equal",
1898                                         i);
1899         }
1900
1901         return TEST_SUCCESS;
1902 }
1903
1904 /* Check Number of code blocks errors */
1905 static int
1906 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
1907 {
1908         unsigned int i;
1909         struct op_data_entries *hard_data_orig =
1910                         &test_vector.entries[DATA_HARD_OUTPUT];
1911         struct rte_bbdev_op_ldpc_dec *ops_td;
1912         struct rte_bbdev_op_data *hard_output;
1913         int errors = 0;
1914         struct rte_mbuf *m;
1915
1916         for (i = 0; i < n; ++i) {
1917                 ops_td = &ops[i]->ldpc_dec;
1918                 hard_output = &ops_td->hard_output;
1919                 m = hard_output->data;
1920                 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
1921                                 hard_data_orig->segments[0].addr,
1922                                 hard_data_orig->segments[0].length))
1923                         errors++;
1924         }
1925         return errors;
1926 }
1927
1928 static int
1929 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1930                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1931 {
1932         unsigned int i;
1933         int ret;
1934         struct op_data_entries *hard_data_orig =
1935                         &test_vector.entries[DATA_HARD_OUTPUT];
1936         struct op_data_entries *soft_data_orig =
1937                         &test_vector.entries[DATA_SOFT_OUTPUT];
1938         struct op_data_entries *harq_data_orig =
1939                                 &test_vector.entries[DATA_HARQ_OUTPUT];
1940         struct rte_bbdev_op_ldpc_dec *ops_td;
1941         struct rte_bbdev_op_data *hard_output;
1942         struct rte_bbdev_op_data *harq_output;
1943         struct rte_bbdev_op_data *soft_output;
1944         struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1945
1946         for (i = 0; i < n; ++i) {
1947                 ops_td = &ops[i]->ldpc_dec;
1948                 hard_output = &ops_td->hard_output;
1949                 harq_output = &ops_td->harq_combined_output;
1950                 soft_output = &ops_td->soft_output;
1951
1952                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1953                 TEST_ASSERT_SUCCESS(ret,
1954                                 "Checking status and ordering for decoder failed");
1955                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1956                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1957                                         "Returned iter_count (%d) > expected iter_count (%d)",
1958                                         ops_td->iter_count, ref_td->iter_count);
1959                 /*
1960                  * We can ignore output data when the decoding failed to
1961                  * converge or for loop-back cases
1962                  */
1963                 if (!check_bit(ops[i]->ldpc_dec.op_flags,
1964                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
1965                                 ) && (
1966                                 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
1967                                                 )) == 0)
1968                         TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1969                                         hard_data_orig),
1970                                         "Hard output buffers (CB=%u) are not equal",
1971                                         i);
1972
1973                 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1974                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1975                                         soft_data_orig),
1976                                         "Soft output buffers (CB=%u) are not equal",
1977                                         i);
1978                 if (ref_op->ldpc_dec.op_flags &
1979                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1980                         TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1981                                         harq_data_orig, ops_td),
1982                                         "HARQ output buffers (CB=%u) are not equal",
1983                                         i);
1984                 }
1985                 if (ref_op->ldpc_dec.op_flags &
1986                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1987                         TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1988                                         harq_data_orig, ops_td),
1989                                         "HARQ output buffers (CB=%u) are not equal",
1990                                         i);
1991
1992         }
1993
1994         return TEST_SUCCESS;
1995 }
1996
1997
1998 static int
1999 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2000                 struct rte_bbdev_enc_op *ref_op)
2001 {
2002         unsigned int i;
2003         int ret;
2004         struct op_data_entries *hard_data_orig =
2005                         &test_vector.entries[DATA_HARD_OUTPUT];
2006
2007         for (i = 0; i < n; ++i) {
2008                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2009                 TEST_ASSERT_SUCCESS(ret,
2010                                 "Checking status and ordering for encoder failed");
2011                 TEST_ASSERT_SUCCESS(validate_op_chain(
2012                                 &ops[i]->turbo_enc.output,
2013                                 hard_data_orig),
2014                                 "Output buffers (CB=%u) are not equal",
2015                                 i);
2016         }
2017
2018         return TEST_SUCCESS;
2019 }
2020
2021 static int
2022 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2023                 struct rte_bbdev_enc_op *ref_op)
2024 {
2025         unsigned int i;
2026         int ret;
2027         struct op_data_entries *hard_data_orig =
2028                         &test_vector.entries[DATA_HARD_OUTPUT];
2029
2030         for (i = 0; i < n; ++i) {
2031                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2032                 TEST_ASSERT_SUCCESS(ret,
2033                                 "Checking status and ordering for encoder failed");
2034                 TEST_ASSERT_SUCCESS(validate_op_chain(
2035                                 &ops[i]->ldpc_enc.output,
2036                                 hard_data_orig),
2037                                 "Output buffers (CB=%u) are not equal",
2038                                 i);
2039         }
2040
2041         return TEST_SUCCESS;
2042 }
2043
2044 static void
2045 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2046 {
2047         unsigned int i;
2048         struct op_data_entries *entry;
2049
2050         op->turbo_dec = test_vector.turbo_dec;
2051         entry = &test_vector.entries[DATA_INPUT];
2052         for (i = 0; i < entry->nb_segments; ++i)
2053                 op->turbo_dec.input.length +=
2054                                 entry->segments[i].length;
2055 }
2056
2057 static void
2058 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2059 {
2060         unsigned int i;
2061         struct op_data_entries *entry;
2062
2063         op->ldpc_dec = test_vector.ldpc_dec;
2064         entry = &test_vector.entries[DATA_INPUT];
2065         for (i = 0; i < entry->nb_segments; ++i)
2066                 op->ldpc_dec.input.length +=
2067                                 entry->segments[i].length;
2068         if (test_vector.ldpc_dec.op_flags &
2069                         RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2070                 entry = &test_vector.entries[DATA_HARQ_INPUT];
2071                 for (i = 0; i < entry->nb_segments; ++i)
2072                         op->ldpc_dec.harq_combined_input.length +=
2073                                 entry->segments[i].length;
2074         }
2075 }
2076
2077
2078 static void
2079 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2080 {
2081         unsigned int i;
2082         struct op_data_entries *entry;
2083
2084         op->turbo_enc = test_vector.turbo_enc;
2085         entry = &test_vector.entries[DATA_INPUT];
2086         for (i = 0; i < entry->nb_segments; ++i)
2087                 op->turbo_enc.input.length +=
2088                                 entry->segments[i].length;
2089 }
2090
2091 static void
2092 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2093 {
2094         unsigned int i;
2095         struct op_data_entries *entry;
2096
2097         op->ldpc_enc = test_vector.ldpc_enc;
2098         entry = &test_vector.entries[DATA_INPUT];
2099         for (i = 0; i < entry->nb_segments; ++i)
2100                 op->ldpc_enc.input.length +=
2101                                 entry->segments[i].length;
2102 }
2103
2104 static uint32_t
2105 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2106 {
2107         uint8_t i;
2108         uint32_t c, r, tb_size = 0;
2109
2110         if (op->turbo_dec.code_block_mode) {
2111                 tb_size = op->turbo_dec.tb_params.k_neg;
2112         } else {
2113                 c = op->turbo_dec.tb_params.c;
2114                 r = op->turbo_dec.tb_params.r;
2115                 for (i = 0; i < c-r; i++)
2116                         tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2117                                 op->turbo_dec.tb_params.k_neg :
2118                                 op->turbo_dec.tb_params.k_pos;
2119         }
2120         return tb_size;
2121 }
2122
2123 static uint32_t
2124 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2125 {
2126         uint8_t i;
2127         uint32_t c, r, tb_size = 0;
2128         uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2129
2130         if (op->ldpc_dec.code_block_mode) {
2131                 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2132         } else {
2133                 c = op->ldpc_dec.tb_params.c;
2134                 r = op->ldpc_dec.tb_params.r;
2135                 for (i = 0; i < c-r; i++)
2136                         tb_size += sys_cols * op->ldpc_dec.z_c
2137                                         - op->ldpc_dec.n_filler;
2138         }
2139         return tb_size;
2140 }
2141
2142 static uint32_t
2143 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2144 {
2145         uint8_t i;
2146         uint32_t c, r, tb_size = 0;
2147
2148         if (op->turbo_enc.code_block_mode) {
2149                 tb_size = op->turbo_enc.tb_params.k_neg;
2150         } else {
2151                 c = op->turbo_enc.tb_params.c;
2152                 r = op->turbo_enc.tb_params.r;
2153                 for (i = 0; i < c-r; i++)
2154                         tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2155                                 op->turbo_enc.tb_params.k_neg :
2156                                 op->turbo_enc.tb_params.k_pos;
2157         }
2158         return tb_size;
2159 }
2160
2161 static uint32_t
2162 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2163 {
2164         uint8_t i;
2165         uint32_t c, r, tb_size = 0;
2166         uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2167
2168         if (op->turbo_enc.code_block_mode) {
2169                 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2170         } else {
2171                 c = op->turbo_enc.tb_params.c;
2172                 r = op->turbo_enc.tb_params.r;
2173                 for (i = 0; i < c-r; i++)
2174                         tb_size += sys_cols * op->ldpc_enc.z_c
2175                                         - op->ldpc_enc.n_filler;
2176         }
2177         return tb_size;
2178 }
2179
2180
2181 static int
2182 init_test_op_params(struct test_op_params *op_params,
2183                 enum rte_bbdev_op_type op_type, const int expected_status,
2184                 const int vector_mask, struct rte_mempool *ops_mp,
2185                 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2186 {
2187         int ret = 0;
2188         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2189                         op_type == RTE_BBDEV_OP_LDPC_DEC)
2190                 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2191                                 &op_params->ref_dec_op, 1);
2192         else
2193                 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2194                                 &op_params->ref_enc_op, 1);
2195
2196         TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2197
2198         op_params->mp = ops_mp;
2199         op_params->burst_sz = burst_sz;
2200         op_params->num_to_process = num_to_process;
2201         op_params->num_lcores = num_lcores;
2202         op_params->vector_mask = vector_mask;
2203         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2204                         op_type == RTE_BBDEV_OP_LDPC_DEC)
2205                 op_params->ref_dec_op->status = expected_status;
2206         else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2207                         || op_type == RTE_BBDEV_OP_LDPC_ENC)
2208                 op_params->ref_enc_op->status = expected_status;
2209         return 0;
2210 }
2211
2212 static int
2213 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2214                 struct test_op_params *op_params)
2215 {
2216         int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2217         unsigned int i;
2218         struct active_device *ad;
2219         unsigned int burst_sz = get_burst_sz();
2220         enum rte_bbdev_op_type op_type = test_vector.op_type;
2221         const struct rte_bbdev_op_cap *capabilities = NULL;
2222
2223         ad = &active_devs[dev_id];
2224
2225         /* Check if device supports op_type */
2226         if (!is_avail_op(ad, test_vector.op_type))
2227                 return TEST_SUCCESS;
2228
2229         struct rte_bbdev_info info;
2230         rte_bbdev_info_get(ad->dev_id, &info);
2231         socket_id = GET_SOCKET(info.socket_id);
2232
2233         f_ret = create_mempools(ad, socket_id, op_type,
2234                         get_num_ops());
2235         if (f_ret != TEST_SUCCESS) {
2236                 printf("Couldn't create mempools");
2237                 goto fail;
2238         }
2239         if (op_type == RTE_BBDEV_OP_NONE)
2240                 op_type = RTE_BBDEV_OP_TURBO_ENC;
2241
2242         f_ret = init_test_op_params(op_params, test_vector.op_type,
2243                         test_vector.expected_status,
2244                         test_vector.mask,
2245                         ad->ops_mempool,
2246                         burst_sz,
2247                         get_num_ops(),
2248                         get_num_lcores());
2249         if (f_ret != TEST_SUCCESS) {
2250                 printf("Couldn't init test op params");
2251                 goto fail;
2252         }
2253
2254
2255         /* Find capabilities */
2256         const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2257         for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2258                 if (cap->type == test_vector.op_type) {
2259                         capabilities = cap;
2260                         break;
2261                 }
2262                 cap++;
2263         }
2264         TEST_ASSERT_NOT_NULL(capabilities,
2265                         "Couldn't find capabilities");
2266
2267         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2268                 create_reference_dec_op(op_params->ref_dec_op);
2269         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2270                 create_reference_enc_op(op_params->ref_enc_op);
2271         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2272                 create_reference_ldpc_enc_op(op_params->ref_enc_op);
2273         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2274                 create_reference_ldpc_dec_op(op_params->ref_dec_op);
2275
2276         for (i = 0; i < ad->nb_queues; ++i) {
2277                 f_ret = fill_queue_buffers(op_params,
2278                                 ad->in_mbuf_pool,
2279                                 ad->hard_out_mbuf_pool,
2280                                 ad->soft_out_mbuf_pool,
2281                                 ad->harq_in_mbuf_pool,
2282                                 ad->harq_out_mbuf_pool,
2283                                 ad->queue_ids[i],
2284                                 capabilities,
2285                                 info.drv.min_alignment,
2286                                 socket_id);
2287                 if (f_ret != TEST_SUCCESS) {
2288                         printf("Couldn't init queue buffers");
2289                         goto fail;
2290                 }
2291         }
2292
2293         /* Run test case function */
2294         t_ret = test_case_func(ad, op_params);
2295
2296         /* Free active device resources and return */
2297         free_buffers(ad, op_params);
2298         return t_ret;
2299
2300 fail:
2301         free_buffers(ad, op_params);
2302         return TEST_FAILED;
2303 }
2304
2305 /* Run given test function per active device per supported op type
2306  * per burst size.
2307  */
2308 static int
2309 run_test_case(test_case_function *test_case_func)
2310 {
2311         int ret = 0;
2312         uint8_t dev;
2313
2314         /* Alloc op_params */
2315         struct test_op_params *op_params = rte_zmalloc(NULL,
2316                         sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2317         TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2318                         RTE_ALIGN(sizeof(struct test_op_params),
2319                                 RTE_CACHE_LINE_SIZE));
2320
2321         /* For each device run test case function */
2322         for (dev = 0; dev < nb_active_devs; ++dev)
2323                 ret |= run_test_case_on_device(test_case_func, dev, op_params);
2324
2325         rte_free(op_params);
2326
2327         return ret;
2328 }
2329
2330
2331 /* Push back the HARQ output from DDR to host */
2332 static void
2333 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2334                 struct rte_bbdev_dec_op **ops,
2335                 const uint16_t n)
2336 {
2337         uint16_t j;
2338         int save_status, ret;
2339         uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2340         struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2341         uint32_t flags = ops[0]->ldpc_dec.op_flags;
2342         bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2343         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2344         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2345         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2346         for (j = 0; j < n; ++j) {
2347                 if ((loopback && mem_out) || hc_out) {
2348                         save_status = ops[j]->status;
2349                         ops[j]->ldpc_dec.op_flags =
2350                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2351                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2352                         if (h_comp)
2353                                 ops[j]->ldpc_dec.op_flags +=
2354                                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2355                         ops[j]->ldpc_dec.harq_combined_input.offset =
2356                                         harq_offset;
2357                         ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2358                         harq_offset += HARQ_INCR;
2359                         if (!loopback)
2360                                 ops[j]->ldpc_dec.harq_combined_input.length =
2361                                 ops[j]->ldpc_dec.harq_combined_output.length;
2362                         rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2363                                         &ops[j], 1);
2364                         ret = 0;
2365                         while (ret == 0)
2366                                 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2367                                                 dev_id, queue_id,
2368                                                 &ops_deq[j], 1);
2369                         ops[j]->ldpc_dec.op_flags = flags;
2370                         ops[j]->status = save_status;
2371                 }
2372         }
2373 }
2374
2375 /*
2376  * Push back the HARQ output from HW DDR to Host
2377  * Preload HARQ memory input and adjust HARQ offset
2378  */
2379 static void
2380 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2381                 struct rte_bbdev_dec_op **ops, const uint16_t n,
2382                 bool preload)
2383 {
2384         uint16_t j;
2385         int ret;
2386         uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2387         struct rte_bbdev_op_data save_hc_in, save_hc_out;
2388         struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2389         uint32_t flags = ops[0]->ldpc_dec.op_flags;
2390         bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2391         bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2392         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2393         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2394         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2395         for (j = 0; j < n; ++j) {
2396                 if ((mem_in || hc_in) && preload) {
2397                         save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
2398                         save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
2399                         ops[j]->ldpc_dec.op_flags =
2400                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2401                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2402                         if (h_comp)
2403                                 ops[j]->ldpc_dec.op_flags +=
2404                                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2405                         ops[j]->ldpc_dec.harq_combined_output.offset =
2406                                         harq_offset;
2407                         ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2408                         rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2409                                         &ops[j], 1);
2410                         ret = 0;
2411                         while (ret == 0)
2412                                 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2413                                         dev_id, queue_id, &ops_deq[j], 1);
2414                         ops[j]->ldpc_dec.op_flags = flags;
2415                         ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
2416                         ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
2417                 }
2418                 /* Adjust HARQ offset when we reach external DDR */
2419                 if (mem_in || hc_in)
2420                         ops[j]->ldpc_dec.harq_combined_input.offset
2421                                 = harq_offset;
2422                 if (mem_out || hc_out)
2423                         ops[j]->ldpc_dec.harq_combined_output.offset
2424                                 = harq_offset;
2425                 harq_offset += HARQ_INCR;
2426         }
2427 }
2428
2429 static void
2430 dequeue_event_callback(uint16_t dev_id,
2431                 enum rte_bbdev_event_type event, void *cb_arg,
2432                 void *ret_param)
2433 {
2434         int ret;
2435         uint16_t i;
2436         uint64_t total_time;
2437         uint16_t deq, burst_sz, num_ops;
2438         uint16_t queue_id = *(uint16_t *) ret_param;
2439         struct rte_bbdev_info info;
2440         double tb_len_bits;
2441         struct thread_params *tp = cb_arg;
2442
2443         /* Find matching thread params using queue_id */
2444         for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2445                 if (tp->queue_id == queue_id)
2446                         break;
2447
2448         if (i == MAX_QUEUES) {
2449                 printf("%s: Queue_id from interrupt details was not found!\n",
2450                                 __func__);
2451                 return;
2452         }
2453
2454         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2455                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2456                 printf(
2457                         "Dequeue interrupt handler called for incorrect event!\n");
2458                 return;
2459         }
2460
2461         burst_sz = rte_atomic16_read(&tp->burst_sz);
2462         num_ops = tp->op_params->num_to_process;
2463
2464         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2465                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2466                                 &tp->dec_ops[
2467                                         rte_atomic16_read(&tp->nb_dequeued)],
2468                                 burst_sz);
2469         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2470                 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2471                                 &tp->dec_ops[
2472                                         rte_atomic16_read(&tp->nb_dequeued)],
2473                                 burst_sz);
2474         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2475                 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2476                                 &tp->enc_ops[
2477                                         rte_atomic16_read(&tp->nb_dequeued)],
2478                                 burst_sz);
2479         else /*RTE_BBDEV_OP_TURBO_ENC*/
2480                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2481                                 &tp->enc_ops[
2482                                         rte_atomic16_read(&tp->nb_dequeued)],
2483                                 burst_sz);
2484
2485         if (deq < burst_sz) {
2486                 printf(
2487                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2488                         burst_sz, deq);
2489                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2490                 return;
2491         }
2492
2493         if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2494                 rte_atomic16_add(&tp->nb_dequeued, deq);
2495                 return;
2496         }
2497
2498         total_time = rte_rdtsc_precise() - tp->start_time;
2499
2500         rte_bbdev_info_get(dev_id, &info);
2501
2502         ret = TEST_SUCCESS;
2503
2504         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2505                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2506                 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2507                                 tp->op_params->vector_mask);
2508                 /* get the max of iter_count for all dequeued ops */
2509                 for (i = 0; i < num_ops; ++i)
2510                         tp->iter_count = RTE_MAX(
2511                                         tp->dec_ops[i]->turbo_dec.iter_count,
2512                                         tp->iter_count);
2513                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2514         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2515                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2516                 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2517                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2518         } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2519                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2520                 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2521                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2522         } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2523                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2524                 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2525                                 tp->op_params->vector_mask);
2526                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2527         }
2528
2529         if (ret) {
2530                 printf("Buffers validation failed\n");
2531                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2532         }
2533
2534         switch (test_vector.op_type) {
2535         case RTE_BBDEV_OP_TURBO_DEC:
2536                 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2537                 break;
2538         case RTE_BBDEV_OP_TURBO_ENC:
2539                 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2540                 break;
2541         case RTE_BBDEV_OP_LDPC_DEC:
2542                 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2543                 break;
2544         case RTE_BBDEV_OP_LDPC_ENC:
2545                 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2546                 break;
2547         case RTE_BBDEV_OP_NONE:
2548                 tb_len_bits = 0.0;
2549                 break;
2550         default:
2551                 printf("Unknown op type: %d\n", test_vector.op_type);
2552                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2553                 return;
2554         }
2555
2556         tp->ops_per_sec += ((double)num_ops) /
2557                         ((double)total_time / (double)rte_get_tsc_hz());
2558         tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2559                         ((double)total_time / (double)rte_get_tsc_hz());
2560
2561         rte_atomic16_add(&tp->nb_dequeued, deq);
2562 }
2563
2564 static int
2565 throughput_intr_lcore_dec(void *arg)
2566 {
2567         struct thread_params *tp = arg;
2568         unsigned int enqueued;
2569         const uint16_t queue_id = tp->queue_id;
2570         const uint16_t burst_sz = tp->op_params->burst_sz;
2571         const uint16_t num_to_process = tp->op_params->num_to_process;
2572         struct rte_bbdev_dec_op *ops[num_to_process];
2573         struct test_buffers *bufs = NULL;
2574         struct rte_bbdev_info info;
2575         int ret, i, j;
2576         uint16_t num_to_enq, enq;
2577
2578         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2579                         "BURST_SIZE should be <= %u", MAX_BURST);
2580
2581         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2582                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2583                         tp->dev_id, queue_id);
2584
2585         rte_bbdev_info_get(tp->dev_id, &info);
2586
2587         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2588                         "NUM_OPS cannot exceed %u for this device",
2589                         info.drv.queue_size_lim);
2590
2591         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2592
2593         rte_atomic16_clear(&tp->processing_status);
2594         rte_atomic16_clear(&tp->nb_dequeued);
2595
2596         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2597                 rte_pause();
2598
2599         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2600                                 num_to_process);
2601         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2602                         num_to_process);
2603         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2604                 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2605                                 bufs->hard_outputs, bufs->soft_outputs,
2606                                 tp->op_params->ref_dec_op);
2607
2608         /* Set counter to validate the ordering */
2609         for (j = 0; j < num_to_process; ++j)
2610                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2611
2612         for (j = 0; j < TEST_REPETITIONS; ++j) {
2613                 for (i = 0; i < num_to_process; ++i)
2614                         rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2615
2616                 tp->start_time = rte_rdtsc_precise();
2617                 for (enqueued = 0; enqueued < num_to_process;) {
2618                         num_to_enq = burst_sz;
2619
2620                         if (unlikely(num_to_process - enqueued < num_to_enq))
2621                                 num_to_enq = num_to_process - enqueued;
2622
2623                         enq = 0;
2624                         do {
2625                                 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2626                                                 queue_id, &ops[enqueued],
2627                                                 num_to_enq);
2628                         } while (unlikely(num_to_enq != enq));
2629                         enqueued += enq;
2630
2631                         /* Write to thread burst_sz current number of enqueued
2632                          * descriptors. It ensures that proper number of
2633                          * descriptors will be dequeued in callback
2634                          * function - needed for last batch in case where
2635                          * the number of operations is not a multiple of
2636                          * burst size.
2637                          */
2638                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
2639
2640                         /* Wait until processing of previous batch is
2641                          * completed
2642                          */
2643                         while (rte_atomic16_read(&tp->nb_dequeued) !=
2644                                         (int16_t) enqueued)
2645                                 rte_pause();
2646                 }
2647                 if (j != TEST_REPETITIONS - 1)
2648                         rte_atomic16_clear(&tp->nb_dequeued);
2649         }
2650
2651         return TEST_SUCCESS;
2652 }
2653
2654 static int
2655 throughput_intr_lcore_enc(void *arg)
2656 {
2657         struct thread_params *tp = arg;
2658         unsigned int enqueued;
2659         const uint16_t queue_id = tp->queue_id;
2660         const uint16_t burst_sz = tp->op_params->burst_sz;
2661         const uint16_t num_to_process = tp->op_params->num_to_process;
2662         struct rte_bbdev_enc_op *ops[num_to_process];
2663         struct test_buffers *bufs = NULL;
2664         struct rte_bbdev_info info;
2665         int ret, i, j;
2666         uint16_t num_to_enq, enq;
2667
2668         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2669                         "BURST_SIZE should be <= %u", MAX_BURST);
2670
2671         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2672                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2673                         tp->dev_id, queue_id);
2674
2675         rte_bbdev_info_get(tp->dev_id, &info);
2676
2677         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2678                         "NUM_OPS cannot exceed %u for this device",
2679                         info.drv.queue_size_lim);
2680
2681         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2682
2683         rte_atomic16_clear(&tp->processing_status);
2684         rte_atomic16_clear(&tp->nb_dequeued);
2685
2686         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2687                 rte_pause();
2688
2689         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2690                         num_to_process);
2691         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2692                         num_to_process);
2693         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2694                 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2695                                 bufs->hard_outputs, tp->op_params->ref_enc_op);
2696
2697         /* Set counter to validate the ordering */
2698         for (j = 0; j < num_to_process; ++j)
2699                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2700
2701         for (j = 0; j < TEST_REPETITIONS; ++j) {
2702                 for (i = 0; i < num_to_process; ++i)
2703                         rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2704
2705                 tp->start_time = rte_rdtsc_precise();
2706                 for (enqueued = 0; enqueued < num_to_process;) {
2707                         num_to_enq = burst_sz;
2708
2709                         if (unlikely(num_to_process - enqueued < num_to_enq))
2710                                 num_to_enq = num_to_process - enqueued;
2711
2712                         enq = 0;
2713                         do {
2714                                 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2715                                                 queue_id, &ops[enqueued],
2716                                                 num_to_enq);
2717                         } while (unlikely(enq != num_to_enq));
2718                         enqueued += enq;
2719
2720                         /* Write to thread burst_sz current number of enqueued
2721                          * descriptors. It ensures that proper number of
2722                          * descriptors will be dequeued in callback
2723                          * function - needed for last batch in case where
2724                          * the number of operations is not a multiple of
2725                          * burst size.
2726                          */
2727                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
2728
2729                         /* Wait until processing of previous batch is
2730                          * completed
2731                          */
2732                         while (rte_atomic16_read(&tp->nb_dequeued) !=
2733                                         (int16_t) enqueued)
2734                                 rte_pause();
2735                 }
2736                 if (j != TEST_REPETITIONS - 1)
2737                         rte_atomic16_clear(&tp->nb_dequeued);
2738         }
2739
2740         return TEST_SUCCESS;
2741 }
2742
2743 static int
2744 throughput_pmd_lcore_dec(void *arg)
2745 {
2746         struct thread_params *tp = arg;
2747         uint16_t enq, deq;
2748         uint64_t total_time = 0, start_time;
2749         const uint16_t queue_id = tp->queue_id;
2750         const uint16_t burst_sz = tp->op_params->burst_sz;
2751         const uint16_t num_ops = tp->op_params->num_to_process;
2752         struct rte_bbdev_dec_op *ops_enq[num_ops];
2753         struct rte_bbdev_dec_op *ops_deq[num_ops];
2754         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2755         struct test_buffers *bufs = NULL;
2756         int i, j, ret;
2757         struct rte_bbdev_info info;
2758         uint16_t num_to_enq;
2759
2760         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2761                         "BURST_SIZE should be <= %u", MAX_BURST);
2762
2763         rte_bbdev_info_get(tp->dev_id, &info);
2764
2765         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2766                         "NUM_OPS cannot exceed %u for this device",
2767                         info.drv.queue_size_lim);
2768
2769         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2770
2771         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2772                 rte_pause();
2773
2774         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2775         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2776
2777         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2778                 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2779                                 bufs->hard_outputs, bufs->soft_outputs, ref_op);
2780
2781         /* Set counter to validate the ordering */
2782         for (j = 0; j < num_ops; ++j)
2783                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2784
2785         for (i = 0; i < TEST_REPETITIONS; ++i) {
2786
2787                 for (j = 0; j < num_ops; ++j)
2788                         mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2789
2790                 start_time = rte_rdtsc_precise();
2791
2792                 for (enq = 0, deq = 0; enq < num_ops;) {
2793                         num_to_enq = burst_sz;
2794
2795                         if (unlikely(num_ops - enq < num_to_enq))
2796                                 num_to_enq = num_ops - enq;
2797
2798                         enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2799                                         queue_id, &ops_enq[enq], num_to_enq);
2800
2801                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2802                                         queue_id, &ops_deq[deq], enq - deq);
2803                 }
2804
2805                 /* dequeue the remaining */
2806                 while (deq < enq) {
2807                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2808                                         queue_id, &ops_deq[deq], enq - deq);
2809                 }
2810
2811                 total_time += rte_rdtsc_precise() - start_time;
2812         }
2813
2814         tp->iter_count = 0;
2815         /* get the max of iter_count for all dequeued ops */
2816         for (i = 0; i < num_ops; ++i) {
2817                 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2818                                 tp->iter_count);
2819         }
2820
2821         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2822                 ret = validate_dec_op(ops_deq, num_ops, ref_op,
2823                                 tp->op_params->vector_mask);
2824                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2825         }
2826
2827         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2828
2829         double tb_len_bits = calc_dec_TB_size(ref_op);
2830
2831         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2832                         ((double)total_time / (double)rte_get_tsc_hz());
2833         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2834                         1000000.0) / ((double)total_time /
2835                         (double)rte_get_tsc_hz());
2836
2837         return TEST_SUCCESS;
2838 }
2839
2840 static int
2841 bler_pmd_lcore_ldpc_dec(void *arg)
2842 {
2843         struct thread_params *tp = arg;
2844         uint16_t enq, deq;
2845         uint64_t total_time = 0, start_time;
2846         const uint16_t queue_id = tp->queue_id;
2847         const uint16_t burst_sz = tp->op_params->burst_sz;
2848         const uint16_t num_ops = tp->op_params->num_to_process;
2849         struct rte_bbdev_dec_op *ops_enq[num_ops];
2850         struct rte_bbdev_dec_op *ops_deq[num_ops];
2851         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2852         struct test_buffers *bufs = NULL;
2853         int i, j, ret;
2854         float parity_bler = 0;
2855         struct rte_bbdev_info info;
2856         uint16_t num_to_enq;
2857         bool extDdr = check_bit(ldpc_cap_flags,
2858                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
2859         bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2860                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2861         bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2862                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2863
2864         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2865                         "BURST_SIZE should be <= %u", MAX_BURST);
2866
2867         rte_bbdev_info_get(tp->dev_id, &info);
2868
2869         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2870                         "NUM_OPS cannot exceed %u for this device",
2871                         info.drv.queue_size_lim);
2872
2873         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2874
2875         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2876                 rte_pause();
2877
2878         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2879         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2880
2881         /* For BLER tests we need to enable early termination */
2882         if (!check_bit(ref_op->ldpc_dec.op_flags,
2883                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2884                 ref_op->ldpc_dec.op_flags +=
2885                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2886         ref_op->ldpc_dec.iter_max = get_iter_max();
2887         ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2888
2889         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2890                 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2891                                 bufs->hard_outputs, bufs->soft_outputs,
2892                                 bufs->harq_inputs, bufs->harq_outputs, ref_op);
2893         generate_llr_input(num_ops, bufs->inputs, ref_op);
2894
2895         /* Set counter to validate the ordering */
2896         for (j = 0; j < num_ops; ++j)
2897                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2898
2899         for (i = 0; i < 1; ++i) { /* Could add more iterations */
2900                 for (j = 0; j < num_ops; ++j) {
2901                         if (!loopback)
2902                                 mbuf_reset(
2903                                 ops_enq[j]->ldpc_dec.hard_output.data);
2904                         if (hc_out || loopback)
2905                                 mbuf_reset(
2906                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
2907                 }
2908                 if (extDdr) {
2909                         bool preload = i == (TEST_REPETITIONS - 1);
2910                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
2911                                         num_ops, preload);
2912                 }
2913                 start_time = rte_rdtsc_precise();
2914
2915                 for (enq = 0, deq = 0; enq < num_ops;) {
2916                         num_to_enq = burst_sz;
2917
2918                         if (unlikely(num_ops - enq < num_to_enq))
2919                                 num_to_enq = num_ops - enq;
2920
2921                         enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2922                                         queue_id, &ops_enq[enq], num_to_enq);
2923
2924                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2925                                         queue_id, &ops_deq[deq], enq - deq);
2926                 }
2927
2928                 /* dequeue the remaining */
2929                 while (deq < enq) {
2930                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2931                                         queue_id, &ops_deq[deq], enq - deq);
2932                 }
2933
2934                 total_time += rte_rdtsc_precise() - start_time;
2935         }
2936
2937         tp->iter_count = 0;
2938         tp->iter_average = 0;
2939         /* get the max of iter_count for all dequeued ops */
2940         for (i = 0; i < num_ops; ++i) {
2941                 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2942                                 tp->iter_count);
2943                 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
2944                 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
2945                         parity_bler += 1.0;
2946         }
2947
2948         parity_bler /= num_ops; /* This one is based on SYND */
2949         tp->iter_average /= num_ops;
2950         tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
2951
2952         if (test_vector.op_type != RTE_BBDEV_OP_NONE
2953                         && tp->bler == 0
2954                         && parity_bler == 0
2955                         && !hc_out) {
2956                 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2957                                 tp->op_params->vector_mask);
2958                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2959         }
2960
2961         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2962
2963         double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2964         tp->ops_per_sec = ((double)num_ops * 1) /
2965                         ((double)total_time / (double)rte_get_tsc_hz());
2966         tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
2967                         1000000.0) / ((double)total_time /
2968                         (double)rte_get_tsc_hz());
2969
2970         return TEST_SUCCESS;
2971 }
2972
2973 static int
2974 throughput_pmd_lcore_ldpc_dec(void *arg)
2975 {
2976         struct thread_params *tp = arg;
2977         uint16_t enq, deq;
2978         uint64_t total_time = 0, start_time;
2979         const uint16_t queue_id = tp->queue_id;
2980         const uint16_t burst_sz = tp->op_params->burst_sz;
2981         const uint16_t num_ops = tp->op_params->num_to_process;
2982         struct rte_bbdev_dec_op *ops_enq[num_ops];
2983         struct rte_bbdev_dec_op *ops_deq[num_ops];
2984         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2985         struct test_buffers *bufs = NULL;
2986         int i, j, ret;
2987         struct rte_bbdev_info info;
2988         uint16_t num_to_enq;
2989         bool extDdr = check_bit(ldpc_cap_flags,
2990                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
2991         bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2992                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2993         bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2994                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2995
2996         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2997                         "BURST_SIZE should be <= %u", MAX_BURST);
2998
2999         rte_bbdev_info_get(tp->dev_id, &info);
3000
3001         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3002                         "NUM_OPS cannot exceed %u for this device",
3003                         info.drv.queue_size_lim);
3004
3005         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3006
3007         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3008                 rte_pause();
3009
3010         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3011         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3012
3013         /* For throughput tests we need to disable early termination */
3014         if (check_bit(ref_op->ldpc_dec.op_flags,
3015                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3016                 ref_op->ldpc_dec.op_flags -=
3017                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3018         ref_op->ldpc_dec.iter_max = get_iter_max();
3019         ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3020
3021         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3022                 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3023                                 bufs->hard_outputs, bufs->soft_outputs,
3024                                 bufs->harq_inputs, bufs->harq_outputs, ref_op);
3025
3026         /* Set counter to validate the ordering */
3027         for (j = 0; j < num_ops; ++j)
3028                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3029
3030         for (i = 0; i < TEST_REPETITIONS; ++i) {
3031                 for (j = 0; j < num_ops; ++j) {
3032                         if (!loopback)
3033                                 mbuf_reset(
3034                                 ops_enq[j]->ldpc_dec.hard_output.data);
3035                         if (hc_out || loopback)
3036                                 mbuf_reset(
3037                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
3038                 }
3039                 if (extDdr) {
3040                         bool preload = i == (TEST_REPETITIONS - 1);
3041                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3042                                         num_ops, preload);
3043                 }
3044                 start_time = rte_rdtsc_precise();
3045
3046                 for (enq = 0, deq = 0; enq < num_ops;) {
3047                         num_to_enq = burst_sz;
3048
3049                         if (unlikely(num_ops - enq < num_to_enq))
3050                                 num_to_enq = num_ops - enq;
3051
3052                         enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3053                                         queue_id, &ops_enq[enq], num_to_enq);
3054
3055                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3056                                         queue_id, &ops_deq[deq], enq - deq);
3057                 }
3058
3059                 /* dequeue the remaining */
3060                 while (deq < enq) {
3061                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3062                                         queue_id, &ops_deq[deq], enq - deq);
3063                 }
3064
3065                 total_time += rte_rdtsc_precise() - start_time;
3066         }
3067
3068         tp->iter_count = 0;
3069         /* get the max of iter_count for all dequeued ops */
3070         for (i = 0; i < num_ops; ++i) {
3071                 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3072                                 tp->iter_count);
3073         }
3074         if (extDdr) {
3075                 /* Read loopback is not thread safe */
3076                 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3077         }
3078
3079         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3080                 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3081                                 tp->op_params->vector_mask);
3082                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3083         }
3084
3085         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3086
3087         double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3088
3089         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3090                         ((double)total_time / (double)rte_get_tsc_hz());
3091         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3092                         1000000.0) / ((double)total_time /
3093                         (double)rte_get_tsc_hz());
3094
3095         return TEST_SUCCESS;
3096 }
3097
3098 static int
3099 throughput_pmd_lcore_enc(void *arg)
3100 {
3101         struct thread_params *tp = arg;
3102         uint16_t enq, deq;
3103         uint64_t total_time = 0, start_time;
3104         const uint16_t queue_id = tp->queue_id;
3105         const uint16_t burst_sz = tp->op_params->burst_sz;
3106         const uint16_t num_ops = tp->op_params->num_to_process;
3107         struct rte_bbdev_enc_op *ops_enq[num_ops];
3108         struct rte_bbdev_enc_op *ops_deq[num_ops];
3109         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3110         struct test_buffers *bufs = NULL;
3111         int i, j, ret;
3112         struct rte_bbdev_info info;
3113         uint16_t num_to_enq;
3114
3115         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3116                         "BURST_SIZE should be <= %u", MAX_BURST);
3117
3118         rte_bbdev_info_get(tp->dev_id, &info);
3119
3120         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3121                         "NUM_OPS cannot exceed %u for this device",
3122                         info.drv.queue_size_lim);
3123
3124         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3125
3126         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3127                 rte_pause();
3128
3129         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3130                         num_ops);
3131         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3132                         num_ops);
3133         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3134                 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3135                                 bufs->hard_outputs, ref_op);
3136
3137         /* Set counter to validate the ordering */
3138         for (j = 0; j < num_ops; ++j)
3139                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3140
3141         for (i = 0; i < TEST_REPETITIONS; ++i) {
3142
3143                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3144                         for (j = 0; j < num_ops; ++j)
3145                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3146
3147                 start_time = rte_rdtsc_precise();
3148
3149                 for (enq = 0, deq = 0; enq < num_ops;) {
3150                         num_to_enq = burst_sz;
3151
3152                         if (unlikely(num_ops - enq < num_to_enq))
3153                                 num_to_enq = num_ops - enq;
3154
3155                         enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3156                                         queue_id, &ops_enq[enq], num_to_enq);
3157
3158                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3159                                         queue_id, &ops_deq[deq], enq - deq);
3160                 }
3161
3162                 /* dequeue the remaining */
3163                 while (deq < enq) {
3164                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3165                                         queue_id, &ops_deq[deq], enq - deq);
3166                 }
3167
3168                 total_time += rte_rdtsc_precise() - start_time;
3169         }
3170
3171         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3172                 ret = validate_enc_op(ops_deq, num_ops, ref_op);
3173                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3174         }
3175
3176         rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3177
3178         double tb_len_bits = calc_enc_TB_size(ref_op);
3179
3180         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3181                         ((double)total_time / (double)rte_get_tsc_hz());
3182         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3183                         / 1000000.0) / ((double)total_time /
3184                         (double)rte_get_tsc_hz());
3185
3186         return TEST_SUCCESS;
3187 }
3188
3189 static int
3190 throughput_pmd_lcore_ldpc_enc(void *arg)
3191 {
3192         struct thread_params *tp = arg;
3193         uint16_t enq, deq;
3194         uint64_t total_time = 0, start_time;
3195         const uint16_t queue_id = tp->queue_id;
3196         const uint16_t burst_sz = tp->op_params->burst_sz;
3197         const uint16_t num_ops = tp->op_params->num_to_process;
3198         struct rte_bbdev_enc_op *ops_enq[num_ops];
3199         struct rte_bbdev_enc_op *ops_deq[num_ops];
3200         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3201         struct test_buffers *bufs = NULL;
3202         int i, j, ret;
3203         struct rte_bbdev_info info;
3204         uint16_t num_to_enq;
3205
3206         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3207                         "BURST_SIZE should be <= %u", MAX_BURST);
3208
3209         rte_bbdev_info_get(tp->dev_id, &info);
3210
3211         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3212                         "NUM_OPS cannot exceed %u for this device",
3213                         info.drv.queue_size_lim);
3214
3215         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3216
3217         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3218                 rte_pause();
3219
3220         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3221                         num_ops);
3222         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3223                         num_ops);
3224         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3225                 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3226                                 bufs->hard_outputs, ref_op);
3227
3228         /* Set counter to validate the ordering */
3229         for (j = 0; j < num_ops; ++j)
3230                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3231
3232         for (i = 0; i < TEST_REPETITIONS; ++i) {
3233
3234                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3235                         for (j = 0; j < num_ops; ++j)
3236                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3237
3238                 start_time = rte_rdtsc_precise();
3239
3240                 for (enq = 0, deq = 0; enq < num_ops;) {
3241                         num_to_enq = burst_sz;
3242
3243                         if (unlikely(num_ops - enq < num_to_enq))
3244                                 num_to_enq = num_ops - enq;
3245
3246                         enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3247                                         queue_id, &ops_enq[enq], num_to_enq);
3248
3249                         deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3250                                         queue_id, &ops_deq[deq], enq - deq);
3251                 }
3252
3253                 /* dequeue the remaining */
3254                 while (deq < enq) {
3255                         deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3256                                         queue_id, &ops_deq[deq], enq - deq);
3257                 }
3258
3259                 total_time += rte_rdtsc_precise() - start_time;
3260         }
3261
3262         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3263                 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3264                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3265         }
3266
3267         rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3268
3269         double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3270
3271         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3272                         ((double)total_time / (double)rte_get_tsc_hz());
3273         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3274                         / 1000000.0) / ((double)total_time /
3275                         (double)rte_get_tsc_hz());
3276
3277         return TEST_SUCCESS;
3278 }
3279
3280 static void
3281 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3282 {
3283         unsigned int iter = 0;
3284         double total_mops = 0, total_mbps = 0;
3285
3286         for (iter = 0; iter < used_cores; iter++) {
3287                 printf(
3288                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3289                         t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3290                         t_params[iter].mbps);
3291                 total_mops += t_params[iter].ops_per_sec;
3292                 total_mbps += t_params[iter].mbps;
3293         }
3294         printf(
3295                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3296                 used_cores, total_mops, total_mbps);
3297 }
3298
3299 /* Aggregate the performance results over the number of cores used */
3300 static void
3301 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3302 {
3303         unsigned int core_idx = 0;
3304         double total_mops = 0, total_mbps = 0;
3305         uint8_t iter_count = 0;
3306
3307         for (core_idx = 0; core_idx < used_cores; core_idx++) {
3308                 printf(
3309                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3310                         t_params[core_idx].lcore_id,
3311                         t_params[core_idx].ops_per_sec,
3312                         t_params[core_idx].mbps,
3313                         t_params[core_idx].iter_count);
3314                 total_mops += t_params[core_idx].ops_per_sec;
3315                 total_mbps += t_params[core_idx].mbps;
3316                 iter_count = RTE_MAX(iter_count,
3317                                 t_params[core_idx].iter_count);
3318         }
3319         printf(
3320                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3321                 used_cores, total_mops, total_mbps, iter_count);
3322 }
3323
3324 /* Aggregate the performance results over the number of cores used */
3325 static void
3326 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3327 {
3328         unsigned int core_idx = 0;
3329         double total_mbps = 0, total_bler = 0, total_iter = 0;
3330         double snr = get_snr();
3331
3332         for (core_idx = 0; core_idx < used_cores; core_idx++) {
3333                 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3334                                 t_params[core_idx].lcore_id,
3335                                 t_params[core_idx].bler * 100,
3336                                 t_params[core_idx].iter_average,
3337                                 t_params[core_idx].mbps,
3338                                 get_vector_filename());
3339                 total_mbps += t_params[core_idx].mbps;
3340                 total_bler += t_params[core_idx].bler;
3341                 total_iter += t_params[core_idx].iter_average;
3342         }
3343         total_bler /= used_cores;
3344         total_iter /= used_cores;
3345
3346         printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3347                         snr, total_bler * 100, total_iter, get_iter_max(),
3348                         total_mbps, get_vector_filename());
3349 }
3350
3351 /*
3352  * Test function that determines BLER wireless performance
3353  */
3354 static int
3355 bler_test(struct active_device *ad,
3356                 struct test_op_params *op_params)
3357 {
3358         int ret;
3359         unsigned int lcore_id, used_cores = 0;
3360         struct thread_params *t_params;
3361         struct rte_bbdev_info info;
3362         lcore_function_t *bler_function;
3363         uint16_t num_lcores;
3364         const char *op_type_str;
3365
3366         rte_bbdev_info_get(ad->dev_id, &info);
3367
3368         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3369         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3370                         test_vector.op_type);
3371
3372         printf("+ ------------------------------------------------------- +\n");
3373         printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3374                         info.dev_name, ad->nb_queues, op_params->burst_sz,
3375                         op_params->num_to_process, op_params->num_lcores,
3376                         op_type_str,
3377                         intr_enabled ? "Interrupt mode" : "PMD mode",
3378                         (double)rte_get_tsc_hz() / 1000000000.0);
3379
3380         /* Set number of lcores */
3381         num_lcores = (ad->nb_queues < (op_params->num_lcores))
3382                         ? ad->nb_queues
3383                         : op_params->num_lcores;
3384
3385         /* Allocate memory for thread parameters structure */
3386         t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3387                         RTE_CACHE_LINE_SIZE);
3388         TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3389                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3390                                 RTE_CACHE_LINE_SIZE));
3391
3392         if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3393                 bler_function = bler_pmd_lcore_ldpc_dec;
3394         else
3395                 return TEST_SKIPPED;
3396
3397         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3398
3399         /* Master core is set at first entry */
3400         t_params[0].dev_id = ad->dev_id;
3401         t_params[0].lcore_id = rte_lcore_id();
3402         t_params[0].op_params = op_params;
3403         t_params[0].queue_id = ad->queue_ids[used_cores++];
3404         t_params[0].iter_count = 0;
3405
3406         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3407                 if (used_cores >= num_lcores)
3408                         break;
3409
3410                 t_params[used_cores].dev_id = ad->dev_id;
3411                 t_params[used_cores].lcore_id = lcore_id;
3412                 t_params[used_cores].op_params = op_params;
3413                 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3414                 t_params[used_cores].iter_count = 0;
3415
3416                 rte_eal_remote_launch(bler_function,
3417                                 &t_params[used_cores++], lcore_id);
3418         }
3419
3420         rte_atomic16_set(&op_params->sync, SYNC_START);
3421         ret = bler_function(&t_params[0]);
3422
3423         /* Master core is always used */
3424         for (used_cores = 1; used_cores < num_lcores; used_cores++)
3425                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3426
3427         print_dec_bler(t_params, num_lcores);
3428
3429         /* Return if test failed */
3430         if (ret) {
3431                 rte_free(t_params);
3432                 return ret;
3433         }
3434
3435         /* Function to print something  here*/
3436         rte_free(t_params);
3437         return ret;
3438 }
3439
3440 /*
3441  * Test function that determines how long an enqueue + dequeue of a burst
3442  * takes on available lcores.
3443  */
3444 static int
3445 throughput_test(struct active_device *ad,
3446                 struct test_op_params *op_params)
3447 {
3448         int ret;
3449         unsigned int lcore_id, used_cores = 0;
3450         struct thread_params *t_params, *tp;
3451         struct rte_bbdev_info info;
3452         lcore_function_t *throughput_function;
3453         uint16_t num_lcores;
3454         const char *op_type_str;
3455
3456         rte_bbdev_info_get(ad->dev_id, &info);
3457
3458         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3459         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3460                         test_vector.op_type);
3461
3462         printf("+ ------------------------------------------------------- +\n");
3463         printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3464                         info.dev_name, ad->nb_queues, op_params->burst_sz,
3465                         op_params->num_to_process, op_params->num_lcores,
3466                         op_type_str,
3467                         intr_enabled ? "Interrupt mode" : "PMD mode",
3468                         (double)rte_get_tsc_hz() / 1000000000.0);
3469
3470         /* Set number of lcores */
3471         num_lcores = (ad->nb_queues < (op_params->num_lcores))
3472                         ? ad->nb_queues
3473                         : op_params->num_lcores;
3474
3475         /* Allocate memory for thread parameters structure */
3476         t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3477                         RTE_CACHE_LINE_SIZE);
3478         TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3479                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3480                                 RTE_CACHE_LINE_SIZE));
3481
3482         if (intr_enabled) {
3483                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3484                         throughput_function = throughput_intr_lcore_dec;
3485                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3486                         throughput_function = throughput_intr_lcore_dec;
3487                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3488                         throughput_function = throughput_intr_lcore_enc;
3489                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3490                         throughput_function = throughput_intr_lcore_enc;
3491                 else
3492                         throughput_function = throughput_intr_lcore_enc;
3493
3494                 /* Dequeue interrupt callback registration */
3495                 ret = rte_bbdev_callback_register(ad->dev_id,
3496                                 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3497                                 t_params);
3498                 if (ret < 0) {
3499                         rte_free(t_params);
3500                         return ret;
3501                 }
3502         } else {
3503                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3504                         throughput_function = throughput_pmd_lcore_dec;
3505                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3506                         throughput_function = throughput_pmd_lcore_ldpc_dec;
3507                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3508                         throughput_function = throughput_pmd_lcore_enc;
3509                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3510                         throughput_function = throughput_pmd_lcore_ldpc_enc;
3511                 else
3512                         throughput_function = throughput_pmd_lcore_enc;
3513         }
3514
3515         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3516
3517         /* Master core is set at first entry */
3518         t_params[0].dev_id = ad->dev_id;
3519         t_params[0].lcore_id = rte_lcore_id();
3520         t_params[0].op_params = op_params;
3521         t_params[0].queue_id = ad->queue_ids[used_cores++];
3522         t_params[0].iter_count = 0;
3523
3524         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3525                 if (used_cores >= num_lcores)
3526                         break;
3527
3528                 t_params[used_cores].dev_id = ad->dev_id;
3529                 t_params[used_cores].lcore_id = lcore_id;
3530                 t_params[used_cores].op_params = op_params;
3531                 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3532                 t_params[used_cores].iter_count = 0;
3533
3534                 rte_eal_remote_launch(throughput_function,
3535                                 &t_params[used_cores++], lcore_id);
3536         }
3537
3538         rte_atomic16_set(&op_params->sync, SYNC_START);
3539         ret = throughput_function(&t_params[0]);
3540
3541         /* Master core is always used */
3542         for (used_cores = 1; used_cores < num_lcores; used_cores++)
3543                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3544
3545         /* Return if test failed */
3546         if (ret) {
3547                 rte_free(t_params);
3548                 return ret;
3549         }
3550
3551         /* Print throughput if interrupts are disabled and test passed */
3552         if (!intr_enabled) {
3553                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3554                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3555                         print_dec_throughput(t_params, num_lcores);
3556                 else
3557                         print_enc_throughput(t_params, num_lcores);
3558                 rte_free(t_params);
3559                 return ret;
3560         }
3561
3562         /* In interrupt TC we need to wait for the interrupt callback to deqeue
3563          * all pending operations. Skip waiting for queues which reported an
3564          * error using processing_status variable.
3565          * Wait for master lcore operations.
3566          */
3567         tp = &t_params[0];
3568         while ((rte_atomic16_read(&tp->nb_dequeued) <
3569                         op_params->num_to_process) &&
3570                         (rte_atomic16_read(&tp->processing_status) !=
3571                         TEST_FAILED))
3572                 rte_pause();
3573
3574         tp->ops_per_sec /= TEST_REPETITIONS;
3575         tp->mbps /= TEST_REPETITIONS;
3576         ret |= (int)rte_atomic16_read(&tp->processing_status);
3577
3578         /* Wait for slave lcores operations */
3579         for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3580                 tp = &t_params[used_cores];
3581
3582                 while ((rte_atomic16_read(&tp->nb_dequeued) <
3583                                 op_params->num_to_process) &&
3584                                 (rte_atomic16_read(&tp->processing_status) !=
3585                                 TEST_FAILED))
3586                         rte_pause();
3587
3588                 tp->ops_per_sec /= TEST_REPETITIONS;
3589                 tp->mbps /= TEST_REPETITIONS;
3590                 ret |= (int)rte_atomic16_read(&tp->processing_status);
3591         }
3592
3593         /* Print throughput if test passed */
3594         if (!ret) {
3595                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3596                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3597                         print_dec_throughput(t_params, num_lcores);
3598                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3599                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3600                         print_enc_throughput(t_params, num_lcores);
3601         }
3602
3603         rte_free(t_params);
3604         return ret;
3605 }
3606
3607 static int
3608 latency_test_dec(struct rte_mempool *mempool,
3609                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3610                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3611                 const uint16_t num_to_process, uint16_t burst_sz,
3612                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3613 {
3614         int ret = TEST_SUCCESS;
3615         uint16_t i, j, dequeued;
3616         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3617         uint64_t start_time = 0, last_time = 0;
3618
3619         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3620                 uint16_t enq = 0, deq = 0;
3621                 bool first_time = true;
3622                 last_time = 0;
3623
3624                 if (unlikely(num_to_process - dequeued < burst_sz))
3625                         burst_sz = num_to_process - dequeued;
3626
3627                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3628                 TEST_ASSERT_SUCCESS(ret,
3629                                 "rte_bbdev_dec_op_alloc_bulk() failed");
3630                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3631                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3632                                         bufs->inputs,
3633                                         bufs->hard_outputs,
3634                                         bufs->soft_outputs,
3635                                         ref_op);
3636
3637                 /* Set counter to validate the ordering */
3638                 for (j = 0; j < burst_sz; ++j)
3639                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3640
3641                 start_time = rte_rdtsc_precise();
3642
3643                 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3644                                 burst_sz);
3645                 TEST_ASSERT(enq == burst_sz,
3646                                 "Error enqueueing burst, expected %u, got %u",
3647                                 burst_sz, enq);
3648
3649                 /* Dequeue */
3650                 do {
3651                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3652                                         &ops_deq[deq], burst_sz - deq);
3653                         if (likely(first_time && (deq > 0))) {
3654                                 last_time = rte_rdtsc_precise() - start_time;
3655                                 first_time = false;
3656                         }
3657                 } while (unlikely(burst_sz != deq));
3658
3659                 *max_time = RTE_MAX(*max_time, last_time);
3660                 *min_time = RTE_MIN(*min_time, last_time);
3661                 *total_time += last_time;
3662
3663                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3664                         ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3665                                         vector_mask);
3666                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3667                 }
3668
3669                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3670                 dequeued += deq;
3671         }
3672
3673         return i;
3674 }
3675
3676 static int
3677 latency_test_ldpc_dec(struct rte_mempool *mempool,
3678                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3679                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3680                 const uint16_t num_to_process, uint16_t burst_sz,
3681                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3682 {
3683         int ret = TEST_SUCCESS;
3684         uint16_t i, j, dequeued;
3685         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3686         uint64_t start_time = 0, last_time = 0;
3687         bool extDdr = ldpc_cap_flags &
3688                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3689
3690         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3691                 uint16_t enq = 0, deq = 0;
3692                 bool first_time = true;
3693                 last_time = 0;
3694
3695                 if (unlikely(num_to_process - dequeued < burst_sz))
3696                         burst_sz = num_to_process - dequeued;
3697
3698                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3699                 TEST_ASSERT_SUCCESS(ret,
3700                                 "rte_bbdev_dec_op_alloc_bulk() failed");
3701
3702                 /* For latency tests we need to disable early termination */
3703                 if (check_bit(ref_op->ldpc_dec.op_flags,
3704                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3705                         ref_op->ldpc_dec.op_flags -=
3706                                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3707                 ref_op->ldpc_dec.iter_max = get_iter_max();
3708                 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3709
3710                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3711                         copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3712                                         bufs->inputs,
3713                                         bufs->hard_outputs,
3714                                         bufs->soft_outputs,
3715                                         bufs->harq_inputs,
3716                                         bufs->harq_outputs,
3717                                         ref_op);
3718
3719                 if (extDdr)
3720                         preload_harq_ddr(dev_id, queue_id, ops_enq,
3721                                         burst_sz, true);
3722
3723                 /* Set counter to validate the ordering */
3724                 for (j = 0; j < burst_sz; ++j)
3725                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3726
3727                 start_time = rte_rdtsc_precise();
3728
3729                 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3730                                 &ops_enq[enq], burst_sz);
3731                 TEST_ASSERT(enq == burst_sz,
3732                                 "Error enqueueing burst, expected %u, got %u",
3733                                 burst_sz, enq);
3734
3735                 /* Dequeue */
3736                 do {
3737                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3738                                         &ops_deq[deq], burst_sz - deq);
3739                         if (likely(first_time && (deq > 0))) {
3740                                 last_time = rte_rdtsc_precise() - start_time;
3741                                 first_time = false;
3742                         }
3743                 } while (unlikely(burst_sz != deq));
3744
3745                 *max_time = RTE_MAX(*max_time, last_time);
3746                 *min_time = RTE_MIN(*min_time, last_time);
3747                 *total_time += last_time;
3748
3749                 if (extDdr)
3750                         retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3751
3752                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3753                         ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
3754                                         vector_mask);
3755                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3756                 }
3757
3758                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3759                 dequeued += deq;
3760         }
3761         return i;
3762 }
3763
3764 static int
3765 latency_test_enc(struct rte_mempool *mempool,
3766                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3767                 uint16_t dev_id, uint16_t queue_id,
3768                 const uint16_t num_to_process, uint16_t burst_sz,
3769                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3770 {
3771         int ret = TEST_SUCCESS;
3772         uint16_t i, j, dequeued;
3773         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3774         uint64_t start_time = 0, last_time = 0;
3775
3776         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3777                 uint16_t enq = 0, deq = 0;
3778                 bool first_time = true;
3779                 last_time = 0;
3780
3781                 if (unlikely(num_to_process - dequeued < burst_sz))
3782                         burst_sz = num_to_process - dequeued;
3783
3784                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3785                 TEST_ASSERT_SUCCESS(ret,
3786                                 "rte_bbdev_enc_op_alloc_bulk() failed");
3787                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3788                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3789                                         bufs->inputs,
3790                                         bufs->hard_outputs,
3791                                         ref_op);
3792
3793                 /* Set counter to validate the ordering */
3794                 for (j = 0; j < burst_sz; ++j)
3795                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3796
3797                 start_time = rte_rdtsc_precise();
3798
3799                 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
3800                                 burst_sz);
3801                 TEST_ASSERT(enq == burst_sz,
3802                                 "Error enqueueing burst, expected %u, got %u",
3803                                 burst_sz, enq);
3804
3805                 /* Dequeue */
3806                 do {
3807                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3808                                         &ops_deq[deq], burst_sz - deq);
3809                         if (likely(first_time && (deq > 0))) {
3810                                 last_time += rte_rdtsc_precise() - start_time;
3811                                 first_time = false;
3812                         }
3813                 } while (unlikely(burst_sz != deq));
3814
3815                 *max_time = RTE_MAX(*max_time, last_time);
3816                 *min_time = RTE_MIN(*min_time, last_time);
3817                 *total_time += last_time;
3818
3819                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3820                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3821                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3822                 }
3823
3824                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3825                 dequeued += deq;
3826         }
3827
3828         return i;
3829 }
3830
3831 static int
3832 latency_test_ldpc_enc(struct rte_mempool *mempool,
3833                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3834                 uint16_t dev_id, uint16_t queue_id,
3835                 const uint16_t num_to_process, uint16_t burst_sz,
3836                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3837 {
3838         int ret = TEST_SUCCESS;
3839         uint16_t i, j, dequeued;
3840         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3841         uint64_t start_time = 0, last_time = 0;
3842
3843         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3844                 uint16_t enq = 0, deq = 0;
3845                 bool first_time = true;
3846                 last_time = 0;
3847
3848                 if (unlikely(num_to_process - dequeued < burst_sz))
3849                         burst_sz = num_to_process - dequeued;
3850
3851                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3852                 TEST_ASSERT_SUCCESS(ret,
3853                                 "rte_bbdev_enc_op_alloc_bulk() failed");
3854                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3855                         copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3856                                         bufs->inputs,
3857                                         bufs->hard_outputs,
3858                                         ref_op);
3859
3860                 /* Set counter to validate the ordering */
3861                 for (j = 0; j < burst_sz; ++j)
3862                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3863
3864                 start_time = rte_rdtsc_precise();
3865
3866                 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3867                                 &ops_enq[enq], burst_sz);
3868                 TEST_ASSERT(enq == burst_sz,
3869                                 "Error enqueueing burst, expected %u, got %u",
3870                                 burst_sz, enq);
3871
3872                 /* Dequeue */
3873                 do {
3874                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3875                                         &ops_deq[deq], burst_sz - deq);
3876                         if (likely(first_time && (deq > 0))) {
3877                                 last_time += rte_rdtsc_precise() - start_time;
3878                                 first_time = false;
3879                         }
3880                 } while (unlikely(burst_sz != deq));
3881
3882                 *max_time = RTE_MAX(*max_time, last_time);
3883                 *min_time = RTE_MIN(*min_time, last_time);
3884                 *total_time += last_time;
3885
3886                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3887                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3888                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3889                 }
3890
3891                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3892                 dequeued += deq;
3893         }
3894
3895         return i;
3896 }
3897
3898 static int
3899 latency_test(struct active_device *ad,
3900                 struct test_op_params *op_params)
3901 {
3902         int iter;
3903         uint16_t burst_sz = op_params->burst_sz;
3904         const uint16_t num_to_process = op_params->num_to_process;
3905         const enum rte_bbdev_op_type op_type = test_vector.op_type;
3906         const uint16_t queue_id = ad->queue_ids[0];
3907         struct test_buffers *bufs = NULL;
3908         struct rte_bbdev_info info;
3909         uint64_t total_time, min_time, max_time;
3910         const char *op_type_str;
3911
3912         total_time = max_time = 0;
3913         min_time = UINT64_MAX;
3914
3915         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3916                         "BURST_SIZE should be <= %u", MAX_BURST);
3917
3918         rte_bbdev_info_get(ad->dev_id, &info);
3919         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3920
3921         op_type_str = rte_bbdev_op_type_str(op_type);
3922         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3923
3924         printf("+ ------------------------------------------------------- +\n");
3925         printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3926                         info.dev_name, burst_sz, num_to_process, op_type_str);
3927
3928         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3929                 iter = latency_test_dec(op_params->mp, bufs,
3930                                 op_params->ref_dec_op, op_params->vector_mask,
3931                                 ad->dev_id, queue_id, num_to_process,
3932                                 burst_sz, &total_time, &min_time, &max_time);
3933         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3934                 iter = latency_test_enc(op_params->mp, bufs,
3935                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3936                                 num_to_process, burst_sz, &total_time,
3937                                 &min_time, &max_time);
3938         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3939                 iter = latency_test_ldpc_enc(op_params->mp, bufs,
3940                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3941                                 num_to_process, burst_sz, &total_time,
3942                                 &min_time, &max_time);
3943         else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3944                 iter = latency_test_ldpc_dec(op_params->mp, bufs,
3945                                 op_params->ref_dec_op, op_params->vector_mask,
3946                                 ad->dev_id, queue_id, num_to_process,
3947                                 burst_sz, &total_time, &min_time, &max_time);
3948         else
3949                 iter = latency_test_enc(op_params->mp, bufs,
3950                                         op_params->ref_enc_op,
3951                                         ad->dev_id, queue_id,
3952                                         num_to_process, burst_sz, &total_time,
3953                                         &min_time, &max_time);
3954
3955         if (iter <= 0)
3956                 return TEST_FAILED;
3957
3958         printf("Operation latency:\n"
3959                         "\tavg: %lg cycles, %lg us\n"
3960                         "\tmin: %lg cycles, %lg us\n"
3961                         "\tmax: %lg cycles, %lg us\n",
3962                         (double)total_time / (double)iter,
3963                         (double)(total_time * 1000000) / (double)iter /
3964                         (double)rte_get_tsc_hz(), (double)min_time,
3965                         (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
3966                         (double)max_time, (double)(max_time * 1000000) /
3967                         (double)rte_get_tsc_hz());
3968
3969         return TEST_SUCCESS;
3970 }
3971
3972 #ifdef RTE_BBDEV_OFFLOAD_COST
3973 static int
3974 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
3975                 struct rte_bbdev_stats *stats)
3976 {
3977         struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
3978         struct rte_bbdev_stats *q_stats;
3979
3980         if (queue_id >= dev->data->num_queues)
3981                 return -1;
3982
3983         q_stats = &dev->data->queues[queue_id].queue_stats;
3984
3985         stats->enqueued_count = q_stats->enqueued_count;
3986         stats->dequeued_count = q_stats->dequeued_count;
3987         stats->enqueue_err_count = q_stats->enqueue_err_count;
3988         stats->dequeue_err_count = q_stats->dequeue_err_count;
3989         stats->acc_offload_cycles = q_stats->acc_offload_cycles;
3990
3991         return 0;
3992 }
3993
3994 static int
3995 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
3996                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3997                 uint16_t queue_id, const uint16_t num_to_process,
3998                 uint16_t burst_sz, struct test_time_stats *time_st)
3999 {
4000         int i, dequeued, ret;
4001         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4002         uint64_t enq_start_time, deq_start_time;
4003         uint64_t enq_sw_last_time, deq_last_time;
4004         struct rte_bbdev_stats stats;
4005
4006         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4007                 uint16_t enq = 0, deq = 0;
4008
4009                 if (unlikely(num_to_process - dequeued < burst_sz))
4010                         burst_sz = num_to_process - dequeued;
4011
4012                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4013                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4014                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4015                                         bufs->inputs,
4016                                         bufs->hard_outputs,
4017                                         bufs->soft_outputs,
4018                                         ref_op);
4019
4020                 /* Start time meas for enqueue function offload latency */
4021                 enq_start_time = rte_rdtsc_precise();
4022                 do {
4023                         enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4024                                         &ops_enq[enq], burst_sz - enq);
4025                 } while (unlikely(burst_sz != enq));
4026
4027                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4028                 TEST_ASSERT_SUCCESS(ret,
4029                                 "Failed to get stats for queue (%u) of device (%u)",
4030                                 queue_id, dev_id);
4031
4032                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4033                                 stats.acc_offload_cycles;
4034                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4035                                 enq_sw_last_time);
4036                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4037                                 enq_sw_last_time);
4038                 time_st->enq_sw_total_time += enq_sw_last_time;
4039
4040                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4041                                 stats.acc_offload_cycles);
4042                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4043                                 stats.acc_offload_cycles);
4044                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4045
4046                 /* give time for device to process ops */
4047                 rte_delay_us(200);
4048
4049                 /* Start time meas for dequeue function offload latency */
4050                 deq_start_time = rte_rdtsc_precise();
4051                 /* Dequeue one operation */
4052                 do {
4053                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4054                                         &ops_deq[deq], 1);
4055                 } while (unlikely(deq != 1));
4056
4057                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4058                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4059                                 deq_last_time);
4060                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4061                                 deq_last_time);
4062                 time_st->deq_total_time += deq_last_time;
4063
4064                 /* Dequeue remaining operations if needed*/
4065                 while (burst_sz != deq)
4066                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4067                                         &ops_deq[deq], burst_sz - deq);
4068
4069                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4070                 dequeued += deq;
4071         }
4072
4073         return i;
4074 }
4075
4076 static int
4077 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4078                 struct test_buffers *bufs,
4079                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4080                 uint16_t queue_id, const uint16_t num_to_process,
4081                 uint16_t burst_sz, struct test_time_stats *time_st)
4082 {
4083         int i, dequeued, ret;
4084         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4085         uint64_t enq_start_time, deq_start_time;
4086         uint64_t enq_sw_last_time, deq_last_time;
4087         struct rte_bbdev_stats stats;
4088         bool extDdr = ldpc_cap_flags &
4089                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4090
4091         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4092                 uint16_t enq = 0, deq = 0;
4093
4094                 if (unlikely(num_to_process - dequeued < burst_sz))
4095                         burst_sz = num_to_process - dequeued;
4096
4097                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4098                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4099                         copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4100                                         bufs->inputs,
4101                                         bufs->hard_outputs,
4102                                         bufs->soft_outputs,
4103                                         bufs->harq_inputs,
4104                                         bufs->harq_outputs,
4105                                         ref_op);
4106
4107                 if (extDdr)
4108                         preload_harq_ddr(dev_id, queue_id, ops_enq,
4109                                         burst_sz, true);
4110
4111                 /* Start time meas for enqueue function offload latency */
4112                 enq_start_time = rte_rdtsc_precise();
4113                 do {
4114                         enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4115                                         &ops_enq[enq], burst_sz - enq);
4116                 } while (unlikely(burst_sz != enq));
4117
4118                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4119                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4120                 TEST_ASSERT_SUCCESS(ret,
4121                                 "Failed to get stats for queue (%u) of device (%u)",
4122                                 queue_id, dev_id);
4123
4124                 enq_sw_last_time -= stats.acc_offload_cycles;
4125                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4126                                 enq_sw_last_time);
4127                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4128                                 enq_sw_last_time);
4129                 time_st->enq_sw_total_time += enq_sw_last_time;
4130
4131                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4132                                 stats.acc_offload_cycles);
4133                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4134                                 stats.acc_offload_cycles);
4135                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4136
4137                 /* give time for device to process ops */
4138                 rte_delay_us(200);
4139
4140                 /* Start time meas for dequeue function offload latency */
4141                 deq_start_time = rte_rdtsc_precise();
4142                 /* Dequeue one operation */
4143                 do {
4144                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4145                                         &ops_deq[deq], 1);
4146                 } while (unlikely(deq != 1));
4147
4148                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4149                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4150                                 deq_last_time);
4151                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4152                                 deq_last_time);
4153                 time_st->deq_total_time += deq_last_time;
4154
4155                 /* Dequeue remaining operations if needed*/
4156                 while (burst_sz != deq)
4157                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4158                                         &ops_deq[deq], burst_sz - deq);
4159
4160                 if (extDdr) {
4161                         /* Read loopback is not thread safe */
4162                         retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4163                 }
4164
4165                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4166                 dequeued += deq;
4167         }
4168
4169         return i;
4170 }
4171
4172 static int
4173 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4174                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4175                 uint16_t queue_id, const uint16_t num_to_process,
4176                 uint16_t burst_sz, struct test_time_stats *time_st)
4177 {
4178         int i, dequeued, ret;
4179         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4180         uint64_t enq_start_time, deq_start_time;
4181         uint64_t enq_sw_last_time, deq_last_time;
4182         struct rte_bbdev_stats stats;
4183
4184         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4185                 uint16_t enq = 0, deq = 0;
4186
4187                 if (unlikely(num_to_process - dequeued < burst_sz))
4188                         burst_sz = num_to_process - dequeued;
4189
4190                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4191                 TEST_ASSERT_SUCCESS(ret,
4192                                 "rte_bbdev_enc_op_alloc_bulk() failed");
4193                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4194                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4195                                         bufs->inputs,
4196                                         bufs->hard_outputs,
4197                                         ref_op);
4198
4199                 /* Start time meas for enqueue function offload latency */
4200                 enq_start_time = rte_rdtsc_precise();
4201                 do {
4202                         enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4203                                         &ops_enq[enq], burst_sz - enq);
4204                 } while (unlikely(burst_sz != enq));
4205
4206                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4207
4208                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4209                 TEST_ASSERT_SUCCESS(ret,
4210                                 "Failed to get stats for queue (%u) of device (%u)",
4211                                 queue_id, dev_id);
4212                 enq_sw_last_time -= stats.acc_offload_cycles;
4213                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4214                                 enq_sw_last_time);
4215                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4216                                 enq_sw_last_time);
4217                 time_st->enq_sw_total_time += enq_sw_last_time;
4218
4219                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4220                                 stats.acc_offload_cycles);
4221                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4222                                 stats.acc_offload_cycles);
4223                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4224
4225                 /* give time for device to process ops */
4226                 rte_delay_us(200);
4227
4228                 /* Start time meas for dequeue function offload latency */
4229                 deq_start_time = rte_rdtsc_precise();
4230                 /* Dequeue one operation */
4231                 do {
4232                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4233                                         &ops_deq[deq], 1);
4234                 } while (unlikely(deq != 1));
4235
4236                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4237                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4238                                 deq_last_time);
4239                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4240                                 deq_last_time);
4241                 time_st->deq_total_time += deq_last_time;
4242
4243                 while (burst_sz != deq)
4244                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4245                                         &ops_deq[deq], burst_sz - deq);
4246
4247                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4248                 dequeued += deq;
4249         }
4250
4251         return i;
4252 }
4253
4254 static int
4255 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4256                 struct test_buffers *bufs,
4257                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4258                 uint16_t queue_id, const uint16_t num_to_process,
4259                 uint16_t burst_sz, struct test_time_stats *time_st)
4260 {
4261         int i, dequeued, ret;
4262         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4263         uint64_t enq_start_time, deq_start_time;
4264         uint64_t enq_sw_last_time, deq_last_time;
4265         struct rte_bbdev_stats stats;
4266
4267         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4268                 uint16_t enq = 0, deq = 0;
4269
4270                 if (unlikely(num_to_process - dequeued < burst_sz))
4271                         burst_sz = num_to_process - dequeued;
4272
4273                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4274                 TEST_ASSERT_SUCCESS(ret,
4275                                 "rte_bbdev_enc_op_alloc_bulk() failed");
4276                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4277                         copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4278                                         bufs->inputs,
4279                                         bufs->hard_outputs,
4280                                         ref_op);
4281
4282                 /* Start time meas for enqueue function offload latency */
4283                 enq_start_time = rte_rdtsc_precise();
4284                 do {
4285                         enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4286                                         &ops_enq[enq], burst_sz - enq);
4287                 } while (unlikely(burst_sz != enq));
4288
4289                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4290                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4291                 TEST_ASSERT_SUCCESS(ret,
4292                                 "Failed to get stats for queue (%u) of device (%u)",
4293                                 queue_id, dev_id);
4294
4295                 enq_sw_last_time -= stats.acc_offload_cycles;
4296                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4297                                 enq_sw_last_time);
4298                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4299                                 enq_sw_last_time);
4300                 time_st->enq_sw_total_time += enq_sw_last_time;
4301
4302                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4303                                 stats.acc_offload_cycles);
4304                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4305                                 stats.acc_offload_cycles);
4306                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4307
4308                 /* give time for device to process ops */
4309                 rte_delay_us(200);
4310
4311                 /* Start time meas for dequeue function offload latency */
4312                 deq_start_time = rte_rdtsc_precise();
4313                 /* Dequeue one operation */
4314                 do {
4315                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4316                                         &ops_deq[deq], 1);
4317                 } while (unlikely(deq != 1));
4318
4319                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4320                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4321                                 deq_last_time);
4322                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4323                                 deq_last_time);
4324                 time_st->deq_total_time += deq_last_time;
4325
4326                 while (burst_sz != deq)
4327                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4328                                         &ops_deq[deq], burst_sz - deq);
4329
4330                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4331                 dequeued += deq;
4332         }
4333
4334         return i;
4335 }
4336 #endif
4337
4338 static int
4339 offload_cost_test(struct active_device *ad,
4340                 struct test_op_params *op_params)
4341 {
4342 #ifndef RTE_BBDEV_OFFLOAD_COST
4343         RTE_SET_USED(ad);
4344         RTE_SET_USED(op_params);
4345         printf("Offload latency test is disabled.\n");
4346         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4347         return TEST_SKIPPED;
4348 #else
4349         int iter;
4350         uint16_t burst_sz = op_params->burst_sz;
4351         const uint16_t num_to_process = op_params->num_to_process;
4352         const enum rte_bbdev_op_type op_type = test_vector.op_type;
4353         const uint16_t queue_id = ad->queue_ids[0];
4354         struct test_buffers *bufs = NULL;
4355         struct rte_bbdev_info info;
4356         const char *op_type_str;
4357         struct test_time_stats time_st;
4358
4359         memset(&time_st, 0, sizeof(struct test_time_stats));
4360         time_st.enq_sw_min_time = UINT64_MAX;
4361         time_st.enq_acc_min_time = UINT64_MAX;
4362         time_st.deq_min_time = UINT64_MAX;
4363
4364         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4365                         "BURST_SIZE should be <= %u", MAX_BURST);
4366
4367         rte_bbdev_info_get(ad->dev_id, &info);
4368         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4369
4370         op_type_str = rte_bbdev_op_type_str(op_type);
4371         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4372
4373         printf("+ ------------------------------------------------------- +\n");
4374         printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4375                         info.dev_name, burst_sz, num_to_process, op_type_str);
4376
4377         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4378                 iter = offload_latency_test_dec(op_params->mp, bufs,
4379                                 op_params->ref_dec_op, ad->dev_id, queue_id,
4380                                 num_to_process, burst_sz, &time_st);
4381         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4382                 iter = offload_latency_test_enc(op_params->mp, bufs,
4383                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4384                                 num_to_process, burst_sz, &time_st);
4385         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4386                 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4387                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4388                                 num_to_process, burst_sz, &time_st);
4389         else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4390                 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4391                         op_params->ref_dec_op, ad->dev_id, queue_id,
4392                         num_to_process, burst_sz, &time_st);
4393         else
4394                 iter = offload_latency_test_enc(op_params->mp, bufs,
4395                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4396                                 num_to_process, burst_sz, &time_st);
4397
4398         if (iter <= 0)
4399                 return TEST_FAILED;
4400
4401         printf("Enqueue driver offload cost latency:\n"
4402                         "\tavg: %lg cycles, %lg us\n"
4403                         "\tmin: %lg cycles, %lg us\n"
4404                         "\tmax: %lg cycles, %lg us\n"
4405                         "Enqueue accelerator offload cost latency:\n"
4406                         "\tavg: %lg cycles, %lg us\n"
4407                         "\tmin: %lg cycles, %lg us\n"
4408                         "\tmax: %lg cycles, %lg us\n",
4409                         (double)time_st.enq_sw_total_time / (double)iter,
4410                         (double)(time_st.enq_sw_total_time * 1000000) /
4411                         (double)iter / (double)rte_get_tsc_hz(),
4412                         (double)time_st.enq_sw_min_time,
4413                         (double)(time_st.enq_sw_min_time * 1000000) /
4414                         rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4415                         (double)(time_st.enq_sw_max_time * 1000000) /
4416                         rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4417                         (double)iter,
4418                         (double)(time_st.enq_acc_total_time * 1000000) /
4419                         (double)iter / (double)rte_get_tsc_hz(),
4420                         (double)time_st.enq_acc_min_time,
4421                         (double)(time_st.enq_acc_min_time * 1000000) /
4422                         rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4423                         (double)(time_st.enq_acc_max_time * 1000000) /
4424                         rte_get_tsc_hz());
4425
4426         printf("Dequeue offload cost latency - one op:\n"
4427                         "\tavg: %lg cycles, %lg us\n"
4428                         "\tmin: %lg cycles, %lg us\n"
4429                         "\tmax: %lg cycles, %lg us\n",
4430                         (double)time_st.deq_total_time / (double)iter,
4431                         (double)(time_st.deq_total_time * 1000000) /
4432                         (double)iter / (double)rte_get_tsc_hz(),
4433                         (double)time_st.deq_min_time,
4434                         (double)(time_st.deq_min_time * 1000000) /
4435                         rte_get_tsc_hz(), (double)time_st.deq_max_time,
4436                         (double)(time_st.deq_max_time * 1000000) /
4437                         rte_get_tsc_hz());
4438
4439         return TEST_SUCCESS;
4440 #endif
4441 }
4442
4443 #ifdef RTE_BBDEV_OFFLOAD_COST
4444 static int
4445 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4446                 const uint16_t num_to_process, uint16_t burst_sz,
4447                 uint64_t *deq_total_time, uint64_t *deq_min_time,
4448                 uint64_t *deq_max_time)
4449 {
4450         int i, deq_total;
4451         struct rte_bbdev_dec_op *ops[MAX_BURST];
4452         uint64_t deq_start_time, deq_last_time;
4453
4454         /* Test deq offload latency from an empty queue */
4455
4456         for (i = 0, deq_total = 0; deq_total < num_to_process;
4457                         ++i, deq_total += burst_sz) {
4458                 deq_start_time = rte_rdtsc_precise();
4459
4460                 if (unlikely(num_to_process - deq_total < burst_sz))
4461                         burst_sz = num_to_process - deq_total;
4462                 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
4463
4464                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4465                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4466                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4467                 *deq_total_time += deq_last_time;
4468         }
4469
4470         return i;
4471 }
4472
4473 static int
4474 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4475                 const uint16_t num_to_process, uint16_t burst_sz,
4476                 uint64_t *deq_total_time, uint64_t *deq_min_time,
4477                 uint64_t *deq_max_time)
4478 {
4479         int i, deq_total;
4480         struct rte_bbdev_enc_op *ops[MAX_BURST];
4481         uint64_t deq_start_time, deq_last_time;
4482
4483         /* Test deq offload latency from an empty queue */
4484         for (i = 0, deq_total = 0; deq_total < num_to_process;
4485                         ++i, deq_total += burst_sz) {
4486                 deq_start_time = rte_rdtsc_precise();
4487
4488                 if (unlikely(num_to_process - deq_total < burst_sz))
4489                         burst_sz = num_to_process - deq_total;
4490                 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
4491
4492                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4493                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4494                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4495                 *deq_total_time += deq_last_time;
4496         }
4497
4498         return i;
4499 }
4500 #endif
4501
4502 static int
4503 offload_latency_empty_q_test(struct active_device *ad,
4504                 struct test_op_params *op_params)
4505 {
4506 #ifndef RTE_BBDEV_OFFLOAD_COST
4507         RTE_SET_USED(ad);
4508         RTE_SET_USED(op_params);
4509         printf("Offload latency empty dequeue test is disabled.\n");
4510         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4511         return TEST_SKIPPED;
4512 #else
4513         int iter;
4514         uint64_t deq_total_time, deq_min_time, deq_max_time;
4515         uint16_t burst_sz = op_params->burst_sz;
4516         const uint16_t num_to_process = op_params->num_to_process;
4517         const enum rte_bbdev_op_type op_type = test_vector.op_type;
4518         const uint16_t queue_id = ad->queue_ids[0];
4519         struct rte_bbdev_info info;
4520         const char *op_type_str;
4521
4522         deq_total_time = deq_max_time = 0;
4523         deq_min_time = UINT64_MAX;
4524
4525         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4526                         "BURST_SIZE should be <= %u", MAX_BURST);
4527
4528         rte_bbdev_info_get(ad->dev_id, &info);
4529
4530         op_type_str = rte_bbdev_op_type_str(op_type);
4531         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4532
4533         printf("+ ------------------------------------------------------- +\n");
4534         printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4535                         info.dev_name, burst_sz, num_to_process, op_type_str);
4536
4537         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4538                 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4539                                 num_to_process, burst_sz, &deq_total_time,
4540                                 &deq_min_time, &deq_max_time);
4541         else
4542                 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4543                                 num_to_process, burst_sz, &deq_total_time,
4544                                 &deq_min_time, &deq_max_time);
4545
4546         if (iter <= 0)
4547                 return TEST_FAILED;
4548
4549         printf("Empty dequeue offload:\n"
4550                         "\tavg: %lg cycles, %lg us\n"
4551                         "\tmin: %lg cycles, %lg us\n"
4552                         "\tmax: %lg cycles, %lg us\n",
4553                         (double)deq_total_time / (double)iter,
4554                         (double)(deq_total_time * 1000000) / (double)iter /
4555                         (double)rte_get_tsc_hz(), (double)deq_min_time,
4556                         (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4557                         (double)deq_max_time, (double)(deq_max_time * 1000000) /
4558                         rte_get_tsc_hz());
4559
4560         return TEST_SUCCESS;
4561 #endif
4562 }
4563
4564 static int
4565 bler_tc(void)
4566 {
4567         return run_test_case(bler_test);
4568 }
4569
4570 static int
4571 throughput_tc(void)
4572 {
4573         return run_test_case(throughput_test);
4574 }
4575
4576 static int
4577 offload_cost_tc(void)
4578 {
4579         return run_test_case(offload_cost_test);
4580 }
4581
4582 static int
4583 offload_latency_empty_q_tc(void)
4584 {
4585         return run_test_case(offload_latency_empty_q_test);
4586 }
4587
4588 static int
4589 latency_tc(void)
4590 {
4591         return run_test_case(latency_test);
4592 }
4593
4594 static int
4595 interrupt_tc(void)
4596 {
4597         return run_test_case(throughput_test);
4598 }
4599
4600 static struct unit_test_suite bbdev_bler_testsuite = {
4601         .suite_name = "BBdev BLER Tests",
4602         .setup = testsuite_setup,
4603         .teardown = testsuite_teardown,
4604         .unit_test_cases = {
4605                 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
4606                 TEST_CASES_END() /**< NULL terminate unit test array */
4607         }
4608 };
4609
4610 static struct unit_test_suite bbdev_throughput_testsuite = {
4611         .suite_name = "BBdev Throughput Tests",
4612         .setup = testsuite_setup,
4613         .teardown = testsuite_teardown,
4614         .unit_test_cases = {
4615                 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4616                 TEST_CASES_END() /**< NULL terminate unit test array */
4617         }
4618 };
4619
4620 static struct unit_test_suite bbdev_validation_testsuite = {
4621         .suite_name = "BBdev Validation Tests",
4622         .setup = testsuite_setup,
4623         .teardown = testsuite_teardown,
4624         .unit_test_cases = {
4625                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4626                 TEST_CASES_END() /**< NULL terminate unit test array */
4627         }
4628 };
4629
4630 static struct unit_test_suite bbdev_latency_testsuite = {
4631         .suite_name = "BBdev Latency Tests",
4632         .setup = testsuite_setup,
4633         .teardown = testsuite_teardown,
4634         .unit_test_cases = {
4635                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4636                 TEST_CASES_END() /**< NULL terminate unit test array */
4637         }
4638 };
4639
4640 static struct unit_test_suite bbdev_offload_cost_testsuite = {
4641         .suite_name = "BBdev Offload Cost Tests",
4642         .setup = testsuite_setup,
4643         .teardown = testsuite_teardown,
4644         .unit_test_cases = {
4645                 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
4646                 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
4647                 TEST_CASES_END() /**< NULL terminate unit test array */
4648         }
4649 };
4650
4651 static struct unit_test_suite bbdev_interrupt_testsuite = {
4652         .suite_name = "BBdev Interrupt Tests",
4653         .setup = interrupt_testsuite_setup,
4654         .teardown = testsuite_teardown,
4655         .unit_test_cases = {
4656                 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
4657                 TEST_CASES_END() /**< NULL terminate unit test array */
4658         }
4659 };
4660
4661 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
4662 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
4663 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
4664 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
4665 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
4666 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);