4d7dc4e6eddb8de994311f7b51044624832df1ab
[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 1000
28
29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
30 #include <fpga_lte_fec.h>
31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
33 #define VF_UL_4G_QUEUE_VALUE 4
34 #define VF_DL_4G_QUEUE_VALUE 4
35 #define UL_4G_BANDWIDTH 3
36 #define DL_4G_BANDWIDTH 3
37 #define UL_4G_LOAD_BALANCE 128
38 #define DL_4G_LOAD_BALANCE 128
39 #define FLR_4G_TIMEOUT 610
40 #endif
41
42 #define OPS_CACHE_SIZE 256U
43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
44
45 #define SYNC_WAIT 0
46 #define SYNC_START 1
47 #define INVALID_OPAQUE -1
48
49 #define INVALID_QUEUE_ID -1
50 /* Increment for next code block in external HARQ memory */
51 #define HARQ_INCR 32768
52 /* Headroom for filler LLRs insertion in HARQ buffer */
53 #define FILLER_HEADROOM 1024
54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
63
64 static struct test_bbdev_vector test_vector;
65
66 /* Switch between PMD and Interrupt for throughput TC */
67 static bool intr_enabled;
68
69 /* LLR arithmetic representation for numerical conversion */
70 static int ldpc_llr_decimals;
71 static int ldpc_llr_size;
72 /* Keep track of the LDPC decoder device capability flag */
73 static uint32_t ldpc_cap_flags;
74
75 /* Represents tested active devices */
76 static struct active_device {
77         const char *driver_name;
78         uint8_t dev_id;
79         uint16_t supported_ops;
80         uint16_t queue_ids[MAX_QUEUES];
81         uint16_t nb_queues;
82         struct rte_mempool *ops_mempool;
83         struct rte_mempool *in_mbuf_pool;
84         struct rte_mempool *hard_out_mbuf_pool;
85         struct rte_mempool *soft_out_mbuf_pool;
86         struct rte_mempool *harq_in_mbuf_pool;
87         struct rte_mempool *harq_out_mbuf_pool;
88 } active_devs[RTE_BBDEV_MAX_DEVS];
89
90 static uint8_t nb_active_devs;
91
92 /* Data buffers used by BBDEV ops */
93 struct test_buffers {
94         struct rte_bbdev_op_data *inputs;
95         struct rte_bbdev_op_data *hard_outputs;
96         struct rte_bbdev_op_data *soft_outputs;
97         struct rte_bbdev_op_data *harq_inputs;
98         struct rte_bbdev_op_data *harq_outputs;
99 };
100
101 /* Operation parameters specific for given test case */
102 struct test_op_params {
103         struct rte_mempool *mp;
104         struct rte_bbdev_dec_op *ref_dec_op;
105         struct rte_bbdev_enc_op *ref_enc_op;
106         uint16_t burst_sz;
107         uint16_t num_to_process;
108         uint16_t num_lcores;
109         int vector_mask;
110         rte_atomic16_t sync;
111         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
112 };
113
114 /* Contains per lcore params */
115 struct thread_params {
116         uint8_t dev_id;
117         uint16_t queue_id;
118         uint32_t lcore_id;
119         uint64_t start_time;
120         double ops_per_sec;
121         double mbps;
122         uint8_t iter_count;
123         rte_atomic16_t nb_dequeued;
124         rte_atomic16_t processing_status;
125         rte_atomic16_t burst_sz;
126         struct test_op_params *op_params;
127         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
128         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
129 };
130
131 #ifdef RTE_BBDEV_OFFLOAD_COST
132 /* Stores time statistics */
133 struct test_time_stats {
134         /* Stores software enqueue total working time */
135         uint64_t enq_sw_total_time;
136         /* Stores minimum value of software enqueue working time */
137         uint64_t enq_sw_min_time;
138         /* Stores maximum value of software enqueue working time */
139         uint64_t enq_sw_max_time;
140         /* Stores turbo enqueue total working time */
141         uint64_t enq_acc_total_time;
142         /* Stores minimum value of accelerator enqueue working time */
143         uint64_t enq_acc_min_time;
144         /* Stores maximum value of accelerator enqueue working time */
145         uint64_t enq_acc_max_time;
146         /* Stores dequeue total working time */
147         uint64_t deq_total_time;
148         /* Stores minimum value of dequeue working time */
149         uint64_t deq_min_time;
150         /* Stores maximum value of dequeue working time */
151         uint64_t deq_max_time;
152 };
153 #endif
154
155 typedef int (test_case_function)(struct active_device *ad,
156                 struct test_op_params *op_params);
157
158 static inline void
159 mbuf_reset(struct rte_mbuf *m)
160 {
161         m->pkt_len = 0;
162
163         do {
164                 m->data_len = 0;
165                 m = m->next;
166         } while (m != NULL);
167 }
168
169 /* Read flag value 0/1 from bitmap */
170 static inline bool
171 check_bit(uint32_t bitmap, uint32_t bitmask)
172 {
173         return bitmap & bitmask;
174 }
175
176 static inline void
177 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
178 {
179         ad->supported_ops |= (1 << op_type);
180 }
181
182 static inline bool
183 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
184 {
185         return ad->supported_ops & (1 << op_type);
186 }
187
188 static inline bool
189 flags_match(uint32_t flags_req, uint32_t flags_present)
190 {
191         return (flags_req & flags_present) == flags_req;
192 }
193
194 static void
195 clear_soft_out_cap(uint32_t *op_flags)
196 {
197         *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
198         *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
199         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
200 }
201
202 static int
203 check_dev_cap(const struct rte_bbdev_info *dev_info)
204 {
205         unsigned int i;
206         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
207                 nb_harq_inputs, nb_harq_outputs;
208         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
209
210         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
211         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
212         nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
213         nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
214         nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
215
216         for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
217                 if (op_cap->type != test_vector.op_type)
218                         continue;
219
220                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
221                         const struct rte_bbdev_op_cap_turbo_dec *cap =
222                                         &op_cap->cap.turbo_dec;
223                         /* Ignore lack of soft output capability, just skip
224                          * checking if soft output is valid.
225                          */
226                         if ((test_vector.turbo_dec.op_flags &
227                                         RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
228                                         !(cap->capability_flags &
229                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
230                                 printf(
231                                         "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
232                                         dev_info->dev_name);
233                                 clear_soft_out_cap(
234                                         &test_vector.turbo_dec.op_flags);
235                         }
236
237                         if (!flags_match(test_vector.turbo_dec.op_flags,
238                                         cap->capability_flags))
239                                 return TEST_FAILED;
240                         if (nb_inputs > cap->num_buffers_src) {
241                                 printf("Too many inputs defined: %u, max: %u\n",
242                                         nb_inputs, cap->num_buffers_src);
243                                 return TEST_FAILED;
244                         }
245                         if (nb_soft_outputs > cap->num_buffers_soft_out &&
246                                         (test_vector.turbo_dec.op_flags &
247                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
248                                 printf(
249                                         "Too many soft outputs defined: %u, max: %u\n",
250                                                 nb_soft_outputs,
251                                                 cap->num_buffers_soft_out);
252                                 return TEST_FAILED;
253                         }
254                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
255                                 printf(
256                                         "Too many hard outputs defined: %u, max: %u\n",
257                                                 nb_hard_outputs,
258                                                 cap->num_buffers_hard_out);
259                                 return TEST_FAILED;
260                         }
261                         if (intr_enabled && !(cap->capability_flags &
262                                         RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
263                                 printf(
264                                         "Dequeue interrupts are not supported!\n");
265                                 return TEST_FAILED;
266                         }
267
268                         return TEST_SUCCESS;
269                 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
270                         const struct rte_bbdev_op_cap_turbo_enc *cap =
271                                         &op_cap->cap.turbo_enc;
272
273                         if (!flags_match(test_vector.turbo_enc.op_flags,
274                                         cap->capability_flags))
275                                 return TEST_FAILED;
276                         if (nb_inputs > cap->num_buffers_src) {
277                                 printf("Too many inputs defined: %u, max: %u\n",
278                                         nb_inputs, cap->num_buffers_src);
279                                 return TEST_FAILED;
280                         }
281                         if (nb_hard_outputs > cap->num_buffers_dst) {
282                                 printf(
283                                         "Too many hard outputs defined: %u, max: %u\n",
284                                         nb_hard_outputs, cap->num_buffers_dst);
285                                 return TEST_FAILED;
286                         }
287                         if (intr_enabled && !(cap->capability_flags &
288                                         RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
289                                 printf(
290                                         "Dequeue interrupts are not supported!\n");
291                                 return TEST_FAILED;
292                         }
293
294                         return TEST_SUCCESS;
295                 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
296                         const struct rte_bbdev_op_cap_ldpc_enc *cap =
297                                         &op_cap->cap.ldpc_enc;
298
299                         if (!flags_match(test_vector.ldpc_enc.op_flags,
300                                         cap->capability_flags)){
301                                 printf("Flag Mismatch\n");
302                                 return TEST_FAILED;
303                         }
304                         if (nb_inputs > cap->num_buffers_src) {
305                                 printf("Too many inputs defined: %u, max: %u\n",
306                                         nb_inputs, cap->num_buffers_src);
307                                 return TEST_FAILED;
308                         }
309                         if (nb_hard_outputs > cap->num_buffers_dst) {
310                                 printf(
311                                         "Too many hard outputs defined: %u, max: %u\n",
312                                         nb_hard_outputs, cap->num_buffers_dst);
313                                 return TEST_FAILED;
314                         }
315                         if (intr_enabled && !(cap->capability_flags &
316                                         RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
317                                 printf(
318                                         "Dequeue interrupts are not supported!\n");
319                                 return TEST_FAILED;
320                         }
321
322                         return TEST_SUCCESS;
323                 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
324                         const struct rte_bbdev_op_cap_ldpc_dec *cap =
325                                         &op_cap->cap.ldpc_dec;
326
327                         if (!flags_match(test_vector.ldpc_dec.op_flags,
328                                         cap->capability_flags)){
329                                 printf("Flag Mismatch\n");
330                                 return TEST_FAILED;
331                         }
332                         if (nb_inputs > cap->num_buffers_src) {
333                                 printf("Too many inputs defined: %u, max: %u\n",
334                                         nb_inputs, cap->num_buffers_src);
335                                 return TEST_FAILED;
336                         }
337                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
338                                 printf(
339                                         "Too many hard outputs defined: %u, max: %u\n",
340                                         nb_hard_outputs,
341                                         cap->num_buffers_hard_out);
342                                 return TEST_FAILED;
343                         }
344                         if (nb_harq_inputs > cap->num_buffers_hard_out) {
345                                 printf(
346                                         "Too many HARQ inputs defined: %u, max: %u\n",
347                                         nb_hard_outputs,
348                                         cap->num_buffers_hard_out);
349                                 return TEST_FAILED;
350                         }
351                         if (nb_harq_outputs > cap->num_buffers_hard_out) {
352                                 printf(
353                                         "Too many HARQ outputs defined: %u, max: %u\n",
354                                         nb_hard_outputs,
355                                         cap->num_buffers_hard_out);
356                                 return TEST_FAILED;
357                         }
358                         if (intr_enabled && !(cap->capability_flags &
359                                         RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
360                                 printf(
361                                         "Dequeue interrupts are not supported!\n");
362                                 return TEST_FAILED;
363                         }
364                         if (intr_enabled && (test_vector.ldpc_dec.op_flags &
365                                 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
366                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
367                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
368                                         ))) {
369                                 printf("Skip loop-back with interrupt\n");
370                                 return TEST_FAILED;
371                         }
372                         return TEST_SUCCESS;
373                 }
374         }
375
376         if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
377                 return TEST_SUCCESS; /* Special case for NULL device */
378
379         return TEST_FAILED;
380 }
381
382 /* calculates optimal mempool size not smaller than the val */
383 static unsigned int
384 optimal_mempool_size(unsigned int val)
385 {
386         return rte_align32pow2(val + 1) - 1;
387 }
388
389 /* allocates mbuf mempool for inputs and outputs */
390 static struct rte_mempool *
391 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
392                 int socket_id, unsigned int mbuf_pool_size,
393                 const char *op_type_str)
394 {
395         unsigned int i;
396         uint32_t max_seg_sz = 0;
397         char pool_name[RTE_MEMPOOL_NAMESIZE];
398
399         /* find max input segment size */
400         for (i = 0; i < entries->nb_segments; ++i)
401                 if (entries->segments[i].length > max_seg_sz)
402                         max_seg_sz = entries->segments[i].length;
403
404         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
405                         dev_id);
406         return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
407                         RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
408                                         + FILLER_HEADROOM,
409                         (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
410 }
411
412 static int
413 create_mempools(struct active_device *ad, int socket_id,
414                 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
415 {
416         struct rte_mempool *mp;
417         unsigned int ops_pool_size, mbuf_pool_size = 0;
418         char pool_name[RTE_MEMPOOL_NAMESIZE];
419         const char *op_type_str;
420         enum rte_bbdev_op_type op_type = org_op_type;
421
422         struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
423         struct op_data_entries *hard_out =
424                         &test_vector.entries[DATA_HARD_OUTPUT];
425         struct op_data_entries *soft_out =
426                         &test_vector.entries[DATA_SOFT_OUTPUT];
427         struct op_data_entries *harq_in =
428                         &test_vector.entries[DATA_HARQ_INPUT];
429         struct op_data_entries *harq_out =
430                         &test_vector.entries[DATA_HARQ_OUTPUT];
431
432         /* allocate ops mempool */
433         ops_pool_size = optimal_mempool_size(RTE_MAX(
434                         /* Ops used plus 1 reference op */
435                         RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
436                         /* Minimal cache size plus 1 reference op */
437                         (unsigned int)(1.5 * rte_lcore_count() *
438                                         OPS_CACHE_SIZE + 1)),
439                         OPS_POOL_SIZE_MIN));
440
441         if (org_op_type == RTE_BBDEV_OP_NONE)
442                 op_type = RTE_BBDEV_OP_TURBO_ENC;
443
444         op_type_str = rte_bbdev_op_type_str(op_type);
445         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
446
447         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
448                         ad->dev_id);
449         mp = rte_bbdev_op_pool_create(pool_name, op_type,
450                         ops_pool_size, OPS_CACHE_SIZE, socket_id);
451         TEST_ASSERT_NOT_NULL(mp,
452                         "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
453                         ops_pool_size,
454                         ad->dev_id,
455                         socket_id);
456         ad->ops_mempool = mp;
457
458         /* Do not create inputs and outputs mbufs for BaseBand Null Device */
459         if (org_op_type == RTE_BBDEV_OP_NONE)
460                 return TEST_SUCCESS;
461
462         /* Inputs */
463         if (in->nb_segments > 0) {
464                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
465                                 in->nb_segments);
466                 mp = create_mbuf_pool(in, ad->dev_id, socket_id,
467                                 mbuf_pool_size, "in");
468                 TEST_ASSERT_NOT_NULL(mp,
469                                 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
470                                 mbuf_pool_size,
471                                 ad->dev_id,
472                                 socket_id);
473                 ad->in_mbuf_pool = mp;
474         }
475
476         /* Hard outputs */
477         if (hard_out->nb_segments > 0) {
478                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
479                                 hard_out->nb_segments);
480                 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
481                                 mbuf_pool_size,
482                                 "hard_out");
483                 TEST_ASSERT_NOT_NULL(mp,
484                                 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
485                                 mbuf_pool_size,
486                                 ad->dev_id,
487                                 socket_id);
488                 ad->hard_out_mbuf_pool = mp;
489         }
490
491         /* Soft outputs */
492         if (soft_out->nb_segments > 0) {
493                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
494                                 soft_out->nb_segments);
495                 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
496                                 mbuf_pool_size,
497                                 "soft_out");
498                 TEST_ASSERT_NOT_NULL(mp,
499                                 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
500                                 mbuf_pool_size,
501                                 ad->dev_id,
502                                 socket_id);
503                 ad->soft_out_mbuf_pool = mp;
504         }
505
506         /* HARQ inputs */
507         if (harq_in->nb_segments > 0) {
508                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
509                                 harq_in->nb_segments);
510                 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
511                                 mbuf_pool_size,
512                                 "harq_in");
513                 TEST_ASSERT_NOT_NULL(mp,
514                                 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
515                                 mbuf_pool_size,
516                                 ad->dev_id,
517                                 socket_id);
518                 ad->harq_in_mbuf_pool = mp;
519         }
520
521         /* HARQ outputs */
522         if (harq_out->nb_segments > 0) {
523                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
524                                 harq_out->nb_segments);
525                 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
526                                 mbuf_pool_size,
527                                 "harq_out");
528                 TEST_ASSERT_NOT_NULL(mp,
529                                 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
530                                 mbuf_pool_size,
531                                 ad->dev_id,
532                                 socket_id);
533                 ad->harq_out_mbuf_pool = mp;
534         }
535
536         return TEST_SUCCESS;
537 }
538
539 static int
540 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
541                 struct test_bbdev_vector *vector)
542 {
543         int ret;
544         unsigned int queue_id;
545         struct rte_bbdev_queue_conf qconf;
546         struct active_device *ad = &active_devs[nb_active_devs];
547         unsigned int nb_queues;
548         enum rte_bbdev_op_type op_type = vector->op_type;
549
550 /* Configure fpga lte fec with PF & VF values
551  * if '-i' flag is set and using fpga device
552  */
553 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
554         if ((get_init_device() == true) &&
555                 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
556                 struct fpga_lte_fec_conf conf;
557                 unsigned int i;
558
559                 printf("Configure FPGA LTE FEC Driver %s with default values\n",
560                                 info->drv.driver_name);
561
562                 /* clear default configuration before initialization */
563                 memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
564
565                 /* Set PF mode :
566                  * true if PF is used for data plane
567                  * false for VFs
568                  */
569                 conf.pf_mode_en = true;
570
571                 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
572                         /* Number of UL queues per VF (fpga supports 8 VFs) */
573                         conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
574                         /* Number of DL queues per VF (fpga supports 8 VFs) */
575                         conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
576                 }
577
578                 /* UL bandwidth. Needed for schedule algorithm */
579                 conf.ul_bandwidth = UL_4G_BANDWIDTH;
580                 /* DL bandwidth */
581                 conf.dl_bandwidth = DL_4G_BANDWIDTH;
582
583                 /* UL & DL load Balance Factor to 64 */
584                 conf.ul_load_balance = UL_4G_LOAD_BALANCE;
585                 conf.dl_load_balance = DL_4G_LOAD_BALANCE;
586
587                 /**< FLR timeout value */
588                 conf.flr_time_out = FLR_4G_TIMEOUT;
589
590                 /* setup FPGA PF with configuration information */
591                 ret = fpga_lte_fec_configure(info->dev_name, &conf);
592                 TEST_ASSERT_SUCCESS(ret,
593                                 "Failed to configure 4G FPGA PF for bbdev %s",
594                                 info->dev_name);
595         }
596 #endif
597         nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
598         nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
599
600         /* setup device */
601         ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
602         if (ret < 0) {
603                 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
604                                 dev_id, nb_queues, info->socket_id, ret);
605                 return TEST_FAILED;
606         }
607
608         /* configure interrupts if needed */
609         if (intr_enabled) {
610                 ret = rte_bbdev_intr_enable(dev_id);
611                 if (ret < 0) {
612                         printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
613                                         ret);
614                         return TEST_FAILED;
615                 }
616         }
617
618         /* setup device queues */
619         qconf.socket = info->socket_id;
620         qconf.queue_size = info->drv.default_queue_conf.queue_size;
621         qconf.priority = 0;
622         qconf.deferred_start = 0;
623         qconf.op_type = op_type;
624
625         for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
626                 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
627                 if (ret != 0) {
628                         printf(
629                                         "Allocated all queues (id=%u) at prio%u on dev%u\n",
630                                         queue_id, qconf.priority, dev_id);
631                         qconf.priority++;
632                         ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
633                                         &qconf);
634                 }
635                 if (ret != 0) {
636                         printf("All queues on dev %u allocated: %u\n",
637                                         dev_id, queue_id);
638                         break;
639                 }
640                 ad->queue_ids[queue_id] = queue_id;
641         }
642         TEST_ASSERT(queue_id != 0,
643                         "ERROR Failed to configure any queues on dev %u",
644                         dev_id);
645         ad->nb_queues = queue_id;
646
647         set_avail_op(ad, op_type);
648
649         return TEST_SUCCESS;
650 }
651
652 static int
653 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
654                 struct test_bbdev_vector *vector)
655 {
656         int ret;
657
658         active_devs[nb_active_devs].driver_name = info->drv.driver_name;
659         active_devs[nb_active_devs].dev_id = dev_id;
660
661         ret = add_bbdev_dev(dev_id, info, vector);
662         if (ret == TEST_SUCCESS)
663                 ++nb_active_devs;
664         return ret;
665 }
666
667 static uint8_t
668 populate_active_devices(void)
669 {
670         int ret;
671         uint8_t dev_id;
672         uint8_t nb_devs_added = 0;
673         struct rte_bbdev_info info;
674
675         RTE_BBDEV_FOREACH(dev_id) {
676                 rte_bbdev_info_get(dev_id, &info);
677
678                 if (check_dev_cap(&info)) {
679                         printf(
680                                 "Device %d (%s) does not support specified capabilities\n",
681                                         dev_id, info.dev_name);
682                         continue;
683                 }
684
685                 ret = add_active_device(dev_id, &info, &test_vector);
686                 if (ret != 0) {
687                         printf("Adding active bbdev %s skipped\n",
688                                         info.dev_name);
689                         continue;
690                 }
691                 nb_devs_added++;
692         }
693
694         return nb_devs_added;
695 }
696
697 static int
698 read_test_vector(void)
699 {
700         int ret;
701
702         memset(&test_vector, 0, sizeof(test_vector));
703         printf("Test vector file = %s\n", get_vector_filename());
704         ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
705         TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
706                         get_vector_filename());
707
708         return TEST_SUCCESS;
709 }
710
711 static int
712 testsuite_setup(void)
713 {
714         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
715
716         if (populate_active_devices() == 0) {
717                 printf("No suitable devices found!\n");
718                 return TEST_SKIPPED;
719         }
720
721         return TEST_SUCCESS;
722 }
723
724 static int
725 interrupt_testsuite_setup(void)
726 {
727         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
728
729         /* Enable interrupts */
730         intr_enabled = true;
731
732         /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
733         if (populate_active_devices() == 0 ||
734                         test_vector.op_type == RTE_BBDEV_OP_NONE) {
735                 intr_enabled = false;
736                 printf("No suitable devices found!\n");
737                 return TEST_SKIPPED;
738         }
739
740         return TEST_SUCCESS;
741 }
742
743 static void
744 testsuite_teardown(void)
745 {
746         uint8_t dev_id;
747
748         /* Unconfigure devices */
749         RTE_BBDEV_FOREACH(dev_id)
750                 rte_bbdev_close(dev_id);
751
752         /* Clear active devices structs. */
753         memset(active_devs, 0, sizeof(active_devs));
754         nb_active_devs = 0;
755 }
756
757 static int
758 ut_setup(void)
759 {
760         uint8_t i, dev_id;
761
762         for (i = 0; i < nb_active_devs; i++) {
763                 dev_id = active_devs[i].dev_id;
764                 /* reset bbdev stats */
765                 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
766                                 "Failed to reset stats of bbdev %u", dev_id);
767                 /* start the device */
768                 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
769                                 "Failed to start bbdev %u", dev_id);
770         }
771
772         return TEST_SUCCESS;
773 }
774
775 static void
776 ut_teardown(void)
777 {
778         uint8_t i, dev_id;
779         struct rte_bbdev_stats stats;
780
781         for (i = 0; i < nb_active_devs; i++) {
782                 dev_id = active_devs[i].dev_id;
783                 /* read stats and print */
784                 rte_bbdev_stats_get(dev_id, &stats);
785                 /* Stop the device */
786                 rte_bbdev_stop(dev_id);
787         }
788 }
789
790 static int
791 init_op_data_objs(struct rte_bbdev_op_data *bufs,
792                 struct op_data_entries *ref_entries,
793                 struct rte_mempool *mbuf_pool, const uint16_t n,
794                 enum op_data_type op_type, uint16_t min_alignment)
795 {
796         int ret;
797         unsigned int i, j;
798         bool large_input = false;
799
800         for (i = 0; i < n; ++i) {
801                 char *data;
802                 struct op_data_buf *seg = &ref_entries->segments[0];
803                 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
804                 TEST_ASSERT_NOT_NULL(m_head,
805                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
806                                 op_type, n * ref_entries->nb_segments,
807                                 mbuf_pool->size);
808
809                 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
810                         /*
811                          * Special case when DPDK mbuf cannot handle
812                          * the required input size
813                          */
814                         printf("Warning: Larger input size than DPDK mbuf %d\n",
815                                         seg->length);
816                         large_input = true;
817                 }
818                 bufs[i].data = m_head;
819                 bufs[i].offset = 0;
820                 bufs[i].length = 0;
821
822                 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
823                         if ((op_type == DATA_INPUT) && large_input) {
824                                 /* Allocate a fake overused mbuf */
825                                 data = rte_malloc(NULL, seg->length, 0);
826                                 memcpy(data, seg->addr, seg->length);
827                                 m_head->buf_addr = data;
828                                 m_head->buf_iova = rte_malloc_virt2iova(data);
829                                 m_head->data_off = 0;
830                                 m_head->data_len = seg->length;
831                         } else {
832                                 data = rte_pktmbuf_append(m_head, seg->length);
833                                 TEST_ASSERT_NOT_NULL(data,
834                                         "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
835                                         seg->length, op_type);
836
837                                 TEST_ASSERT(data == RTE_PTR_ALIGN(
838                                                 data, min_alignment),
839                                         "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
840                                         data, min_alignment);
841                                 rte_memcpy(data, seg->addr, seg->length);
842                         }
843
844                         bufs[i].length += seg->length;
845
846                         for (j = 1; j < ref_entries->nb_segments; ++j) {
847                                 struct rte_mbuf *m_tail =
848                                                 rte_pktmbuf_alloc(mbuf_pool);
849                                 TEST_ASSERT_NOT_NULL(m_tail,
850                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
851                                                 op_type,
852                                                 n * ref_entries->nb_segments,
853                                                 mbuf_pool->size);
854                                 seg += 1;
855
856                                 data = rte_pktmbuf_append(m_tail, seg->length);
857                                 TEST_ASSERT_NOT_NULL(data,
858                                                 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
859                                                 seg->length, op_type);
860
861                                 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
862                                                 min_alignment),
863                                                 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
864                                                 data, min_alignment);
865                                 rte_memcpy(data, seg->addr, seg->length);
866                                 bufs[i].length += seg->length;
867
868                                 ret = rte_pktmbuf_chain(m_head, m_tail);
869                                 TEST_ASSERT_SUCCESS(ret,
870                                                 "Couldn't chain mbufs from %d data type mbuf pool",
871                                                 op_type);
872                         }
873                 } else {
874
875                         /* allocate chained-mbuf for output buffer */
876                         for (j = 1; j < ref_entries->nb_segments; ++j) {
877                                 struct rte_mbuf *m_tail =
878                                                 rte_pktmbuf_alloc(mbuf_pool);
879                                 TEST_ASSERT_NOT_NULL(m_tail,
880                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
881                                                 op_type,
882                                                 n * ref_entries->nb_segments,
883                                                 mbuf_pool->size);
884
885                                 ret = rte_pktmbuf_chain(m_head, m_tail);
886                                 TEST_ASSERT_SUCCESS(ret,
887                                                 "Couldn't chain mbufs from %d data type mbuf pool",
888                                                 op_type);
889                         }
890                 }
891         }
892
893         return 0;
894 }
895
896 static int
897 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
898                 const int socket)
899 {
900         int i;
901
902         *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
903         if (*buffers == NULL) {
904                 printf("WARNING: Failed to allocate op_data on socket %d\n",
905                                 socket);
906                 /* try to allocate memory on other detected sockets */
907                 for (i = 0; i < socket; i++) {
908                         *buffers = rte_zmalloc_socket(NULL, len, 0, i);
909                         if (*buffers != NULL)
910                                 break;
911                 }
912         }
913
914         return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
915 }
916
917 static void
918 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
919                 const uint16_t n, const int8_t max_llr_modulus)
920 {
921         uint16_t i, byte_idx;
922
923         for (i = 0; i < n; ++i) {
924                 struct rte_mbuf *m = input_ops[i].data;
925                 while (m != NULL) {
926                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
927                                         input_ops[i].offset);
928                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
929                                         ++byte_idx)
930                                 llr[byte_idx] = round((double)max_llr_modulus *
931                                                 llr[byte_idx] / INT8_MAX);
932
933                         m = m->next;
934                 }
935         }
936 }
937
938 /*
939  * We may have to insert filler bits
940  * when they are required by the HARQ assumption
941  */
942 static void
943 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
944                 const uint16_t n, struct test_op_params *op_params)
945 {
946         struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
947
948         if (input_ops == NULL)
949                 return;
950         /* No need to add filler if not required by device */
951         if (!(ldpc_cap_flags &
952                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
953                 return;
954         /* No need to add filler for loopback operation */
955         if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
956                 return;
957
958         uint16_t i, j, parity_offset;
959         for (i = 0; i < n; ++i) {
960                 struct rte_mbuf *m = input_ops[i].data;
961                 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
962                                 input_ops[i].offset);
963                 parity_offset = (dec.basegraph == 1 ? 20 : 8)
964                                 * dec.z_c - dec.n_filler;
965                 uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
966                 m->data_len = new_hin_size;
967                 input_ops[i].length = new_hin_size;
968                 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
969                                 j--)
970                         llr[j] = llr[j - dec.n_filler];
971                 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
972                 for (j = 0; j < dec.n_filler; j++)
973                         llr[parity_offset + j] = llr_max_pre_scaling;
974         }
975 }
976
977 static void
978 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
979                 const uint16_t n, const int8_t llr_size,
980                 const int8_t llr_decimals)
981 {
982         if (input_ops == NULL)
983                 return;
984
985         uint16_t i, byte_idx;
986
987         int16_t llr_max, llr_min, llr_tmp;
988         llr_max = (1 << (llr_size - 1)) - 1;
989         llr_min = -llr_max;
990         for (i = 0; i < n; ++i) {
991                 struct rte_mbuf *m = input_ops[i].data;
992                 while (m != NULL) {
993                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
994                                         input_ops[i].offset);
995                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
996                                         ++byte_idx) {
997
998                                 llr_tmp = llr[byte_idx];
999                                 if (llr_decimals == 4)
1000                                         llr_tmp *= 8;
1001                                 else if (llr_decimals == 2)
1002                                         llr_tmp *= 2;
1003                                 else if (llr_decimals == 0)
1004                                         llr_tmp /= 2;
1005                                 llr_tmp = RTE_MIN(llr_max,
1006                                                 RTE_MAX(llr_min, llr_tmp));
1007                                 llr[byte_idx] = (int8_t) llr_tmp;
1008                         }
1009
1010                         m = m->next;
1011                 }
1012         }
1013 }
1014
1015
1016
1017 static int
1018 fill_queue_buffers(struct test_op_params *op_params,
1019                 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1020                 struct rte_mempool *soft_out_mp,
1021                 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1022                 uint16_t queue_id,
1023                 const struct rte_bbdev_op_cap *capabilities,
1024                 uint16_t min_alignment, const int socket_id)
1025 {
1026         int ret;
1027         enum op_data_type type;
1028         const uint16_t n = op_params->num_to_process;
1029
1030         struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1031                 in_mp,
1032                 soft_out_mp,
1033                 hard_out_mp,
1034                 harq_in_mp,
1035                 harq_out_mp,
1036         };
1037
1038         struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1039                 &op_params->q_bufs[socket_id][queue_id].inputs,
1040                 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
1041                 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
1042                 &op_params->q_bufs[socket_id][queue_id].harq_inputs,
1043                 &op_params->q_bufs[socket_id][queue_id].harq_outputs,
1044         };
1045
1046         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1047                 struct op_data_entries *ref_entries =
1048                                 &test_vector.entries[type];
1049                 if (ref_entries->nb_segments == 0)
1050                         continue;
1051
1052                 ret = allocate_buffers_on_socket(queue_ops[type],
1053                                 n * sizeof(struct rte_bbdev_op_data),
1054                                 socket_id);
1055                 TEST_ASSERT_SUCCESS(ret,
1056                                 "Couldn't allocate memory for rte_bbdev_op_data structs");
1057
1058                 ret = init_op_data_objs(*queue_ops[type], ref_entries,
1059                                 mbuf_pools[type], n, type, min_alignment);
1060                 TEST_ASSERT_SUCCESS(ret,
1061                                 "Couldn't init rte_bbdev_op_data structs");
1062         }
1063
1064         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1065                 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1066                         capabilities->cap.turbo_dec.max_llr_modulus);
1067
1068         if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1069                 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1070                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1071                 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1072                                 RTE_BBDEV_LDPC_LLR_COMPRESSION;
1073                 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1074                                 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1075                 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1076                 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1077                 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1078                 if (!loopback && !llr_comp)
1079                         ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1080                                         ldpc_llr_size, ldpc_llr_decimals);
1081                 if (!loopback && !harq_comp)
1082                         ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1083                                         ldpc_llr_size, ldpc_llr_decimals);
1084                 if (!loopback)
1085                         ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1086                                         op_params);
1087         }
1088
1089         return 0;
1090 }
1091
1092 static void
1093 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1094 {
1095         unsigned int i, j;
1096
1097         rte_mempool_free(ad->ops_mempool);
1098         rte_mempool_free(ad->in_mbuf_pool);
1099         rte_mempool_free(ad->hard_out_mbuf_pool);
1100         rte_mempool_free(ad->soft_out_mbuf_pool);
1101         rte_mempool_free(ad->harq_in_mbuf_pool);
1102         rte_mempool_free(ad->harq_out_mbuf_pool);
1103
1104         for (i = 0; i < rte_lcore_count(); ++i) {
1105                 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1106                         rte_free(op_params->q_bufs[j][i].inputs);
1107                         rte_free(op_params->q_bufs[j][i].hard_outputs);
1108                         rte_free(op_params->q_bufs[j][i].soft_outputs);
1109                         rte_free(op_params->q_bufs[j][i].harq_inputs);
1110                         rte_free(op_params->q_bufs[j][i].harq_outputs);
1111                 }
1112         }
1113 }
1114
1115 static void
1116 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1117                 unsigned int start_idx,
1118                 struct rte_bbdev_op_data *inputs,
1119                 struct rte_bbdev_op_data *hard_outputs,
1120                 struct rte_bbdev_op_data *soft_outputs,
1121                 struct rte_bbdev_dec_op *ref_op)
1122 {
1123         unsigned int i;
1124         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1125
1126         for (i = 0; i < n; ++i) {
1127                 if (turbo_dec->code_block_mode == 0) {
1128                         ops[i]->turbo_dec.tb_params.ea =
1129                                         turbo_dec->tb_params.ea;
1130                         ops[i]->turbo_dec.tb_params.eb =
1131                                         turbo_dec->tb_params.eb;
1132                         ops[i]->turbo_dec.tb_params.k_pos =
1133                                         turbo_dec->tb_params.k_pos;
1134                         ops[i]->turbo_dec.tb_params.k_neg =
1135                                         turbo_dec->tb_params.k_neg;
1136                         ops[i]->turbo_dec.tb_params.c =
1137                                         turbo_dec->tb_params.c;
1138                         ops[i]->turbo_dec.tb_params.c_neg =
1139                                         turbo_dec->tb_params.c_neg;
1140                         ops[i]->turbo_dec.tb_params.cab =
1141                                         turbo_dec->tb_params.cab;
1142                         ops[i]->turbo_dec.tb_params.r =
1143                                         turbo_dec->tb_params.r;
1144                 } else {
1145                         ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1146                         ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1147                 }
1148
1149                 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1150                 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1151                 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1152                 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1153                 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1154                 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1155                 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1156
1157                 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1158                 ops[i]->turbo_dec.input = inputs[start_idx + i];
1159                 if (soft_outputs != NULL)
1160                         ops[i]->turbo_dec.soft_output =
1161                                 soft_outputs[start_idx + i];
1162         }
1163 }
1164
1165 static void
1166 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1167                 unsigned int start_idx,
1168                 struct rte_bbdev_op_data *inputs,
1169                 struct rte_bbdev_op_data *outputs,
1170                 struct rte_bbdev_enc_op *ref_op)
1171 {
1172         unsigned int i;
1173         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1174         for (i = 0; i < n; ++i) {
1175                 if (turbo_enc->code_block_mode == 0) {
1176                         ops[i]->turbo_enc.tb_params.ea =
1177                                         turbo_enc->tb_params.ea;
1178                         ops[i]->turbo_enc.tb_params.eb =
1179                                         turbo_enc->tb_params.eb;
1180                         ops[i]->turbo_enc.tb_params.k_pos =
1181                                         turbo_enc->tb_params.k_pos;
1182                         ops[i]->turbo_enc.tb_params.k_neg =
1183                                         turbo_enc->tb_params.k_neg;
1184                         ops[i]->turbo_enc.tb_params.c =
1185                                         turbo_enc->tb_params.c;
1186                         ops[i]->turbo_enc.tb_params.c_neg =
1187                                         turbo_enc->tb_params.c_neg;
1188                         ops[i]->turbo_enc.tb_params.cab =
1189                                         turbo_enc->tb_params.cab;
1190                         ops[i]->turbo_enc.tb_params.ncb_pos =
1191                                         turbo_enc->tb_params.ncb_pos;
1192                         ops[i]->turbo_enc.tb_params.ncb_neg =
1193                                         turbo_enc->tb_params.ncb_neg;
1194                         ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1195                 } else {
1196                         ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1197                         ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1198                         ops[i]->turbo_enc.cb_params.ncb =
1199                                         turbo_enc->cb_params.ncb;
1200                 }
1201                 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1202                 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1203                 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1204
1205                 ops[i]->turbo_enc.output = outputs[start_idx + i];
1206                 ops[i]->turbo_enc.input = inputs[start_idx + i];
1207         }
1208 }
1209
1210 static void
1211 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1212                 unsigned int start_idx,
1213                 struct rte_bbdev_op_data *inputs,
1214                 struct rte_bbdev_op_data *hard_outputs,
1215                 struct rte_bbdev_op_data *soft_outputs,
1216                 struct rte_bbdev_op_data *harq_inputs,
1217                 struct rte_bbdev_op_data *harq_outputs,
1218                 struct rte_bbdev_dec_op *ref_op)
1219 {
1220         unsigned int i;
1221         struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1222
1223         for (i = 0; i < n; ++i) {
1224                 if (ldpc_dec->code_block_mode == 0) {
1225                         ops[i]->ldpc_dec.tb_params.ea =
1226                                         ldpc_dec->tb_params.ea;
1227                         ops[i]->ldpc_dec.tb_params.eb =
1228                                         ldpc_dec->tb_params.eb;
1229                         ops[i]->ldpc_dec.tb_params.c =
1230                                         ldpc_dec->tb_params.c;
1231                         ops[i]->ldpc_dec.tb_params.cab =
1232                                         ldpc_dec->tb_params.cab;
1233                         ops[i]->ldpc_dec.tb_params.r =
1234                                         ldpc_dec->tb_params.r;
1235                 } else {
1236                         ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1237                 }
1238
1239                 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1240                 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1241                 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1242                 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1243                 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1244                 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1245                 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1246                 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1247                 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1248
1249                 if (hard_outputs != NULL)
1250                         ops[i]->ldpc_dec.hard_output =
1251                                         hard_outputs[start_idx + i];
1252                 if (inputs != NULL)
1253                         ops[i]->ldpc_dec.input =
1254                                         inputs[start_idx + i];
1255                 if (soft_outputs != NULL)
1256                         ops[i]->ldpc_dec.soft_output =
1257                                         soft_outputs[start_idx + i];
1258                 if (harq_inputs != NULL)
1259                         ops[i]->ldpc_dec.harq_combined_input =
1260                                         harq_inputs[start_idx + i];
1261                 if (harq_outputs != NULL)
1262                         ops[i]->ldpc_dec.harq_combined_output =
1263                                         harq_outputs[start_idx + i];
1264         }
1265 }
1266
1267
1268 static void
1269 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1270                 unsigned int start_idx,
1271                 struct rte_bbdev_op_data *inputs,
1272                 struct rte_bbdev_op_data *outputs,
1273                 struct rte_bbdev_enc_op *ref_op)
1274 {
1275         unsigned int i;
1276         struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1277         for (i = 0; i < n; ++i) {
1278                 if (ldpc_enc->code_block_mode == 0) {
1279                         ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1280                         ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1281                         ops[i]->ldpc_enc.tb_params.cab =
1282                                         ldpc_enc->tb_params.cab;
1283                         ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1284                         ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1285                 } else {
1286                         ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1287                 }
1288                 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1289                 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1290                 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1291                 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1292                 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1293                 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1294                 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1295                 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1296                 ops[i]->ldpc_enc.output = outputs[start_idx + i];
1297                 ops[i]->ldpc_enc.input = inputs[start_idx + i];
1298         }
1299 }
1300
1301 static int
1302 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1303                 unsigned int order_idx, const int expected_status)
1304 {
1305         int status = op->status;
1306         /* ignore parity mismatch false alarms for long iterations */
1307         if (get_iter_max() >= 10) {
1308                 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1309                                 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1310                         printf("WARNING: Ignore Syndrome Check mismatch\n");
1311                         status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1312                 }
1313                 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1314                                 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1315                         printf("WARNING: Ignore Syndrome Check mismatch\n");
1316                         status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1317                 }
1318         }
1319
1320         TEST_ASSERT(status == expected_status,
1321                         "op_status (%d) != expected_status (%d)",
1322                         op->status, expected_status);
1323
1324         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1325                         "Ordering error, expected %p, got %p",
1326                         (void *)(uintptr_t)order_idx, op->opaque_data);
1327
1328         return TEST_SUCCESS;
1329 }
1330
1331 static int
1332 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1333                 unsigned int order_idx, const int expected_status)
1334 {
1335         TEST_ASSERT(op->status == expected_status,
1336                         "op_status (%d) != expected_status (%d)",
1337                         op->status, expected_status);
1338
1339         if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1340                 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1341                                 "Ordering error, expected %p, got %p",
1342                                 (void *)(uintptr_t)order_idx, op->opaque_data);
1343
1344         return TEST_SUCCESS;
1345 }
1346
1347 static inline int
1348 validate_op_chain(struct rte_bbdev_op_data *op,
1349                 struct op_data_entries *orig_op)
1350 {
1351         uint8_t i;
1352         struct rte_mbuf *m = op->data;
1353         uint8_t nb_dst_segments = orig_op->nb_segments;
1354         uint32_t total_data_size = 0;
1355
1356         TEST_ASSERT(nb_dst_segments == m->nb_segs,
1357                         "Number of segments differ in original (%u) and filled (%u) op",
1358                         nb_dst_segments, m->nb_segs);
1359
1360         /* Validate each mbuf segment length */
1361         for (i = 0; i < nb_dst_segments; ++i) {
1362                 /* Apply offset to the first mbuf segment */
1363                 uint16_t offset = (i == 0) ? op->offset : 0;
1364                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1365                 total_data_size += orig_op->segments[i].length;
1366
1367                 TEST_ASSERT(orig_op->segments[i].length == data_len,
1368                                 "Length of segment differ in original (%u) and filled (%u) op",
1369                                 orig_op->segments[i].length, data_len);
1370                 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1371                                 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1372                                 data_len,
1373                                 "Output buffers (CB=%u) are not equal", i);
1374                 m = m->next;
1375         }
1376
1377         /* Validate total mbuf pkt length */
1378         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1379         TEST_ASSERT(total_data_size == pkt_len,
1380                         "Length of data differ in original (%u) and filled (%u) op",
1381                         total_data_size, pkt_len);
1382
1383         return TEST_SUCCESS;
1384 }
1385
1386 /*
1387  * Compute K0 for a given configuration for HARQ output length computation
1388  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1389  */
1390 static inline uint16_t
1391 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1392 {
1393         if (rv_index == 0)
1394                 return 0;
1395         uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1396         if (n_cb == n) {
1397                 if (rv_index == 1)
1398                         return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1399                 else if (rv_index == 2)
1400                         return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1401                 else
1402                         return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1403         }
1404         /* LBRM case - includes a division by N */
1405         if (rv_index == 1)
1406                 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1407                                 / n) * z_c;
1408         else if (rv_index == 2)
1409                 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1410                                 / n) * z_c;
1411         else
1412                 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1413                                 / n) * z_c;
1414 }
1415
1416 /* HARQ output length including the Filler bits */
1417 static inline uint16_t
1418 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1419 {
1420         uint16_t k0 = 0;
1421         uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1422         k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1423         /* Compute RM out size and number of rows */
1424         uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1425                         * ops_ld->z_c - ops_ld->n_filler;
1426         uint16_t deRmOutSize = RTE_MIN(
1427                         k0 + ops_ld->cb_params.e +
1428                         ((k0 > parity_offset) ?
1429                                         0 : ops_ld->n_filler),
1430                                         ops_ld->n_cb);
1431         uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1432                         / ops_ld->z_c);
1433         uint16_t harq_output_len = numRows * ops_ld->z_c;
1434         return harq_output_len;
1435 }
1436
1437 static inline int
1438 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1439                 struct op_data_entries *orig_op,
1440                 struct rte_bbdev_op_ldpc_dec *ops_ld)
1441 {
1442         uint8_t i;
1443         uint32_t j, jj, k;
1444         struct rte_mbuf *m = op->data;
1445         uint8_t nb_dst_segments = orig_op->nb_segments;
1446         uint32_t total_data_size = 0;
1447         int8_t *harq_orig, *harq_out, abs_harq_origin;
1448         uint32_t byte_error = 0, cum_error = 0, error;
1449         int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1450         int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1451         uint16_t parity_offset;
1452
1453         TEST_ASSERT(nb_dst_segments == m->nb_segs,
1454                         "Number of segments differ in original (%u) and filled (%u) op",
1455                         nb_dst_segments, m->nb_segs);
1456
1457         /* Validate each mbuf segment length */
1458         for (i = 0; i < nb_dst_segments; ++i) {
1459                 /* Apply offset to the first mbuf segment */
1460                 uint16_t offset = (i == 0) ? op->offset : 0;
1461                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1462                 total_data_size += orig_op->segments[i].length;
1463
1464                 TEST_ASSERT(orig_op->segments[i].length <
1465                                 (uint32_t)(data_len + 64),
1466                                 "Length of segment differ in original (%u) and filled (%u) op",
1467                                 orig_op->segments[i].length, data_len);
1468                 harq_orig = (int8_t *) orig_op->segments[i].addr;
1469                 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1470
1471                 if (!(ldpc_cap_flags &
1472                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1473                                 ) || (ops_ld->op_flags &
1474                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1475                         data_len -= ops_ld->z_c;
1476                         parity_offset = data_len;
1477                 } else {
1478                         /* Compute RM out size and number of rows */
1479                         parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1480                                         * ops_ld->z_c - ops_ld->n_filler;
1481                         uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1482                                         ops_ld->n_filler;
1483                         if (data_len > deRmOutSize)
1484                                 data_len = deRmOutSize;
1485                         if (data_len > orig_op->segments[i].length)
1486                                 data_len = orig_op->segments[i].length;
1487                 }
1488                 /*
1489                  * HARQ output can have minor differences
1490                  * due to integer representation and related scaling
1491                  */
1492                 for (j = 0, jj = 0; j < data_len; j++, jj++) {
1493                         if (j == parity_offset) {
1494                                 /* Special Handling of the filler bits */
1495                                 for (k = 0; k < ops_ld->n_filler; k++) {
1496                                         if (harq_out[jj] !=
1497                                                         llr_max_pre_scaling) {
1498                                                 printf("HARQ Filler issue %d: %d %d\n",
1499                                                         jj, harq_out[jj],
1500                                                         llr_max);
1501                                                 byte_error++;
1502                                         }
1503                                         jj++;
1504                                 }
1505                         }
1506                         if (!(ops_ld->op_flags &
1507                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1508                                 if (ldpc_llr_decimals > 1)
1509                                         harq_out[jj] = (harq_out[jj] + 1)
1510                                                 >> (ldpc_llr_decimals - 1);
1511                                 /* Saturated to S7 */
1512                                 if (harq_orig[j] > llr_max)
1513                                         harq_orig[j] = llr_max;
1514                                 if (harq_orig[j] < -llr_max)
1515                                         harq_orig[j] = -llr_max;
1516                         }
1517                         if (harq_orig[j] != harq_out[jj]) {
1518                                 error = (harq_orig[j] > harq_out[jj]) ?
1519                                                 harq_orig[j] - harq_out[jj] :
1520                                                 harq_out[jj] - harq_orig[j];
1521                                 abs_harq_origin = harq_orig[j] > 0 ?
1522                                                         harq_orig[j] :
1523                                                         -harq_orig[j];
1524                                 /* Residual quantization error */
1525                                 if ((error > 8 && (abs_harq_origin <
1526                                                 (llr_max - 16))) ||
1527                                                 (error > 16)) {
1528                                         printf("HARQ mismatch %d: exp %d act %d => %d\n",
1529                                                         j, harq_orig[j],
1530                                                         harq_out[jj], error);
1531                                         byte_error++;
1532                                         cum_error += error;
1533                                 }
1534                         }
1535                 }
1536                 m = m->next;
1537         }
1538
1539         if (byte_error)
1540                 TEST_ASSERT(byte_error <= 1,
1541                                 "HARQ output mismatch (%d) %d",
1542                                 byte_error, cum_error);
1543
1544         /* Validate total mbuf pkt length */
1545         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1546         TEST_ASSERT(total_data_size < pkt_len + 64,
1547                         "Length of data differ in original (%u) and filled (%u) op",
1548                         total_data_size, pkt_len);
1549
1550         return TEST_SUCCESS;
1551 }
1552
1553 static int
1554 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1555                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1556 {
1557         unsigned int i;
1558         int ret;
1559         struct op_data_entries *hard_data_orig =
1560                         &test_vector.entries[DATA_HARD_OUTPUT];
1561         struct op_data_entries *soft_data_orig =
1562                         &test_vector.entries[DATA_SOFT_OUTPUT];
1563         struct rte_bbdev_op_turbo_dec *ops_td;
1564         struct rte_bbdev_op_data *hard_output;
1565         struct rte_bbdev_op_data *soft_output;
1566         struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
1567
1568         for (i = 0; i < n; ++i) {
1569                 ops_td = &ops[i]->turbo_dec;
1570                 hard_output = &ops_td->hard_output;
1571                 soft_output = &ops_td->soft_output;
1572
1573                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1574                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1575                                         "Returned iter_count (%d) > expected iter_count (%d)",
1576                                         ops_td->iter_count, ref_td->iter_count);
1577                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1578                 TEST_ASSERT_SUCCESS(ret,
1579                                 "Checking status and ordering for decoder failed");
1580
1581                 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1582                                 hard_data_orig),
1583                                 "Hard output buffers (CB=%u) are not equal",
1584                                 i);
1585
1586                 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
1587                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1588                                         soft_data_orig),
1589                                         "Soft output buffers (CB=%u) are not equal",
1590                                         i);
1591         }
1592
1593         return TEST_SUCCESS;
1594 }
1595
1596 static int
1597 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
1598                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
1599 {
1600         unsigned int i;
1601         int ret;
1602         struct op_data_entries *hard_data_orig =
1603                         &test_vector.entries[DATA_HARD_OUTPUT];
1604         struct op_data_entries *soft_data_orig =
1605                         &test_vector.entries[DATA_SOFT_OUTPUT];
1606         struct op_data_entries *harq_data_orig =
1607                                 &test_vector.entries[DATA_HARQ_OUTPUT];
1608         struct rte_bbdev_op_ldpc_dec *ops_td;
1609         struct rte_bbdev_op_data *hard_output;
1610         struct rte_bbdev_op_data *harq_output;
1611         struct rte_bbdev_op_data *soft_output;
1612         struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
1613
1614         for (i = 0; i < n; ++i) {
1615                 ops_td = &ops[i]->ldpc_dec;
1616                 hard_output = &ops_td->hard_output;
1617                 harq_output = &ops_td->harq_combined_output;
1618                 soft_output = &ops_td->soft_output;
1619
1620                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
1621                 TEST_ASSERT_SUCCESS(ret,
1622                                 "Checking status and ordering for decoder failed");
1623                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
1624                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
1625                                         "Returned iter_count (%d) > expected iter_count (%d)",
1626                                         ops_td->iter_count, ref_td->iter_count);
1627                 /*
1628                  * We can ignore output data when the decoding failed to
1629                  * converge or for loop-back cases
1630                  */
1631                 if (!check_bit(ops[i]->ldpc_dec.op_flags,
1632                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
1633                                 ) && (
1634                                 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
1635                                                 )) == 0)
1636                         TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
1637                                         hard_data_orig),
1638                                         "Hard output buffers (CB=%u) are not equal",
1639                                         i);
1640
1641                 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
1642                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
1643                                         soft_data_orig),
1644                                         "Soft output buffers (CB=%u) are not equal",
1645                                         i);
1646                 if (ref_op->ldpc_dec.op_flags &
1647                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
1648                         TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1649                                         harq_data_orig, ops_td),
1650                                         "HARQ output buffers (CB=%u) are not equal",
1651                                         i);
1652                 }
1653                 if (ref_op->ldpc_dec.op_flags &
1654                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1655                         TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
1656                                         harq_data_orig, ops_td),
1657                                         "HARQ output buffers (CB=%u) are not equal",
1658                                         i);
1659
1660         }
1661
1662         return TEST_SUCCESS;
1663 }
1664
1665
1666 static int
1667 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1668                 struct rte_bbdev_enc_op *ref_op)
1669 {
1670         unsigned int i;
1671         int ret;
1672         struct op_data_entries *hard_data_orig =
1673                         &test_vector.entries[DATA_HARD_OUTPUT];
1674
1675         for (i = 0; i < n; ++i) {
1676                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1677                 TEST_ASSERT_SUCCESS(ret,
1678                                 "Checking status and ordering for encoder failed");
1679                 TEST_ASSERT_SUCCESS(validate_op_chain(
1680                                 &ops[i]->turbo_enc.output,
1681                                 hard_data_orig),
1682                                 "Output buffers (CB=%u) are not equal",
1683                                 i);
1684         }
1685
1686         return TEST_SUCCESS;
1687 }
1688
1689 static int
1690 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
1691                 struct rte_bbdev_enc_op *ref_op)
1692 {
1693         unsigned int i;
1694         int ret;
1695         struct op_data_entries *hard_data_orig =
1696                         &test_vector.entries[DATA_HARD_OUTPUT];
1697
1698         for (i = 0; i < n; ++i) {
1699                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
1700                 TEST_ASSERT_SUCCESS(ret,
1701                                 "Checking status and ordering for encoder failed");
1702                 TEST_ASSERT_SUCCESS(validate_op_chain(
1703                                 &ops[i]->ldpc_enc.output,
1704                                 hard_data_orig),
1705                                 "Output buffers (CB=%u) are not equal",
1706                                 i);
1707         }
1708
1709         return TEST_SUCCESS;
1710 }
1711
1712 static void
1713 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1714 {
1715         unsigned int i;
1716         struct op_data_entries *entry;
1717
1718         op->turbo_dec = test_vector.turbo_dec;
1719         entry = &test_vector.entries[DATA_INPUT];
1720         for (i = 0; i < entry->nb_segments; ++i)
1721                 op->turbo_dec.input.length +=
1722                                 entry->segments[i].length;
1723 }
1724
1725 static void
1726 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
1727 {
1728         unsigned int i;
1729         struct op_data_entries *entry;
1730
1731         op->ldpc_dec = test_vector.ldpc_dec;
1732         entry = &test_vector.entries[DATA_INPUT];
1733         for (i = 0; i < entry->nb_segments; ++i)
1734                 op->ldpc_dec.input.length +=
1735                                 entry->segments[i].length;
1736         if (test_vector.ldpc_dec.op_flags &
1737                         RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
1738                 entry = &test_vector.entries[DATA_HARQ_INPUT];
1739                 for (i = 0; i < entry->nb_segments; ++i)
1740                         op->ldpc_dec.harq_combined_input.length +=
1741                                 entry->segments[i].length;
1742         }
1743 }
1744
1745
1746 static void
1747 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1748 {
1749         unsigned int i;
1750         struct op_data_entries *entry;
1751
1752         op->turbo_enc = test_vector.turbo_enc;
1753         entry = &test_vector.entries[DATA_INPUT];
1754         for (i = 0; i < entry->nb_segments; ++i)
1755                 op->turbo_enc.input.length +=
1756                                 entry->segments[i].length;
1757 }
1758
1759 static void
1760 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
1761 {
1762         unsigned int i;
1763         struct op_data_entries *entry;
1764
1765         op->ldpc_enc = test_vector.ldpc_enc;
1766         entry = &test_vector.entries[DATA_INPUT];
1767         for (i = 0; i < entry->nb_segments; ++i)
1768                 op->ldpc_enc.input.length +=
1769                                 entry->segments[i].length;
1770 }
1771
1772 static uint32_t
1773 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1774 {
1775         uint8_t i;
1776         uint32_t c, r, tb_size = 0;
1777
1778         if (op->turbo_dec.code_block_mode) {
1779                 tb_size = op->turbo_dec.tb_params.k_neg;
1780         } else {
1781                 c = op->turbo_dec.tb_params.c;
1782                 r = op->turbo_dec.tb_params.r;
1783                 for (i = 0; i < c-r; i++)
1784                         tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1785                                 op->turbo_dec.tb_params.k_neg :
1786                                 op->turbo_dec.tb_params.k_pos;
1787         }
1788         return tb_size;
1789 }
1790
1791 static uint32_t
1792 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
1793 {
1794         uint8_t i;
1795         uint32_t c, r, tb_size = 0;
1796         uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
1797
1798         if (op->ldpc_dec.code_block_mode) {
1799                 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1800         } else {
1801                 c = op->ldpc_dec.tb_params.c;
1802                 r = op->ldpc_dec.tb_params.r;
1803                 for (i = 0; i < c-r; i++)
1804                         tb_size += sys_cols * op->ldpc_dec.z_c
1805                                         - op->ldpc_dec.n_filler;
1806         }
1807         return tb_size;
1808 }
1809
1810 static uint32_t
1811 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1812 {
1813         uint8_t i;
1814         uint32_t c, r, tb_size = 0;
1815
1816         if (op->turbo_enc.code_block_mode) {
1817                 tb_size = op->turbo_enc.tb_params.k_neg;
1818         } else {
1819                 c = op->turbo_enc.tb_params.c;
1820                 r = op->turbo_enc.tb_params.r;
1821                 for (i = 0; i < c-r; i++)
1822                         tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1823                                 op->turbo_enc.tb_params.k_neg :
1824                                 op->turbo_enc.tb_params.k_pos;
1825         }
1826         return tb_size;
1827 }
1828
1829 static uint32_t
1830 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
1831 {
1832         uint8_t i;
1833         uint32_t c, r, tb_size = 0;
1834         uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
1835
1836         if (op->turbo_enc.code_block_mode) {
1837                 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
1838         } else {
1839                 c = op->turbo_enc.tb_params.c;
1840                 r = op->turbo_enc.tb_params.r;
1841                 for (i = 0; i < c-r; i++)
1842                         tb_size += sys_cols * op->ldpc_enc.z_c
1843                                         - op->ldpc_enc.n_filler;
1844         }
1845         return tb_size;
1846 }
1847
1848
1849 static int
1850 init_test_op_params(struct test_op_params *op_params,
1851                 enum rte_bbdev_op_type op_type, const int expected_status,
1852                 const int vector_mask, struct rte_mempool *ops_mp,
1853                 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1854 {
1855         int ret = 0;
1856         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1857                         op_type == RTE_BBDEV_OP_LDPC_DEC)
1858                 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1859                                 &op_params->ref_dec_op, 1);
1860         else
1861                 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1862                                 &op_params->ref_enc_op, 1);
1863
1864         TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1865
1866         op_params->mp = ops_mp;
1867         op_params->burst_sz = burst_sz;
1868         op_params->num_to_process = num_to_process;
1869         op_params->num_lcores = num_lcores;
1870         op_params->vector_mask = vector_mask;
1871         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
1872                         op_type == RTE_BBDEV_OP_LDPC_DEC)
1873                 op_params->ref_dec_op->status = expected_status;
1874         else if (op_type == RTE_BBDEV_OP_TURBO_ENC
1875                         || op_type == RTE_BBDEV_OP_LDPC_ENC)
1876                 op_params->ref_enc_op->status = expected_status;
1877         return 0;
1878 }
1879
1880 static int
1881 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1882                 struct test_op_params *op_params)
1883 {
1884         int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1885         unsigned int i;
1886         struct active_device *ad;
1887         unsigned int burst_sz = get_burst_sz();
1888         enum rte_bbdev_op_type op_type = test_vector.op_type;
1889         const struct rte_bbdev_op_cap *capabilities = NULL;
1890
1891         ad = &active_devs[dev_id];
1892
1893         /* Check if device supports op_type */
1894         if (!is_avail_op(ad, test_vector.op_type))
1895                 return TEST_SUCCESS;
1896
1897         struct rte_bbdev_info info;
1898         rte_bbdev_info_get(ad->dev_id, &info);
1899         socket_id = GET_SOCKET(info.socket_id);
1900
1901         f_ret = create_mempools(ad, socket_id, op_type,
1902                         get_num_ops());
1903         if (f_ret != TEST_SUCCESS) {
1904                 printf("Couldn't create mempools");
1905                 goto fail;
1906         }
1907         if (op_type == RTE_BBDEV_OP_NONE)
1908                 op_type = RTE_BBDEV_OP_TURBO_ENC;
1909
1910         f_ret = init_test_op_params(op_params, test_vector.op_type,
1911                         test_vector.expected_status,
1912                         test_vector.mask,
1913                         ad->ops_mempool,
1914                         burst_sz,
1915                         get_num_ops(),
1916                         get_num_lcores());
1917         if (f_ret != TEST_SUCCESS) {
1918                 printf("Couldn't init test op params");
1919                 goto fail;
1920         }
1921
1922
1923         /* Find capabilities */
1924         const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1925         for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
1926                 if (cap->type == test_vector.op_type) {
1927                         capabilities = cap;
1928                         break;
1929                 }
1930                 cap++;
1931         }
1932         TEST_ASSERT_NOT_NULL(capabilities,
1933                         "Couldn't find capabilities");
1934
1935         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1936                 create_reference_dec_op(op_params->ref_dec_op);
1937         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1938                 create_reference_enc_op(op_params->ref_enc_op);
1939         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
1940                 create_reference_ldpc_enc_op(op_params->ref_enc_op);
1941         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
1942                 create_reference_ldpc_dec_op(op_params->ref_dec_op);
1943
1944         for (i = 0; i < ad->nb_queues; ++i) {
1945                 f_ret = fill_queue_buffers(op_params,
1946                                 ad->in_mbuf_pool,
1947                                 ad->hard_out_mbuf_pool,
1948                                 ad->soft_out_mbuf_pool,
1949                                 ad->harq_in_mbuf_pool,
1950                                 ad->harq_out_mbuf_pool,
1951                                 ad->queue_ids[i],
1952                                 capabilities,
1953                                 info.drv.min_alignment,
1954                                 socket_id);
1955                 if (f_ret != TEST_SUCCESS) {
1956                         printf("Couldn't init queue buffers");
1957                         goto fail;
1958                 }
1959         }
1960
1961         /* Run test case function */
1962         t_ret = test_case_func(ad, op_params);
1963
1964         /* Free active device resources and return */
1965         free_buffers(ad, op_params);
1966         return t_ret;
1967
1968 fail:
1969         free_buffers(ad, op_params);
1970         return TEST_FAILED;
1971 }
1972
1973 /* Run given test function per active device per supported op type
1974  * per burst size.
1975  */
1976 static int
1977 run_test_case(test_case_function *test_case_func)
1978 {
1979         int ret = 0;
1980         uint8_t dev;
1981
1982         /* Alloc op_params */
1983         struct test_op_params *op_params = rte_zmalloc(NULL,
1984                         sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1985         TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1986                         RTE_ALIGN(sizeof(struct test_op_params),
1987                                 RTE_CACHE_LINE_SIZE));
1988
1989         /* For each device run test case function */
1990         for (dev = 0; dev < nb_active_devs; ++dev)
1991                 ret |= run_test_case_on_device(test_case_func, dev, op_params);
1992
1993         rte_free(op_params);
1994
1995         return ret;
1996 }
1997
1998
1999 /* Push back the HARQ output from DDR to host */
2000 static void
2001 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2002                 struct rte_bbdev_dec_op **ops,
2003                 const uint16_t n)
2004 {
2005         uint16_t j;
2006         int save_status, ret;
2007         uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2008         struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2009         uint32_t flags = ops[0]->ldpc_dec.op_flags;
2010         bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2011         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2012         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2013         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2014         for (j = 0; j < n; ++j) {
2015                 if ((loopback && mem_out) || hc_out) {
2016                         save_status = ops[j]->status;
2017                         ops[j]->ldpc_dec.op_flags =
2018                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2019                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2020                         if (h_comp)
2021                                 ops[j]->ldpc_dec.op_flags +=
2022                                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2023                         ops[j]->ldpc_dec.harq_combined_input.offset =
2024                                         harq_offset;
2025                         ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2026                         harq_offset += HARQ_INCR;
2027                         if (!loopback)
2028                                 ops[j]->ldpc_dec.harq_combined_input.length =
2029                                 ops[j]->ldpc_dec.harq_combined_output.length;
2030                         rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2031                                         &ops[j], 1);
2032                         ret = 0;
2033                         while (ret == 0)
2034                                 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2035                                                 dev_id, queue_id,
2036                                                 &ops_deq[j], 1);
2037                         ops[j]->ldpc_dec.op_flags = flags;
2038                         ops[j]->status = save_status;
2039                 }
2040         }
2041 }
2042
2043 /*
2044  * Push back the HARQ output from HW DDR to Host
2045  * Preload HARQ memory input and adjust HARQ offset
2046  */
2047 static void
2048 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2049                 struct rte_bbdev_dec_op **ops, const uint16_t n,
2050                 bool preload)
2051 {
2052         uint16_t j;
2053         int ret;
2054         uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
2055         struct rte_bbdev_op_data save_hc_in, save_hc_out;
2056         struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2057         uint32_t flags = ops[0]->ldpc_dec.op_flags;
2058         bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2059         bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2060         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2061         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2062         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2063         for (j = 0; j < n; ++j) {
2064                 if ((mem_in || hc_in) && preload) {
2065                         save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
2066                         save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
2067                         ops[j]->ldpc_dec.op_flags =
2068                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2069                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2070                         if (h_comp)
2071                                 ops[j]->ldpc_dec.op_flags +=
2072                                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2073                         ops[j]->ldpc_dec.harq_combined_output.offset =
2074                                         harq_offset;
2075                         ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2076                         rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2077                                         &ops[j], 1);
2078                         ret = 0;
2079                         while (ret == 0)
2080                                 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2081                                         dev_id, queue_id, &ops_deq[j], 1);
2082                         ops[j]->ldpc_dec.op_flags = flags;
2083                         ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
2084                         ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
2085                 }
2086                 /* Adjust HARQ offset when we reach external DDR */
2087                 if (mem_in || hc_in)
2088                         ops[j]->ldpc_dec.harq_combined_input.offset
2089                                 = harq_offset;
2090                 if (mem_out || hc_out)
2091                         ops[j]->ldpc_dec.harq_combined_output.offset
2092                                 = harq_offset;
2093                 harq_offset += HARQ_INCR;
2094         }
2095 }
2096
2097 static void
2098 dequeue_event_callback(uint16_t dev_id,
2099                 enum rte_bbdev_event_type event, void *cb_arg,
2100                 void *ret_param)
2101 {
2102         int ret;
2103         uint16_t i;
2104         uint64_t total_time;
2105         uint16_t deq, burst_sz, num_ops;
2106         uint16_t queue_id = *(uint16_t *) ret_param;
2107         struct rte_bbdev_info info;
2108         double tb_len_bits;
2109         struct thread_params *tp = cb_arg;
2110
2111         /* Find matching thread params using queue_id */
2112         for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2113                 if (tp->queue_id == queue_id)
2114                         break;
2115
2116         if (i == MAX_QUEUES) {
2117                 printf("%s: Queue_id from interrupt details was not found!\n",
2118                                 __func__);
2119                 return;
2120         }
2121
2122         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2123                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2124                 printf(
2125                         "Dequeue interrupt handler called for incorrect event!\n");
2126                 return;
2127         }
2128
2129         burst_sz = rte_atomic16_read(&tp->burst_sz);
2130         num_ops = tp->op_params->num_to_process;
2131
2132         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2133                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2134                                 &tp->dec_ops[
2135                                         rte_atomic16_read(&tp->nb_dequeued)],
2136                                 burst_sz);
2137         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2138                 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2139                                 &tp->dec_ops[
2140                                         rte_atomic16_read(&tp->nb_dequeued)],
2141                                 burst_sz);
2142         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2143                 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2144                                 &tp->enc_ops[
2145                                         rte_atomic16_read(&tp->nb_dequeued)],
2146                                 burst_sz);
2147         else /*RTE_BBDEV_OP_TURBO_ENC*/
2148                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2149                                 &tp->enc_ops[
2150                                         rte_atomic16_read(&tp->nb_dequeued)],
2151                                 burst_sz);
2152
2153         if (deq < burst_sz) {
2154                 printf(
2155                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2156                         burst_sz, deq);
2157                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2158                 return;
2159         }
2160
2161         if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2162                 rte_atomic16_add(&tp->nb_dequeued, deq);
2163                 return;
2164         }
2165
2166         total_time = rte_rdtsc_precise() - tp->start_time;
2167
2168         rte_bbdev_info_get(dev_id, &info);
2169
2170         ret = TEST_SUCCESS;
2171
2172         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2173                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2174                 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2175                                 tp->op_params->vector_mask);
2176                 /* get the max of iter_count for all dequeued ops */
2177                 for (i = 0; i < num_ops; ++i)
2178                         tp->iter_count = RTE_MAX(
2179                                         tp->dec_ops[i]->turbo_dec.iter_count,
2180                                         tp->iter_count);
2181                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2182         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2183                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2184                 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2185                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2186         } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2187                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2188                 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2189                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2190         } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2191                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2192                 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2193                                 tp->op_params->vector_mask);
2194                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2195         }
2196
2197         if (ret) {
2198                 printf("Buffers validation failed\n");
2199                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2200         }
2201
2202         switch (test_vector.op_type) {
2203         case RTE_BBDEV_OP_TURBO_DEC:
2204                 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2205                 break;
2206         case RTE_BBDEV_OP_TURBO_ENC:
2207                 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2208                 break;
2209         case RTE_BBDEV_OP_LDPC_DEC:
2210                 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2211                 break;
2212         case RTE_BBDEV_OP_LDPC_ENC:
2213                 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2214                 break;
2215         case RTE_BBDEV_OP_NONE:
2216                 tb_len_bits = 0.0;
2217                 break;
2218         default:
2219                 printf("Unknown op type: %d\n", test_vector.op_type);
2220                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2221                 return;
2222         }
2223
2224         tp->ops_per_sec += ((double)num_ops) /
2225                         ((double)total_time / (double)rte_get_tsc_hz());
2226         tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2227                         ((double)total_time / (double)rte_get_tsc_hz());
2228
2229         rte_atomic16_add(&tp->nb_dequeued, deq);
2230 }
2231
2232 static int
2233 throughput_intr_lcore_dec(void *arg)
2234 {
2235         struct thread_params *tp = arg;
2236         unsigned int enqueued;
2237         const uint16_t queue_id = tp->queue_id;
2238         const uint16_t burst_sz = tp->op_params->burst_sz;
2239         const uint16_t num_to_process = tp->op_params->num_to_process;
2240         struct rte_bbdev_dec_op *ops[num_to_process];
2241         struct test_buffers *bufs = NULL;
2242         struct rte_bbdev_info info;
2243         int ret, i, j;
2244         uint16_t num_to_enq, enq;
2245
2246         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2247                         "BURST_SIZE should be <= %u", MAX_BURST);
2248
2249         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2250                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2251                         tp->dev_id, queue_id);
2252
2253         rte_bbdev_info_get(tp->dev_id, &info);
2254
2255         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2256                         "NUM_OPS cannot exceed %u for this device",
2257                         info.drv.queue_size_lim);
2258
2259         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2260
2261         rte_atomic16_clear(&tp->processing_status);
2262         rte_atomic16_clear(&tp->nb_dequeued);
2263
2264         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2265                 rte_pause();
2266
2267         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2268                                 num_to_process);
2269         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2270                         num_to_process);
2271         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2272                 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2273                                 bufs->hard_outputs, bufs->soft_outputs,
2274                                 tp->op_params->ref_dec_op);
2275
2276         /* Set counter to validate the ordering */
2277         for (j = 0; j < num_to_process; ++j)
2278                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2279
2280         for (j = 0; j < TEST_REPETITIONS; ++j) {
2281                 for (i = 0; i < num_to_process; ++i)
2282                         rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2283
2284                 tp->start_time = rte_rdtsc_precise();
2285                 for (enqueued = 0; enqueued < num_to_process;) {
2286                         num_to_enq = burst_sz;
2287
2288                         if (unlikely(num_to_process - enqueued < num_to_enq))
2289                                 num_to_enq = num_to_process - enqueued;
2290
2291                         enq = 0;
2292                         do {
2293                                 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2294                                                 queue_id, &ops[enqueued],
2295                                                 num_to_enq);
2296                         } while (unlikely(num_to_enq != enq));
2297                         enqueued += enq;
2298
2299                         /* Write to thread burst_sz current number of enqueued
2300                          * descriptors. It ensures that proper number of
2301                          * descriptors will be dequeued in callback
2302                          * function - needed for last batch in case where
2303                          * the number of operations is not a multiple of
2304                          * burst size.
2305                          */
2306                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
2307
2308                         /* Wait until processing of previous batch is
2309                          * completed
2310                          */
2311                         while (rte_atomic16_read(&tp->nb_dequeued) !=
2312                                         (int16_t) enqueued)
2313                                 rte_pause();
2314                 }
2315                 if (j != TEST_REPETITIONS - 1)
2316                         rte_atomic16_clear(&tp->nb_dequeued);
2317         }
2318
2319         return TEST_SUCCESS;
2320 }
2321
2322 static int
2323 throughput_intr_lcore_enc(void *arg)
2324 {
2325         struct thread_params *tp = arg;
2326         unsigned int enqueued;
2327         const uint16_t queue_id = tp->queue_id;
2328         const uint16_t burst_sz = tp->op_params->burst_sz;
2329         const uint16_t num_to_process = tp->op_params->num_to_process;
2330         struct rte_bbdev_enc_op *ops[num_to_process];
2331         struct test_buffers *bufs = NULL;
2332         struct rte_bbdev_info info;
2333         int ret, i, j;
2334         uint16_t num_to_enq, enq;
2335
2336         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2337                         "BURST_SIZE should be <= %u", MAX_BURST);
2338
2339         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2340                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2341                         tp->dev_id, queue_id);
2342
2343         rte_bbdev_info_get(tp->dev_id, &info);
2344
2345         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2346                         "NUM_OPS cannot exceed %u for this device",
2347                         info.drv.queue_size_lim);
2348
2349         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2350
2351         rte_atomic16_clear(&tp->processing_status);
2352         rte_atomic16_clear(&tp->nb_dequeued);
2353
2354         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2355                 rte_pause();
2356
2357         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2358                         num_to_process);
2359         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2360                         num_to_process);
2361         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2362                 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2363                                 bufs->hard_outputs, tp->op_params->ref_enc_op);
2364
2365         /* Set counter to validate the ordering */
2366         for (j = 0; j < num_to_process; ++j)
2367                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2368
2369         for (j = 0; j < TEST_REPETITIONS; ++j) {
2370                 for (i = 0; i < num_to_process; ++i)
2371                         rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2372
2373                 tp->start_time = rte_rdtsc_precise();
2374                 for (enqueued = 0; enqueued < num_to_process;) {
2375                         num_to_enq = burst_sz;
2376
2377                         if (unlikely(num_to_process - enqueued < num_to_enq))
2378                                 num_to_enq = num_to_process - enqueued;
2379
2380                         enq = 0;
2381                         do {
2382                                 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2383                                                 queue_id, &ops[enqueued],
2384                                                 num_to_enq);
2385                         } while (unlikely(enq != num_to_enq));
2386                         enqueued += enq;
2387
2388                         /* Write to thread burst_sz current number of enqueued
2389                          * descriptors. It ensures that proper number of
2390                          * descriptors will be dequeued in callback
2391                          * function - needed for last batch in case where
2392                          * the number of operations is not a multiple of
2393                          * burst size.
2394                          */
2395                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
2396
2397                         /* Wait until processing of previous batch is
2398                          * completed
2399                          */
2400                         while (rte_atomic16_read(&tp->nb_dequeued) !=
2401                                         (int16_t) enqueued)
2402                                 rte_pause();
2403                 }
2404                 if (j != TEST_REPETITIONS - 1)
2405                         rte_atomic16_clear(&tp->nb_dequeued);
2406         }
2407
2408         return TEST_SUCCESS;
2409 }
2410
2411 static int
2412 throughput_pmd_lcore_dec(void *arg)
2413 {
2414         struct thread_params *tp = arg;
2415         uint16_t enq, deq;
2416         uint64_t total_time = 0, start_time;
2417         const uint16_t queue_id = tp->queue_id;
2418         const uint16_t burst_sz = tp->op_params->burst_sz;
2419         const uint16_t num_ops = tp->op_params->num_to_process;
2420         struct rte_bbdev_dec_op *ops_enq[num_ops];
2421         struct rte_bbdev_dec_op *ops_deq[num_ops];
2422         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2423         struct test_buffers *bufs = NULL;
2424         int i, j, ret;
2425         struct rte_bbdev_info info;
2426         uint16_t num_to_enq;
2427
2428         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2429                         "BURST_SIZE should be <= %u", MAX_BURST);
2430
2431         rte_bbdev_info_get(tp->dev_id, &info);
2432
2433         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2434                         "NUM_OPS cannot exceed %u for this device",
2435                         info.drv.queue_size_lim);
2436
2437         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2438
2439         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2440                 rte_pause();
2441
2442         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2443         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2444
2445         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2446                 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2447                                 bufs->hard_outputs, bufs->soft_outputs, ref_op);
2448
2449         /* Set counter to validate the ordering */
2450         for (j = 0; j < num_ops; ++j)
2451                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2452
2453         for (i = 0; i < TEST_REPETITIONS; ++i) {
2454
2455                 for (j = 0; j < num_ops; ++j)
2456                         mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
2457
2458                 start_time = rte_rdtsc_precise();
2459
2460                 for (enq = 0, deq = 0; enq < num_ops;) {
2461                         num_to_enq = burst_sz;
2462
2463                         if (unlikely(num_ops - enq < num_to_enq))
2464                                 num_to_enq = num_ops - enq;
2465
2466                         enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2467                                         queue_id, &ops_enq[enq], num_to_enq);
2468
2469                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2470                                         queue_id, &ops_deq[deq], enq - deq);
2471                 }
2472
2473                 /* dequeue the remaining */
2474                 while (deq < enq) {
2475                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
2476                                         queue_id, &ops_deq[deq], enq - deq);
2477                 }
2478
2479                 total_time += rte_rdtsc_precise() - start_time;
2480         }
2481
2482         tp->iter_count = 0;
2483         /* get the max of iter_count for all dequeued ops */
2484         for (i = 0; i < num_ops; ++i) {
2485                 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
2486                                 tp->iter_count);
2487         }
2488
2489         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2490                 ret = validate_dec_op(ops_deq, num_ops, ref_op,
2491                                 tp->op_params->vector_mask);
2492                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2493         }
2494
2495         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2496
2497         double tb_len_bits = calc_dec_TB_size(ref_op);
2498
2499         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2500                         ((double)total_time / (double)rte_get_tsc_hz());
2501         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2502                         1000000.0) / ((double)total_time /
2503                         (double)rte_get_tsc_hz());
2504
2505         return TEST_SUCCESS;
2506 }
2507
2508 static int
2509 throughput_pmd_lcore_ldpc_dec(void *arg)
2510 {
2511         struct thread_params *tp = arg;
2512         uint16_t enq, deq;
2513         uint64_t total_time = 0, start_time;
2514         const uint16_t queue_id = tp->queue_id;
2515         const uint16_t burst_sz = tp->op_params->burst_sz;
2516         const uint16_t num_ops = tp->op_params->num_to_process;
2517         struct rte_bbdev_dec_op *ops_enq[num_ops];
2518         struct rte_bbdev_dec_op *ops_deq[num_ops];
2519         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2520         struct test_buffers *bufs = NULL;
2521         int i, j, ret;
2522         struct rte_bbdev_info info;
2523         uint16_t num_to_enq;
2524         bool extDdr = check_bit(ldpc_cap_flags,
2525                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
2526         bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2527                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2528         bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2529                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2530
2531         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2532                         "BURST_SIZE should be <= %u", MAX_BURST);
2533
2534         rte_bbdev_info_get(tp->dev_id, &info);
2535
2536         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2537                         "NUM_OPS cannot exceed %u for this device",
2538                         info.drv.queue_size_lim);
2539
2540         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2541
2542         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2543                 rte_pause();
2544
2545         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
2546         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
2547
2548         /* For throughput tests we need to disable early termination */
2549         if (check_bit(ref_op->ldpc_dec.op_flags,
2550                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
2551                 ref_op->ldpc_dec.op_flags -=
2552                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
2553         ref_op->ldpc_dec.iter_max = 6;
2554         ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
2555
2556         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2557                 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
2558                                 bufs->hard_outputs, bufs->soft_outputs,
2559                                 bufs->harq_inputs, bufs->harq_outputs, ref_op);
2560
2561         /* Set counter to validate the ordering */
2562         for (j = 0; j < num_ops; ++j)
2563                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2564
2565         for (i = 0; i < TEST_REPETITIONS; ++i) {
2566                 for (j = 0; j < num_ops; ++j) {
2567                         if (!loopback)
2568                                 mbuf_reset(
2569                                 ops_enq[j]->ldpc_dec.hard_output.data);
2570                         if (hc_out || loopback)
2571                                 mbuf_reset(
2572                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
2573                 }
2574                 if (extDdr) {
2575                         bool preload = i == (TEST_REPETITIONS - 1);
2576                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
2577                                         num_ops, preload);
2578                 }
2579                 start_time = rte_rdtsc_precise();
2580
2581                 for (enq = 0, deq = 0; enq < num_ops;) {
2582                         num_to_enq = burst_sz;
2583
2584                         if (unlikely(num_ops - enq < num_to_enq))
2585                                 num_to_enq = num_ops - enq;
2586
2587                         enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
2588                                         queue_id, &ops_enq[enq], num_to_enq);
2589
2590                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2591                                         queue_id, &ops_deq[deq], enq - deq);
2592                 }
2593
2594                 /* dequeue the remaining */
2595                 while (deq < enq) {
2596                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
2597                                         queue_id, &ops_deq[deq], enq - deq);
2598                 }
2599
2600                 total_time += rte_rdtsc_precise() - start_time;
2601         }
2602
2603         tp->iter_count = 0;
2604         /* get the max of iter_count for all dequeued ops */
2605         for (i = 0; i < num_ops; ++i) {
2606                 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
2607                                 tp->iter_count);
2608         }
2609         if (extDdr) {
2610                 /* Read loopback is not thread safe */
2611                 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
2612         }
2613
2614         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2615                 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
2616                                 tp->op_params->vector_mask);
2617                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2618         }
2619
2620         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
2621
2622         double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
2623
2624         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2625                         ((double)total_time / (double)rte_get_tsc_hz());
2626         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
2627                         1000000.0) / ((double)total_time /
2628                         (double)rte_get_tsc_hz());
2629
2630         return TEST_SUCCESS;
2631 }
2632
2633 static int
2634 throughput_pmd_lcore_enc(void *arg)
2635 {
2636         struct thread_params *tp = arg;
2637         uint16_t enq, deq;
2638         uint64_t total_time = 0, start_time;
2639         const uint16_t queue_id = tp->queue_id;
2640         const uint16_t burst_sz = tp->op_params->burst_sz;
2641         const uint16_t num_ops = tp->op_params->num_to_process;
2642         struct rte_bbdev_enc_op *ops_enq[num_ops];
2643         struct rte_bbdev_enc_op *ops_deq[num_ops];
2644         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2645         struct test_buffers *bufs = NULL;
2646         int i, j, ret;
2647         struct rte_bbdev_info info;
2648         uint16_t num_to_enq;
2649
2650         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2651                         "BURST_SIZE should be <= %u", MAX_BURST);
2652
2653         rte_bbdev_info_get(tp->dev_id, &info);
2654
2655         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2656                         "NUM_OPS cannot exceed %u for this device",
2657                         info.drv.queue_size_lim);
2658
2659         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2660
2661         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2662                 rte_pause();
2663
2664         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2665                         num_ops);
2666         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2667                         num_ops);
2668         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2669                 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2670                                 bufs->hard_outputs, ref_op);
2671
2672         /* Set counter to validate the ordering */
2673         for (j = 0; j < num_ops; ++j)
2674                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2675
2676         for (i = 0; i < TEST_REPETITIONS; ++i) {
2677
2678                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2679                         for (j = 0; j < num_ops; ++j)
2680                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2681
2682                 start_time = rte_rdtsc_precise();
2683
2684                 for (enq = 0, deq = 0; enq < num_ops;) {
2685                         num_to_enq = burst_sz;
2686
2687                         if (unlikely(num_ops - enq < num_to_enq))
2688                                 num_to_enq = num_ops - enq;
2689
2690                         enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
2691                                         queue_id, &ops_enq[enq], num_to_enq);
2692
2693                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2694                                         queue_id, &ops_deq[deq], enq - deq);
2695                 }
2696
2697                 /* dequeue the remaining */
2698                 while (deq < enq) {
2699                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
2700                                         queue_id, &ops_deq[deq], enq - deq);
2701                 }
2702
2703                 total_time += rte_rdtsc_precise() - start_time;
2704         }
2705
2706         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2707                 ret = validate_enc_op(ops_deq, num_ops, ref_op);
2708                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2709         }
2710
2711         rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2712
2713         double tb_len_bits = calc_enc_TB_size(ref_op);
2714
2715         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2716                         ((double)total_time / (double)rte_get_tsc_hz());
2717         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2718                         / 1000000.0) / ((double)total_time /
2719                         (double)rte_get_tsc_hz());
2720
2721         return TEST_SUCCESS;
2722 }
2723
2724 static int
2725 throughput_pmd_lcore_ldpc_enc(void *arg)
2726 {
2727         struct thread_params *tp = arg;
2728         uint16_t enq, deq;
2729         uint64_t total_time = 0, start_time;
2730         const uint16_t queue_id = tp->queue_id;
2731         const uint16_t burst_sz = tp->op_params->burst_sz;
2732         const uint16_t num_ops = tp->op_params->num_to_process;
2733         struct rte_bbdev_enc_op *ops_enq[num_ops];
2734         struct rte_bbdev_enc_op *ops_deq[num_ops];
2735         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2736         struct test_buffers *bufs = NULL;
2737         int i, j, ret;
2738         struct rte_bbdev_info info;
2739         uint16_t num_to_enq;
2740
2741         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2742                         "BURST_SIZE should be <= %u", MAX_BURST);
2743
2744         rte_bbdev_info_get(tp->dev_id, &info);
2745
2746         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
2747                         "NUM_OPS cannot exceed %u for this device",
2748                         info.drv.queue_size_lim);
2749
2750         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2751
2752         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2753                 rte_pause();
2754
2755         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
2756                         num_ops);
2757         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2758                         num_ops);
2759         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2760                 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
2761                                 bufs->hard_outputs, ref_op);
2762
2763         /* Set counter to validate the ordering */
2764         for (j = 0; j < num_ops; ++j)
2765                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
2766
2767         for (i = 0; i < TEST_REPETITIONS; ++i) {
2768
2769                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2770                         for (j = 0; j < num_ops; ++j)
2771                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
2772
2773                 start_time = rte_rdtsc_precise();
2774
2775                 for (enq = 0, deq = 0; enq < num_ops;) {
2776                         num_to_enq = burst_sz;
2777
2778                         if (unlikely(num_ops - enq < num_to_enq))
2779                                 num_to_enq = num_ops - enq;
2780
2781                         enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
2782                                         queue_id, &ops_enq[enq], num_to_enq);
2783
2784                         deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2785                                         queue_id, &ops_deq[deq], enq - deq);
2786                 }
2787
2788                 /* dequeue the remaining */
2789                 while (deq < enq) {
2790                         deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
2791                                         queue_id, &ops_deq[deq], enq - deq);
2792                 }
2793
2794                 total_time += rte_rdtsc_precise() - start_time;
2795         }
2796
2797         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
2798                 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
2799                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2800         }
2801
2802         rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
2803
2804         double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
2805
2806         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
2807                         ((double)total_time / (double)rte_get_tsc_hz());
2808         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
2809                         / 1000000.0) / ((double)total_time /
2810                         (double)rte_get_tsc_hz());
2811
2812         return TEST_SUCCESS;
2813 }
2814
2815 static void
2816 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
2817 {
2818         unsigned int iter = 0;
2819         double total_mops = 0, total_mbps = 0;
2820
2821         for (iter = 0; iter < used_cores; iter++) {
2822                 printf(
2823                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
2824                         t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2825                         t_params[iter].mbps);
2826                 total_mops += t_params[iter].ops_per_sec;
2827                 total_mbps += t_params[iter].mbps;
2828         }
2829         printf(
2830                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
2831                 used_cores, total_mops, total_mbps);
2832 }
2833
2834 static void
2835 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
2836 {
2837         unsigned int iter = 0;
2838         double total_mops = 0, total_mbps = 0;
2839         uint8_t iter_count = 0;
2840
2841         for (iter = 0; iter < used_cores; iter++) {
2842                 printf(
2843                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
2844                         t_params[iter].lcore_id, t_params[iter].ops_per_sec,
2845                         t_params[iter].mbps, t_params[iter].iter_count);
2846                 total_mops += t_params[iter].ops_per_sec;
2847                 total_mbps += t_params[iter].mbps;
2848                 iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
2849         }
2850         printf(
2851                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
2852                 used_cores, total_mops, total_mbps, iter_count);
2853 }
2854
2855 /*
2856  * Test function that determines how long an enqueue + dequeue of a burst
2857  * takes on available lcores.
2858  */
2859 static int
2860 throughput_test(struct active_device *ad,
2861                 struct test_op_params *op_params)
2862 {
2863         int ret;
2864         unsigned int lcore_id, used_cores = 0;
2865         struct thread_params *t_params, *tp;
2866         struct rte_bbdev_info info;
2867         lcore_function_t *throughput_function;
2868         uint16_t num_lcores;
2869         const char *op_type_str;
2870
2871         rte_bbdev_info_get(ad->dev_id, &info);
2872
2873         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
2874         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
2875                         test_vector.op_type);
2876
2877         printf("+ ------------------------------------------------------- +\n");
2878         printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
2879                         info.dev_name, ad->nb_queues, op_params->burst_sz,
2880                         op_params->num_to_process, op_params->num_lcores,
2881                         op_type_str,
2882                         intr_enabled ? "Interrupt mode" : "PMD mode",
2883                         (double)rte_get_tsc_hz() / 1000000000.0);
2884
2885         /* Set number of lcores */
2886         num_lcores = (ad->nb_queues < (op_params->num_lcores))
2887                         ? ad->nb_queues
2888                         : op_params->num_lcores;
2889
2890         /* Allocate memory for thread parameters structure */
2891         t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
2892                         RTE_CACHE_LINE_SIZE);
2893         TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
2894                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
2895                                 RTE_CACHE_LINE_SIZE));
2896
2897         if (intr_enabled) {
2898                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2899                         throughput_function = throughput_intr_lcore_dec;
2900                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2901                         throughput_function = throughput_intr_lcore_dec;
2902                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2903                         throughput_function = throughput_intr_lcore_enc;
2904                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2905                         throughput_function = throughput_intr_lcore_enc;
2906                 else
2907                         throughput_function = throughput_intr_lcore_enc;
2908
2909                 /* Dequeue interrupt callback registration */
2910                 ret = rte_bbdev_callback_register(ad->dev_id,
2911                                 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
2912                                 t_params);
2913                 if (ret < 0) {
2914                         rte_free(t_params);
2915                         return ret;
2916                 }
2917         } else {
2918                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2919                         throughput_function = throughput_pmd_lcore_dec;
2920                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2921                         throughput_function = throughput_pmd_lcore_ldpc_dec;
2922                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2923                         throughput_function = throughput_pmd_lcore_enc;
2924                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2925                         throughput_function = throughput_pmd_lcore_ldpc_enc;
2926                 else
2927                         throughput_function = throughput_pmd_lcore_enc;
2928         }
2929
2930         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
2931
2932         /* Master core is set at first entry */
2933         t_params[0].dev_id = ad->dev_id;
2934         t_params[0].lcore_id = rte_lcore_id();
2935         t_params[0].op_params = op_params;
2936         t_params[0].queue_id = ad->queue_ids[used_cores++];
2937         t_params[0].iter_count = 0;
2938
2939         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
2940                 if (used_cores >= num_lcores)
2941                         break;
2942
2943                 t_params[used_cores].dev_id = ad->dev_id;
2944                 t_params[used_cores].lcore_id = lcore_id;
2945                 t_params[used_cores].op_params = op_params;
2946                 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
2947                 t_params[used_cores].iter_count = 0;
2948
2949                 rte_eal_remote_launch(throughput_function,
2950                                 &t_params[used_cores++], lcore_id);
2951         }
2952
2953         rte_atomic16_set(&op_params->sync, SYNC_START);
2954         ret = throughput_function(&t_params[0]);
2955
2956         /* Master core is always used */
2957         for (used_cores = 1; used_cores < num_lcores; used_cores++)
2958                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
2959
2960         /* Return if test failed */
2961         if (ret) {
2962                 rte_free(t_params);
2963                 return ret;
2964         }
2965
2966         /* Print throughput if interrupts are disabled and test passed */
2967         if (!intr_enabled) {
2968                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
2969                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2970                         print_dec_throughput(t_params, num_lcores);
2971                 else
2972                         print_enc_throughput(t_params, num_lcores);
2973                 rte_free(t_params);
2974                 return ret;
2975         }
2976
2977         /* In interrupt TC we need to wait for the interrupt callback to deqeue
2978          * all pending operations. Skip waiting for queues which reported an
2979          * error using processing_status variable.
2980          * Wait for master lcore operations.
2981          */
2982         tp = &t_params[0];
2983         while ((rte_atomic16_read(&tp->nb_dequeued) <
2984                         op_params->num_to_process) &&
2985                         (rte_atomic16_read(&tp->processing_status) !=
2986                         TEST_FAILED))
2987                 rte_pause();
2988
2989         tp->ops_per_sec /= TEST_REPETITIONS;
2990         tp->mbps /= TEST_REPETITIONS;
2991         ret |= (int)rte_atomic16_read(&tp->processing_status);
2992
2993         /* Wait for slave lcores operations */
2994         for (used_cores = 1; used_cores < num_lcores; used_cores++) {
2995                 tp = &t_params[used_cores];
2996
2997                 while ((rte_atomic16_read(&tp->nb_dequeued) <
2998                                 op_params->num_to_process) &&
2999                                 (rte_atomic16_read(&tp->processing_status) !=
3000                                 TEST_FAILED))
3001                         rte_pause();
3002
3003                 tp->ops_per_sec /= TEST_REPETITIONS;
3004                 tp->mbps /= TEST_REPETITIONS;
3005                 ret |= (int)rte_atomic16_read(&tp->processing_status);
3006         }
3007
3008         /* Print throughput if test passed */
3009         if (!ret) {
3010                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3011                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3012                         print_dec_throughput(t_params, num_lcores);
3013                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3014                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3015                         print_enc_throughput(t_params, num_lcores);
3016         }
3017
3018         rte_free(t_params);
3019         return ret;
3020 }
3021
3022 static int
3023 latency_test_dec(struct rte_mempool *mempool,
3024                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3025                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3026                 const uint16_t num_to_process, uint16_t burst_sz,
3027                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3028 {
3029         int ret = TEST_SUCCESS;
3030         uint16_t i, j, dequeued;
3031         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3032         uint64_t start_time = 0, last_time = 0;
3033
3034         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3035                 uint16_t enq = 0, deq = 0;
3036                 bool first_time = true;
3037                 last_time = 0;
3038
3039                 if (unlikely(num_to_process - dequeued < burst_sz))
3040                         burst_sz = num_to_process - dequeued;
3041
3042                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3043                 TEST_ASSERT_SUCCESS(ret,
3044                                 "rte_bbdev_dec_op_alloc_bulk() failed");
3045                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3046                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3047                                         bufs->inputs,
3048                                         bufs->hard_outputs,
3049                                         bufs->soft_outputs,
3050                                         ref_op);
3051
3052                 /* Set counter to validate the ordering */
3053                 for (j = 0; j < burst_sz; ++j)
3054                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3055
3056                 start_time = rte_rdtsc_precise();
3057
3058                 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
3059                                 burst_sz);
3060                 TEST_ASSERT(enq == burst_sz,
3061                                 "Error enqueueing burst, expected %u, got %u",
3062                                 burst_sz, enq);
3063
3064                 /* Dequeue */
3065                 do {
3066                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3067                                         &ops_deq[deq], burst_sz - deq);
3068                         if (likely(first_time && (deq > 0))) {
3069                                 last_time = rte_rdtsc_precise() - start_time;
3070                                 first_time = false;
3071                         }
3072                 } while (unlikely(burst_sz != deq));
3073
3074                 *max_time = RTE_MAX(*max_time, last_time);
3075                 *min_time = RTE_MIN(*min_time, last_time);
3076                 *total_time += last_time;
3077
3078                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3079                         ret = validate_dec_op(ops_deq, burst_sz, ref_op,
3080                                         vector_mask);
3081                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3082                 }
3083
3084                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3085                 dequeued += deq;
3086         }
3087
3088         return i;
3089 }
3090
3091 static int
3092 latency_test_ldpc_dec(struct rte_mempool *mempool,
3093                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3094                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3095                 const uint16_t num_to_process, uint16_t burst_sz,
3096                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3097 {
3098         int ret = TEST_SUCCESS;
3099         uint16_t i, j, dequeued;
3100         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3101         uint64_t start_time = 0, last_time = 0;
3102         bool extDdr = ldpc_cap_flags &
3103                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3104
3105         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3106                 uint16_t enq = 0, deq = 0;
3107                 bool first_time = true;
3108                 last_time = 0;
3109
3110                 if (unlikely(num_to_process - dequeued < burst_sz))
3111                         burst_sz = num_to_process - dequeued;
3112
3113                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3114                 TEST_ASSERT_SUCCESS(ret,
3115                                 "rte_bbdev_dec_op_alloc_bulk() failed");
3116
3117                 /* For latency tests we need to disable early termination */
3118                 if (check_bit(ref_op->ldpc_dec.op_flags,
3119                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3120                         ref_op->ldpc_dec.op_flags -=
3121                                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3122                 ref_op->ldpc_dec.iter_max = 6;
3123                 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3124
3125                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3126                         copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3127                                         bufs->inputs,
3128                                         bufs->hard_outputs,
3129                                         bufs->soft_outputs,
3130                                         bufs->harq_inputs,
3131                                         bufs->harq_outputs,
3132                                         ref_op);
3133
3134                 if (extDdr)
3135                         preload_harq_ddr(dev_id, queue_id, ops_enq,
3136                                         burst_sz, true);
3137
3138                 /* Set counter to validate the ordering */
3139                 for (j = 0; j < burst_sz; ++j)
3140                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3141
3142                 start_time = rte_rdtsc_precise();
3143
3144                 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3145                                 &ops_enq[enq], burst_sz);
3146                 TEST_ASSERT(enq == burst_sz,
3147                                 "Error enqueueing burst, expected %u, got %u",
3148                                 burst_sz, enq);
3149
3150                 /* Dequeue */
3151                 do {
3152                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3153                                         &ops_deq[deq], burst_sz - deq);
3154                         if (likely(first_time && (deq > 0))) {
3155                                 last_time = rte_rdtsc_precise() - start_time;
3156                                 first_time = false;
3157                         }
3158                 } while (unlikely(burst_sz != deq));
3159
3160                 *max_time = RTE_MAX(*max_time, last_time);
3161                 *min_time = RTE_MIN(*min_time, last_time);
3162                 *total_time += last_time;
3163
3164                 if (extDdr)
3165                         retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3166
3167                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3168                         ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
3169                                         vector_mask);
3170                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3171                 }
3172
3173                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3174                 dequeued += deq;
3175         }
3176         return i;
3177 }
3178
3179 static int
3180 latency_test_enc(struct rte_mempool *mempool,
3181                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3182                 uint16_t dev_id, uint16_t queue_id,
3183                 const uint16_t num_to_process, uint16_t burst_sz,
3184                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3185 {
3186         int ret = TEST_SUCCESS;
3187         uint16_t i, j, dequeued;
3188         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3189         uint64_t start_time = 0, last_time = 0;
3190
3191         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3192                 uint16_t enq = 0, deq = 0;
3193                 bool first_time = true;
3194                 last_time = 0;
3195
3196                 if (unlikely(num_to_process - dequeued < burst_sz))
3197                         burst_sz = num_to_process - dequeued;
3198
3199                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3200                 TEST_ASSERT_SUCCESS(ret,
3201                                 "rte_bbdev_enc_op_alloc_bulk() failed");
3202                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3203                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3204                                         bufs->inputs,
3205                                         bufs->hard_outputs,
3206                                         ref_op);
3207
3208                 /* Set counter to validate the ordering */
3209                 for (j = 0; j < burst_sz; ++j)
3210                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3211
3212                 start_time = rte_rdtsc_precise();
3213
3214                 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
3215                                 burst_sz);
3216                 TEST_ASSERT(enq == burst_sz,
3217                                 "Error enqueueing burst, expected %u, got %u",
3218                                 burst_sz, enq);
3219
3220                 /* Dequeue */
3221                 do {
3222                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3223                                         &ops_deq[deq], burst_sz - deq);
3224                         if (likely(first_time && (deq > 0))) {
3225                                 last_time += rte_rdtsc_precise() - start_time;
3226                                 first_time = false;
3227                         }
3228                 } while (unlikely(burst_sz != deq));
3229
3230                 *max_time = RTE_MAX(*max_time, last_time);
3231                 *min_time = RTE_MIN(*min_time, last_time);
3232                 *total_time += last_time;
3233
3234                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3235                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3236                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3237                 }
3238
3239                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3240                 dequeued += deq;
3241         }
3242
3243         return i;
3244 }
3245
3246 static int
3247 latency_test_ldpc_enc(struct rte_mempool *mempool,
3248                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
3249                 uint16_t dev_id, uint16_t queue_id,
3250                 const uint16_t num_to_process, uint16_t burst_sz,
3251                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3252 {
3253         int ret = TEST_SUCCESS;
3254         uint16_t i, j, dequeued;
3255         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3256         uint64_t start_time = 0, last_time = 0;
3257
3258         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3259                 uint16_t enq = 0, deq = 0;
3260                 bool first_time = true;
3261                 last_time = 0;
3262
3263                 if (unlikely(num_to_process - dequeued < burst_sz))
3264                         burst_sz = num_to_process - dequeued;
3265
3266                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3267                 TEST_ASSERT_SUCCESS(ret,
3268                                 "rte_bbdev_enc_op_alloc_bulk() failed");
3269                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3270                         copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3271                                         bufs->inputs,
3272                                         bufs->hard_outputs,
3273                                         ref_op);
3274
3275                 /* Set counter to validate the ordering */
3276                 for (j = 0; j < burst_sz; ++j)
3277                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3278
3279                 start_time = rte_rdtsc_precise();
3280
3281                 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3282                                 &ops_enq[enq], burst_sz);
3283                 TEST_ASSERT(enq == burst_sz,
3284                                 "Error enqueueing burst, expected %u, got %u",
3285                                 burst_sz, enq);
3286
3287                 /* Dequeue */
3288                 do {
3289                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3290                                         &ops_deq[deq], burst_sz - deq);
3291                         if (likely(first_time && (deq > 0))) {
3292                                 last_time += rte_rdtsc_precise() - start_time;
3293                                 first_time = false;
3294                         }
3295                 } while (unlikely(burst_sz != deq));
3296
3297                 *max_time = RTE_MAX(*max_time, last_time);
3298                 *min_time = RTE_MIN(*min_time, last_time);
3299                 *total_time += last_time;
3300
3301                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3302                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
3303                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3304                 }
3305
3306                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3307                 dequeued += deq;
3308         }
3309
3310         return i;
3311 }
3312
3313 static int
3314 latency_test(struct active_device *ad,
3315                 struct test_op_params *op_params)
3316 {
3317         int iter;
3318         uint16_t burst_sz = op_params->burst_sz;
3319         const uint16_t num_to_process = op_params->num_to_process;
3320         const enum rte_bbdev_op_type op_type = test_vector.op_type;
3321         const uint16_t queue_id = ad->queue_ids[0];
3322         struct test_buffers *bufs = NULL;
3323         struct rte_bbdev_info info;
3324         uint64_t total_time, min_time, max_time;
3325         const char *op_type_str;
3326
3327         total_time = max_time = 0;
3328         min_time = UINT64_MAX;
3329
3330         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3331                         "BURST_SIZE should be <= %u", MAX_BURST);
3332
3333         rte_bbdev_info_get(ad->dev_id, &info);
3334         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3335
3336         op_type_str = rte_bbdev_op_type_str(op_type);
3337         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3338
3339         printf("+ ------------------------------------------------------- +\n");
3340         printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3341                         info.dev_name, burst_sz, num_to_process, op_type_str);
3342
3343         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3344                 iter = latency_test_dec(op_params->mp, bufs,
3345                                 op_params->ref_dec_op, op_params->vector_mask,
3346                                 ad->dev_id, queue_id, num_to_process,
3347                                 burst_sz, &total_time, &min_time, &max_time);
3348         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3349                 iter = latency_test_enc(op_params->mp, bufs,
3350                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3351                                 num_to_process, burst_sz, &total_time,
3352                                 &min_time, &max_time);
3353         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3354                 iter = latency_test_ldpc_enc(op_params->mp, bufs,
3355                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3356                                 num_to_process, burst_sz, &total_time,
3357                                 &min_time, &max_time);
3358         else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3359                 iter = latency_test_ldpc_dec(op_params->mp, bufs,
3360                                 op_params->ref_dec_op, op_params->vector_mask,
3361                                 ad->dev_id, queue_id, num_to_process,
3362                                 burst_sz, &total_time, &min_time, &max_time);
3363         else
3364                 iter = latency_test_enc(op_params->mp, bufs,
3365                                         op_params->ref_enc_op,
3366                                         ad->dev_id, queue_id,
3367                                         num_to_process, burst_sz, &total_time,
3368                                         &min_time, &max_time);
3369
3370         if (iter <= 0)
3371                 return TEST_FAILED;
3372
3373         printf("Operation latency:\n"
3374                         "\tavg: %lg cycles, %lg us\n"
3375                         "\tmin: %lg cycles, %lg us\n"
3376                         "\tmax: %lg cycles, %lg us\n",
3377                         (double)total_time / (double)iter,
3378                         (double)(total_time * 1000000) / (double)iter /
3379                         (double)rte_get_tsc_hz(), (double)min_time,
3380                         (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
3381                         (double)max_time, (double)(max_time * 1000000) /
3382                         (double)rte_get_tsc_hz());
3383
3384         return TEST_SUCCESS;
3385 }
3386
3387 #ifdef RTE_BBDEV_OFFLOAD_COST
3388 static int
3389 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
3390                 struct rte_bbdev_stats *stats)
3391 {
3392         struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
3393         struct rte_bbdev_stats *q_stats;
3394
3395         if (queue_id >= dev->data->num_queues)
3396                 return -1;
3397
3398         q_stats = &dev->data->queues[queue_id].queue_stats;
3399
3400         stats->enqueued_count = q_stats->enqueued_count;
3401         stats->dequeued_count = q_stats->dequeued_count;
3402         stats->enqueue_err_count = q_stats->enqueue_err_count;
3403         stats->dequeue_err_count = q_stats->dequeue_err_count;
3404         stats->acc_offload_cycles = q_stats->acc_offload_cycles;
3405
3406         return 0;
3407 }
3408
3409 static int
3410 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
3411                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3412                 uint16_t queue_id, const uint16_t num_to_process,
3413                 uint16_t burst_sz, struct test_time_stats *time_st)
3414 {
3415         int i, dequeued, ret;
3416         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3417         uint64_t enq_start_time, deq_start_time;
3418         uint64_t enq_sw_last_time, deq_last_time;
3419         struct rte_bbdev_stats stats;
3420
3421         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3422                 uint16_t enq = 0, deq = 0;
3423
3424                 if (unlikely(num_to_process - dequeued < burst_sz))
3425                         burst_sz = num_to_process - dequeued;
3426
3427                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3428                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3429                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
3430                                         bufs->inputs,
3431                                         bufs->hard_outputs,
3432                                         bufs->soft_outputs,
3433                                         ref_op);
3434
3435                 /* Start time meas for enqueue function offload latency */
3436                 enq_start_time = rte_rdtsc_precise();
3437                 do {
3438                         enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
3439                                         &ops_enq[enq], burst_sz - enq);
3440                 } while (unlikely(burst_sz != enq));
3441
3442                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3443                 TEST_ASSERT_SUCCESS(ret,
3444                                 "Failed to get stats for queue (%u) of device (%u)",
3445                                 queue_id, dev_id);
3446
3447                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
3448                                 stats.acc_offload_cycles;
3449                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3450                                 enq_sw_last_time);
3451                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3452                                 enq_sw_last_time);
3453                 time_st->enq_sw_total_time += enq_sw_last_time;
3454
3455                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3456                                 stats.acc_offload_cycles);
3457                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3458                                 stats.acc_offload_cycles);
3459                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3460
3461                 /* give time for device to process ops */
3462                 rte_delay_us(200);
3463
3464                 /* Start time meas for dequeue function offload latency */
3465                 deq_start_time = rte_rdtsc_precise();
3466                 /* Dequeue one operation */
3467                 do {
3468                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3469                                         &ops_deq[deq], 1);
3470                 } while (unlikely(deq != 1));
3471
3472                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3473                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3474                                 deq_last_time);
3475                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3476                                 deq_last_time);
3477                 time_st->deq_total_time += deq_last_time;
3478
3479                 /* Dequeue remaining operations if needed*/
3480                 while (burst_sz != deq)
3481                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
3482                                         &ops_deq[deq], burst_sz - deq);
3483
3484                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3485                 dequeued += deq;
3486         }
3487
3488         return i;
3489 }
3490
3491 static int
3492 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
3493                 struct test_buffers *bufs,
3494                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
3495                 uint16_t queue_id, const uint16_t num_to_process,
3496                 uint16_t burst_sz, struct test_time_stats *time_st)
3497 {
3498         int i, dequeued, ret;
3499         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3500         uint64_t enq_start_time, deq_start_time;
3501         uint64_t enq_sw_last_time, deq_last_time;
3502         struct rte_bbdev_stats stats;
3503         bool extDdr = ldpc_cap_flags &
3504                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
3505
3506         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3507                 uint16_t enq = 0, deq = 0;
3508
3509                 if (unlikely(num_to_process - dequeued < burst_sz))
3510                         burst_sz = num_to_process - dequeued;
3511
3512                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
3513                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3514                         copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
3515                                         bufs->inputs,
3516                                         bufs->hard_outputs,
3517                                         bufs->soft_outputs,
3518                                         bufs->harq_inputs,
3519                                         bufs->harq_outputs,
3520                                         ref_op);
3521
3522                 if (extDdr)
3523                         preload_harq_ddr(dev_id, queue_id, ops_enq,
3524                                         burst_sz, true);
3525
3526                 /* Start time meas for enqueue function offload latency */
3527                 enq_start_time = rte_rdtsc_precise();
3528                 do {
3529                         enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
3530                                         &ops_enq[enq], burst_sz - enq);
3531                 } while (unlikely(burst_sz != enq));
3532
3533                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3534                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3535                 TEST_ASSERT_SUCCESS(ret,
3536                                 "Failed to get stats for queue (%u) of device (%u)",
3537                                 queue_id, dev_id);
3538
3539                 enq_sw_last_time -= stats.acc_offload_cycles;
3540                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3541                                 enq_sw_last_time);
3542                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3543                                 enq_sw_last_time);
3544                 time_st->enq_sw_total_time += enq_sw_last_time;
3545
3546                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3547                                 stats.acc_offload_cycles);
3548                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3549                                 stats.acc_offload_cycles);
3550                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3551
3552                 /* give time for device to process ops */
3553                 rte_delay_us(200);
3554
3555                 /* Start time meas for dequeue function offload latency */
3556                 deq_start_time = rte_rdtsc_precise();
3557                 /* Dequeue one operation */
3558                 do {
3559                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3560                                         &ops_deq[deq], 1);
3561                 } while (unlikely(deq != 1));
3562
3563                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3564                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3565                                 deq_last_time);
3566                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3567                                 deq_last_time);
3568                 time_st->deq_total_time += deq_last_time;
3569
3570                 /* Dequeue remaining operations if needed*/
3571                 while (burst_sz != deq)
3572                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
3573                                         &ops_deq[deq], burst_sz - deq);
3574
3575                 if (extDdr) {
3576                         /* Read loopback is not thread safe */
3577                         retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
3578                 }
3579
3580                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
3581                 dequeued += deq;
3582         }
3583
3584         return i;
3585 }
3586
3587 static int
3588 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
3589                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3590                 uint16_t queue_id, const uint16_t num_to_process,
3591                 uint16_t burst_sz, struct test_time_stats *time_st)
3592 {
3593         int i, dequeued, ret;
3594         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3595         uint64_t enq_start_time, deq_start_time;
3596         uint64_t enq_sw_last_time, deq_last_time;
3597         struct rte_bbdev_stats stats;
3598
3599         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3600                 uint16_t enq = 0, deq = 0;
3601
3602                 if (unlikely(num_to_process - dequeued < burst_sz))
3603                         burst_sz = num_to_process - dequeued;
3604
3605                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3606                 TEST_ASSERT_SUCCESS(ret,
3607                                 "rte_bbdev_enc_op_alloc_bulk() failed");
3608                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3609                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
3610                                         bufs->inputs,
3611                                         bufs->hard_outputs,
3612                                         ref_op);
3613
3614                 /* Start time meas for enqueue function offload latency */
3615                 enq_start_time = rte_rdtsc_precise();
3616                 do {
3617                         enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
3618                                         &ops_enq[enq], burst_sz - enq);
3619                 } while (unlikely(burst_sz != enq));
3620
3621                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3622
3623                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3624                 TEST_ASSERT_SUCCESS(ret,
3625                                 "Failed to get stats for queue (%u) of device (%u)",
3626                                 queue_id, dev_id);
3627                 enq_sw_last_time -= stats.acc_offload_cycles;
3628                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3629                                 enq_sw_last_time);
3630                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3631                                 enq_sw_last_time);
3632                 time_st->enq_sw_total_time += enq_sw_last_time;
3633
3634                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3635                                 stats.acc_offload_cycles);
3636                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3637                                 stats.acc_offload_cycles);
3638                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3639
3640                 /* give time for device to process ops */
3641                 rte_delay_us(200);
3642
3643                 /* Start time meas for dequeue function offload latency */
3644                 deq_start_time = rte_rdtsc_precise();
3645                 /* Dequeue one operation */
3646                 do {
3647                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3648                                         &ops_deq[deq], 1);
3649                 } while (unlikely(deq != 1));
3650
3651                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3652                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3653                                 deq_last_time);
3654                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3655                                 deq_last_time);
3656                 time_st->deq_total_time += deq_last_time;
3657
3658                 while (burst_sz != deq)
3659                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
3660                                         &ops_deq[deq], burst_sz - deq);
3661
3662                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3663                 dequeued += deq;
3664         }
3665
3666         return i;
3667 }
3668
3669 static int
3670 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
3671                 struct test_buffers *bufs,
3672                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
3673                 uint16_t queue_id, const uint16_t num_to_process,
3674                 uint16_t burst_sz, struct test_time_stats *time_st)
3675 {
3676         int i, dequeued, ret;
3677         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3678         uint64_t enq_start_time, deq_start_time;
3679         uint64_t enq_sw_last_time, deq_last_time;
3680         struct rte_bbdev_stats stats;
3681
3682         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
3683                 uint16_t enq = 0, deq = 0;
3684
3685                 if (unlikely(num_to_process - dequeued < burst_sz))
3686                         burst_sz = num_to_process - dequeued;
3687
3688                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
3689                 TEST_ASSERT_SUCCESS(ret,
3690                                 "rte_bbdev_enc_op_alloc_bulk() failed");
3691                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3692                         copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
3693                                         bufs->inputs,
3694                                         bufs->hard_outputs,
3695                                         ref_op);
3696
3697                 /* Start time meas for enqueue function offload latency */
3698                 enq_start_time = rte_rdtsc_precise();
3699                 do {
3700                         enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
3701                                         &ops_enq[enq], burst_sz - enq);
3702                 } while (unlikely(burst_sz != enq));
3703
3704                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
3705                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
3706                 TEST_ASSERT_SUCCESS(ret,
3707                                 "Failed to get stats for queue (%u) of device (%u)",
3708                                 queue_id, dev_id);
3709
3710                 enq_sw_last_time -= stats.acc_offload_cycles;
3711                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
3712                                 enq_sw_last_time);
3713                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
3714                                 enq_sw_last_time);
3715                 time_st->enq_sw_total_time += enq_sw_last_time;
3716
3717                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
3718                                 stats.acc_offload_cycles);
3719                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
3720                                 stats.acc_offload_cycles);
3721                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
3722
3723                 /* give time for device to process ops */
3724                 rte_delay_us(200);
3725
3726                 /* Start time meas for dequeue function offload latency */
3727                 deq_start_time = rte_rdtsc_precise();
3728                 /* Dequeue one operation */
3729                 do {
3730                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3731                                         &ops_deq[deq], 1);
3732                 } while (unlikely(deq != 1));
3733
3734                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3735                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
3736                                 deq_last_time);
3737                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
3738                                 deq_last_time);
3739                 time_st->deq_total_time += deq_last_time;
3740
3741                 while (burst_sz != deq)
3742                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
3743                                         &ops_deq[deq], burst_sz - deq);
3744
3745                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
3746                 dequeued += deq;
3747         }
3748
3749         return i;
3750 }
3751 #endif
3752
3753 static int
3754 offload_cost_test(struct active_device *ad,
3755                 struct test_op_params *op_params)
3756 {
3757 #ifndef RTE_BBDEV_OFFLOAD_COST
3758         RTE_SET_USED(ad);
3759         RTE_SET_USED(op_params);
3760         printf("Offload latency test is disabled.\n");
3761         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3762         return TEST_SKIPPED;
3763 #else
3764         int iter;
3765         uint16_t burst_sz = op_params->burst_sz;
3766         const uint16_t num_to_process = op_params->num_to_process;
3767         const enum rte_bbdev_op_type op_type = test_vector.op_type;
3768         const uint16_t queue_id = ad->queue_ids[0];
3769         struct test_buffers *bufs = NULL;
3770         struct rte_bbdev_info info;
3771         const char *op_type_str;
3772         struct test_time_stats time_st;
3773
3774         memset(&time_st, 0, sizeof(struct test_time_stats));
3775         time_st.enq_sw_min_time = UINT64_MAX;
3776         time_st.enq_acc_min_time = UINT64_MAX;
3777         time_st.deq_min_time = UINT64_MAX;
3778
3779         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3780                         "BURST_SIZE should be <= %u", MAX_BURST);
3781
3782         rte_bbdev_info_get(ad->dev_id, &info);
3783         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3784
3785         op_type_str = rte_bbdev_op_type_str(op_type);
3786         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3787
3788         printf("+ ------------------------------------------------------- +\n");
3789         printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3790                         info.dev_name, burst_sz, num_to_process, op_type_str);
3791
3792         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3793                 iter = offload_latency_test_dec(op_params->mp, bufs,
3794                                 op_params->ref_dec_op, ad->dev_id, queue_id,
3795                                 num_to_process, burst_sz, &time_st);
3796         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
3797                 iter = offload_latency_test_enc(op_params->mp, bufs,
3798                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3799                                 num_to_process, burst_sz, &time_st);
3800         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
3801                 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
3802                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3803                                 num_to_process, burst_sz, &time_st);
3804         else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
3805                 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
3806                         op_params->ref_dec_op, ad->dev_id, queue_id,
3807                         num_to_process, burst_sz, &time_st);
3808         else
3809                 iter = offload_latency_test_enc(op_params->mp, bufs,
3810                                 op_params->ref_enc_op, ad->dev_id, queue_id,
3811                                 num_to_process, burst_sz, &time_st);
3812
3813         if (iter <= 0)
3814                 return TEST_FAILED;
3815
3816         printf("Enqueue driver offload cost latency:\n"
3817                         "\tavg: %lg cycles, %lg us\n"
3818                         "\tmin: %lg cycles, %lg us\n"
3819                         "\tmax: %lg cycles, %lg us\n"
3820                         "Enqueue accelerator offload cost latency:\n"
3821                         "\tavg: %lg cycles, %lg us\n"
3822                         "\tmin: %lg cycles, %lg us\n"
3823                         "\tmax: %lg cycles, %lg us\n",
3824                         (double)time_st.enq_sw_total_time / (double)iter,
3825                         (double)(time_st.enq_sw_total_time * 1000000) /
3826                         (double)iter / (double)rte_get_tsc_hz(),
3827                         (double)time_st.enq_sw_min_time,
3828                         (double)(time_st.enq_sw_min_time * 1000000) /
3829                         rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
3830                         (double)(time_st.enq_sw_max_time * 1000000) /
3831                         rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
3832                         (double)iter,
3833                         (double)(time_st.enq_acc_total_time * 1000000) /
3834                         (double)iter / (double)rte_get_tsc_hz(),
3835                         (double)time_st.enq_acc_min_time,
3836                         (double)(time_st.enq_acc_min_time * 1000000) /
3837                         rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
3838                         (double)(time_st.enq_acc_max_time * 1000000) /
3839                         rte_get_tsc_hz());
3840
3841         printf("Dequeue offload cost latency - one op:\n"
3842                         "\tavg: %lg cycles, %lg us\n"
3843                         "\tmin: %lg cycles, %lg us\n"
3844                         "\tmax: %lg cycles, %lg us\n",
3845                         (double)time_st.deq_total_time / (double)iter,
3846                         (double)(time_st.deq_total_time * 1000000) /
3847                         (double)iter / (double)rte_get_tsc_hz(),
3848                         (double)time_st.deq_min_time,
3849                         (double)(time_st.deq_min_time * 1000000) /
3850                         rte_get_tsc_hz(), (double)time_st.deq_max_time,
3851                         (double)(time_st.deq_max_time * 1000000) /
3852                         rte_get_tsc_hz());
3853
3854         return TEST_SUCCESS;
3855 #endif
3856 }
3857
3858 #ifdef RTE_BBDEV_OFFLOAD_COST
3859 static int
3860 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
3861                 const uint16_t num_to_process, uint16_t burst_sz,
3862                 uint64_t *deq_total_time, uint64_t *deq_min_time,
3863                 uint64_t *deq_max_time)
3864 {
3865         int i, deq_total;
3866         struct rte_bbdev_dec_op *ops[MAX_BURST];
3867         uint64_t deq_start_time, deq_last_time;
3868
3869         /* Test deq offload latency from an empty queue */
3870
3871         for (i = 0, deq_total = 0; deq_total < num_to_process;
3872                         ++i, deq_total += burst_sz) {
3873                 deq_start_time = rte_rdtsc_precise();
3874
3875                 if (unlikely(num_to_process - deq_total < burst_sz))
3876                         burst_sz = num_to_process - deq_total;
3877                 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
3878
3879                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3880                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3881                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3882                 *deq_total_time += deq_last_time;
3883         }
3884
3885         return i;
3886 }
3887
3888 static int
3889 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
3890                 const uint16_t num_to_process, uint16_t burst_sz,
3891                 uint64_t *deq_total_time, uint64_t *deq_min_time,
3892                 uint64_t *deq_max_time)
3893 {
3894         int i, deq_total;
3895         struct rte_bbdev_enc_op *ops[MAX_BURST];
3896         uint64_t deq_start_time, deq_last_time;
3897
3898         /* Test deq offload latency from an empty queue */
3899         for (i = 0, deq_total = 0; deq_total < num_to_process;
3900                         ++i, deq_total += burst_sz) {
3901                 deq_start_time = rte_rdtsc_precise();
3902
3903                 if (unlikely(num_to_process - deq_total < burst_sz))
3904                         burst_sz = num_to_process - deq_total;
3905                 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
3906
3907                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
3908                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
3909                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
3910                 *deq_total_time += deq_last_time;
3911         }
3912
3913         return i;
3914 }
3915 #endif
3916
3917 static int
3918 offload_latency_empty_q_test(struct active_device *ad,
3919                 struct test_op_params *op_params)
3920 {
3921 #ifndef RTE_BBDEV_OFFLOAD_COST
3922         RTE_SET_USED(ad);
3923         RTE_SET_USED(op_params);
3924         printf("Offload latency empty dequeue test is disabled.\n");
3925         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
3926         return TEST_SKIPPED;
3927 #else
3928         int iter;
3929         uint64_t deq_total_time, deq_min_time, deq_max_time;
3930         uint16_t burst_sz = op_params->burst_sz;
3931         const uint16_t num_to_process = op_params->num_to_process;
3932         const enum rte_bbdev_op_type op_type = test_vector.op_type;
3933         const uint16_t queue_id = ad->queue_ids[0];
3934         struct rte_bbdev_info info;
3935         const char *op_type_str;
3936
3937         deq_total_time = deq_max_time = 0;
3938         deq_min_time = UINT64_MAX;
3939
3940         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3941                         "BURST_SIZE should be <= %u", MAX_BURST);
3942
3943         rte_bbdev_info_get(ad->dev_id, &info);
3944
3945         op_type_str = rte_bbdev_op_type_str(op_type);
3946         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
3947
3948         printf("+ ------------------------------------------------------- +\n");
3949         printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
3950                         info.dev_name, burst_sz, num_to_process, op_type_str);
3951
3952         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
3953                 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
3954                                 num_to_process, burst_sz, &deq_total_time,
3955                                 &deq_min_time, &deq_max_time);
3956         else
3957                 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
3958                                 num_to_process, burst_sz, &deq_total_time,
3959                                 &deq_min_time, &deq_max_time);
3960
3961         if (iter <= 0)
3962                 return TEST_FAILED;
3963
3964         printf("Empty dequeue offload:\n"
3965                         "\tavg: %lg cycles, %lg us\n"
3966                         "\tmin: %lg cycles, %lg us\n"
3967                         "\tmax: %lg cycles, %lg us\n",
3968                         (double)deq_total_time / (double)iter,
3969                         (double)(deq_total_time * 1000000) / (double)iter /
3970                         (double)rte_get_tsc_hz(), (double)deq_min_time,
3971                         (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
3972                         (double)deq_max_time, (double)(deq_max_time * 1000000) /
3973                         rte_get_tsc_hz());
3974
3975         return TEST_SUCCESS;
3976 #endif
3977 }
3978
3979 static int
3980 throughput_tc(void)
3981 {
3982         return run_test_case(throughput_test);
3983 }
3984
3985 static int
3986 offload_cost_tc(void)
3987 {
3988         return run_test_case(offload_cost_test);
3989 }
3990
3991 static int
3992 offload_latency_empty_q_tc(void)
3993 {
3994         return run_test_case(offload_latency_empty_q_test);
3995 }
3996
3997 static int
3998 latency_tc(void)
3999 {
4000         return run_test_case(latency_test);
4001 }
4002
4003 static int
4004 interrupt_tc(void)
4005 {
4006         return run_test_case(throughput_test);
4007 }
4008
4009 static struct unit_test_suite bbdev_throughput_testsuite = {
4010         .suite_name = "BBdev Throughput Tests",
4011         .setup = testsuite_setup,
4012         .teardown = testsuite_teardown,
4013         .unit_test_cases = {
4014                 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
4015                 TEST_CASES_END() /**< NULL terminate unit test array */
4016         }
4017 };
4018
4019 static struct unit_test_suite bbdev_validation_testsuite = {
4020         .suite_name = "BBdev Validation Tests",
4021         .setup = testsuite_setup,
4022         .teardown = testsuite_teardown,
4023         .unit_test_cases = {
4024                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4025                 TEST_CASES_END() /**< NULL terminate unit test array */
4026         }
4027 };
4028
4029 static struct unit_test_suite bbdev_latency_testsuite = {
4030         .suite_name = "BBdev Latency Tests",
4031         .setup = testsuite_setup,
4032         .teardown = testsuite_teardown,
4033         .unit_test_cases = {
4034                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
4035                 TEST_CASES_END() /**< NULL terminate unit test array */
4036         }
4037 };
4038
4039 static struct unit_test_suite bbdev_offload_cost_testsuite = {
4040         .suite_name = "BBdev Offload Cost Tests",
4041         .setup = testsuite_setup,
4042         .teardown = testsuite_teardown,
4043         .unit_test_cases = {
4044                 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
4045                 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
4046                 TEST_CASES_END() /**< NULL terminate unit test array */
4047         }
4048 };
4049
4050 static struct unit_test_suite bbdev_interrupt_testsuite = {
4051         .suite_name = "BBdev Interrupt Tests",
4052         .setup = interrupt_testsuite_setup,
4053         .teardown = testsuite_teardown,
4054         .unit_test_cases = {
4055                 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
4056                 TEST_CASES_END() /**< NULL terminate unit test array */
4057         }
4058 };
4059
4060 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
4061 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
4062 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
4063 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
4064 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);