bpf: allow self-xor operation
[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19 #include <rte_interrupts.h>
20
21 #include "main.h"
22 #include "test_bbdev_vector.h"
23
24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
25
26 #define MAX_QUEUES RTE_MAX_LCORE
27 #define TEST_REPETITIONS 100
28 #define WAIT_OFFLOAD_US 1000
29
30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
31 #include <fpga_lte_fec.h>
32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
34 #define VF_UL_4G_QUEUE_VALUE 4
35 #define VF_DL_4G_QUEUE_VALUE 4
36 #define UL_4G_BANDWIDTH 3
37 #define DL_4G_BANDWIDTH 3
38 #define UL_4G_LOAD_BALANCE 128
39 #define DL_4G_LOAD_BALANCE 128
40 #define FLR_4G_TIMEOUT 610
41 #endif
42
43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
44 #include <rte_pmd_fpga_5gnr_fec.h>
45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
47 #define VF_UL_5G_QUEUE_VALUE 4
48 #define VF_DL_5G_QUEUE_VALUE 4
49 #define UL_5G_BANDWIDTH 3
50 #define DL_5G_BANDWIDTH 3
51 #define UL_5G_LOAD_BALANCE 128
52 #define DL_5G_LOAD_BALANCE 128
53 #define FLR_5G_TIMEOUT 610
54 #endif
55
56 #ifdef RTE_BASEBAND_ACC100
57 #include <rte_acc100_cfg.h>
58 #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
59 #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
60 #define ACC100_QMGR_NUM_AQS 16
61 #define ACC100_QMGR_NUM_QGS 2
62 #define ACC100_QMGR_AQ_DEPTH 5
63 #define ACC100_QMGR_INVALID_IDX -1
64 #define ACC100_QMGR_RR 1
65 #define ACC100_QOS_GBR 0
66 #endif
67
68 #define OPS_CACHE_SIZE 256U
69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
70
71 #define SYNC_WAIT 0
72 #define SYNC_START 1
73 #define INVALID_OPAQUE -1
74
75 #define INVALID_QUEUE_ID -1
76 /* Increment for next code block in external HARQ memory */
77 #define HARQ_INCR 32768
78 /* Headroom for filler LLRs insertion in HARQ buffer */
79 #define FILLER_HEADROOM 1024
80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */
82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */
83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
89
90 static struct test_bbdev_vector test_vector;
91
92 /* Switch between PMD and Interrupt for throughput TC */
93 static bool intr_enabled;
94
95 /* LLR arithmetic representation for numerical conversion */
96 static int ldpc_llr_decimals;
97 static int ldpc_llr_size;
98 /* Keep track of the LDPC decoder device capability flag */
99 static uint32_t ldpc_cap_flags;
100
101 /* Represents tested active devices */
102 static struct active_device {
103         const char *driver_name;
104         uint8_t dev_id;
105         uint16_t supported_ops;
106         uint16_t queue_ids[MAX_QUEUES];
107         uint16_t nb_queues;
108         struct rte_mempool *ops_mempool;
109         struct rte_mempool *in_mbuf_pool;
110         struct rte_mempool *hard_out_mbuf_pool;
111         struct rte_mempool *soft_out_mbuf_pool;
112         struct rte_mempool *harq_in_mbuf_pool;
113         struct rte_mempool *harq_out_mbuf_pool;
114 } active_devs[RTE_BBDEV_MAX_DEVS];
115
116 static uint8_t nb_active_devs;
117
118 /* Data buffers used by BBDEV ops */
119 struct test_buffers {
120         struct rte_bbdev_op_data *inputs;
121         struct rte_bbdev_op_data *hard_outputs;
122         struct rte_bbdev_op_data *soft_outputs;
123         struct rte_bbdev_op_data *harq_inputs;
124         struct rte_bbdev_op_data *harq_outputs;
125 };
126
127 /* Operation parameters specific for given test case */
128 struct test_op_params {
129         struct rte_mempool *mp;
130         struct rte_bbdev_dec_op *ref_dec_op;
131         struct rte_bbdev_enc_op *ref_enc_op;
132         uint16_t burst_sz;
133         uint16_t num_to_process;
134         uint16_t num_lcores;
135         int vector_mask;
136         rte_atomic16_t sync;
137         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
138 };
139
140 /* Contains per lcore params */
141 struct thread_params {
142         uint8_t dev_id;
143         uint16_t queue_id;
144         uint32_t lcore_id;
145         uint64_t start_time;
146         double ops_per_sec;
147         double mbps;
148         uint8_t iter_count;
149         double iter_average;
150         double bler;
151         rte_atomic16_t nb_dequeued;
152         rte_atomic16_t processing_status;
153         rte_atomic16_t burst_sz;
154         struct test_op_params *op_params;
155         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
156         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
157 };
158
159 #ifdef RTE_BBDEV_OFFLOAD_COST
160 /* Stores time statistics */
161 struct test_time_stats {
162         /* Stores software enqueue total working time */
163         uint64_t enq_sw_total_time;
164         /* Stores minimum value of software enqueue working time */
165         uint64_t enq_sw_min_time;
166         /* Stores maximum value of software enqueue working time */
167         uint64_t enq_sw_max_time;
168         /* Stores turbo enqueue total working time */
169         uint64_t enq_acc_total_time;
170         /* Stores minimum value of accelerator enqueue working time */
171         uint64_t enq_acc_min_time;
172         /* Stores maximum value of accelerator enqueue working time */
173         uint64_t enq_acc_max_time;
174         /* Stores dequeue total working time */
175         uint64_t deq_total_time;
176         /* Stores minimum value of dequeue working time */
177         uint64_t deq_min_time;
178         /* Stores maximum value of dequeue working time */
179         uint64_t deq_max_time;
180 };
181 #endif
182
183 typedef int (test_case_function)(struct active_device *ad,
184                 struct test_op_params *op_params);
185
186 static inline void
187 mbuf_reset(struct rte_mbuf *m)
188 {
189         m->pkt_len = 0;
190
191         do {
192                 m->data_len = 0;
193                 m = m->next;
194         } while (m != NULL);
195 }
196
197 /* Read flag value 0/1 from bitmap */
198 static inline bool
199 check_bit(uint32_t bitmap, uint32_t bitmask)
200 {
201         return bitmap & bitmask;
202 }
203
204 static inline void
205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
206 {
207         ad->supported_ops |= (1 << op_type);
208 }
209
210 static inline bool
211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
212 {
213         return ad->supported_ops & (1 << op_type);
214 }
215
216 static inline bool
217 flags_match(uint32_t flags_req, uint32_t flags_present)
218 {
219         return (flags_req & flags_present) == flags_req;
220 }
221
222 static void
223 clear_soft_out_cap(uint32_t *op_flags)
224 {
225         *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
226         *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
227         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
228 }
229
230 /* This API is to convert all the test vector op data entries
231  * to big endian format. It is used when the device supports
232  * the input in the big endian format.
233  */
234 static inline void
235 convert_op_data_to_be(void)
236 {
237         struct op_data_entries *op;
238         enum op_data_type type;
239         uint8_t nb_segs, *rem_data, temp;
240         uint32_t *data, len;
241         int complete, rem, i, j;
242
243         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
244                 nb_segs = test_vector.entries[type].nb_segments;
245                 op = &test_vector.entries[type];
246
247                 /* Invert byte endianness for all the segments */
248                 for (i = 0; i < nb_segs; ++i) {
249                         len = op->segments[i].length;
250                         data = op->segments[i].addr;
251
252                         /* Swap complete u32 bytes */
253                         complete = len / 4;
254                         for (j = 0; j < complete; j++)
255                                 data[j] = rte_bswap32(data[j]);
256
257                         /* Swap any remaining bytes */
258                         rem = len % 4;
259                         rem_data = (uint8_t *)&data[j];
260                         for (j = 0; j < rem/2; j++) {
261                                 temp = rem_data[j];
262                                 rem_data[j] = rem_data[rem - j - 1];
263                                 rem_data[rem - j - 1] = temp;
264                         }
265                 }
266         }
267 }
268
269 static int
270 check_dev_cap(const struct rte_bbdev_info *dev_info)
271 {
272         unsigned int i;
273         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
274                 nb_harq_inputs, nb_harq_outputs;
275         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
276         uint8_t dev_data_endianness = dev_info->drv.data_endianness;
277
278         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
279         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
280         nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
281         nb_harq_inputs  = test_vector.entries[DATA_HARQ_INPUT].nb_segments;
282         nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments;
283
284         for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
285                 if (op_cap->type != test_vector.op_type)
286                         continue;
287
288                 if (dev_data_endianness == RTE_BIG_ENDIAN)
289                         convert_op_data_to_be();
290
291                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
292                         const struct rte_bbdev_op_cap_turbo_dec *cap =
293                                         &op_cap->cap.turbo_dec;
294                         /* Ignore lack of soft output capability, just skip
295                          * checking if soft output is valid.
296                          */
297                         if ((test_vector.turbo_dec.op_flags &
298                                         RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
299                                         !(cap->capability_flags &
300                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
301                                 printf(
302                                         "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
303                                         dev_info->dev_name);
304                                 clear_soft_out_cap(
305                                         &test_vector.turbo_dec.op_flags);
306                         }
307
308                         if (!flags_match(test_vector.turbo_dec.op_flags,
309                                         cap->capability_flags))
310                                 return TEST_FAILED;
311                         if (nb_inputs > cap->num_buffers_src) {
312                                 printf("Too many inputs defined: %u, max: %u\n",
313                                         nb_inputs, cap->num_buffers_src);
314                                 return TEST_FAILED;
315                         }
316                         if (nb_soft_outputs > cap->num_buffers_soft_out &&
317                                         (test_vector.turbo_dec.op_flags &
318                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
319                                 printf(
320                                         "Too many soft outputs defined: %u, max: %u\n",
321                                                 nb_soft_outputs,
322                                                 cap->num_buffers_soft_out);
323                                 return TEST_FAILED;
324                         }
325                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
326                                 printf(
327                                         "Too many hard outputs defined: %u, max: %u\n",
328                                                 nb_hard_outputs,
329                                                 cap->num_buffers_hard_out);
330                                 return TEST_FAILED;
331                         }
332                         if (intr_enabled && !(cap->capability_flags &
333                                         RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
334                                 printf(
335                                         "Dequeue interrupts are not supported!\n");
336                                 return TEST_FAILED;
337                         }
338
339                         return TEST_SUCCESS;
340                 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
341                         const struct rte_bbdev_op_cap_turbo_enc *cap =
342                                         &op_cap->cap.turbo_enc;
343
344                         if (!flags_match(test_vector.turbo_enc.op_flags,
345                                         cap->capability_flags))
346                                 return TEST_FAILED;
347                         if (nb_inputs > cap->num_buffers_src) {
348                                 printf("Too many inputs defined: %u, max: %u\n",
349                                         nb_inputs, cap->num_buffers_src);
350                                 return TEST_FAILED;
351                         }
352                         if (nb_hard_outputs > cap->num_buffers_dst) {
353                                 printf(
354                                         "Too many hard outputs defined: %u, max: %u\n",
355                                         nb_hard_outputs, cap->num_buffers_dst);
356                                 return TEST_FAILED;
357                         }
358                         if (intr_enabled && !(cap->capability_flags &
359                                         RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
360                                 printf(
361                                         "Dequeue interrupts are not supported!\n");
362                                 return TEST_FAILED;
363                         }
364
365                         return TEST_SUCCESS;
366                 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) {
367                         const struct rte_bbdev_op_cap_ldpc_enc *cap =
368                                         &op_cap->cap.ldpc_enc;
369
370                         if (!flags_match(test_vector.ldpc_enc.op_flags,
371                                         cap->capability_flags)){
372                                 printf("Flag Mismatch\n");
373                                 return TEST_FAILED;
374                         }
375                         if (nb_inputs > cap->num_buffers_src) {
376                                 printf("Too many inputs defined: %u, max: %u\n",
377                                         nb_inputs, cap->num_buffers_src);
378                                 return TEST_FAILED;
379                         }
380                         if (nb_hard_outputs > cap->num_buffers_dst) {
381                                 printf(
382                                         "Too many hard outputs defined: %u, max: %u\n",
383                                         nb_hard_outputs, cap->num_buffers_dst);
384                                 return TEST_FAILED;
385                         }
386                         if (intr_enabled && !(cap->capability_flags &
387                                         RTE_BBDEV_LDPC_ENC_INTERRUPTS)) {
388                                 printf(
389                                         "Dequeue interrupts are not supported!\n");
390                                 return TEST_FAILED;
391                         }
392
393                         return TEST_SUCCESS;
394                 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) {
395                         const struct rte_bbdev_op_cap_ldpc_dec *cap =
396                                         &op_cap->cap.ldpc_dec;
397
398                         if (!flags_match(test_vector.ldpc_dec.op_flags,
399                                         cap->capability_flags)){
400                                 printf("Flag Mismatch\n");
401                                 return TEST_FAILED;
402                         }
403                         if (nb_inputs > cap->num_buffers_src) {
404                                 printf("Too many inputs defined: %u, max: %u\n",
405                                         nb_inputs, cap->num_buffers_src);
406                                 return TEST_FAILED;
407                         }
408                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
409                                 printf(
410                                         "Too many hard outputs defined: %u, max: %u\n",
411                                         nb_hard_outputs,
412                                         cap->num_buffers_hard_out);
413                                 return TEST_FAILED;
414                         }
415                         if (nb_harq_inputs > cap->num_buffers_hard_out) {
416                                 printf(
417                                         "Too many HARQ inputs defined: %u, max: %u\n",
418                                         nb_harq_inputs,
419                                         cap->num_buffers_hard_out);
420                                 return TEST_FAILED;
421                         }
422                         if (nb_harq_outputs > cap->num_buffers_hard_out) {
423                                 printf(
424                                         "Too many HARQ outputs defined: %u, max: %u\n",
425                                         nb_harq_outputs,
426                                         cap->num_buffers_hard_out);
427                                 return TEST_FAILED;
428                         }
429                         if (intr_enabled && !(cap->capability_flags &
430                                         RTE_BBDEV_LDPC_DEC_INTERRUPTS)) {
431                                 printf(
432                                         "Dequeue interrupts are not supported!\n");
433                                 return TEST_FAILED;
434                         }
435                         if (intr_enabled && (test_vector.ldpc_dec.op_flags &
436                                 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
437                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
438                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
439                                         ))) {
440                                 printf("Skip loop-back with interrupt\n");
441                                 return TEST_FAILED;
442                         }
443                         return TEST_SUCCESS;
444                 }
445         }
446
447         if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
448                 return TEST_SUCCESS; /* Special case for NULL device */
449
450         return TEST_FAILED;
451 }
452
453 /* calculates optimal mempool size not smaller than the val */
454 static unsigned int
455 optimal_mempool_size(unsigned int val)
456 {
457         return rte_align32pow2(val + 1) - 1;
458 }
459
460 /* allocates mbuf mempool for inputs and outputs */
461 static struct rte_mempool *
462 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
463                 int socket_id, unsigned int mbuf_pool_size,
464                 const char *op_type_str)
465 {
466         unsigned int i;
467         uint32_t max_seg_sz = 0;
468         char pool_name[RTE_MEMPOOL_NAMESIZE];
469
470         /* find max input segment size */
471         for (i = 0; i < entries->nb_segments; ++i)
472                 if (entries->segments[i].length > max_seg_sz)
473                         max_seg_sz = entries->segments[i].length;
474
475         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
476                         dev_id);
477         return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
478                         RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM
479                                         + FILLER_HEADROOM,
480                         (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
481 }
482
483 static int
484 create_mempools(struct active_device *ad, int socket_id,
485                 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
486 {
487         struct rte_mempool *mp;
488         unsigned int ops_pool_size, mbuf_pool_size = 0;
489         char pool_name[RTE_MEMPOOL_NAMESIZE];
490         const char *op_type_str;
491         enum rte_bbdev_op_type op_type = org_op_type;
492
493         struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
494         struct op_data_entries *hard_out =
495                         &test_vector.entries[DATA_HARD_OUTPUT];
496         struct op_data_entries *soft_out =
497                         &test_vector.entries[DATA_SOFT_OUTPUT];
498         struct op_data_entries *harq_in =
499                         &test_vector.entries[DATA_HARQ_INPUT];
500         struct op_data_entries *harq_out =
501                         &test_vector.entries[DATA_HARQ_OUTPUT];
502
503         /* allocate ops mempool */
504         ops_pool_size = optimal_mempool_size(RTE_MAX(
505                         /* Ops used plus 1 reference op */
506                         RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
507                         /* Minimal cache size plus 1 reference op */
508                         (unsigned int)(1.5 * rte_lcore_count() *
509                                         OPS_CACHE_SIZE + 1)),
510                         OPS_POOL_SIZE_MIN));
511
512         if (org_op_type == RTE_BBDEV_OP_NONE)
513                 op_type = RTE_BBDEV_OP_TURBO_ENC;
514
515         op_type_str = rte_bbdev_op_type_str(op_type);
516         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
517
518         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
519                         ad->dev_id);
520         mp = rte_bbdev_op_pool_create(pool_name, op_type,
521                         ops_pool_size, OPS_CACHE_SIZE, socket_id);
522         TEST_ASSERT_NOT_NULL(mp,
523                         "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
524                         ops_pool_size,
525                         ad->dev_id,
526                         socket_id);
527         ad->ops_mempool = mp;
528
529         /* Do not create inputs and outputs mbufs for BaseBand Null Device */
530         if (org_op_type == RTE_BBDEV_OP_NONE)
531                 return TEST_SUCCESS;
532
533         /* Inputs */
534         if (in->nb_segments > 0) {
535                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
536                                 in->nb_segments);
537                 mp = create_mbuf_pool(in, ad->dev_id, socket_id,
538                                 mbuf_pool_size, "in");
539                 TEST_ASSERT_NOT_NULL(mp,
540                                 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
541                                 mbuf_pool_size,
542                                 ad->dev_id,
543                                 socket_id);
544                 ad->in_mbuf_pool = mp;
545         }
546
547         /* Hard outputs */
548         if (hard_out->nb_segments > 0) {
549                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
550                                 hard_out->nb_segments);
551                 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id,
552                                 mbuf_pool_size,
553                                 "hard_out");
554                 TEST_ASSERT_NOT_NULL(mp,
555                                 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
556                                 mbuf_pool_size,
557                                 ad->dev_id,
558                                 socket_id);
559                 ad->hard_out_mbuf_pool = mp;
560         }
561
562         /* Soft outputs */
563         if (soft_out->nb_segments > 0) {
564                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
565                                 soft_out->nb_segments);
566                 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id,
567                                 mbuf_pool_size,
568                                 "soft_out");
569                 TEST_ASSERT_NOT_NULL(mp,
570                                 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
571                                 mbuf_pool_size,
572                                 ad->dev_id,
573                                 socket_id);
574                 ad->soft_out_mbuf_pool = mp;
575         }
576
577         /* HARQ inputs */
578         if (harq_in->nb_segments > 0) {
579                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
580                                 harq_in->nb_segments);
581                 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id,
582                                 mbuf_pool_size,
583                                 "harq_in");
584                 TEST_ASSERT_NOT_NULL(mp,
585                                 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.",
586                                 mbuf_pool_size,
587                                 ad->dev_id,
588                                 socket_id);
589                 ad->harq_in_mbuf_pool = mp;
590         }
591
592         /* HARQ outputs */
593         if (harq_out->nb_segments > 0) {
594                 mbuf_pool_size = optimal_mempool_size(ops_pool_size *
595                                 harq_out->nb_segments);
596                 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id,
597                                 mbuf_pool_size,
598                                 "harq_out");
599                 TEST_ASSERT_NOT_NULL(mp,
600                                 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.",
601                                 mbuf_pool_size,
602                                 ad->dev_id,
603                                 socket_id);
604                 ad->harq_out_mbuf_pool = mp;
605         }
606
607         return TEST_SUCCESS;
608 }
609
610 static int
611 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
612                 struct test_bbdev_vector *vector)
613 {
614         int ret;
615         unsigned int queue_id;
616         struct rte_bbdev_queue_conf qconf;
617         struct active_device *ad = &active_devs[nb_active_devs];
618         unsigned int nb_queues;
619         enum rte_bbdev_op_type op_type = vector->op_type;
620
621 /* Configure fpga lte fec with PF & VF values
622  * if '-i' flag is set and using fpga device
623  */
624 #ifdef RTE_BASEBAND_FPGA_LTE_FEC
625         if ((get_init_device() == true) &&
626                 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
627                 struct rte_fpga_lte_fec_conf conf;
628                 unsigned int i;
629
630                 printf("Configure FPGA LTE FEC Driver %s with default values\n",
631                                 info->drv.driver_name);
632
633                 /* clear default configuration before initialization */
634                 memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
635
636                 /* Set PF mode :
637                  * true if PF is used for data plane
638                  * false for VFs
639                  */
640                 conf.pf_mode_en = true;
641
642                 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) {
643                         /* Number of UL queues per VF (fpga supports 8 VFs) */
644                         conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE;
645                         /* Number of DL queues per VF (fpga supports 8 VFs) */
646                         conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE;
647                 }
648
649                 /* UL bandwidth. Needed for schedule algorithm */
650                 conf.ul_bandwidth = UL_4G_BANDWIDTH;
651                 /* DL bandwidth */
652                 conf.dl_bandwidth = DL_4G_BANDWIDTH;
653
654                 /* UL & DL load Balance Factor to 64 */
655                 conf.ul_load_balance = UL_4G_LOAD_BALANCE;
656                 conf.dl_load_balance = DL_4G_LOAD_BALANCE;
657
658                 /**< FLR timeout value */
659                 conf.flr_time_out = FLR_4G_TIMEOUT;
660
661                 /* setup FPGA PF with configuration information */
662                 ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
663                 TEST_ASSERT_SUCCESS(ret,
664                                 "Failed to configure 4G FPGA PF for bbdev %s",
665                                 info->dev_name);
666         }
667 #endif
668 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC
669         if ((get_init_device() == true) &&
670                 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
671                 struct rte_fpga_5gnr_fec_conf conf;
672                 unsigned int i;
673
674                 printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
675                                 info->drv.driver_name);
676
677                 /* clear default configuration before initialization */
678                 memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
679
680                 /* Set PF mode :
681                  * true if PF is used for data plane
682                  * false for VFs
683                  */
684                 conf.pf_mode_en = true;
685
686                 for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) {
687                         /* Number of UL queues per VF (fpga supports 8 VFs) */
688                         conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE;
689                         /* Number of DL queues per VF (fpga supports 8 VFs) */
690                         conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE;
691                 }
692
693                 /* UL bandwidth. Needed for schedule algorithm */
694                 conf.ul_bandwidth = UL_5G_BANDWIDTH;
695                 /* DL bandwidth */
696                 conf.dl_bandwidth = DL_5G_BANDWIDTH;
697
698                 /* UL & DL load Balance Factor to 64 */
699                 conf.ul_load_balance = UL_5G_LOAD_BALANCE;
700                 conf.dl_load_balance = DL_5G_LOAD_BALANCE;
701
702                 /**< FLR timeout value */
703                 conf.flr_time_out = FLR_5G_TIMEOUT;
704
705                 /* setup FPGA PF with configuration information */
706                 ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
707                 TEST_ASSERT_SUCCESS(ret,
708                                 "Failed to configure 5G FPGA PF for bbdev %s",
709                                 info->dev_name);
710         }
711 #endif
712 #ifdef RTE_BASEBAND_ACC100
713         if ((get_init_device() == true) &&
714                 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
715                 struct rte_acc100_conf conf;
716                 unsigned int i;
717
718                 printf("Configure ACC100 FEC Driver %s with default values\n",
719                                 info->drv.driver_name);
720
721                 /* clear default configuration before initialization */
722                 memset(&conf, 0, sizeof(struct rte_acc100_conf));
723
724                 /* Always set in PF mode for built-in configuration */
725                 conf.pf_mode_en = true;
726                 for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) {
727                         conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
728                         conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
729                         conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR;
730                         conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
731                         conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR;
732                         conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR;
733                         conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
734                         conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
735                         conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR;
736                         conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
737                         conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR;
738                         conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR;
739                 }
740
741                 conf.input_pos_llr_1_bit = true;
742                 conf.output_pos_llr_1_bit = true;
743                 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */
744
745                 conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
746                 conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
747                 conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
748                 conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
749                 conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS;
750                 conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
751                 conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
752                 conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
753                 conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
754                 conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
755                 conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
756                 conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
757                 conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS;
758                 conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX;
759                 conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS;
760                 conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH;
761
762                 /* setup PF with configuration information */
763                 ret = rte_acc100_configure(info->dev_name, &conf);
764                 TEST_ASSERT_SUCCESS(ret,
765                                 "Failed to configure ACC100 PF for bbdev %s",
766                                 info->dev_name);
767         }
768 #endif
769         /* Let's refresh this now this is configured */
770         rte_bbdev_info_get(dev_id, info);
771         nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
772         nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES);
773
774         /* setup device */
775         ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
776         if (ret < 0) {
777                 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
778                                 dev_id, nb_queues, info->socket_id, ret);
779                 return TEST_FAILED;
780         }
781
782         /* configure interrupts if needed */
783         if (intr_enabled) {
784                 ret = rte_bbdev_intr_enable(dev_id);
785                 if (ret < 0) {
786                         printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
787                                         ret);
788                         return TEST_FAILED;
789                 }
790         }
791
792         /* setup device queues */
793         qconf.socket = info->socket_id;
794         qconf.queue_size = info->drv.default_queue_conf.queue_size;
795         qconf.priority = 0;
796         qconf.deferred_start = 0;
797         qconf.op_type = op_type;
798
799         for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
800                 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
801                 if (ret != 0) {
802                         printf(
803                                         "Allocated all queues (id=%u) at prio%u on dev%u\n",
804                                         queue_id, qconf.priority, dev_id);
805                         qconf.priority++;
806                         ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
807                                         &qconf);
808                 }
809                 if (ret != 0) {
810                         printf("All queues on dev %u allocated: %u\n",
811                                         dev_id, queue_id);
812                         break;
813                 }
814                 ad->queue_ids[queue_id] = queue_id;
815         }
816         TEST_ASSERT(queue_id != 0,
817                         "ERROR Failed to configure any queues on dev %u",
818                         dev_id);
819         ad->nb_queues = queue_id;
820
821         set_avail_op(ad, op_type);
822
823         return TEST_SUCCESS;
824 }
825
826 static int
827 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
828                 struct test_bbdev_vector *vector)
829 {
830         int ret;
831
832         active_devs[nb_active_devs].driver_name = info->drv.driver_name;
833         active_devs[nb_active_devs].dev_id = dev_id;
834
835         ret = add_bbdev_dev(dev_id, info, vector);
836         if (ret == TEST_SUCCESS)
837                 ++nb_active_devs;
838         return ret;
839 }
840
841 static uint8_t
842 populate_active_devices(void)
843 {
844         int ret;
845         uint8_t dev_id;
846         uint8_t nb_devs_added = 0;
847         struct rte_bbdev_info info;
848
849         RTE_BBDEV_FOREACH(dev_id) {
850                 rte_bbdev_info_get(dev_id, &info);
851
852                 if (check_dev_cap(&info)) {
853                         printf(
854                                 "Device %d (%s) does not support specified capabilities\n",
855                                         dev_id, info.dev_name);
856                         continue;
857                 }
858
859                 ret = add_active_device(dev_id, &info, &test_vector);
860                 if (ret != 0) {
861                         printf("Adding active bbdev %s skipped\n",
862                                         info.dev_name);
863                         continue;
864                 }
865                 nb_devs_added++;
866         }
867
868         return nb_devs_added;
869 }
870
871 static int
872 read_test_vector(void)
873 {
874         int ret;
875
876         memset(&test_vector, 0, sizeof(test_vector));
877         printf("Test vector file = %s\n", get_vector_filename());
878         ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
879         TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
880                         get_vector_filename());
881
882         return TEST_SUCCESS;
883 }
884
885 static int
886 testsuite_setup(void)
887 {
888         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
889
890         if (populate_active_devices() == 0) {
891                 printf("No suitable devices found!\n");
892                 return TEST_SKIPPED;
893         }
894
895         return TEST_SUCCESS;
896 }
897
898 static int
899 interrupt_testsuite_setup(void)
900 {
901         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
902
903         /* Enable interrupts */
904         intr_enabled = true;
905
906         /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
907         if (populate_active_devices() == 0 ||
908                         test_vector.op_type == RTE_BBDEV_OP_NONE) {
909                 intr_enabled = false;
910                 printf("No suitable devices found!\n");
911                 return TEST_SKIPPED;
912         }
913
914         return TEST_SUCCESS;
915 }
916
917 static void
918 testsuite_teardown(void)
919 {
920         uint8_t dev_id;
921
922         /* Unconfigure devices */
923         RTE_BBDEV_FOREACH(dev_id)
924                 rte_bbdev_close(dev_id);
925
926         /* Clear active devices structs. */
927         memset(active_devs, 0, sizeof(active_devs));
928         nb_active_devs = 0;
929
930         /* Disable interrupts */
931         intr_enabled = false;
932 }
933
934 static int
935 ut_setup(void)
936 {
937         uint8_t i, dev_id;
938
939         for (i = 0; i < nb_active_devs; i++) {
940                 dev_id = active_devs[i].dev_id;
941                 /* reset bbdev stats */
942                 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
943                                 "Failed to reset stats of bbdev %u", dev_id);
944                 /* start the device */
945                 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
946                                 "Failed to start bbdev %u", dev_id);
947         }
948
949         return TEST_SUCCESS;
950 }
951
952 static void
953 ut_teardown(void)
954 {
955         uint8_t i, dev_id;
956         struct rte_bbdev_stats stats;
957
958         for (i = 0; i < nb_active_devs; i++) {
959                 dev_id = active_devs[i].dev_id;
960                 /* read stats and print */
961                 rte_bbdev_stats_get(dev_id, &stats);
962                 /* Stop the device */
963                 rte_bbdev_stop(dev_id);
964         }
965 }
966
967 static int
968 init_op_data_objs(struct rte_bbdev_op_data *bufs,
969                 struct op_data_entries *ref_entries,
970                 struct rte_mempool *mbuf_pool, const uint16_t n,
971                 enum op_data_type op_type, uint16_t min_alignment)
972 {
973         int ret;
974         unsigned int i, j;
975         bool large_input = false;
976
977         for (i = 0; i < n; ++i) {
978                 char *data;
979                 struct op_data_buf *seg = &ref_entries->segments[0];
980                 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
981                 TEST_ASSERT_NOT_NULL(m_head,
982                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
983                                 op_type, n * ref_entries->nb_segments,
984                                 mbuf_pool->size);
985
986                 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) {
987                         /*
988                          * Special case when DPDK mbuf cannot handle
989                          * the required input size
990                          */
991                         printf("Warning: Larger input size than DPDK mbuf %d\n",
992                                         seg->length);
993                         large_input = true;
994                 }
995                 bufs[i].data = m_head;
996                 bufs[i].offset = 0;
997                 bufs[i].length = 0;
998
999                 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) {
1000                         if ((op_type == DATA_INPUT) && large_input) {
1001                                 /* Allocate a fake overused mbuf */
1002                                 data = rte_malloc(NULL, seg->length, 0);
1003                                 TEST_ASSERT_NOT_NULL(data,
1004                                         "rte malloc failed with %u bytes",
1005                                         seg->length);
1006                                 memcpy(data, seg->addr, seg->length);
1007                                 m_head->buf_addr = data;
1008                                 m_head->buf_iova = rte_malloc_virt2iova(data);
1009                                 m_head->data_off = 0;
1010                                 m_head->data_len = seg->length;
1011                         } else {
1012                                 data = rte_pktmbuf_append(m_head, seg->length);
1013                                 TEST_ASSERT_NOT_NULL(data,
1014                                         "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1015                                         seg->length, op_type);
1016
1017                                 TEST_ASSERT(data == RTE_PTR_ALIGN(
1018                                                 data, min_alignment),
1019                                         "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1020                                         data, min_alignment);
1021                                 rte_memcpy(data, seg->addr, seg->length);
1022                         }
1023
1024                         bufs[i].length += seg->length;
1025
1026                         for (j = 1; j < ref_entries->nb_segments; ++j) {
1027                                 struct rte_mbuf *m_tail =
1028                                                 rte_pktmbuf_alloc(mbuf_pool);
1029                                 TEST_ASSERT_NOT_NULL(m_tail,
1030                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1031                                                 op_type,
1032                                                 n * ref_entries->nb_segments,
1033                                                 mbuf_pool->size);
1034                                 seg += 1;
1035
1036                                 data = rte_pktmbuf_append(m_tail, seg->length);
1037                                 TEST_ASSERT_NOT_NULL(data,
1038                                                 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
1039                                                 seg->length, op_type);
1040
1041                                 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
1042                                                 min_alignment),
1043                                                 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
1044                                                 data, min_alignment);
1045                                 rte_memcpy(data, seg->addr, seg->length);
1046                                 bufs[i].length += seg->length;
1047
1048                                 ret = rte_pktmbuf_chain(m_head, m_tail);
1049                                 TEST_ASSERT_SUCCESS(ret,
1050                                                 "Couldn't chain mbufs from %d data type mbuf pool",
1051                                                 op_type);
1052                         }
1053                 } else {
1054
1055                         /* allocate chained-mbuf for output buffer */
1056                         for (j = 1; j < ref_entries->nb_segments; ++j) {
1057                                 struct rte_mbuf *m_tail =
1058                                                 rte_pktmbuf_alloc(mbuf_pool);
1059                                 TEST_ASSERT_NOT_NULL(m_tail,
1060                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
1061                                                 op_type,
1062                                                 n * ref_entries->nb_segments,
1063                                                 mbuf_pool->size);
1064
1065                                 ret = rte_pktmbuf_chain(m_head, m_tail);
1066                                 TEST_ASSERT_SUCCESS(ret,
1067                                                 "Couldn't chain mbufs from %d data type mbuf pool",
1068                                                 op_type);
1069                         }
1070                 }
1071         }
1072
1073         return 0;
1074 }
1075
1076 static int
1077 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
1078                 const int socket)
1079 {
1080         int i;
1081
1082         *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
1083         if (*buffers == NULL) {
1084                 printf("WARNING: Failed to allocate op_data on socket %d\n",
1085                                 socket);
1086                 /* try to allocate memory on other detected sockets */
1087                 for (i = 0; i < socket; i++) {
1088                         *buffers = rte_zmalloc_socket(NULL, len, 0, i);
1089                         if (*buffers != NULL)
1090                                 break;
1091                 }
1092         }
1093
1094         return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
1095 }
1096
1097 static void
1098 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
1099                 const uint16_t n, const int8_t max_llr_modulus)
1100 {
1101         uint16_t i, byte_idx;
1102
1103         for (i = 0; i < n; ++i) {
1104                 struct rte_mbuf *m = input_ops[i].data;
1105                 while (m != NULL) {
1106                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1107                                         input_ops[i].offset);
1108                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1109                                         ++byte_idx)
1110                                 llr[byte_idx] = round((double)max_llr_modulus *
1111                                                 llr[byte_idx] / INT8_MAX);
1112
1113                         m = m->next;
1114                 }
1115         }
1116 }
1117
1118 /*
1119  * We may have to insert filler bits
1120  * when they are required by the HARQ assumption
1121  */
1122 static void
1123 ldpc_add_filler(struct rte_bbdev_op_data *input_ops,
1124                 const uint16_t n, struct test_op_params *op_params)
1125 {
1126         struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec;
1127
1128         if (input_ops == NULL)
1129                 return;
1130         /* No need to add filler if not required by device */
1131         if (!(ldpc_cap_flags &
1132                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS))
1133                 return;
1134         /* No need to add filler for loopback operation */
1135         if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
1136                 return;
1137
1138         uint16_t i, j, parity_offset;
1139         for (i = 0; i < n; ++i) {
1140                 struct rte_mbuf *m = input_ops[i].data;
1141                 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1142                                 input_ops[i].offset);
1143                 parity_offset = (dec.basegraph == 1 ? 20 : 8)
1144                                 * dec.z_c - dec.n_filler;
1145                 uint16_t new_hin_size = input_ops[i].length + dec.n_filler;
1146                 m->data_len = new_hin_size;
1147                 input_ops[i].length = new_hin_size;
1148                 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler;
1149                                 j--)
1150                         llr[j] = llr[j - dec.n_filler];
1151                 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1152                 for (j = 0; j < dec.n_filler; j++)
1153                         llr[parity_offset + j] = llr_max_pre_scaling;
1154         }
1155 }
1156
1157 static void
1158 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops,
1159                 const uint16_t n, const int8_t llr_size,
1160                 const int8_t llr_decimals)
1161 {
1162         if (input_ops == NULL)
1163                 return;
1164
1165         uint16_t i, byte_idx;
1166
1167         int16_t llr_max, llr_min, llr_tmp;
1168         llr_max = (1 << (llr_size - 1)) - 1;
1169         llr_min = -llr_max;
1170         for (i = 0; i < n; ++i) {
1171                 struct rte_mbuf *m = input_ops[i].data;
1172                 while (m != NULL) {
1173                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
1174                                         input_ops[i].offset);
1175                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
1176                                         ++byte_idx) {
1177
1178                                 llr_tmp = llr[byte_idx];
1179                                 if (llr_decimals == 4)
1180                                         llr_tmp *= 8;
1181                                 else if (llr_decimals == 2)
1182                                         llr_tmp *= 2;
1183                                 else if (llr_decimals == 0)
1184                                         llr_tmp /= 2;
1185                                 llr_tmp = RTE_MIN(llr_max,
1186                                                 RTE_MAX(llr_min, llr_tmp));
1187                                 llr[byte_idx] = (int8_t) llr_tmp;
1188                         }
1189
1190                         m = m->next;
1191                 }
1192         }
1193 }
1194
1195
1196
1197 static int
1198 fill_queue_buffers(struct test_op_params *op_params,
1199                 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
1200                 struct rte_mempool *soft_out_mp,
1201                 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp,
1202                 uint16_t queue_id,
1203                 const struct rte_bbdev_op_cap *capabilities,
1204                 uint16_t min_alignment, const int socket_id)
1205 {
1206         int ret;
1207         enum op_data_type type;
1208         const uint16_t n = op_params->num_to_process;
1209
1210         struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
1211                 in_mp,
1212                 soft_out_mp,
1213                 hard_out_mp,
1214                 harq_in_mp,
1215                 harq_out_mp,
1216         };
1217
1218         struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
1219                 &op_params->q_bufs[socket_id][queue_id].inputs,
1220                 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
1221                 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
1222                 &op_params->q_bufs[socket_id][queue_id].harq_inputs,
1223                 &op_params->q_bufs[socket_id][queue_id].harq_outputs,
1224         };
1225
1226         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
1227                 struct op_data_entries *ref_entries =
1228                                 &test_vector.entries[type];
1229                 if (ref_entries->nb_segments == 0)
1230                         continue;
1231
1232                 ret = allocate_buffers_on_socket(queue_ops[type],
1233                                 n * sizeof(struct rte_bbdev_op_data),
1234                                 socket_id);
1235                 TEST_ASSERT_SUCCESS(ret,
1236                                 "Couldn't allocate memory for rte_bbdev_op_data structs");
1237
1238                 ret = init_op_data_objs(*queue_ops[type], ref_entries,
1239                                 mbuf_pools[type], n, type, min_alignment);
1240                 TEST_ASSERT_SUCCESS(ret,
1241                                 "Couldn't init rte_bbdev_op_data structs");
1242         }
1243
1244         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1245                 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
1246                         capabilities->cap.turbo_dec.max_llr_modulus);
1247
1248         if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
1249                 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags &
1250                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
1251                 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1252                                 RTE_BBDEV_LDPC_LLR_COMPRESSION;
1253                 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags &
1254                                 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
1255                 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals;
1256                 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size;
1257                 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags;
1258                 if (!loopback && !llr_comp)
1259                         ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n,
1260                                         ldpc_llr_size, ldpc_llr_decimals);
1261                 if (!loopback && !harq_comp)
1262                         ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n,
1263                                         ldpc_llr_size, ldpc_llr_decimals);
1264                 if (!loopback)
1265                         ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n,
1266                                         op_params);
1267         }
1268
1269         return 0;
1270 }
1271
1272 static void
1273 free_buffers(struct active_device *ad, struct test_op_params *op_params)
1274 {
1275         unsigned int i, j;
1276
1277         rte_mempool_free(ad->ops_mempool);
1278         rte_mempool_free(ad->in_mbuf_pool);
1279         rte_mempool_free(ad->hard_out_mbuf_pool);
1280         rte_mempool_free(ad->soft_out_mbuf_pool);
1281         rte_mempool_free(ad->harq_in_mbuf_pool);
1282         rte_mempool_free(ad->harq_out_mbuf_pool);
1283
1284         for (i = 0; i < rte_lcore_count(); ++i) {
1285                 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
1286                         rte_free(op_params->q_bufs[j][i].inputs);
1287                         rte_free(op_params->q_bufs[j][i].hard_outputs);
1288                         rte_free(op_params->q_bufs[j][i].soft_outputs);
1289                         rte_free(op_params->q_bufs[j][i].harq_inputs);
1290                         rte_free(op_params->q_bufs[j][i].harq_outputs);
1291                 }
1292         }
1293 }
1294
1295 static void
1296 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1297                 unsigned int start_idx,
1298                 struct rte_bbdev_op_data *inputs,
1299                 struct rte_bbdev_op_data *hard_outputs,
1300                 struct rte_bbdev_op_data *soft_outputs,
1301                 struct rte_bbdev_dec_op *ref_op)
1302 {
1303         unsigned int i;
1304         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
1305
1306         for (i = 0; i < n; ++i) {
1307                 if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1308                         ops[i]->turbo_dec.tb_params.ea =
1309                                         turbo_dec->tb_params.ea;
1310                         ops[i]->turbo_dec.tb_params.eb =
1311                                         turbo_dec->tb_params.eb;
1312                         ops[i]->turbo_dec.tb_params.k_pos =
1313                                         turbo_dec->tb_params.k_pos;
1314                         ops[i]->turbo_dec.tb_params.k_neg =
1315                                         turbo_dec->tb_params.k_neg;
1316                         ops[i]->turbo_dec.tb_params.c =
1317                                         turbo_dec->tb_params.c;
1318                         ops[i]->turbo_dec.tb_params.c_neg =
1319                                         turbo_dec->tb_params.c_neg;
1320                         ops[i]->turbo_dec.tb_params.cab =
1321                                         turbo_dec->tb_params.cab;
1322                         ops[i]->turbo_dec.tb_params.r =
1323                                         turbo_dec->tb_params.r;
1324                 } else {
1325                         ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
1326                         ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
1327                 }
1328
1329                 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
1330                 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
1331                 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
1332                 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
1333                 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
1334                 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
1335                 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
1336
1337                 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
1338                 ops[i]->turbo_dec.input = inputs[start_idx + i];
1339                 if (soft_outputs != NULL)
1340                         ops[i]->turbo_dec.soft_output =
1341                                 soft_outputs[start_idx + i];
1342         }
1343 }
1344
1345 static void
1346 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1347                 unsigned int start_idx,
1348                 struct rte_bbdev_op_data *inputs,
1349                 struct rte_bbdev_op_data *outputs,
1350                 struct rte_bbdev_enc_op *ref_op)
1351 {
1352         unsigned int i;
1353         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
1354         for (i = 0; i < n; ++i) {
1355                 if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1356                         ops[i]->turbo_enc.tb_params.ea =
1357                                         turbo_enc->tb_params.ea;
1358                         ops[i]->turbo_enc.tb_params.eb =
1359                                         turbo_enc->tb_params.eb;
1360                         ops[i]->turbo_enc.tb_params.k_pos =
1361                                         turbo_enc->tb_params.k_pos;
1362                         ops[i]->turbo_enc.tb_params.k_neg =
1363                                         turbo_enc->tb_params.k_neg;
1364                         ops[i]->turbo_enc.tb_params.c =
1365                                         turbo_enc->tb_params.c;
1366                         ops[i]->turbo_enc.tb_params.c_neg =
1367                                         turbo_enc->tb_params.c_neg;
1368                         ops[i]->turbo_enc.tb_params.cab =
1369                                         turbo_enc->tb_params.cab;
1370                         ops[i]->turbo_enc.tb_params.ncb_pos =
1371                                         turbo_enc->tb_params.ncb_pos;
1372                         ops[i]->turbo_enc.tb_params.ncb_neg =
1373                                         turbo_enc->tb_params.ncb_neg;
1374                         ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
1375                 } else {
1376                         ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
1377                         ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
1378                         ops[i]->turbo_enc.cb_params.ncb =
1379                                         turbo_enc->cb_params.ncb;
1380                 }
1381                 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
1382                 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
1383                 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
1384
1385                 ops[i]->turbo_enc.output = outputs[start_idx + i];
1386                 ops[i]->turbo_enc.input = inputs[start_idx + i];
1387         }
1388 }
1389
1390
1391 /* Returns a random number drawn from a normal distribution
1392  * with mean of 0 and variance of 1
1393  * Marsaglia algorithm
1394  */
1395 static double
1396 randn(int n)
1397 {
1398         double S, Z, U1, U2, u, v, fac;
1399
1400         do {
1401                 U1 = (double)rand() / RAND_MAX;
1402                 U2 = (double)rand() / RAND_MAX;
1403                 u = 2. * U1 - 1.;
1404                 v = 2. * U2 - 1.;
1405                 S = u * u + v * v;
1406         } while (S >= 1 || S == 0);
1407         fac = sqrt(-2. * log(S) / S);
1408         Z = (n % 2) ? u * fac : v * fac;
1409         return Z;
1410 }
1411
1412 static inline double
1413 maxstar(double A, double B)
1414 {
1415         if (fabs(A - B) > 5)
1416                 return RTE_MAX(A, B);
1417         else
1418                 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B)));
1419 }
1420
1421 /*
1422  * Generate Qm LLRS for Qm==8
1423  * Modulation, AWGN and LLR estimation from max log development
1424  */
1425 static void
1426 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1427 {
1428         int qm = 8;
1429         int qam = 256;
1430         int m, k;
1431         double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1432         /* 5.1.4 of TS38.211 */
1433         const double symbols_I[256] = {
1434                         5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5,
1435                         5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11,
1436                         11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13,
1437                         15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15,
1438                         15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3,
1439                         1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1,
1440                         1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13,
1441                         15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9,
1442                         13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5,
1443                         -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5,
1444                         -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3,
1445                         -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13,
1446                         -13, -15, -15, -13, -13, -15, -15, -11, -11, -9,
1447                         -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1448                         -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3,
1449                         -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5,
1450                         -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11,
1451                         -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13,
1452                         -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9,
1453                         -13, -13, -15, -15, -13, -13, -15, -15};
1454         const double symbols_Q[256] = {
1455                         5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1456                         9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13,
1457                         15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1458                         11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13,
1459                         15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5,
1460                         -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13,
1461                         -15, -13, -15, -11, -9, -11, -9, -13, -15, -13,
1462                         -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5,
1463                         -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1464                         -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5,
1465                         7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11,
1466                         9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15,
1467                         13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1,
1468                         3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9,
1469                         13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1,
1470                         -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9,
1471                         -13, -15, -13, -15, -11, -9, -11, -9, -13, -15,
1472                         -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7,
1473                         -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15,
1474                         -13, -15, -11, -9, -11, -9, -13, -15, -13, -15};
1475         /* Average constellation point energy */
1476         N0 *= 170.0;
1477         for (k = 0; k < qm; k++)
1478                 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1479         /* 5.1.4 of TS38.211 */
1480         I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) *
1481                         (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6]))));
1482         Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) *
1483                         (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7]))));
1484         /* AWGN channel */
1485         I += sqrt(N0 / 2) * randn(0);
1486         Q += sqrt(N0 / 2) * randn(1);
1487         /*
1488          * Calculate the log of the probability that each of
1489          * the constellation points was transmitted
1490          */
1491         for (m = 0; m < qam; m++)
1492                 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1493                                 + pow(Q - symbols_Q[m], 2.0)) / N0;
1494         /* Calculate an LLR for each of the k_64QAM bits in the set */
1495         for (k = 0; k < qm; k++) {
1496                 p0 = -999999;
1497                 p1 = -999999;
1498                 /* For each constellation point */
1499                 for (m = 0; m < qam; m++) {
1500                         if ((m >> (qm - k - 1)) & 1)
1501                                 p1 = maxstar(p1, log_syml_prob[m]);
1502                         else
1503                                 p0 = maxstar(p0, log_syml_prob[m]);
1504                 }
1505                 /* Calculate the LLR */
1506                 llr_ = p0 - p1;
1507                 llr_ *= (1 << ldpc_llr_decimals);
1508                 llr_ = round(llr_);
1509                 if (llr_ > llr_max)
1510                         llr_ = llr_max;
1511                 if (llr_ < -llr_max)
1512                         llr_ = -llr_max;
1513                 llrs[qm * i + k] = (int8_t) llr_;
1514         }
1515 }
1516
1517
1518 /*
1519  * Generate Qm LLRS for Qm==6
1520  * Modulation, AWGN and LLR estimation from max log development
1521  */
1522 static void
1523 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1524 {
1525         int qm = 6;
1526         int qam = 64;
1527         int m, k;
1528         double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1529         /* 5.1.4 of TS38.211 */
1530         const double symbols_I[64] = {
1531                         3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1532                         3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7,
1533                         -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7,
1534                         -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1,
1535                         -5, -5, -7, -7, -5, -5, -7, -7};
1536         const double symbols_Q[64] = {
1537                         3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7,
1538                         -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1,
1539                         -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1,
1540                         5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7,
1541                         -3, -1, -3, -1, -5, -7, -5, -7};
1542         /* Average constellation point energy */
1543         N0 *= 42.0;
1544         for (k = 0; k < qm; k++)
1545                 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1546         /* 5.1.4 of TS38.211 */
1547         I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4])));
1548         Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5])));
1549         /* AWGN channel */
1550         I += sqrt(N0 / 2) * randn(0);
1551         Q += sqrt(N0 / 2) * randn(1);
1552         /*
1553          * Calculate the log of the probability that each of
1554          * the constellation points was transmitted
1555          */
1556         for (m = 0; m < qam; m++)
1557                 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1558                                 + pow(Q - symbols_Q[m], 2.0)) / N0;
1559         /* Calculate an LLR for each of the k_64QAM bits in the set */
1560         for (k = 0; k < qm; k++) {
1561                 p0 = -999999;
1562                 p1 = -999999;
1563                 /* For each constellation point */
1564                 for (m = 0; m < qam; m++) {
1565                         if ((m >> (qm - k - 1)) & 1)
1566                                 p1 = maxstar(p1, log_syml_prob[m]);
1567                         else
1568                                 p0 = maxstar(p0, log_syml_prob[m]);
1569                 }
1570                 /* Calculate the LLR */
1571                 llr_ = p0 - p1;
1572                 llr_ *= (1 << ldpc_llr_decimals);
1573                 llr_ = round(llr_);
1574                 if (llr_ > llr_max)
1575                         llr_ = llr_max;
1576                 if (llr_ < -llr_max)
1577                         llr_ = -llr_max;
1578                 llrs[qm * i + k] = (int8_t) llr_;
1579         }
1580 }
1581
1582 /*
1583  * Generate Qm LLRS for Qm==4
1584  * Modulation, AWGN and LLR estimation from max log development
1585  */
1586 static void
1587 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max)
1588 {
1589         int qm = 4;
1590         int qam = 16;
1591         int m, k;
1592         double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam];
1593         /* 5.1.4 of TS38.211 */
1594         const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3,
1595                         -1, -1, -3, -3, -1, -1, -3, -3};
1596         const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3,
1597                         1, 3, 1, 3, -1, -3, -1, -3};
1598         /* Average constellation point energy */
1599         N0 *= 10.0;
1600         for (k = 0; k < qm; k++)
1601                 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0;
1602         /* 5.1.4 of TS38.211 */
1603         I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2]));
1604         Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3]));
1605         /* AWGN channel */
1606         I += sqrt(N0 / 2) * randn(0);
1607         Q += sqrt(N0 / 2) * randn(1);
1608         /*
1609          * Calculate the log of the probability that each of
1610          * the constellation points was transmitted
1611          */
1612         for (m = 0; m < qam; m++)
1613                 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0)
1614                                 + pow(Q - symbols_Q[m], 2.0)) / N0;
1615         /* Calculate an LLR for each of the k_64QAM bits in the set */
1616         for (k = 0; k < qm; k++) {
1617                 p0 = -999999;
1618                 p1 = -999999;
1619                 /* For each constellation point */
1620                 for (m = 0; m < qam; m++) {
1621                         if ((m >> (qm - k - 1)) & 1)
1622                                 p1 = maxstar(p1, log_syml_prob[m]);
1623                         else
1624                                 p0 = maxstar(p0, log_syml_prob[m]);
1625                 }
1626                 /* Calculate the LLR */
1627                 llr_ = p0 - p1;
1628                 llr_ *= (1 << ldpc_llr_decimals);
1629                 llr_ = round(llr_);
1630                 if (llr_ > llr_max)
1631                         llr_ = llr_max;
1632                 if (llr_ < -llr_max)
1633                         llr_ = -llr_max;
1634                 llrs[qm * i + k] = (int8_t) llr_;
1635         }
1636 }
1637
1638 static void
1639 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max)
1640 {
1641         double b, b1, n;
1642         double coeff = 2.0 * sqrt(N0);
1643
1644         /* Ignore in vectors rare quasi null LLRs not to be saturated */
1645         if (llrs[j] < 8 && llrs[j] > -8)
1646                 return;
1647
1648         /* Note don't change sign here */
1649         n = randn(j % 2);
1650         b1 = ((llrs[j] > 0 ? 2.0 : -2.0)
1651                         + coeff * n) / N0;
1652         b = b1 * (1 << ldpc_llr_decimals);
1653         b = round(b);
1654         if (b > llr_max)
1655                 b = llr_max;
1656         if (b < -llr_max)
1657                 b = -llr_max;
1658         llrs[j] = (int8_t) b;
1659 }
1660
1661 /* Generate LLR for a given SNR */
1662 static void
1663 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs,
1664                 struct rte_bbdev_dec_op *ref_op)
1665 {
1666         struct rte_mbuf *m;
1667         uint16_t qm;
1668         uint32_t i, j, e, range;
1669         double N0, llr_max;
1670
1671         e = ref_op->ldpc_dec.cb_params.e;
1672         qm = ref_op->ldpc_dec.q_m;
1673         llr_max = (1 << (ldpc_llr_size - 1)) - 1;
1674         range = e / qm;
1675         N0 = 1.0 / pow(10.0, get_snr() / 10.0);
1676
1677         for (i = 0; i < n; ++i) {
1678                 m = inputs[i].data;
1679                 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0);
1680                 if (qm == 8) {
1681                         for (j = 0; j < range; ++j)
1682                                 gen_qm8_llr(llrs, j, N0, llr_max);
1683                 } else if (qm == 6) {
1684                         for (j = 0; j < range; ++j)
1685                                 gen_qm6_llr(llrs, j, N0, llr_max);
1686                 } else if (qm == 4) {
1687                         for (j = 0; j < range; ++j)
1688                                 gen_qm4_llr(llrs, j, N0, llr_max);
1689                 } else {
1690                         for (j = 0; j < e; ++j)
1691                                 gen_qm2_llr(llrs, j, N0, llr_max);
1692                 }
1693         }
1694 }
1695
1696 static void
1697 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
1698                 unsigned int start_idx,
1699                 struct rte_bbdev_op_data *inputs,
1700                 struct rte_bbdev_op_data *hard_outputs,
1701                 struct rte_bbdev_op_data *soft_outputs,
1702                 struct rte_bbdev_op_data *harq_inputs,
1703                 struct rte_bbdev_op_data *harq_outputs,
1704                 struct rte_bbdev_dec_op *ref_op)
1705 {
1706         unsigned int i;
1707         struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
1708
1709         for (i = 0; i < n; ++i) {
1710                 if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1711                         ops[i]->ldpc_dec.tb_params.ea =
1712                                         ldpc_dec->tb_params.ea;
1713                         ops[i]->ldpc_dec.tb_params.eb =
1714                                         ldpc_dec->tb_params.eb;
1715                         ops[i]->ldpc_dec.tb_params.c =
1716                                         ldpc_dec->tb_params.c;
1717                         ops[i]->ldpc_dec.tb_params.cab =
1718                                         ldpc_dec->tb_params.cab;
1719                         ops[i]->ldpc_dec.tb_params.r =
1720                                         ldpc_dec->tb_params.r;
1721                 } else {
1722                         ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e;
1723                 }
1724
1725                 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph;
1726                 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c;
1727                 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m;
1728                 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler;
1729                 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb;
1730                 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max;
1731                 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index;
1732                 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags;
1733                 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode;
1734
1735                 if (hard_outputs != NULL)
1736                         ops[i]->ldpc_dec.hard_output =
1737                                         hard_outputs[start_idx + i];
1738                 if (inputs != NULL)
1739                         ops[i]->ldpc_dec.input =
1740                                         inputs[start_idx + i];
1741                 if (soft_outputs != NULL)
1742                         ops[i]->ldpc_dec.soft_output =
1743                                         soft_outputs[start_idx + i];
1744                 if (harq_inputs != NULL)
1745                         ops[i]->ldpc_dec.harq_combined_input =
1746                                         harq_inputs[start_idx + i];
1747                 if (harq_outputs != NULL)
1748                         ops[i]->ldpc_dec.harq_combined_output =
1749                                         harq_outputs[start_idx + i];
1750         }
1751 }
1752
1753
1754 static void
1755 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
1756                 unsigned int start_idx,
1757                 struct rte_bbdev_op_data *inputs,
1758                 struct rte_bbdev_op_data *outputs,
1759                 struct rte_bbdev_enc_op *ref_op)
1760 {
1761         unsigned int i;
1762         struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
1763         for (i = 0; i < n; ++i) {
1764                 if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1765                         ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
1766                         ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
1767                         ops[i]->ldpc_enc.tb_params.cab =
1768                                         ldpc_enc->tb_params.cab;
1769                         ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c;
1770                         ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r;
1771                 } else {
1772                         ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e;
1773                 }
1774                 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph;
1775                 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c;
1776                 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m;
1777                 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler;
1778                 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb;
1779                 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index;
1780                 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags;
1781                 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode;
1782                 ops[i]->ldpc_enc.output = outputs[start_idx + i];
1783                 ops[i]->ldpc_enc.input = inputs[start_idx + i];
1784         }
1785 }
1786
1787 static int
1788 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
1789                 unsigned int order_idx, const int expected_status)
1790 {
1791         int status = op->status;
1792         /* ignore parity mismatch false alarms for long iterations */
1793         if (get_iter_max() >= 10) {
1794                 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1795                                 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1796                         printf("WARNING: Ignore Syndrome Check mismatch\n");
1797                         status -= (1 << RTE_BBDEV_SYNDROME_ERROR);
1798                 }
1799                 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) &&
1800                                 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) {
1801                         printf("WARNING: Ignore Syndrome Check mismatch\n");
1802                         status += (1 << RTE_BBDEV_SYNDROME_ERROR);
1803                 }
1804         }
1805
1806         TEST_ASSERT(status == expected_status,
1807                         "op_status (%d) != expected_status (%d)",
1808                         op->status, expected_status);
1809
1810         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1811                         "Ordering error, expected %p, got %p",
1812                         (void *)(uintptr_t)order_idx, op->opaque_data);
1813
1814         return TEST_SUCCESS;
1815 }
1816
1817 static int
1818 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
1819                 unsigned int order_idx, const int expected_status)
1820 {
1821         TEST_ASSERT(op->status == expected_status,
1822                         "op_status (%d) != expected_status (%d)",
1823                         op->status, expected_status);
1824
1825         if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE)
1826                 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
1827                                 "Ordering error, expected %p, got %p",
1828                                 (void *)(uintptr_t)order_idx, op->opaque_data);
1829
1830         return TEST_SUCCESS;
1831 }
1832
1833 static inline int
1834 validate_op_chain(struct rte_bbdev_op_data *op,
1835                 struct op_data_entries *orig_op)
1836 {
1837         uint8_t i;
1838         struct rte_mbuf *m = op->data;
1839         uint8_t nb_dst_segments = orig_op->nb_segments;
1840         uint32_t total_data_size = 0;
1841
1842         TEST_ASSERT(nb_dst_segments == m->nb_segs,
1843                         "Number of segments differ in original (%u) and filled (%u) op",
1844                         nb_dst_segments, m->nb_segs);
1845
1846         /* Validate each mbuf segment length */
1847         for (i = 0; i < nb_dst_segments; ++i) {
1848                 /* Apply offset to the first mbuf segment */
1849                 uint16_t offset = (i == 0) ? op->offset : 0;
1850                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1851                 total_data_size += orig_op->segments[i].length;
1852
1853                 TEST_ASSERT(orig_op->segments[i].length == data_len,
1854                                 "Length of segment differ in original (%u) and filled (%u) op",
1855                                 orig_op->segments[i].length, data_len);
1856                 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
1857                                 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
1858                                 data_len,
1859                                 "Output buffers (CB=%u) are not equal", i);
1860                 m = m->next;
1861         }
1862
1863         /* Validate total mbuf pkt length */
1864         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
1865         TEST_ASSERT(total_data_size == pkt_len,
1866                         "Length of data differ in original (%u) and filled (%u) op",
1867                         total_data_size, pkt_len);
1868
1869         return TEST_SUCCESS;
1870 }
1871
1872 /*
1873  * Compute K0 for a given configuration for HARQ output length computation
1874  * As per definition in 3GPP 38.212 Table 5.4.2.1-2
1875  */
1876 static inline uint16_t
1877 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1878 {
1879         if (rv_index == 0)
1880                 return 0;
1881         uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c;
1882         if (n_cb == n) {
1883                 if (rv_index == 1)
1884                         return (bg == 1 ? K0_1_1 : K0_1_2) * z_c;
1885                 else if (rv_index == 2)
1886                         return (bg == 1 ? K0_2_1 : K0_2_2) * z_c;
1887                 else
1888                         return (bg == 1 ? K0_3_1 : K0_3_2) * z_c;
1889         }
1890         /* LBRM case - includes a division by N */
1891         if (rv_index == 1)
1892                 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb)
1893                                 / n) * z_c;
1894         else if (rv_index == 2)
1895                 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb)
1896                                 / n) * z_c;
1897         else
1898                 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb)
1899                                 / n) * z_c;
1900 }
1901
1902 /* HARQ output length including the Filler bits */
1903 static inline uint16_t
1904 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld)
1905 {
1906         uint16_t k0 = 0;
1907         uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index;
1908         k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv);
1909         /* Compute RM out size and number of rows */
1910         uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1911                         * ops_ld->z_c - ops_ld->n_filler;
1912         uint16_t deRmOutSize = RTE_MIN(
1913                         k0 + ops_ld->cb_params.e +
1914                         ((k0 > parity_offset) ?
1915                                         0 : ops_ld->n_filler),
1916                                         ops_ld->n_cb);
1917         uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1)
1918                         / ops_ld->z_c);
1919         uint16_t harq_output_len = numRows * ops_ld->z_c;
1920         return harq_output_len;
1921 }
1922
1923 static inline int
1924 validate_op_harq_chain(struct rte_bbdev_op_data *op,
1925                 struct op_data_entries *orig_op,
1926                 struct rte_bbdev_op_ldpc_dec *ops_ld)
1927 {
1928         uint8_t i;
1929         uint32_t j, jj, k;
1930         struct rte_mbuf *m = op->data;
1931         uint8_t nb_dst_segments = orig_op->nb_segments;
1932         uint32_t total_data_size = 0;
1933         int8_t *harq_orig, *harq_out, abs_harq_origin;
1934         uint32_t byte_error = 0, cum_error = 0, error;
1935         int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1;
1936         int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1;
1937         uint16_t parity_offset;
1938
1939         TEST_ASSERT(nb_dst_segments == m->nb_segs,
1940                         "Number of segments differ in original (%u) and filled (%u) op",
1941                         nb_dst_segments, m->nb_segs);
1942
1943         /* Validate each mbuf segment length */
1944         for (i = 0; i < nb_dst_segments; ++i) {
1945                 /* Apply offset to the first mbuf segment */
1946                 uint16_t offset = (i == 0) ? op->offset : 0;
1947                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
1948                 total_data_size += orig_op->segments[i].length;
1949
1950                 TEST_ASSERT(orig_op->segments[i].length <
1951                                 (uint32_t)(data_len + 64),
1952                                 "Length of segment differ in original (%u) and filled (%u) op",
1953                                 orig_op->segments[i].length, data_len);
1954                 harq_orig = (int8_t *) orig_op->segments[i].addr;
1955                 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset);
1956
1957                 if (!(ldpc_cap_flags &
1958                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS
1959                                 ) || (ops_ld->op_flags &
1960                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1961                         data_len -= ops_ld->z_c;
1962                         parity_offset = data_len;
1963                 } else {
1964                         /* Compute RM out size and number of rows */
1965                         parity_offset = (ops_ld->basegraph == 1 ? 20 : 8)
1966                                         * ops_ld->z_c - ops_ld->n_filler;
1967                         uint16_t deRmOutSize = compute_harq_len(ops_ld) -
1968                                         ops_ld->n_filler;
1969                         if (data_len > deRmOutSize)
1970                                 data_len = deRmOutSize;
1971                         if (data_len > orig_op->segments[i].length)
1972                                 data_len = orig_op->segments[i].length;
1973                 }
1974                 /*
1975                  * HARQ output can have minor differences
1976                  * due to integer representation and related scaling
1977                  */
1978                 for (j = 0, jj = 0; j < data_len; j++, jj++) {
1979                         if (j == parity_offset) {
1980                                 /* Special Handling of the filler bits */
1981                                 for (k = 0; k < ops_ld->n_filler; k++) {
1982                                         if (harq_out[jj] !=
1983                                                         llr_max_pre_scaling) {
1984                                                 printf("HARQ Filler issue %d: %d %d\n",
1985                                                         jj, harq_out[jj],
1986                                                         llr_max);
1987                                                 byte_error++;
1988                                         }
1989                                         jj++;
1990                                 }
1991                         }
1992                         if (!(ops_ld->op_flags &
1993                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) {
1994                                 if (ldpc_llr_decimals > 1)
1995                                         harq_out[jj] = (harq_out[jj] + 1)
1996                                                 >> (ldpc_llr_decimals - 1);
1997                                 /* Saturated to S7 */
1998                                 if (harq_orig[j] > llr_max)
1999                                         harq_orig[j] = llr_max;
2000                                 if (harq_orig[j] < -llr_max)
2001                                         harq_orig[j] = -llr_max;
2002                         }
2003                         if (harq_orig[j] != harq_out[jj]) {
2004                                 error = (harq_orig[j] > harq_out[jj]) ?
2005                                                 harq_orig[j] - harq_out[jj] :
2006                                                 harq_out[jj] - harq_orig[j];
2007                                 abs_harq_origin = harq_orig[j] > 0 ?
2008                                                         harq_orig[j] :
2009                                                         -harq_orig[j];
2010                                 /* Residual quantization error */
2011                                 if ((error > 8 && (abs_harq_origin <
2012                                                 (llr_max - 16))) ||
2013                                                 (error > 16)) {
2014                                         printf("HARQ mismatch %d: exp %d act %d => %d\n",
2015                                                         j, harq_orig[j],
2016                                                         harq_out[jj], error);
2017                                         byte_error++;
2018                                         cum_error += error;
2019                                 }
2020                         }
2021                 }
2022                 m = m->next;
2023         }
2024
2025         if (byte_error)
2026                 TEST_ASSERT(byte_error <= 1,
2027                                 "HARQ output mismatch (%d) %d",
2028                                 byte_error, cum_error);
2029
2030         /* Validate total mbuf pkt length */
2031         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
2032         TEST_ASSERT(total_data_size < pkt_len + 64,
2033                         "Length of data differ in original (%u) and filled (%u) op",
2034                         total_data_size, pkt_len);
2035
2036         return TEST_SUCCESS;
2037 }
2038
2039 static int
2040 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2041                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2042 {
2043         unsigned int i;
2044         int ret;
2045         struct op_data_entries *hard_data_orig =
2046                         &test_vector.entries[DATA_HARD_OUTPUT];
2047         struct op_data_entries *soft_data_orig =
2048                         &test_vector.entries[DATA_SOFT_OUTPUT];
2049         struct rte_bbdev_op_turbo_dec *ops_td;
2050         struct rte_bbdev_op_data *hard_output;
2051         struct rte_bbdev_op_data *soft_output;
2052         struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
2053
2054         for (i = 0; i < n; ++i) {
2055                 ops_td = &ops[i]->turbo_dec;
2056                 hard_output = &ops_td->hard_output;
2057                 soft_output = &ops_td->soft_output;
2058
2059                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2060                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2061                                         "Returned iter_count (%d) > expected iter_count (%d)",
2062                                         ops_td->iter_count, ref_td->iter_count);
2063                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2064                 TEST_ASSERT_SUCCESS(ret,
2065                                 "Checking status and ordering for decoder failed");
2066
2067                 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2068                                 hard_data_orig),
2069                                 "Hard output buffers (CB=%u) are not equal",
2070                                 i);
2071
2072                 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
2073                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2074                                         soft_data_orig),
2075                                         "Soft output buffers (CB=%u) are not equal",
2076                                         i);
2077         }
2078
2079         return TEST_SUCCESS;
2080 }
2081
2082 /* Check Number of code blocks errors */
2083 static int
2084 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n)
2085 {
2086         unsigned int i;
2087         struct op_data_entries *hard_data_orig =
2088                         &test_vector.entries[DATA_HARD_OUTPUT];
2089         struct rte_bbdev_op_ldpc_dec *ops_td;
2090         struct rte_bbdev_op_data *hard_output;
2091         int errors = 0;
2092         struct rte_mbuf *m;
2093
2094         for (i = 0; i < n; ++i) {
2095                 ops_td = &ops[i]->ldpc_dec;
2096                 hard_output = &ops_td->hard_output;
2097                 m = hard_output->data;
2098                 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0),
2099                                 hard_data_orig->segments[0].addr,
2100                                 hard_data_orig->segments[0].length))
2101                         errors++;
2102         }
2103         return errors;
2104 }
2105
2106 static int
2107 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
2108                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
2109 {
2110         unsigned int i;
2111         int ret;
2112         struct op_data_entries *hard_data_orig =
2113                         &test_vector.entries[DATA_HARD_OUTPUT];
2114         struct op_data_entries *soft_data_orig =
2115                         &test_vector.entries[DATA_SOFT_OUTPUT];
2116         struct op_data_entries *harq_data_orig =
2117                                 &test_vector.entries[DATA_HARQ_OUTPUT];
2118         struct rte_bbdev_op_ldpc_dec *ops_td;
2119         struct rte_bbdev_op_data *hard_output;
2120         struct rte_bbdev_op_data *harq_output;
2121         struct rte_bbdev_op_data *soft_output;
2122         struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec;
2123
2124         for (i = 0; i < n; ++i) {
2125                 ops_td = &ops[i]->ldpc_dec;
2126                 hard_output = &ops_td->hard_output;
2127                 harq_output = &ops_td->harq_combined_output;
2128                 soft_output = &ops_td->soft_output;
2129
2130                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
2131                 TEST_ASSERT_SUCCESS(ret,
2132                                 "Checking status and ordering for decoder failed");
2133                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
2134                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
2135                                         "Returned iter_count (%d) > expected iter_count (%d)",
2136                                         ops_td->iter_count, ref_td->iter_count);
2137                 /*
2138                  * We can ignore output data when the decoding failed to
2139                  * converge or for loop-back cases
2140                  */
2141                 if (!check_bit(ops[i]->ldpc_dec.op_flags,
2142                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK
2143                                 ) && (
2144                                 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR
2145                                                 )) == 0)
2146                         TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
2147                                         hard_data_orig),
2148                                         "Hard output buffers (CB=%u) are not equal",
2149                                         i);
2150
2151                 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)
2152                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
2153                                         soft_data_orig),
2154                                         "Soft output buffers (CB=%u) are not equal",
2155                                         i);
2156                 if (ref_op->ldpc_dec.op_flags &
2157                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) {
2158                         TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2159                                         harq_data_orig, ops_td),
2160                                         "HARQ output buffers (CB=%u) are not equal",
2161                                         i);
2162                 }
2163                 if (ref_op->ldpc_dec.op_flags &
2164                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
2165                         TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output,
2166                                         harq_data_orig, ops_td),
2167                                         "HARQ output buffers (CB=%u) are not equal",
2168                                         i);
2169
2170         }
2171
2172         return TEST_SUCCESS;
2173 }
2174
2175
2176 static int
2177 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2178                 struct rte_bbdev_enc_op *ref_op)
2179 {
2180         unsigned int i;
2181         int ret;
2182         struct op_data_entries *hard_data_orig =
2183                         &test_vector.entries[DATA_HARD_OUTPUT];
2184
2185         for (i = 0; i < n; ++i) {
2186                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2187                 TEST_ASSERT_SUCCESS(ret,
2188                                 "Checking status and ordering for encoder failed");
2189                 TEST_ASSERT_SUCCESS(validate_op_chain(
2190                                 &ops[i]->turbo_enc.output,
2191                                 hard_data_orig),
2192                                 "Output buffers (CB=%u) are not equal",
2193                                 i);
2194         }
2195
2196         return TEST_SUCCESS;
2197 }
2198
2199 static int
2200 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
2201                 struct rte_bbdev_enc_op *ref_op)
2202 {
2203         unsigned int i;
2204         int ret;
2205         struct op_data_entries *hard_data_orig =
2206                         &test_vector.entries[DATA_HARD_OUTPUT];
2207
2208         for (i = 0; i < n; ++i) {
2209                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
2210                 TEST_ASSERT_SUCCESS(ret,
2211                                 "Checking status and ordering for encoder failed");
2212                 TEST_ASSERT_SUCCESS(validate_op_chain(
2213                                 &ops[i]->ldpc_enc.output,
2214                                 hard_data_orig),
2215                                 "Output buffers (CB=%u) are not equal",
2216                                 i);
2217         }
2218
2219         return TEST_SUCCESS;
2220 }
2221
2222 static void
2223 create_reference_dec_op(struct rte_bbdev_dec_op *op)
2224 {
2225         unsigned int i;
2226         struct op_data_entries *entry;
2227
2228         op->turbo_dec = test_vector.turbo_dec;
2229         entry = &test_vector.entries[DATA_INPUT];
2230         for (i = 0; i < entry->nb_segments; ++i)
2231                 op->turbo_dec.input.length +=
2232                                 entry->segments[i].length;
2233 }
2234
2235 static void
2236 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2237 {
2238         unsigned int i;
2239         struct op_data_entries *entry;
2240
2241         op->ldpc_dec = test_vector.ldpc_dec;
2242         entry = &test_vector.entries[DATA_INPUT];
2243         for (i = 0; i < entry->nb_segments; ++i)
2244                 op->ldpc_dec.input.length +=
2245                                 entry->segments[i].length;
2246         if (test_vector.ldpc_dec.op_flags &
2247                         RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) {
2248                 entry = &test_vector.entries[DATA_HARQ_INPUT];
2249                 for (i = 0; i < entry->nb_segments; ++i)
2250                         op->ldpc_dec.harq_combined_input.length +=
2251                                 entry->segments[i].length;
2252         }
2253 }
2254
2255
2256 static void
2257 create_reference_enc_op(struct rte_bbdev_enc_op *op)
2258 {
2259         unsigned int i;
2260         struct op_data_entries *entry;
2261
2262         op->turbo_enc = test_vector.turbo_enc;
2263         entry = &test_vector.entries[DATA_INPUT];
2264         for (i = 0; i < entry->nb_segments; ++i)
2265                 op->turbo_enc.input.length +=
2266                                 entry->segments[i].length;
2267 }
2268
2269 static void
2270 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2271 {
2272         unsigned int i;
2273         struct op_data_entries *entry;
2274
2275         op->ldpc_enc = test_vector.ldpc_enc;
2276         entry = &test_vector.entries[DATA_INPUT];
2277         for (i = 0; i < entry->nb_segments; ++i)
2278                 op->ldpc_enc.input.length +=
2279                                 entry->segments[i].length;
2280 }
2281
2282 static uint32_t
2283 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
2284 {
2285         uint8_t i;
2286         uint32_t c, r, tb_size = 0;
2287
2288         if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2289                 tb_size = op->turbo_dec.tb_params.k_neg;
2290         } else {
2291                 c = op->turbo_dec.tb_params.c;
2292                 r = op->turbo_dec.tb_params.r;
2293                 for (i = 0; i < c-r; i++)
2294                         tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
2295                                 op->turbo_dec.tb_params.k_neg :
2296                                 op->turbo_dec.tb_params.k_pos;
2297         }
2298         return tb_size;
2299 }
2300
2301 static uint32_t
2302 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
2303 {
2304         uint8_t i;
2305         uint32_t c, r, tb_size = 0;
2306         uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
2307
2308         if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2309                 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
2310         } else {
2311                 c = op->ldpc_dec.tb_params.c;
2312                 r = op->ldpc_dec.tb_params.r;
2313                 for (i = 0; i < c-r; i++)
2314                         tb_size += sys_cols * op->ldpc_dec.z_c
2315                                         - op->ldpc_dec.n_filler;
2316         }
2317         return tb_size;
2318 }
2319
2320 static uint32_t
2321 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
2322 {
2323         uint8_t i;
2324         uint32_t c, r, tb_size = 0;
2325
2326         if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2327                 tb_size = op->turbo_enc.tb_params.k_neg;
2328         } else {
2329                 c = op->turbo_enc.tb_params.c;
2330                 r = op->turbo_enc.tb_params.r;
2331                 for (i = 0; i < c-r; i++)
2332                         tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
2333                                 op->turbo_enc.tb_params.k_neg :
2334                                 op->turbo_enc.tb_params.k_pos;
2335         }
2336         return tb_size;
2337 }
2338
2339 static uint32_t
2340 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
2341 {
2342         uint8_t i;
2343         uint32_t c, r, tb_size = 0;
2344         uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
2345
2346         if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
2347                 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
2348         } else {
2349                 c = op->turbo_enc.tb_params.c;
2350                 r = op->turbo_enc.tb_params.r;
2351                 for (i = 0; i < c-r; i++)
2352                         tb_size += sys_cols * op->ldpc_enc.z_c
2353                                         - op->ldpc_enc.n_filler;
2354         }
2355         return tb_size;
2356 }
2357
2358
2359 static int
2360 init_test_op_params(struct test_op_params *op_params,
2361                 enum rte_bbdev_op_type op_type, const int expected_status,
2362                 const int vector_mask, struct rte_mempool *ops_mp,
2363                 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
2364 {
2365         int ret = 0;
2366         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2367                         op_type == RTE_BBDEV_OP_LDPC_DEC)
2368                 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
2369                                 &op_params->ref_dec_op, 1);
2370         else
2371                 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
2372                                 &op_params->ref_enc_op, 1);
2373
2374         TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
2375
2376         op_params->mp = ops_mp;
2377         op_params->burst_sz = burst_sz;
2378         op_params->num_to_process = num_to_process;
2379         op_params->num_lcores = num_lcores;
2380         op_params->vector_mask = vector_mask;
2381         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
2382                         op_type == RTE_BBDEV_OP_LDPC_DEC)
2383                 op_params->ref_dec_op->status = expected_status;
2384         else if (op_type == RTE_BBDEV_OP_TURBO_ENC
2385                         || op_type == RTE_BBDEV_OP_LDPC_ENC)
2386                 op_params->ref_enc_op->status = expected_status;
2387         return 0;
2388 }
2389
2390 static int
2391 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
2392                 struct test_op_params *op_params)
2393 {
2394         int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
2395         unsigned int i;
2396         struct active_device *ad;
2397         unsigned int burst_sz = get_burst_sz();
2398         enum rte_bbdev_op_type op_type = test_vector.op_type;
2399         const struct rte_bbdev_op_cap *capabilities = NULL;
2400
2401         ad = &active_devs[dev_id];
2402
2403         /* Check if device supports op_type */
2404         if (!is_avail_op(ad, test_vector.op_type))
2405                 return TEST_SUCCESS;
2406
2407         struct rte_bbdev_info info;
2408         rte_bbdev_info_get(ad->dev_id, &info);
2409         socket_id = GET_SOCKET(info.socket_id);
2410
2411         f_ret = create_mempools(ad, socket_id, op_type,
2412                         get_num_ops());
2413         if (f_ret != TEST_SUCCESS) {
2414                 printf("Couldn't create mempools");
2415                 goto fail;
2416         }
2417         if (op_type == RTE_BBDEV_OP_NONE)
2418                 op_type = RTE_BBDEV_OP_TURBO_ENC;
2419
2420         f_ret = init_test_op_params(op_params, test_vector.op_type,
2421                         test_vector.expected_status,
2422                         test_vector.mask,
2423                         ad->ops_mempool,
2424                         burst_sz,
2425                         get_num_ops(),
2426                         get_num_lcores());
2427         if (f_ret != TEST_SUCCESS) {
2428                 printf("Couldn't init test op params");
2429                 goto fail;
2430         }
2431
2432
2433         /* Find capabilities */
2434         const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
2435         for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) {
2436                 if (cap->type == test_vector.op_type) {
2437                         capabilities = cap;
2438                         break;
2439                 }
2440                 cap++;
2441         }
2442         TEST_ASSERT_NOT_NULL(capabilities,
2443                         "Couldn't find capabilities");
2444
2445         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2446                 create_reference_dec_op(op_params->ref_dec_op);
2447         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
2448                 create_reference_enc_op(op_params->ref_enc_op);
2449         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2450                 create_reference_ldpc_enc_op(op_params->ref_enc_op);
2451         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2452                 create_reference_ldpc_dec_op(op_params->ref_dec_op);
2453
2454         for (i = 0; i < ad->nb_queues; ++i) {
2455                 f_ret = fill_queue_buffers(op_params,
2456                                 ad->in_mbuf_pool,
2457                                 ad->hard_out_mbuf_pool,
2458                                 ad->soft_out_mbuf_pool,
2459                                 ad->harq_in_mbuf_pool,
2460                                 ad->harq_out_mbuf_pool,
2461                                 ad->queue_ids[i],
2462                                 capabilities,
2463                                 info.drv.min_alignment,
2464                                 socket_id);
2465                 if (f_ret != TEST_SUCCESS) {
2466                         printf("Couldn't init queue buffers");
2467                         goto fail;
2468                 }
2469         }
2470
2471         /* Run test case function */
2472         t_ret = test_case_func(ad, op_params);
2473
2474         /* Free active device resources and return */
2475         free_buffers(ad, op_params);
2476         return t_ret;
2477
2478 fail:
2479         free_buffers(ad, op_params);
2480         return TEST_FAILED;
2481 }
2482
2483 /* Run given test function per active device per supported op type
2484  * per burst size.
2485  */
2486 static int
2487 run_test_case(test_case_function *test_case_func)
2488 {
2489         int ret = 0;
2490         uint8_t dev;
2491
2492         /* Alloc op_params */
2493         struct test_op_params *op_params = rte_zmalloc(NULL,
2494                         sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
2495         TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
2496                         RTE_ALIGN(sizeof(struct test_op_params),
2497                                 RTE_CACHE_LINE_SIZE));
2498
2499         /* For each device run test case function */
2500         for (dev = 0; dev < nb_active_devs; ++dev)
2501                 ret |= run_test_case_on_device(test_case_func, dev, op_params);
2502
2503         rte_free(op_params);
2504
2505         return ret;
2506 }
2507
2508
2509 /* Push back the HARQ output from DDR to host */
2510 static void
2511 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2512                 struct rte_bbdev_dec_op **ops,
2513                 const uint16_t n)
2514 {
2515         uint16_t j;
2516         int save_status, ret;
2517         uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2518         struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
2519         uint32_t flags = ops[0]->ldpc_dec.op_flags;
2520         bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
2521         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2522         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2523         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2524         for (j = 0; j < n; ++j) {
2525                 if ((loopback && mem_out) || hc_out) {
2526                         save_status = ops[j]->status;
2527                         ops[j]->ldpc_dec.op_flags =
2528                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2529                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2530                         if (h_comp)
2531                                 ops[j]->ldpc_dec.op_flags +=
2532                                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2533                         ops[j]->ldpc_dec.harq_combined_input.offset =
2534                                         harq_offset;
2535                         ops[j]->ldpc_dec.harq_combined_output.offset = 0;
2536                         harq_offset += HARQ_INCR;
2537                         if (!loopback)
2538                                 ops[j]->ldpc_dec.harq_combined_input.length =
2539                                 ops[j]->ldpc_dec.harq_combined_output.length;
2540                         rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
2541                                         &ops[j], 1);
2542                         ret = 0;
2543                         while (ret == 0)
2544                                 ret = rte_bbdev_dequeue_ldpc_dec_ops(
2545                                                 dev_id, queue_id,
2546                                                 &ops_deq[j], 1);
2547                         ops[j]->ldpc_dec.op_flags = flags;
2548                         ops[j]->status = save_status;
2549                 }
2550         }
2551 }
2552
2553 /*
2554  * Push back the HARQ output from HW DDR to Host
2555  * Preload HARQ memory input and adjust HARQ offset
2556  */
2557 static void
2558 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
2559                 struct rte_bbdev_dec_op **ops, const uint16_t n,
2560                 bool preload)
2561 {
2562         uint16_t j;
2563         int deq;
2564         uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2565         struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
2566         struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
2567         uint32_t flags = ops[0]->ldpc_dec.op_flags;
2568         bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
2569         bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
2570         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2571         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
2572         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2573         if ((mem_in || hc_in) && preload) {
2574                 for (j = 0; j < n; ++j) {
2575                         save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
2576                         save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
2577                         ops[j]->ldpc_dec.op_flags =
2578                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
2579                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
2580                         if (h_comp)
2581                                 ops[j]->ldpc_dec.op_flags +=
2582                                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
2583                         ops[j]->ldpc_dec.harq_combined_output.offset =
2584                                         harq_offset;
2585                         ops[j]->ldpc_dec.harq_combined_input.offset = 0;
2586                         harq_offset += HARQ_INCR;
2587                 }
2588                 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
2589                 deq = 0;
2590                 while (deq != n)
2591                         deq += rte_bbdev_dequeue_ldpc_dec_ops(
2592                                         dev_id, queue_id, &ops_deq[deq],
2593                                         n - deq);
2594                 /* Restore the operations */
2595                 for (j = 0; j < n; ++j) {
2596                         ops[j]->ldpc_dec.op_flags = flags;
2597                         ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
2598                         ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
2599                 }
2600         }
2601         harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
2602         for (j = 0; j < n; ++j) {
2603                 /* Adjust HARQ offset when we reach external DDR */
2604                 if (mem_in || hc_in)
2605                         ops[j]->ldpc_dec.harq_combined_input.offset
2606                                 = harq_offset;
2607                 if (mem_out || hc_out)
2608                         ops[j]->ldpc_dec.harq_combined_output.offset
2609                                 = harq_offset;
2610                 harq_offset += HARQ_INCR;
2611         }
2612 }
2613
2614 static void
2615 dequeue_event_callback(uint16_t dev_id,
2616                 enum rte_bbdev_event_type event, void *cb_arg,
2617                 void *ret_param)
2618 {
2619         int ret;
2620         uint16_t i;
2621         uint64_t total_time;
2622         uint16_t deq, burst_sz, num_ops;
2623         uint16_t queue_id = *(uint16_t *) ret_param;
2624         struct rte_bbdev_info info;
2625         double tb_len_bits;
2626         struct thread_params *tp = cb_arg;
2627
2628         /* Find matching thread params using queue_id */
2629         for (i = 0; i < MAX_QUEUES; ++i, ++tp)
2630                 if (tp->queue_id == queue_id)
2631                         break;
2632
2633         if (i == MAX_QUEUES) {
2634                 printf("%s: Queue_id from interrupt details was not found!\n",
2635                                 __func__);
2636                 return;
2637         }
2638
2639         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
2640                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2641                 printf(
2642                         "Dequeue interrupt handler called for incorrect event!\n");
2643                 return;
2644         }
2645
2646         burst_sz = rte_atomic16_read(&tp->burst_sz);
2647         num_ops = tp->op_params->num_to_process;
2648
2649         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
2650                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2651                                 &tp->dec_ops[
2652                                         rte_atomic16_read(&tp->nb_dequeued)],
2653                                 burst_sz);
2654         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
2655                 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
2656                                 &tp->dec_ops[
2657                                         rte_atomic16_read(&tp->nb_dequeued)],
2658                                 burst_sz);
2659         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
2660                 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
2661                                 &tp->enc_ops[
2662                                         rte_atomic16_read(&tp->nb_dequeued)],
2663                                 burst_sz);
2664         else /*RTE_BBDEV_OP_TURBO_ENC*/
2665                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2666                                 &tp->enc_ops[
2667                                         rte_atomic16_read(&tp->nb_dequeued)],
2668                                 burst_sz);
2669
2670         if (deq < burst_sz) {
2671                 printf(
2672                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
2673                         burst_sz, deq);
2674                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2675                 return;
2676         }
2677
2678         if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
2679                 rte_atomic16_add(&tp->nb_dequeued, deq);
2680                 return;
2681         }
2682
2683         total_time = rte_rdtsc_precise() - tp->start_time;
2684
2685         rte_bbdev_info_get(dev_id, &info);
2686
2687         ret = TEST_SUCCESS;
2688
2689         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
2690                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2691                 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
2692                                 tp->op_params->vector_mask);
2693                 /* get the max of iter_count for all dequeued ops */
2694                 for (i = 0; i < num_ops; ++i)
2695                         tp->iter_count = RTE_MAX(
2696                                         tp->dec_ops[i]->turbo_dec.iter_count,
2697                                         tp->iter_count);
2698                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2699         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
2700                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2701                 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
2702                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2703         } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) {
2704                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
2705                 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op);
2706                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
2707         } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) {
2708                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2709                 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op,
2710                                 tp->op_params->vector_mask);
2711                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
2712         }
2713
2714         if (ret) {
2715                 printf("Buffers validation failed\n");
2716                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2717         }
2718
2719         switch (test_vector.op_type) {
2720         case RTE_BBDEV_OP_TURBO_DEC:
2721                 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
2722                 break;
2723         case RTE_BBDEV_OP_TURBO_ENC:
2724                 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
2725                 break;
2726         case RTE_BBDEV_OP_LDPC_DEC:
2727                 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op);
2728                 break;
2729         case RTE_BBDEV_OP_LDPC_ENC:
2730                 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op);
2731                 break;
2732         case RTE_BBDEV_OP_NONE:
2733                 tb_len_bits = 0.0;
2734                 break;
2735         default:
2736                 printf("Unknown op type: %d\n", test_vector.op_type);
2737                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
2738                 return;
2739         }
2740
2741         tp->ops_per_sec += ((double)num_ops) /
2742                         ((double)total_time / (double)rte_get_tsc_hz());
2743         tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
2744                         ((double)total_time / (double)rte_get_tsc_hz());
2745
2746         rte_atomic16_add(&tp->nb_dequeued, deq);
2747 }
2748
2749 static int
2750 throughput_intr_lcore_ldpc_dec(void *arg)
2751 {
2752         struct thread_params *tp = arg;
2753         unsigned int enqueued;
2754         const uint16_t queue_id = tp->queue_id;
2755         const uint16_t burst_sz = tp->op_params->burst_sz;
2756         const uint16_t num_to_process = tp->op_params->num_to_process;
2757         struct rte_bbdev_dec_op *ops[num_to_process];
2758         struct test_buffers *bufs = NULL;
2759         struct rte_bbdev_info info;
2760         int ret, i, j;
2761         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
2762         uint16_t num_to_enq, enq;
2763
2764         bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
2765                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
2766         bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
2767                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
2768
2769         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2770                         "BURST_SIZE should be <= %u", MAX_BURST);
2771
2772         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2773                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2774                         tp->dev_id, queue_id);
2775
2776         rte_bbdev_info_get(tp->dev_id, &info);
2777
2778         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2779                         "NUM_OPS cannot exceed %u for this device",
2780                         info.drv.queue_size_lim);
2781
2782         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2783
2784         rte_atomic16_clear(&tp->processing_status);
2785         rte_atomic16_clear(&tp->nb_dequeued);
2786
2787         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2788                 rte_pause();
2789
2790         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2791                                 num_to_process);
2792         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2793                         num_to_process);
2794         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2795                 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs,
2796                                 bufs->hard_outputs, bufs->soft_outputs,
2797                                 bufs->harq_inputs, bufs->harq_outputs, ref_op);
2798
2799         /* Set counter to validate the ordering */
2800         for (j = 0; j < num_to_process; ++j)
2801                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2802
2803         for (j = 0; j < TEST_REPETITIONS; ++j) {
2804                 for (i = 0; i < num_to_process; ++i) {
2805                         if (!loopback)
2806                                 rte_pktmbuf_reset(
2807                                         ops[i]->ldpc_dec.hard_output.data);
2808                         if (hc_out || loopback)
2809                                 mbuf_reset(
2810                                 ops[i]->ldpc_dec.harq_combined_output.data);
2811                 }
2812
2813                 tp->start_time = rte_rdtsc_precise();
2814                 for (enqueued = 0; enqueued < num_to_process;) {
2815                         num_to_enq = burst_sz;
2816
2817                         if (unlikely(num_to_process - enqueued < num_to_enq))
2818                                 num_to_enq = num_to_process - enqueued;
2819
2820                         enq = 0;
2821                         do {
2822                                 enq += rte_bbdev_enqueue_ldpc_dec_ops(
2823                                                 tp->dev_id,
2824                                                 queue_id, &ops[enqueued],
2825                                                 num_to_enq);
2826                         } while (unlikely(num_to_enq != enq));
2827                         enqueued += enq;
2828
2829                         /* Write to thread burst_sz current number of enqueued
2830                          * descriptors. It ensures that proper number of
2831                          * descriptors will be dequeued in callback
2832                          * function - needed for last batch in case where
2833                          * the number of operations is not a multiple of
2834                          * burst size.
2835                          */
2836                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
2837
2838                         /* Wait until processing of previous batch is
2839                          * completed
2840                          */
2841                         while (rte_atomic16_read(&tp->nb_dequeued) !=
2842                                         (int16_t) enqueued)
2843                                 rte_pause();
2844                 }
2845                 if (j != TEST_REPETITIONS - 1)
2846                         rte_atomic16_clear(&tp->nb_dequeued);
2847         }
2848
2849         return TEST_SUCCESS;
2850 }
2851
2852 static int
2853 throughput_intr_lcore_dec(void *arg)
2854 {
2855         struct thread_params *tp = arg;
2856         unsigned int enqueued;
2857         const uint16_t queue_id = tp->queue_id;
2858         const uint16_t burst_sz = tp->op_params->burst_sz;
2859         const uint16_t num_to_process = tp->op_params->num_to_process;
2860         struct rte_bbdev_dec_op *ops[num_to_process];
2861         struct test_buffers *bufs = NULL;
2862         struct rte_bbdev_info info;
2863         int ret, i, j;
2864         uint16_t num_to_enq, enq;
2865
2866         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2867                         "BURST_SIZE should be <= %u", MAX_BURST);
2868
2869         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2870                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2871                         tp->dev_id, queue_id);
2872
2873         rte_bbdev_info_get(tp->dev_id, &info);
2874
2875         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2876                         "NUM_OPS cannot exceed %u for this device",
2877                         info.drv.queue_size_lim);
2878
2879         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2880
2881         rte_atomic16_clear(&tp->processing_status);
2882         rte_atomic16_clear(&tp->nb_dequeued);
2883
2884         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2885                 rte_pause();
2886
2887         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
2888                                 num_to_process);
2889         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2890                         num_to_process);
2891         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2892                 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
2893                                 bufs->hard_outputs, bufs->soft_outputs,
2894                                 tp->op_params->ref_dec_op);
2895
2896         /* Set counter to validate the ordering */
2897         for (j = 0; j < num_to_process; ++j)
2898                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2899
2900         for (j = 0; j < TEST_REPETITIONS; ++j) {
2901                 for (i = 0; i < num_to_process; ++i)
2902                         rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
2903
2904                 tp->start_time = rte_rdtsc_precise();
2905                 for (enqueued = 0; enqueued < num_to_process;) {
2906                         num_to_enq = burst_sz;
2907
2908                         if (unlikely(num_to_process - enqueued < num_to_enq))
2909                                 num_to_enq = num_to_process - enqueued;
2910
2911                         enq = 0;
2912                         do {
2913                                 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
2914                                                 queue_id, &ops[enqueued],
2915                                                 num_to_enq);
2916                         } while (unlikely(num_to_enq != enq));
2917                         enqueued += enq;
2918
2919                         /* Write to thread burst_sz current number of enqueued
2920                          * descriptors. It ensures that proper number of
2921                          * descriptors will be dequeued in callback
2922                          * function - needed for last batch in case where
2923                          * the number of operations is not a multiple of
2924                          * burst size.
2925                          */
2926                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
2927
2928                         /* Wait until processing of previous batch is
2929                          * completed
2930                          */
2931                         while (rte_atomic16_read(&tp->nb_dequeued) !=
2932                                         (int16_t) enqueued)
2933                                 rte_pause();
2934                 }
2935                 if (j != TEST_REPETITIONS - 1)
2936                         rte_atomic16_clear(&tp->nb_dequeued);
2937         }
2938
2939         return TEST_SUCCESS;
2940 }
2941
2942 static int
2943 throughput_intr_lcore_enc(void *arg)
2944 {
2945         struct thread_params *tp = arg;
2946         unsigned int enqueued;
2947         const uint16_t queue_id = tp->queue_id;
2948         const uint16_t burst_sz = tp->op_params->burst_sz;
2949         const uint16_t num_to_process = tp->op_params->num_to_process;
2950         struct rte_bbdev_enc_op *ops[num_to_process];
2951         struct test_buffers *bufs = NULL;
2952         struct rte_bbdev_info info;
2953         int ret, i, j;
2954         uint16_t num_to_enq, enq;
2955
2956         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2957                         "BURST_SIZE should be <= %u", MAX_BURST);
2958
2959         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
2960                         "Failed to enable interrupts for dev: %u, queue_id: %u",
2961                         tp->dev_id, queue_id);
2962
2963         rte_bbdev_info_get(tp->dev_id, &info);
2964
2965         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
2966                         "NUM_OPS cannot exceed %u for this device",
2967                         info.drv.queue_size_lim);
2968
2969         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2970
2971         rte_atomic16_clear(&tp->processing_status);
2972         rte_atomic16_clear(&tp->nb_dequeued);
2973
2974         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
2975                 rte_pause();
2976
2977         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
2978                         num_to_process);
2979         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2980                         num_to_process);
2981         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2982                 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
2983                                 bufs->hard_outputs, tp->op_params->ref_enc_op);
2984
2985         /* Set counter to validate the ordering */
2986         for (j = 0; j < num_to_process; ++j)
2987                 ops[j]->opaque_data = (void *)(uintptr_t)j;
2988
2989         for (j = 0; j < TEST_REPETITIONS; ++j) {
2990                 for (i = 0; i < num_to_process; ++i)
2991                         rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
2992
2993                 tp->start_time = rte_rdtsc_precise();
2994                 for (enqueued = 0; enqueued < num_to_process;) {
2995                         num_to_enq = burst_sz;
2996
2997                         if (unlikely(num_to_process - enqueued < num_to_enq))
2998                                 num_to_enq = num_to_process - enqueued;
2999
3000                         enq = 0;
3001                         do {
3002                                 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3003                                                 queue_id, &ops[enqueued],
3004                                                 num_to_enq);
3005                         } while (unlikely(enq != num_to_enq));
3006                         enqueued += enq;
3007
3008                         /* Write to thread burst_sz current number of enqueued
3009                          * descriptors. It ensures that proper number of
3010                          * descriptors will be dequeued in callback
3011                          * function - needed for last batch in case where
3012                          * the number of operations is not a multiple of
3013                          * burst size.
3014                          */
3015                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
3016
3017                         /* Wait until processing of previous batch is
3018                          * completed
3019                          */
3020                         while (rte_atomic16_read(&tp->nb_dequeued) !=
3021                                         (int16_t) enqueued)
3022                                 rte_pause();
3023                 }
3024                 if (j != TEST_REPETITIONS - 1)
3025                         rte_atomic16_clear(&tp->nb_dequeued);
3026         }
3027
3028         return TEST_SUCCESS;
3029 }
3030
3031
3032 static int
3033 throughput_intr_lcore_ldpc_enc(void *arg)
3034 {
3035         struct thread_params *tp = arg;
3036         unsigned int enqueued;
3037         const uint16_t queue_id = tp->queue_id;
3038         const uint16_t burst_sz = tp->op_params->burst_sz;
3039         const uint16_t num_to_process = tp->op_params->num_to_process;
3040         struct rte_bbdev_enc_op *ops[num_to_process];
3041         struct test_buffers *bufs = NULL;
3042         struct rte_bbdev_info info;
3043         int ret, i, j;
3044         uint16_t num_to_enq, enq;
3045
3046         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3047                         "BURST_SIZE should be <= %u", MAX_BURST);
3048
3049         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
3050                         "Failed to enable interrupts for dev: %u, queue_id: %u",
3051                         tp->dev_id, queue_id);
3052
3053         rte_bbdev_info_get(tp->dev_id, &info);
3054
3055         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
3056                         "NUM_OPS cannot exceed %u for this device",
3057                         info.drv.queue_size_lim);
3058
3059         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3060
3061         rte_atomic16_clear(&tp->processing_status);
3062         rte_atomic16_clear(&tp->nb_dequeued);
3063
3064         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3065                 rte_pause();
3066
3067         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
3068                         num_to_process);
3069         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3070                         num_to_process);
3071         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3072                 copy_reference_ldpc_enc_op(ops, num_to_process, 0,
3073                                 bufs->inputs, bufs->hard_outputs,
3074                                 tp->op_params->ref_enc_op);
3075
3076         /* Set counter to validate the ordering */
3077         for (j = 0; j < num_to_process; ++j)
3078                 ops[j]->opaque_data = (void *)(uintptr_t)j;
3079
3080         for (j = 0; j < TEST_REPETITIONS; ++j) {
3081                 for (i = 0; i < num_to_process; ++i)
3082                         rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
3083
3084                 tp->start_time = rte_rdtsc_precise();
3085                 for (enqueued = 0; enqueued < num_to_process;) {
3086                         num_to_enq = burst_sz;
3087
3088                         if (unlikely(num_to_process - enqueued < num_to_enq))
3089                                 num_to_enq = num_to_process - enqueued;
3090
3091                         enq = 0;
3092                         do {
3093                                 enq += rte_bbdev_enqueue_ldpc_enc_ops(
3094                                                 tp->dev_id,
3095                                                 queue_id, &ops[enqueued],
3096                                                 num_to_enq);
3097                         } while (unlikely(enq != num_to_enq));
3098                         enqueued += enq;
3099
3100                         /* Write to thread burst_sz current number of enqueued
3101                          * descriptors. It ensures that proper number of
3102                          * descriptors will be dequeued in callback
3103                          * function - needed for last batch in case where
3104                          * the number of operations is not a multiple of
3105                          * burst size.
3106                          */
3107                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
3108
3109                         /* Wait until processing of previous batch is
3110                          * completed
3111                          */
3112                         while (rte_atomic16_read(&tp->nb_dequeued) !=
3113                                         (int16_t) enqueued)
3114                                 rte_pause();
3115                 }
3116                 if (j != TEST_REPETITIONS - 1)
3117                         rte_atomic16_clear(&tp->nb_dequeued);
3118         }
3119
3120         return TEST_SUCCESS;
3121 }
3122
3123 static int
3124 throughput_pmd_lcore_dec(void *arg)
3125 {
3126         struct thread_params *tp = arg;
3127         uint16_t enq, deq;
3128         uint64_t total_time = 0, start_time;
3129         const uint16_t queue_id = tp->queue_id;
3130         const uint16_t burst_sz = tp->op_params->burst_sz;
3131         const uint16_t num_ops = tp->op_params->num_to_process;
3132         struct rte_bbdev_dec_op *ops_enq[num_ops];
3133         struct rte_bbdev_dec_op *ops_deq[num_ops];
3134         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3135         struct test_buffers *bufs = NULL;
3136         int i, j, ret;
3137         struct rte_bbdev_info info;
3138         uint16_t num_to_enq;
3139
3140         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3141                         "BURST_SIZE should be <= %u", MAX_BURST);
3142
3143         rte_bbdev_info_get(tp->dev_id, &info);
3144
3145         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3146                         "NUM_OPS cannot exceed %u for this device",
3147                         info.drv.queue_size_lim);
3148
3149         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3150
3151         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3152                 rte_pause();
3153
3154         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3155         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3156
3157         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3158                 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3159                                 bufs->hard_outputs, bufs->soft_outputs, ref_op);
3160
3161         /* Set counter to validate the ordering */
3162         for (j = 0; j < num_ops; ++j)
3163                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3164
3165         for (i = 0; i < TEST_REPETITIONS; ++i) {
3166
3167                 for (j = 0; j < num_ops; ++j)
3168                         mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
3169
3170                 start_time = rte_rdtsc_precise();
3171
3172                 for (enq = 0, deq = 0; enq < num_ops;) {
3173                         num_to_enq = burst_sz;
3174
3175                         if (unlikely(num_ops - enq < num_to_enq))
3176                                 num_to_enq = num_ops - enq;
3177
3178                         enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
3179                                         queue_id, &ops_enq[enq], num_to_enq);
3180
3181                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3182                                         queue_id, &ops_deq[deq], enq - deq);
3183                 }
3184
3185                 /* dequeue the remaining */
3186                 while (deq < enq) {
3187                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
3188                                         queue_id, &ops_deq[deq], enq - deq);
3189                 }
3190
3191                 total_time += rte_rdtsc_precise() - start_time;
3192         }
3193
3194         tp->iter_count = 0;
3195         /* get the max of iter_count for all dequeued ops */
3196         for (i = 0; i < num_ops; ++i) {
3197                 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
3198                                 tp->iter_count);
3199         }
3200
3201         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3202                 ret = validate_dec_op(ops_deq, num_ops, ref_op,
3203                                 tp->op_params->vector_mask);
3204                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3205         }
3206
3207         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3208
3209         double tb_len_bits = calc_dec_TB_size(ref_op);
3210
3211         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3212                         ((double)total_time / (double)rte_get_tsc_hz());
3213         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3214                         1000000.0) / ((double)total_time /
3215                         (double)rte_get_tsc_hz());
3216
3217         return TEST_SUCCESS;
3218 }
3219
3220 static int
3221 bler_pmd_lcore_ldpc_dec(void *arg)
3222 {
3223         struct thread_params *tp = arg;
3224         uint16_t enq, deq;
3225         uint64_t total_time = 0, start_time;
3226         const uint16_t queue_id = tp->queue_id;
3227         const uint16_t burst_sz = tp->op_params->burst_sz;
3228         const uint16_t num_ops = tp->op_params->num_to_process;
3229         struct rte_bbdev_dec_op *ops_enq[num_ops];
3230         struct rte_bbdev_dec_op *ops_deq[num_ops];
3231         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3232         struct test_buffers *bufs = NULL;
3233         int i, j, ret;
3234         float parity_bler = 0;
3235         struct rte_bbdev_info info;
3236         uint16_t num_to_enq;
3237         bool extDdr = check_bit(ldpc_cap_flags,
3238                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3239         bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3240                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3241         bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3242                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3243
3244         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3245                         "BURST_SIZE should be <= %u", MAX_BURST);
3246
3247         rte_bbdev_info_get(tp->dev_id, &info);
3248
3249         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3250                         "NUM_OPS cannot exceed %u for this device",
3251                         info.drv.queue_size_lim);
3252
3253         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3254
3255         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3256                 rte_pause();
3257
3258         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3259         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3260
3261         /* For BLER tests we need to enable early termination */
3262         if (!check_bit(ref_op->ldpc_dec.op_flags,
3263                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3264                 ref_op->ldpc_dec.op_flags +=
3265                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3266         ref_op->ldpc_dec.iter_max = get_iter_max();
3267         ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3268
3269         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3270                 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3271                                 bufs->hard_outputs, bufs->soft_outputs,
3272                                 bufs->harq_inputs, bufs->harq_outputs, ref_op);
3273         generate_llr_input(num_ops, bufs->inputs, ref_op);
3274
3275         /* Set counter to validate the ordering */
3276         for (j = 0; j < num_ops; ++j)
3277                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3278
3279         for (i = 0; i < 1; ++i) { /* Could add more iterations */
3280                 for (j = 0; j < num_ops; ++j) {
3281                         if (!loopback)
3282                                 mbuf_reset(
3283                                 ops_enq[j]->ldpc_dec.hard_output.data);
3284                         if (hc_out || loopback)
3285                                 mbuf_reset(
3286                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
3287                 }
3288                 if (extDdr)
3289                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3290                                         num_ops, true);
3291                 start_time = rte_rdtsc_precise();
3292
3293                 for (enq = 0, deq = 0; enq < num_ops;) {
3294                         num_to_enq = burst_sz;
3295
3296                         if (unlikely(num_ops - enq < num_to_enq))
3297                                 num_to_enq = num_ops - enq;
3298
3299                         enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3300                                         queue_id, &ops_enq[enq], num_to_enq);
3301
3302                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3303                                         queue_id, &ops_deq[deq], enq - deq);
3304                 }
3305
3306                 /* dequeue the remaining */
3307                 while (deq < enq) {
3308                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3309                                         queue_id, &ops_deq[deq], enq - deq);
3310                 }
3311
3312                 total_time += rte_rdtsc_precise() - start_time;
3313         }
3314
3315         tp->iter_count = 0;
3316         tp->iter_average = 0;
3317         /* get the max of iter_count for all dequeued ops */
3318         for (i = 0; i < num_ops; ++i) {
3319                 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3320                                 tp->iter_count);
3321                 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count;
3322                 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR))
3323                         parity_bler += 1.0;
3324         }
3325
3326         parity_bler /= num_ops; /* This one is based on SYND */
3327         tp->iter_average /= num_ops;
3328         tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops;
3329
3330         if (test_vector.op_type != RTE_BBDEV_OP_NONE
3331                         && tp->bler == 0
3332                         && parity_bler == 0
3333                         && !hc_out) {
3334                 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3335                                 tp->op_params->vector_mask);
3336                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3337         }
3338
3339         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3340
3341         double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3342         tp->ops_per_sec = ((double)num_ops * 1) /
3343                         ((double)total_time / (double)rte_get_tsc_hz());
3344         tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) /
3345                         1000000.0) / ((double)total_time /
3346                         (double)rte_get_tsc_hz());
3347
3348         return TEST_SUCCESS;
3349 }
3350
3351 static int
3352 throughput_pmd_lcore_ldpc_dec(void *arg)
3353 {
3354         struct thread_params *tp = arg;
3355         uint16_t enq, deq;
3356         uint64_t total_time = 0, start_time;
3357         const uint16_t queue_id = tp->queue_id;
3358         const uint16_t burst_sz = tp->op_params->burst_sz;
3359         const uint16_t num_ops = tp->op_params->num_to_process;
3360         struct rte_bbdev_dec_op *ops_enq[num_ops];
3361         struct rte_bbdev_dec_op *ops_deq[num_ops];
3362         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
3363         struct test_buffers *bufs = NULL;
3364         int i, j, ret;
3365         struct rte_bbdev_info info;
3366         uint16_t num_to_enq;
3367         bool extDdr = check_bit(ldpc_cap_flags,
3368                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE);
3369         bool loopback = check_bit(ref_op->ldpc_dec.op_flags,
3370                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK);
3371         bool hc_out = check_bit(ref_op->ldpc_dec.op_flags,
3372                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
3373
3374         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3375                         "BURST_SIZE should be <= %u", MAX_BURST);
3376
3377         rte_bbdev_info_get(tp->dev_id, &info);
3378
3379         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3380                         "NUM_OPS cannot exceed %u for this device",
3381                         info.drv.queue_size_lim);
3382
3383         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3384
3385         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3386                 rte_pause();
3387
3388         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
3389         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
3390
3391         /* For throughput tests we need to disable early termination */
3392         if (check_bit(ref_op->ldpc_dec.op_flags,
3393                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
3394                 ref_op->ldpc_dec.op_flags -=
3395                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
3396         ref_op->ldpc_dec.iter_max = get_iter_max();
3397         ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
3398
3399         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3400                 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs,
3401                                 bufs->hard_outputs, bufs->soft_outputs,
3402                                 bufs->harq_inputs, bufs->harq_outputs, ref_op);
3403
3404         /* Set counter to validate the ordering */
3405         for (j = 0; j < num_ops; ++j)
3406                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3407
3408         for (i = 0; i < TEST_REPETITIONS; ++i) {
3409                 for (j = 0; j < num_ops; ++j) {
3410                         if (!loopback)
3411                                 mbuf_reset(
3412                                 ops_enq[j]->ldpc_dec.hard_output.data);
3413                         if (hc_out || loopback)
3414                                 mbuf_reset(
3415                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
3416                 }
3417                 if (extDdr)
3418                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
3419                                         num_ops, true);
3420                 start_time = rte_rdtsc_precise();
3421
3422                 for (enq = 0, deq = 0; enq < num_ops;) {
3423                         num_to_enq = burst_sz;
3424
3425                         if (unlikely(num_ops - enq < num_to_enq))
3426                                 num_to_enq = num_ops - enq;
3427
3428                         enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id,
3429                                         queue_id, &ops_enq[enq], num_to_enq);
3430
3431                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3432                                         queue_id, &ops_deq[deq], enq - deq);
3433                 }
3434
3435                 /* dequeue the remaining */
3436                 while (deq < enq) {
3437                         deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id,
3438                                         queue_id, &ops_deq[deq], enq - deq);
3439                 }
3440
3441                 total_time += rte_rdtsc_precise() - start_time;
3442         }
3443
3444         tp->iter_count = 0;
3445         /* get the max of iter_count for all dequeued ops */
3446         for (i = 0; i < num_ops; ++i) {
3447                 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count,
3448                                 tp->iter_count);
3449         }
3450         if (extDdr) {
3451                 /* Read loopback is not thread safe */
3452                 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops);
3453         }
3454
3455         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3456                 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op,
3457                                 tp->op_params->vector_mask);
3458                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3459         }
3460
3461         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
3462
3463         double tb_len_bits = calc_ldpc_dec_TB_size(ref_op);
3464
3465         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3466                         ((double)total_time / (double)rte_get_tsc_hz());
3467         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
3468                         1000000.0) / ((double)total_time /
3469                         (double)rte_get_tsc_hz());
3470
3471         return TEST_SUCCESS;
3472 }
3473
3474 static int
3475 throughput_pmd_lcore_enc(void *arg)
3476 {
3477         struct thread_params *tp = arg;
3478         uint16_t enq, deq;
3479         uint64_t total_time = 0, start_time;
3480         const uint16_t queue_id = tp->queue_id;
3481         const uint16_t burst_sz = tp->op_params->burst_sz;
3482         const uint16_t num_ops = tp->op_params->num_to_process;
3483         struct rte_bbdev_enc_op *ops_enq[num_ops];
3484         struct rte_bbdev_enc_op *ops_deq[num_ops];
3485         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3486         struct test_buffers *bufs = NULL;
3487         int i, j, ret;
3488         struct rte_bbdev_info info;
3489         uint16_t num_to_enq;
3490
3491         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3492                         "BURST_SIZE should be <= %u", MAX_BURST);
3493
3494         rte_bbdev_info_get(tp->dev_id, &info);
3495
3496         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3497                         "NUM_OPS cannot exceed %u for this device",
3498                         info.drv.queue_size_lim);
3499
3500         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3501
3502         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3503                 rte_pause();
3504
3505         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3506                         num_ops);
3507         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3508                         num_ops);
3509         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3510                 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3511                                 bufs->hard_outputs, ref_op);
3512
3513         /* Set counter to validate the ordering */
3514         for (j = 0; j < num_ops; ++j)
3515                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3516
3517         for (i = 0; i < TEST_REPETITIONS; ++i) {
3518
3519                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3520                         for (j = 0; j < num_ops; ++j)
3521                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3522
3523                 start_time = rte_rdtsc_precise();
3524
3525                 for (enq = 0, deq = 0; enq < num_ops;) {
3526                         num_to_enq = burst_sz;
3527
3528                         if (unlikely(num_ops - enq < num_to_enq))
3529                                 num_to_enq = num_ops - enq;
3530
3531                         enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
3532                                         queue_id, &ops_enq[enq], num_to_enq);
3533
3534                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3535                                         queue_id, &ops_deq[deq], enq - deq);
3536                 }
3537
3538                 /* dequeue the remaining */
3539                 while (deq < enq) {
3540                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
3541                                         queue_id, &ops_deq[deq], enq - deq);
3542                 }
3543
3544                 total_time += rte_rdtsc_precise() - start_time;
3545         }
3546
3547         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3548                 ret = validate_enc_op(ops_deq, num_ops, ref_op);
3549                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3550         }
3551
3552         rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3553
3554         double tb_len_bits = calc_enc_TB_size(ref_op);
3555
3556         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3557                         ((double)total_time / (double)rte_get_tsc_hz());
3558         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3559                         / 1000000.0) / ((double)total_time /
3560                         (double)rte_get_tsc_hz());
3561
3562         return TEST_SUCCESS;
3563 }
3564
3565 static int
3566 throughput_pmd_lcore_ldpc_enc(void *arg)
3567 {
3568         struct thread_params *tp = arg;
3569         uint16_t enq, deq;
3570         uint64_t total_time = 0, start_time;
3571         const uint16_t queue_id = tp->queue_id;
3572         const uint16_t burst_sz = tp->op_params->burst_sz;
3573         const uint16_t num_ops = tp->op_params->num_to_process;
3574         struct rte_bbdev_enc_op *ops_enq[num_ops];
3575         struct rte_bbdev_enc_op *ops_deq[num_ops];
3576         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
3577         struct test_buffers *bufs = NULL;
3578         int i, j, ret;
3579         struct rte_bbdev_info info;
3580         uint16_t num_to_enq;
3581
3582         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
3583                         "BURST_SIZE should be <= %u", MAX_BURST);
3584
3585         rte_bbdev_info_get(tp->dev_id, &info);
3586
3587         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
3588                         "NUM_OPS cannot exceed %u for this device",
3589                         info.drv.queue_size_lim);
3590
3591         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
3592
3593         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
3594                 rte_pause();
3595
3596         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
3597                         num_ops);
3598         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
3599                         num_ops);
3600         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3601                 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs,
3602                                 bufs->hard_outputs, ref_op);
3603
3604         /* Set counter to validate the ordering */
3605         for (j = 0; j < num_ops; ++j)
3606                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
3607
3608         for (i = 0; i < TEST_REPETITIONS; ++i) {
3609
3610                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
3611                         for (j = 0; j < num_ops; ++j)
3612                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
3613
3614                 start_time = rte_rdtsc_precise();
3615
3616                 for (enq = 0, deq = 0; enq < num_ops;) {
3617                         num_to_enq = burst_sz;
3618
3619                         if (unlikely(num_ops - enq < num_to_enq))
3620                                 num_to_enq = num_ops - enq;
3621
3622                         enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id,
3623                                         queue_id, &ops_enq[enq], num_to_enq);
3624
3625                         deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3626                                         queue_id, &ops_deq[deq], enq - deq);
3627                 }
3628
3629                 /* dequeue the remaining */
3630                 while (deq < enq) {
3631                         deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id,
3632                                         queue_id, &ops_deq[deq], enq - deq);
3633                 }
3634
3635                 total_time += rte_rdtsc_precise() - start_time;
3636         }
3637
3638         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
3639                 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op);
3640                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
3641         }
3642
3643         rte_bbdev_enc_op_free_bulk(ops_enq, num_ops);
3644
3645         double tb_len_bits = calc_ldpc_enc_TB_size(ref_op);
3646
3647         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
3648                         ((double)total_time / (double)rte_get_tsc_hz());
3649         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
3650                         / 1000000.0) / ((double)total_time /
3651                         (double)rte_get_tsc_hz());
3652
3653         return TEST_SUCCESS;
3654 }
3655
3656 static void
3657 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
3658 {
3659         unsigned int iter = 0;
3660         double total_mops = 0, total_mbps = 0;
3661
3662         for (iter = 0; iter < used_cores; iter++) {
3663                 printf(
3664                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
3665                         t_params[iter].lcore_id, t_params[iter].ops_per_sec,
3666                         t_params[iter].mbps);
3667                 total_mops += t_params[iter].ops_per_sec;
3668                 total_mbps += t_params[iter].mbps;
3669         }
3670         printf(
3671                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
3672                 used_cores, total_mops, total_mbps);
3673 }
3674
3675 /* Aggregate the performance results over the number of cores used */
3676 static void
3677 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
3678 {
3679         unsigned int core_idx = 0;
3680         double total_mops = 0, total_mbps = 0;
3681         uint8_t iter_count = 0;
3682
3683         for (core_idx = 0; core_idx < used_cores; core_idx++) {
3684                 printf(
3685                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
3686                         t_params[core_idx].lcore_id,
3687                         t_params[core_idx].ops_per_sec,
3688                         t_params[core_idx].mbps,
3689                         t_params[core_idx].iter_count);
3690                 total_mops += t_params[core_idx].ops_per_sec;
3691                 total_mbps += t_params[core_idx].mbps;
3692                 iter_count = RTE_MAX(iter_count,
3693                                 t_params[core_idx].iter_count);
3694         }
3695         printf(
3696                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
3697                 used_cores, total_mops, total_mbps, iter_count);
3698 }
3699
3700 /* Aggregate the performance results over the number of cores used */
3701 static void
3702 print_dec_bler(struct thread_params *t_params, unsigned int used_cores)
3703 {
3704         unsigned int core_idx = 0;
3705         double total_mbps = 0, total_bler = 0, total_iter = 0;
3706         double snr = get_snr();
3707
3708         for (core_idx = 0; core_idx < used_cores; core_idx++) {
3709                 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n",
3710                                 t_params[core_idx].lcore_id,
3711                                 t_params[core_idx].bler * 100,
3712                                 t_params[core_idx].iter_average,
3713                                 t_params[core_idx].mbps,
3714                                 get_vector_filename());
3715                 total_mbps += t_params[core_idx].mbps;
3716                 total_bler += t_params[core_idx].bler;
3717                 total_iter += t_params[core_idx].iter_average;
3718         }
3719         total_bler /= used_cores;
3720         total_iter /= used_cores;
3721
3722         printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n",
3723                         snr, total_bler * 100, total_iter, get_iter_max(),
3724                         total_mbps, get_vector_filename());
3725 }
3726
3727 /*
3728  * Test function that determines BLER wireless performance
3729  */
3730 static int
3731 bler_test(struct active_device *ad,
3732                 struct test_op_params *op_params)
3733 {
3734         int ret;
3735         unsigned int lcore_id, used_cores = 0;
3736         struct thread_params *t_params;
3737         struct rte_bbdev_info info;
3738         lcore_function_t *bler_function;
3739         uint16_t num_lcores;
3740         const char *op_type_str;
3741
3742         rte_bbdev_info_get(ad->dev_id, &info);
3743
3744         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3745         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3746                         test_vector.op_type);
3747
3748         printf("+ ------------------------------------------------------- +\n");
3749         printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3750                         info.dev_name, ad->nb_queues, op_params->burst_sz,
3751                         op_params->num_to_process, op_params->num_lcores,
3752                         op_type_str,
3753                         intr_enabled ? "Interrupt mode" : "PMD mode",
3754                         (double)rte_get_tsc_hz() / 1000000000.0);
3755
3756         /* Set number of lcores */
3757         num_lcores = (ad->nb_queues < (op_params->num_lcores))
3758                         ? ad->nb_queues
3759                         : op_params->num_lcores;
3760
3761         /* Allocate memory for thread parameters structure */
3762         t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3763                         RTE_CACHE_LINE_SIZE);
3764         TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3765                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3766                                 RTE_CACHE_LINE_SIZE));
3767
3768         if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
3769                         !check_bit(test_vector.ldpc_dec.op_flags,
3770                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
3771                         && !check_bit(test_vector.ldpc_dec.op_flags,
3772                         RTE_BBDEV_LDPC_LLR_COMPRESSION))
3773                 bler_function = bler_pmd_lcore_ldpc_dec;
3774         else
3775                 return TEST_SKIPPED;
3776
3777         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3778
3779         /* Main core is set at first entry */
3780         t_params[0].dev_id = ad->dev_id;
3781         t_params[0].lcore_id = rte_lcore_id();
3782         t_params[0].op_params = op_params;
3783         t_params[0].queue_id = ad->queue_ids[used_cores++];
3784         t_params[0].iter_count = 0;
3785
3786         RTE_LCORE_FOREACH_WORKER(lcore_id) {
3787                 if (used_cores >= num_lcores)
3788                         break;
3789
3790                 t_params[used_cores].dev_id = ad->dev_id;
3791                 t_params[used_cores].lcore_id = lcore_id;
3792                 t_params[used_cores].op_params = op_params;
3793                 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3794                 t_params[used_cores].iter_count = 0;
3795
3796                 rte_eal_remote_launch(bler_function,
3797                                 &t_params[used_cores++], lcore_id);
3798         }
3799
3800         rte_atomic16_set(&op_params->sync, SYNC_START);
3801         ret = bler_function(&t_params[0]);
3802
3803         /* Main core is always used */
3804         for (used_cores = 1; used_cores < num_lcores; used_cores++)
3805                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3806
3807         print_dec_bler(t_params, num_lcores);
3808
3809         /* Return if test failed */
3810         if (ret) {
3811                 rte_free(t_params);
3812                 return ret;
3813         }
3814
3815         /* Function to print something  here*/
3816         rte_free(t_params);
3817         return ret;
3818 }
3819
3820 /*
3821  * Test function that determines how long an enqueue + dequeue of a burst
3822  * takes on available lcores.
3823  */
3824 static int
3825 throughput_test(struct active_device *ad,
3826                 struct test_op_params *op_params)
3827 {
3828         int ret;
3829         unsigned int lcore_id, used_cores = 0;
3830         struct thread_params *t_params, *tp;
3831         struct rte_bbdev_info info;
3832         lcore_function_t *throughput_function;
3833         uint16_t num_lcores;
3834         const char *op_type_str;
3835
3836         rte_bbdev_info_get(ad->dev_id, &info);
3837
3838         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
3839         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
3840                         test_vector.op_type);
3841
3842         printf("+ ------------------------------------------------------- +\n");
3843         printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n",
3844                         info.dev_name, ad->nb_queues, op_params->burst_sz,
3845                         op_params->num_to_process, op_params->num_lcores,
3846                         op_type_str,
3847                         intr_enabled ? "Interrupt mode" : "PMD mode",
3848                         (double)rte_get_tsc_hz() / 1000000000.0);
3849
3850         /* Set number of lcores */
3851         num_lcores = (ad->nb_queues < (op_params->num_lcores))
3852                         ? ad->nb_queues
3853                         : op_params->num_lcores;
3854
3855         /* Allocate memory for thread parameters structure */
3856         t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
3857                         RTE_CACHE_LINE_SIZE);
3858         TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
3859                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
3860                                 RTE_CACHE_LINE_SIZE));
3861
3862         if (intr_enabled) {
3863                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3864                         throughput_function = throughput_intr_lcore_dec;
3865                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3866                         throughput_function = throughput_intr_lcore_ldpc_dec;
3867                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3868                         throughput_function = throughput_intr_lcore_enc;
3869                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3870                         throughput_function = throughput_intr_lcore_ldpc_enc;
3871                 else
3872                         throughput_function = throughput_intr_lcore_enc;
3873
3874                 /* Dequeue interrupt callback registration */
3875                 ret = rte_bbdev_callback_register(ad->dev_id,
3876                                 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
3877                                 t_params);
3878                 if (ret < 0) {
3879                         rte_free(t_params);
3880                         return ret;
3881                 }
3882         } else {
3883                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
3884                         throughput_function = throughput_pmd_lcore_dec;
3885                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3886                         throughput_function = throughput_pmd_lcore_ldpc_dec;
3887                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
3888                         throughput_function = throughput_pmd_lcore_enc;
3889                 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3890                         throughput_function = throughput_pmd_lcore_ldpc_enc;
3891                 else
3892                         throughput_function = throughput_pmd_lcore_enc;
3893         }
3894
3895         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
3896
3897         /* Main core is set at first entry */
3898         t_params[0].dev_id = ad->dev_id;
3899         t_params[0].lcore_id = rte_lcore_id();
3900         t_params[0].op_params = op_params;
3901         t_params[0].queue_id = ad->queue_ids[used_cores++];
3902         t_params[0].iter_count = 0;
3903
3904         RTE_LCORE_FOREACH_WORKER(lcore_id) {
3905                 if (used_cores >= num_lcores)
3906                         break;
3907
3908                 t_params[used_cores].dev_id = ad->dev_id;
3909                 t_params[used_cores].lcore_id = lcore_id;
3910                 t_params[used_cores].op_params = op_params;
3911                 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
3912                 t_params[used_cores].iter_count = 0;
3913
3914                 rte_eal_remote_launch(throughput_function,
3915                                 &t_params[used_cores++], lcore_id);
3916         }
3917
3918         rte_atomic16_set(&op_params->sync, SYNC_START);
3919         ret = throughput_function(&t_params[0]);
3920
3921         /* Main core is always used */
3922         for (used_cores = 1; used_cores < num_lcores; used_cores++)
3923                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
3924
3925         /* Return if test failed */
3926         if (ret) {
3927                 rte_free(t_params);
3928                 return ret;
3929         }
3930
3931         /* Print throughput if interrupts are disabled and test passed */
3932         if (!intr_enabled) {
3933                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3934                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3935                         print_dec_throughput(t_params, num_lcores);
3936                 else
3937                         print_enc_throughput(t_params, num_lcores);
3938                 rte_free(t_params);
3939                 return ret;
3940         }
3941
3942         /* In interrupt TC we need to wait for the interrupt callback to deqeue
3943          * all pending operations. Skip waiting for queues which reported an
3944          * error using processing_status variable.
3945          * Wait for main lcore operations.
3946          */
3947         tp = &t_params[0];
3948         while ((rte_atomic16_read(&tp->nb_dequeued) <
3949                         op_params->num_to_process) &&
3950                         (rte_atomic16_read(&tp->processing_status) !=
3951                         TEST_FAILED))
3952                 rte_pause();
3953
3954         tp->ops_per_sec /= TEST_REPETITIONS;
3955         tp->mbps /= TEST_REPETITIONS;
3956         ret |= (int)rte_atomic16_read(&tp->processing_status);
3957
3958         /* Wait for worker lcores operations */
3959         for (used_cores = 1; used_cores < num_lcores; used_cores++) {
3960                 tp = &t_params[used_cores];
3961
3962                 while ((rte_atomic16_read(&tp->nb_dequeued) <
3963                                 op_params->num_to_process) &&
3964                                 (rte_atomic16_read(&tp->processing_status) !=
3965                                 TEST_FAILED))
3966                         rte_pause();
3967
3968                 tp->ops_per_sec /= TEST_REPETITIONS;
3969                 tp->mbps /= TEST_REPETITIONS;
3970                 ret |= (int)rte_atomic16_read(&tp->processing_status);
3971         }
3972
3973         /* Print throughput if test passed */
3974         if (!ret) {
3975                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC ||
3976                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
3977                         print_dec_throughput(t_params, num_lcores);
3978                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC ||
3979                                 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
3980                         print_enc_throughput(t_params, num_lcores);
3981         }
3982
3983         rte_free(t_params);
3984         return ret;
3985 }
3986
3987 static int
3988 latency_test_dec(struct rte_mempool *mempool,
3989                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
3990                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
3991                 const uint16_t num_to_process, uint16_t burst_sz,
3992                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
3993 {
3994         int ret = TEST_SUCCESS;
3995         uint16_t i, j, dequeued;
3996         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
3997         uint64_t start_time = 0, last_time = 0;
3998
3999         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4000                 uint16_t enq = 0, deq = 0;
4001                 bool first_time = true;
4002                 last_time = 0;
4003
4004                 if (unlikely(num_to_process - dequeued < burst_sz))
4005                         burst_sz = num_to_process - dequeued;
4006
4007                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4008                 TEST_ASSERT_SUCCESS(ret,
4009                                 "rte_bbdev_dec_op_alloc_bulk() failed");
4010                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4011                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4012                                         bufs->inputs,
4013                                         bufs->hard_outputs,
4014                                         bufs->soft_outputs,
4015                                         ref_op);
4016
4017                 /* Set counter to validate the ordering */
4018                 for (j = 0; j < burst_sz; ++j)
4019                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4020
4021                 start_time = rte_rdtsc_precise();
4022
4023                 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
4024                                 burst_sz);
4025                 TEST_ASSERT(enq == burst_sz,
4026                                 "Error enqueueing burst, expected %u, got %u",
4027                                 burst_sz, enq);
4028
4029                 /* Dequeue */
4030                 do {
4031                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4032                                         &ops_deq[deq], burst_sz - deq);
4033                         if (likely(first_time && (deq > 0))) {
4034                                 last_time = rte_rdtsc_precise() - start_time;
4035                                 first_time = false;
4036                         }
4037                 } while (unlikely(burst_sz != deq));
4038
4039                 *max_time = RTE_MAX(*max_time, last_time);
4040                 *min_time = RTE_MIN(*min_time, last_time);
4041                 *total_time += last_time;
4042
4043                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4044                         ret = validate_dec_op(ops_deq, burst_sz, ref_op,
4045                                         vector_mask);
4046                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4047                 }
4048
4049                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4050                 dequeued += deq;
4051         }
4052
4053         return i;
4054 }
4055
4056 /* Test case for latency/validation for LDPC Decoder */
4057 static int
4058 latency_test_ldpc_dec(struct rte_mempool *mempool,
4059                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
4060                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
4061                 const uint16_t num_to_process, uint16_t burst_sz,
4062                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
4063                 bool disable_et)
4064 {
4065         int ret = TEST_SUCCESS;
4066         uint16_t i, j, dequeued;
4067         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4068         uint64_t start_time = 0, last_time = 0;
4069         bool extDdr = ldpc_cap_flags &
4070                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4071
4072         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4073                 uint16_t enq = 0, deq = 0;
4074                 bool first_time = true;
4075                 last_time = 0;
4076
4077                 if (unlikely(num_to_process - dequeued < burst_sz))
4078                         burst_sz = num_to_process - dequeued;
4079
4080                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4081                 TEST_ASSERT_SUCCESS(ret,
4082                                 "rte_bbdev_dec_op_alloc_bulk() failed");
4083
4084                 /* For latency tests we need to disable early termination */
4085                 if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
4086                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
4087                         ref_op->ldpc_dec.op_flags -=
4088                                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
4089                 ref_op->ldpc_dec.iter_max = get_iter_max();
4090                 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max;
4091
4092                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4093                         copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4094                                         bufs->inputs,
4095                                         bufs->hard_outputs,
4096                                         bufs->soft_outputs,
4097                                         bufs->harq_inputs,
4098                                         bufs->harq_outputs,
4099                                         ref_op);
4100
4101                 if (extDdr)
4102                         preload_harq_ddr(dev_id, queue_id, ops_enq,
4103                                         burst_sz, true);
4104
4105                 /* Set counter to validate the ordering */
4106                 for (j = 0; j < burst_sz; ++j)
4107                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4108
4109                 start_time = rte_rdtsc_precise();
4110
4111                 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4112                                 &ops_enq[enq], burst_sz);
4113                 TEST_ASSERT(enq == burst_sz,
4114                                 "Error enqueueing burst, expected %u, got %u",
4115                                 burst_sz, enq);
4116
4117                 /* Dequeue */
4118                 do {
4119                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4120                                         &ops_deq[deq], burst_sz - deq);
4121                         if (likely(first_time && (deq > 0))) {
4122                                 last_time = rte_rdtsc_precise() - start_time;
4123                                 first_time = false;
4124                         }
4125                 } while (unlikely(burst_sz != deq));
4126
4127                 *max_time = RTE_MAX(*max_time, last_time);
4128                 *min_time = RTE_MIN(*min_time, last_time);
4129                 *total_time += last_time;
4130
4131                 if (extDdr)
4132                         retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4133
4134                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4135                         ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op,
4136                                         vector_mask);
4137                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4138                 }
4139
4140                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4141                 dequeued += deq;
4142         }
4143         return i;
4144 }
4145
4146 static int
4147 latency_test_enc(struct rte_mempool *mempool,
4148                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4149                 uint16_t dev_id, uint16_t queue_id,
4150                 const uint16_t num_to_process, uint16_t burst_sz,
4151                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4152 {
4153         int ret = TEST_SUCCESS;
4154         uint16_t i, j, dequeued;
4155         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4156         uint64_t start_time = 0, last_time = 0;
4157
4158         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4159                 uint16_t enq = 0, deq = 0;
4160                 bool first_time = true;
4161                 last_time = 0;
4162
4163                 if (unlikely(num_to_process - dequeued < burst_sz))
4164                         burst_sz = num_to_process - dequeued;
4165
4166                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4167                 TEST_ASSERT_SUCCESS(ret,
4168                                 "rte_bbdev_enc_op_alloc_bulk() failed");
4169                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4170                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4171                                         bufs->inputs,
4172                                         bufs->hard_outputs,
4173                                         ref_op);
4174
4175                 /* Set counter to validate the ordering */
4176                 for (j = 0; j < burst_sz; ++j)
4177                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4178
4179                 start_time = rte_rdtsc_precise();
4180
4181                 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
4182                                 burst_sz);
4183                 TEST_ASSERT(enq == burst_sz,
4184                                 "Error enqueueing burst, expected %u, got %u",
4185                                 burst_sz, enq);
4186
4187                 /* Dequeue */
4188                 do {
4189                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4190                                         &ops_deq[deq], burst_sz - deq);
4191                         if (likely(first_time && (deq > 0))) {
4192                                 last_time += rte_rdtsc_precise() - start_time;
4193                                 first_time = false;
4194                         }
4195                 } while (unlikely(burst_sz != deq));
4196
4197                 *max_time = RTE_MAX(*max_time, last_time);
4198                 *min_time = RTE_MIN(*min_time, last_time);
4199                 *total_time += last_time;
4200
4201                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4202                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4203                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4204                 }
4205
4206                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4207                 dequeued += deq;
4208         }
4209
4210         return i;
4211 }
4212
4213 static int
4214 latency_test_ldpc_enc(struct rte_mempool *mempool,
4215                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
4216                 uint16_t dev_id, uint16_t queue_id,
4217                 const uint16_t num_to_process, uint16_t burst_sz,
4218                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
4219 {
4220         int ret = TEST_SUCCESS;
4221         uint16_t i, j, dequeued;
4222         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4223         uint64_t start_time = 0, last_time = 0;
4224
4225         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4226                 uint16_t enq = 0, deq = 0;
4227                 bool first_time = true;
4228                 last_time = 0;
4229
4230                 if (unlikely(num_to_process - dequeued < burst_sz))
4231                         burst_sz = num_to_process - dequeued;
4232
4233                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4234                 TEST_ASSERT_SUCCESS(ret,
4235                                 "rte_bbdev_enc_op_alloc_bulk() failed");
4236                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4237                         copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4238                                         bufs->inputs,
4239                                         bufs->hard_outputs,
4240                                         ref_op);
4241
4242                 /* Set counter to validate the ordering */
4243                 for (j = 0; j < burst_sz; ++j)
4244                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
4245
4246                 start_time = rte_rdtsc_precise();
4247
4248                 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4249                                 &ops_enq[enq], burst_sz);
4250                 TEST_ASSERT(enq == burst_sz,
4251                                 "Error enqueueing burst, expected %u, got %u",
4252                                 burst_sz, enq);
4253
4254                 /* Dequeue */
4255                 do {
4256                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4257                                         &ops_deq[deq], burst_sz - deq);
4258                         if (likely(first_time && (deq > 0))) {
4259                                 last_time += rte_rdtsc_precise() - start_time;
4260                                 first_time = false;
4261                         }
4262                 } while (unlikely(burst_sz != deq));
4263
4264                 *max_time = RTE_MAX(*max_time, last_time);
4265                 *min_time = RTE_MIN(*min_time, last_time);
4266                 *total_time += last_time;
4267
4268                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
4269                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
4270                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
4271                 }
4272
4273                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4274                 dequeued += deq;
4275         }
4276
4277         return i;
4278 }
4279
4280 /* Common function for running validation and latency test cases */
4281 static int
4282 validation_latency_test(struct active_device *ad,
4283                 struct test_op_params *op_params, bool latency_flag)
4284 {
4285         int iter;
4286         uint16_t burst_sz = op_params->burst_sz;
4287         const uint16_t num_to_process = op_params->num_to_process;
4288         const enum rte_bbdev_op_type op_type = test_vector.op_type;
4289         const uint16_t queue_id = ad->queue_ids[0];
4290         struct test_buffers *bufs = NULL;
4291         struct rte_bbdev_info info;
4292         uint64_t total_time, min_time, max_time;
4293         const char *op_type_str;
4294
4295         total_time = max_time = 0;
4296         min_time = UINT64_MAX;
4297
4298         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4299                         "BURST_SIZE should be <= %u", MAX_BURST);
4300
4301         rte_bbdev_info_get(ad->dev_id, &info);
4302         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4303
4304         op_type_str = rte_bbdev_op_type_str(op_type);
4305         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4306
4307         printf("+ ------------------------------------------------------- +\n");
4308         if (latency_flag)
4309                 printf("== test: latency\ndev:");
4310         else
4311                 printf("== test: validation\ndev:");
4312         printf("%s, burst size: %u, num ops: %u, op type: %s\n",
4313                         info.dev_name, burst_sz, num_to_process, op_type_str);
4314
4315         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4316                 iter = latency_test_dec(op_params->mp, bufs,
4317                                 op_params->ref_dec_op, op_params->vector_mask,
4318                                 ad->dev_id, queue_id, num_to_process,
4319                                 burst_sz, &total_time, &min_time, &max_time);
4320         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4321                 iter = latency_test_ldpc_enc(op_params->mp, bufs,
4322                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4323                                 num_to_process, burst_sz, &total_time,
4324                                 &min_time, &max_time);
4325         else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4326                 iter = latency_test_ldpc_dec(op_params->mp, bufs,
4327                                 op_params->ref_dec_op, op_params->vector_mask,
4328                                 ad->dev_id, queue_id, num_to_process,
4329                                 burst_sz, &total_time, &min_time, &max_time,
4330                                 latency_flag);
4331         else /* RTE_BBDEV_OP_TURBO_ENC */
4332                 iter = latency_test_enc(op_params->mp, bufs,
4333                                 op_params->ref_enc_op,
4334                                 ad->dev_id, queue_id,
4335                                 num_to_process, burst_sz, &total_time,
4336                                 &min_time, &max_time);
4337
4338         if (iter <= 0)
4339                 return TEST_FAILED;
4340
4341         printf("Operation latency:\n"
4342                         "\tavg: %lg cycles, %lg us\n"
4343                         "\tmin: %lg cycles, %lg us\n"
4344                         "\tmax: %lg cycles, %lg us\n",
4345                         (double)total_time / (double)iter,
4346                         (double)(total_time * 1000000) / (double)iter /
4347                         (double)rte_get_tsc_hz(), (double)min_time,
4348                         (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
4349                         (double)max_time, (double)(max_time * 1000000) /
4350                         (double)rte_get_tsc_hz());
4351
4352         return TEST_SUCCESS;
4353 }
4354
4355 static int
4356 latency_test(struct active_device *ad, struct test_op_params *op_params)
4357 {
4358         return validation_latency_test(ad, op_params, true);
4359 }
4360
4361 static int
4362 validation_test(struct active_device *ad, struct test_op_params *op_params)
4363 {
4364         return validation_latency_test(ad, op_params, false);
4365 }
4366
4367 #ifdef RTE_BBDEV_OFFLOAD_COST
4368 static int
4369 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
4370                 struct rte_bbdev_stats *stats)
4371 {
4372         struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
4373         struct rte_bbdev_stats *q_stats;
4374
4375         if (queue_id >= dev->data->num_queues)
4376                 return -1;
4377
4378         q_stats = &dev->data->queues[queue_id].queue_stats;
4379
4380         stats->enqueued_count = q_stats->enqueued_count;
4381         stats->dequeued_count = q_stats->dequeued_count;
4382         stats->enqueue_err_count = q_stats->enqueue_err_count;
4383         stats->dequeue_err_count = q_stats->dequeue_err_count;
4384         stats->acc_offload_cycles = q_stats->acc_offload_cycles;
4385
4386         return 0;
4387 }
4388
4389 static int
4390 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
4391                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4392                 uint16_t queue_id, const uint16_t num_to_process,
4393                 uint16_t burst_sz, struct test_time_stats *time_st)
4394 {
4395         int i, dequeued, ret;
4396         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4397         uint64_t enq_start_time, deq_start_time;
4398         uint64_t enq_sw_last_time, deq_last_time;
4399         struct rte_bbdev_stats stats;
4400
4401         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4402                 uint16_t enq = 0, deq = 0;
4403
4404                 if (unlikely(num_to_process - dequeued < burst_sz))
4405                         burst_sz = num_to_process - dequeued;
4406
4407                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4408                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4409                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
4410                                         bufs->inputs,
4411                                         bufs->hard_outputs,
4412                                         bufs->soft_outputs,
4413                                         ref_op);
4414
4415                 /* Start time meas for enqueue function offload latency */
4416                 enq_start_time = rte_rdtsc_precise();
4417                 do {
4418                         enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
4419                                         &ops_enq[enq], burst_sz - enq);
4420                 } while (unlikely(burst_sz != enq));
4421
4422                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4423                 TEST_ASSERT_SUCCESS(ret,
4424                                 "Failed to get stats for queue (%u) of device (%u)",
4425                                 queue_id, dev_id);
4426
4427                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
4428                                 stats.acc_offload_cycles;
4429                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4430                                 enq_sw_last_time);
4431                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4432                                 enq_sw_last_time);
4433                 time_st->enq_sw_total_time += enq_sw_last_time;
4434
4435                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4436                                 stats.acc_offload_cycles);
4437                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4438                                 stats.acc_offload_cycles);
4439                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4440
4441                 /* give time for device to process ops */
4442                 rte_delay_us(WAIT_OFFLOAD_US);
4443
4444                 /* Start time meas for dequeue function offload latency */
4445                 deq_start_time = rte_rdtsc_precise();
4446                 /* Dequeue one operation */
4447                 do {
4448                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4449                                         &ops_deq[deq], enq);
4450                 } while (unlikely(deq == 0));
4451
4452                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4453                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4454                                 deq_last_time);
4455                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4456                                 deq_last_time);
4457                 time_st->deq_total_time += deq_last_time;
4458
4459                 /* Dequeue remaining operations if needed*/
4460                 while (burst_sz != deq)
4461                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
4462                                         &ops_deq[deq], burst_sz - deq);
4463
4464                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4465                 dequeued += deq;
4466         }
4467
4468         return i;
4469 }
4470
4471 static int
4472 offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
4473                 struct test_buffers *bufs,
4474                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
4475                 uint16_t queue_id, const uint16_t num_to_process,
4476                 uint16_t burst_sz, struct test_time_stats *time_st)
4477 {
4478         int i, dequeued, ret;
4479         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4480         uint64_t enq_start_time, deq_start_time;
4481         uint64_t enq_sw_last_time, deq_last_time;
4482         struct rte_bbdev_stats stats;
4483         bool extDdr = ldpc_cap_flags &
4484                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
4485
4486         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4487                 uint16_t enq = 0, deq = 0;
4488
4489                 if (unlikely(num_to_process - dequeued < burst_sz))
4490                         burst_sz = num_to_process - dequeued;
4491
4492                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
4493                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4494                         copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued,
4495                                         bufs->inputs,
4496                                         bufs->hard_outputs,
4497                                         bufs->soft_outputs,
4498                                         bufs->harq_inputs,
4499                                         bufs->harq_outputs,
4500                                         ref_op);
4501
4502                 if (extDdr)
4503                         preload_harq_ddr(dev_id, queue_id, ops_enq,
4504                                         burst_sz, true);
4505
4506                 /* Start time meas for enqueue function offload latency */
4507                 enq_start_time = rte_rdtsc_precise();
4508                 do {
4509                         enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
4510                                         &ops_enq[enq], burst_sz - enq);
4511                 } while (unlikely(burst_sz != enq));
4512
4513                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4514                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4515                 TEST_ASSERT_SUCCESS(ret,
4516                                 "Failed to get stats for queue (%u) of device (%u)",
4517                                 queue_id, dev_id);
4518
4519                 enq_sw_last_time -= stats.acc_offload_cycles;
4520                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4521                                 enq_sw_last_time);
4522                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4523                                 enq_sw_last_time);
4524                 time_st->enq_sw_total_time += enq_sw_last_time;
4525
4526                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4527                                 stats.acc_offload_cycles);
4528                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4529                                 stats.acc_offload_cycles);
4530                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4531
4532                 /* give time for device to process ops */
4533                 rte_delay_us(WAIT_OFFLOAD_US);
4534
4535                 /* Start time meas for dequeue function offload latency */
4536                 deq_start_time = rte_rdtsc_precise();
4537                 /* Dequeue one operation */
4538                 do {
4539                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4540                                         &ops_deq[deq], enq);
4541                 } while (unlikely(deq == 0));
4542
4543                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4544                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4545                                 deq_last_time);
4546                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4547                                 deq_last_time);
4548                 time_st->deq_total_time += deq_last_time;
4549
4550                 /* Dequeue remaining operations if needed*/
4551                 while (burst_sz != deq)
4552                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
4553                                         &ops_deq[deq], burst_sz - deq);
4554
4555                 if (extDdr) {
4556                         /* Read loopback is not thread safe */
4557                         retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz);
4558                 }
4559
4560                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
4561                 dequeued += deq;
4562         }
4563
4564         return i;
4565 }
4566
4567 static int
4568 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
4569                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4570                 uint16_t queue_id, const uint16_t num_to_process,
4571                 uint16_t burst_sz, struct test_time_stats *time_st)
4572 {
4573         int i, dequeued, ret;
4574         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4575         uint64_t enq_start_time, deq_start_time;
4576         uint64_t enq_sw_last_time, deq_last_time;
4577         struct rte_bbdev_stats stats;
4578
4579         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4580                 uint16_t enq = 0, deq = 0;
4581
4582                 if (unlikely(num_to_process - dequeued < burst_sz))
4583                         burst_sz = num_to_process - dequeued;
4584
4585                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4586                 TEST_ASSERT_SUCCESS(ret,
4587                                 "rte_bbdev_enc_op_alloc_bulk() failed");
4588                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4589                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
4590                                         bufs->inputs,
4591                                         bufs->hard_outputs,
4592                                         ref_op);
4593
4594                 /* Start time meas for enqueue function offload latency */
4595                 enq_start_time = rte_rdtsc_precise();
4596                 do {
4597                         enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
4598                                         &ops_enq[enq], burst_sz - enq);
4599                 } while (unlikely(burst_sz != enq));
4600
4601                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4602
4603                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4604                 TEST_ASSERT_SUCCESS(ret,
4605                                 "Failed to get stats for queue (%u) of device (%u)",
4606                                 queue_id, dev_id);
4607                 enq_sw_last_time -= stats.acc_offload_cycles;
4608                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4609                                 enq_sw_last_time);
4610                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4611                                 enq_sw_last_time);
4612                 time_st->enq_sw_total_time += enq_sw_last_time;
4613
4614                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4615                                 stats.acc_offload_cycles);
4616                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4617                                 stats.acc_offload_cycles);
4618                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4619
4620                 /* give time for device to process ops */
4621                 rte_delay_us(WAIT_OFFLOAD_US);
4622
4623                 /* Start time meas for dequeue function offload latency */
4624                 deq_start_time = rte_rdtsc_precise();
4625                 /* Dequeue one operation */
4626                 do {
4627                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4628                                         &ops_deq[deq], enq);
4629                 } while (unlikely(deq == 0));
4630
4631                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4632                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4633                                 deq_last_time);
4634                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4635                                 deq_last_time);
4636                 time_st->deq_total_time += deq_last_time;
4637
4638                 while (burst_sz != deq)
4639                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
4640                                         &ops_deq[deq], burst_sz - deq);
4641
4642                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4643                 dequeued += deq;
4644         }
4645
4646         return i;
4647 }
4648
4649 static int
4650 offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
4651                 struct test_buffers *bufs,
4652                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
4653                 uint16_t queue_id, const uint16_t num_to_process,
4654                 uint16_t burst_sz, struct test_time_stats *time_st)
4655 {
4656         int i, dequeued, ret;
4657         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
4658         uint64_t enq_start_time, deq_start_time;
4659         uint64_t enq_sw_last_time, deq_last_time;
4660         struct rte_bbdev_stats stats;
4661
4662         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
4663                 uint16_t enq = 0, deq = 0;
4664
4665                 if (unlikely(num_to_process - dequeued < burst_sz))
4666                         burst_sz = num_to_process - dequeued;
4667
4668                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
4669                 TEST_ASSERT_SUCCESS(ret,
4670                                 "rte_bbdev_enc_op_alloc_bulk() failed");
4671                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
4672                         copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued,
4673                                         bufs->inputs,
4674                                         bufs->hard_outputs,
4675                                         ref_op);
4676
4677                 /* Start time meas for enqueue function offload latency */
4678                 enq_start_time = rte_rdtsc_precise();
4679                 do {
4680                         enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id,
4681                                         &ops_enq[enq], burst_sz - enq);
4682                 } while (unlikely(burst_sz != enq));
4683
4684                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time;
4685                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
4686                 TEST_ASSERT_SUCCESS(ret,
4687                                 "Failed to get stats for queue (%u) of device (%u)",
4688                                 queue_id, dev_id);
4689
4690                 enq_sw_last_time -= stats.acc_offload_cycles;
4691                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
4692                                 enq_sw_last_time);
4693                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
4694                                 enq_sw_last_time);
4695                 time_st->enq_sw_total_time += enq_sw_last_time;
4696
4697                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
4698                                 stats.acc_offload_cycles);
4699                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
4700                                 stats.acc_offload_cycles);
4701                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
4702
4703                 /* give time for device to process ops */
4704                 rte_delay_us(WAIT_OFFLOAD_US);
4705
4706                 /* Start time meas for dequeue function offload latency */
4707                 deq_start_time = rte_rdtsc_precise();
4708                 /* Dequeue one operation */
4709                 do {
4710                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4711                                         &ops_deq[deq], enq);
4712                 } while (unlikely(deq == 0));
4713
4714                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4715                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
4716                                 deq_last_time);
4717                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
4718                                 deq_last_time);
4719                 time_st->deq_total_time += deq_last_time;
4720
4721                 while (burst_sz != deq)
4722                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
4723                                         &ops_deq[deq], burst_sz - deq);
4724
4725                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
4726                 dequeued += deq;
4727         }
4728
4729         return i;
4730 }
4731 #endif
4732
4733 static int
4734 offload_cost_test(struct active_device *ad,
4735                 struct test_op_params *op_params)
4736 {
4737 #ifndef RTE_BBDEV_OFFLOAD_COST
4738         RTE_SET_USED(ad);
4739         RTE_SET_USED(op_params);
4740         printf("Offload latency test is disabled.\n");
4741         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4742         return TEST_SKIPPED;
4743 #else
4744         int iter;
4745         uint16_t burst_sz = op_params->burst_sz;
4746         const uint16_t num_to_process = op_params->num_to_process;
4747         const enum rte_bbdev_op_type op_type = test_vector.op_type;
4748         const uint16_t queue_id = ad->queue_ids[0];
4749         struct test_buffers *bufs = NULL;
4750         struct rte_bbdev_info info;
4751         const char *op_type_str;
4752         struct test_time_stats time_st;
4753
4754         memset(&time_st, 0, sizeof(struct test_time_stats));
4755         time_st.enq_sw_min_time = UINT64_MAX;
4756         time_st.enq_acc_min_time = UINT64_MAX;
4757         time_st.deq_min_time = UINT64_MAX;
4758
4759         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4760                         "BURST_SIZE should be <= %u", MAX_BURST);
4761
4762         rte_bbdev_info_get(ad->dev_id, &info);
4763         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
4764
4765         op_type_str = rte_bbdev_op_type_str(op_type);
4766         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4767
4768         printf("+ ------------------------------------------------------- +\n");
4769         printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4770                         info.dev_name, burst_sz, num_to_process, op_type_str);
4771
4772         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
4773                 iter = offload_latency_test_dec(op_params->mp, bufs,
4774                                 op_params->ref_dec_op, ad->dev_id, queue_id,
4775                                 num_to_process, burst_sz, &time_st);
4776         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
4777                 iter = offload_latency_test_enc(op_params->mp, bufs,
4778                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4779                                 num_to_process, burst_sz, &time_st);
4780         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4781                 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs,
4782                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4783                                 num_to_process, burst_sz, &time_st);
4784         else if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4785                 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs,
4786                         op_params->ref_dec_op, ad->dev_id, queue_id,
4787                         num_to_process, burst_sz, &time_st);
4788         else
4789                 iter = offload_latency_test_enc(op_params->mp, bufs,
4790                                 op_params->ref_enc_op, ad->dev_id, queue_id,
4791                                 num_to_process, burst_sz, &time_st);
4792
4793         if (iter <= 0)
4794                 return TEST_FAILED;
4795
4796         printf("Enqueue driver offload cost latency:\n"
4797                         "\tavg: %lg cycles, %lg us\n"
4798                         "\tmin: %lg cycles, %lg us\n"
4799                         "\tmax: %lg cycles, %lg us\n"
4800                         "Enqueue accelerator offload cost latency:\n"
4801                         "\tavg: %lg cycles, %lg us\n"
4802                         "\tmin: %lg cycles, %lg us\n"
4803                         "\tmax: %lg cycles, %lg us\n",
4804                         (double)time_st.enq_sw_total_time / (double)iter,
4805                         (double)(time_st.enq_sw_total_time * 1000000) /
4806                         (double)iter / (double)rte_get_tsc_hz(),
4807                         (double)time_st.enq_sw_min_time,
4808                         (double)(time_st.enq_sw_min_time * 1000000) /
4809                         rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
4810                         (double)(time_st.enq_sw_max_time * 1000000) /
4811                         rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
4812                         (double)iter,
4813                         (double)(time_st.enq_acc_total_time * 1000000) /
4814                         (double)iter / (double)rte_get_tsc_hz(),
4815                         (double)time_st.enq_acc_min_time,
4816                         (double)(time_st.enq_acc_min_time * 1000000) /
4817                         rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
4818                         (double)(time_st.enq_acc_max_time * 1000000) /
4819                         rte_get_tsc_hz());
4820
4821         printf("Dequeue offload cost latency - one op:\n"
4822                         "\tavg: %lg cycles, %lg us\n"
4823                         "\tmin: %lg cycles, %lg us\n"
4824                         "\tmax: %lg cycles, %lg us\n",
4825                         (double)time_st.deq_total_time / (double)iter,
4826                         (double)(time_st.deq_total_time * 1000000) /
4827                         (double)iter / (double)rte_get_tsc_hz(),
4828                         (double)time_st.deq_min_time,
4829                         (double)(time_st.deq_min_time * 1000000) /
4830                         rte_get_tsc_hz(), (double)time_st.deq_max_time,
4831                         (double)(time_st.deq_max_time * 1000000) /
4832                         rte_get_tsc_hz());
4833
4834         struct rte_bbdev_stats stats = {0};
4835         get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
4836         if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
4837                 TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
4838                                 "Mismatch in enqueue count %10"PRIu64" %d",
4839                                 stats.enqueued_count, num_to_process);
4840                 TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
4841                                 "Mismatch in dequeue count %10"PRIu64" %d",
4842                                 stats.dequeued_count, num_to_process);
4843         }
4844         TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
4845                         "Enqueue count Error %10"PRIu64"",
4846                         stats.enqueue_err_count);
4847         TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
4848                         "Dequeue count Error (%10"PRIu64"",
4849                         stats.dequeue_err_count);
4850
4851         return TEST_SUCCESS;
4852 #endif
4853 }
4854
4855 #ifdef RTE_BBDEV_OFFLOAD_COST
4856 static int
4857 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
4858                 const uint16_t num_to_process, uint16_t burst_sz,
4859                 uint64_t *deq_total_time, uint64_t *deq_min_time,
4860                 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4861 {
4862         int i, deq_total;
4863         struct rte_bbdev_dec_op *ops[MAX_BURST];
4864         uint64_t deq_start_time, deq_last_time;
4865
4866         /* Test deq offload latency from an empty queue */
4867
4868         for (i = 0, deq_total = 0; deq_total < num_to_process;
4869                         ++i, deq_total += burst_sz) {
4870                 deq_start_time = rte_rdtsc_precise();
4871
4872                 if (unlikely(num_to_process - deq_total < burst_sz))
4873                         burst_sz = num_to_process - deq_total;
4874                 if (op_type == RTE_BBDEV_OP_LDPC_DEC)
4875                         rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops,
4876                                         burst_sz);
4877                 else
4878                         rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops,
4879                                         burst_sz);
4880
4881                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4882                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4883                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4884                 *deq_total_time += deq_last_time;
4885         }
4886
4887         return i;
4888 }
4889
4890 static int
4891 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
4892                 const uint16_t num_to_process, uint16_t burst_sz,
4893                 uint64_t *deq_total_time, uint64_t *deq_min_time,
4894                 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type)
4895 {
4896         int i, deq_total;
4897         struct rte_bbdev_enc_op *ops[MAX_BURST];
4898         uint64_t deq_start_time, deq_last_time;
4899
4900         /* Test deq offload latency from an empty queue */
4901         for (i = 0, deq_total = 0; deq_total < num_to_process;
4902                         ++i, deq_total += burst_sz) {
4903                 deq_start_time = rte_rdtsc_precise();
4904
4905                 if (unlikely(num_to_process - deq_total < burst_sz))
4906                         burst_sz = num_to_process - deq_total;
4907                 if (op_type == RTE_BBDEV_OP_LDPC_ENC)
4908                         rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops,
4909                                         burst_sz);
4910                 else
4911                         rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops,
4912                                         burst_sz);
4913
4914                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
4915                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
4916                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
4917                 *deq_total_time += deq_last_time;
4918         }
4919
4920         return i;
4921 }
4922
4923 #endif
4924
4925 static int
4926 offload_latency_empty_q_test(struct active_device *ad,
4927                 struct test_op_params *op_params)
4928 {
4929 #ifndef RTE_BBDEV_OFFLOAD_COST
4930         RTE_SET_USED(ad);
4931         RTE_SET_USED(op_params);
4932         printf("Offload latency empty dequeue test is disabled.\n");
4933         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
4934         return TEST_SKIPPED;
4935 #else
4936         int iter;
4937         uint64_t deq_total_time, deq_min_time, deq_max_time;
4938         uint16_t burst_sz = op_params->burst_sz;
4939         const uint16_t num_to_process = op_params->num_to_process;
4940         const enum rte_bbdev_op_type op_type = test_vector.op_type;
4941         const uint16_t queue_id = ad->queue_ids[0];
4942         struct rte_bbdev_info info;
4943         const char *op_type_str;
4944
4945         deq_total_time = deq_max_time = 0;
4946         deq_min_time = UINT64_MAX;
4947
4948         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
4949                         "BURST_SIZE should be <= %u", MAX_BURST);
4950
4951         rte_bbdev_info_get(ad->dev_id, &info);
4952
4953         op_type_str = rte_bbdev_op_type_str(op_type);
4954         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
4955
4956         printf("+ ------------------------------------------------------- +\n");
4957         printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
4958                         info.dev_name, burst_sz, num_to_process, op_type_str);
4959
4960         if (op_type == RTE_BBDEV_OP_TURBO_DEC ||
4961                         op_type == RTE_BBDEV_OP_LDPC_DEC)
4962                 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
4963                                 num_to_process, burst_sz, &deq_total_time,
4964                                 &deq_min_time, &deq_max_time, op_type);
4965         else
4966                 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
4967                                 num_to_process, burst_sz, &deq_total_time,
4968                                 &deq_min_time, &deq_max_time, op_type);
4969
4970         if (iter <= 0)
4971                 return TEST_FAILED;
4972
4973         printf("Empty dequeue offload:\n"
4974                         "\tavg: %lg cycles, %lg us\n"
4975                         "\tmin: %lg cycles, %lg us\n"
4976                         "\tmax: %lg cycles, %lg us\n",
4977                         (double)deq_total_time / (double)iter,
4978                         (double)(deq_total_time * 1000000) / (double)iter /
4979                         (double)rte_get_tsc_hz(), (double)deq_min_time,
4980                         (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
4981                         (double)deq_max_time, (double)(deq_max_time * 1000000) /
4982                         rte_get_tsc_hz());
4983
4984         return TEST_SUCCESS;
4985 #endif
4986 }
4987
4988 static int
4989 bler_tc(void)
4990 {
4991         return run_test_case(bler_test);
4992 }
4993
4994 static int
4995 throughput_tc(void)
4996 {
4997         return run_test_case(throughput_test);
4998 }
4999
5000 static int
5001 offload_cost_tc(void)
5002 {
5003         return run_test_case(offload_cost_test);
5004 }
5005
5006 static int
5007 offload_latency_empty_q_tc(void)
5008 {
5009         return run_test_case(offload_latency_empty_q_test);
5010 }
5011
5012 static int
5013 latency_tc(void)
5014 {
5015         return run_test_case(latency_test);
5016 }
5017
5018 static int
5019 validation_tc(void)
5020 {
5021         return run_test_case(validation_test);
5022 }
5023
5024 static int
5025 interrupt_tc(void)
5026 {
5027         return run_test_case(throughput_test);
5028 }
5029
5030 static struct unit_test_suite bbdev_bler_testsuite = {
5031         .suite_name = "BBdev BLER Tests",
5032         .setup = testsuite_setup,
5033         .teardown = testsuite_teardown,
5034         .unit_test_cases = {
5035                 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc),
5036                 TEST_CASES_END() /**< NULL terminate unit test array */
5037         }
5038 };
5039
5040 static struct unit_test_suite bbdev_throughput_testsuite = {
5041         .suite_name = "BBdev Throughput Tests",
5042         .setup = testsuite_setup,
5043         .teardown = testsuite_teardown,
5044         .unit_test_cases = {
5045                 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
5046                 TEST_CASES_END() /**< NULL terminate unit test array */
5047         }
5048 };
5049
5050 static struct unit_test_suite bbdev_validation_testsuite = {
5051         .suite_name = "BBdev Validation Tests",
5052         .setup = testsuite_setup,
5053         .teardown = testsuite_teardown,
5054         .unit_test_cases = {
5055                 TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
5056                 TEST_CASES_END() /**< NULL terminate unit test array */
5057         }
5058 };
5059
5060 static struct unit_test_suite bbdev_latency_testsuite = {
5061         .suite_name = "BBdev Latency Tests",
5062         .setup = testsuite_setup,
5063         .teardown = testsuite_teardown,
5064         .unit_test_cases = {
5065                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
5066                 TEST_CASES_END() /**< NULL terminate unit test array */
5067         }
5068 };
5069
5070 static struct unit_test_suite bbdev_offload_cost_testsuite = {
5071         .suite_name = "BBdev Offload Cost Tests",
5072         .setup = testsuite_setup,
5073         .teardown = testsuite_teardown,
5074         .unit_test_cases = {
5075                 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
5076                 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
5077                 TEST_CASES_END() /**< NULL terminate unit test array */
5078         }
5079 };
5080
5081 static struct unit_test_suite bbdev_interrupt_testsuite = {
5082         .suite_name = "BBdev Interrupt Tests",
5083         .setup = interrupt_testsuite_setup,
5084         .teardown = testsuite_teardown,
5085         .unit_test_cases = {
5086                 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
5087                 TEST_CASES_END() /**< NULL terminate unit test array */
5088         }
5089 };
5090
5091 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite);
5092 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
5093 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
5094 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
5095 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
5096 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);