bbdev: enhance throughput test
[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19
20 #include "main.h"
21 #include "test_bbdev_vector.h"
22
23 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
24
25 #define MAX_QUEUES RTE_MAX_LCORE
26 #define TEST_REPETITIONS 1000
27
28 #define OPS_CACHE_SIZE 256U
29 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
30
31 #define SYNC_WAIT 0
32 #define SYNC_START 1
33
34 #define INVALID_QUEUE_ID -1
35
36 static struct test_bbdev_vector test_vector;
37
38 /* Switch between PMD and Interrupt for throughput TC */
39 static bool intr_enabled;
40
41 /* Represents tested active devices */
42 static struct active_device {
43         const char *driver_name;
44         uint8_t dev_id;
45         uint16_t supported_ops;
46         uint16_t queue_ids[MAX_QUEUES];
47         uint16_t nb_queues;
48         struct rte_mempool *ops_mempool;
49         struct rte_mempool *in_mbuf_pool;
50         struct rte_mempool *hard_out_mbuf_pool;
51         struct rte_mempool *soft_out_mbuf_pool;
52 } active_devs[RTE_BBDEV_MAX_DEVS];
53
54 static uint8_t nb_active_devs;
55
56 /* Data buffers used by BBDEV ops */
57 struct test_buffers {
58         struct rte_bbdev_op_data *inputs;
59         struct rte_bbdev_op_data *hard_outputs;
60         struct rte_bbdev_op_data *soft_outputs;
61 };
62
63 /* Operation parameters specific for given test case */
64 struct test_op_params {
65         struct rte_mempool *mp;
66         struct rte_bbdev_dec_op *ref_dec_op;
67         struct rte_bbdev_enc_op *ref_enc_op;
68         uint16_t burst_sz;
69         uint16_t num_to_process;
70         uint16_t num_lcores;
71         int vector_mask;
72         rte_atomic16_t sync;
73         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
74 };
75
76 /* Contains per lcore params */
77 struct thread_params {
78         uint8_t dev_id;
79         uint16_t queue_id;
80         uint64_t start_time;
81         double ops_per_sec;
82         double mbps;
83         uint8_t iter_count;
84         rte_atomic16_t nb_dequeued;
85         rte_atomic16_t processing_status;
86         struct test_op_params *op_params;
87 };
88
89 #ifdef RTE_BBDEV_OFFLOAD_COST
90 /* Stores time statistics */
91 struct test_time_stats {
92         /* Stores software enqueue total working time */
93         uint64_t enq_sw_total_time;
94         /* Stores minimum value of software enqueue working time */
95         uint64_t enq_sw_min_time;
96         /* Stores maximum value of software enqueue working time */
97         uint64_t enq_sw_max_time;
98         /* Stores turbo enqueue total working time */
99         uint64_t enq_acc_total_time;
100         /* Stores minimum value of accelerator enqueue working time */
101         uint64_t enq_acc_min_time;
102         /* Stores maximum value of accelerator enqueue working time */
103         uint64_t enq_acc_max_time;
104         /* Stores dequeue total working time */
105         uint64_t deq_total_time;
106         /* Stores minimum value of dequeue working time */
107         uint64_t deq_min_time;
108         /* Stores maximum value of dequeue working time */
109         uint64_t deq_max_time;
110 };
111 #endif
112
113 typedef int (test_case_function)(struct active_device *ad,
114                 struct test_op_params *op_params);
115
116 static inline void
117 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
118 {
119         ad->supported_ops |= (1 << op_type);
120 }
121
122 static inline bool
123 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
124 {
125         return ad->supported_ops & (1 << op_type);
126 }
127
128 static inline bool
129 flags_match(uint32_t flags_req, uint32_t flags_present)
130 {
131         return (flags_req & flags_present) == flags_req;
132 }
133
134 static void
135 clear_soft_out_cap(uint32_t *op_flags)
136 {
137         *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
138         *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
139         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
140 }
141
142 static int
143 check_dev_cap(const struct rte_bbdev_info *dev_info)
144 {
145         unsigned int i;
146         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs;
147         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
148
149         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
150         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
151         nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
152
153         for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
154                 if (op_cap->type != test_vector.op_type)
155                         continue;
156
157                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
158                         const struct rte_bbdev_op_cap_turbo_dec *cap =
159                                         &op_cap->cap.turbo_dec;
160                         /* Ignore lack of soft output capability, just skip
161                          * checking if soft output is valid.
162                          */
163                         if ((test_vector.turbo_dec.op_flags &
164                                         RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
165                                         !(cap->capability_flags &
166                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
167                                 printf(
168                                         "WARNING: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
169                                         dev_info->dev_name);
170                                 clear_soft_out_cap(
171                                         &test_vector.turbo_dec.op_flags);
172                         }
173
174                         if (!flags_match(test_vector.turbo_dec.op_flags,
175                                         cap->capability_flags))
176                                 return TEST_FAILED;
177                         if (nb_inputs > cap->num_buffers_src) {
178                                 printf("Too many inputs defined: %u, max: %u\n",
179                                         nb_inputs, cap->num_buffers_src);
180                                 return TEST_FAILED;
181                         }
182                         if (nb_soft_outputs > cap->num_buffers_soft_out &&
183                                         (test_vector.turbo_dec.op_flags &
184                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
185                                 printf(
186                                         "Too many soft outputs defined: %u, max: %u\n",
187                                                 nb_soft_outputs,
188                                                 cap->num_buffers_soft_out);
189                                 return TEST_FAILED;
190                         }
191                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
192                                 printf(
193                                         "Too many hard outputs defined: %u, max: %u\n",
194                                                 nb_hard_outputs,
195                                                 cap->num_buffers_hard_out);
196                                 return TEST_FAILED;
197                         }
198                         if (intr_enabled && !(cap->capability_flags &
199                                         RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
200                                 printf(
201                                         "Dequeue interrupts are not supported!\n");
202                                 return TEST_FAILED;
203                         }
204
205                         return TEST_SUCCESS;
206                 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
207                         const struct rte_bbdev_op_cap_turbo_enc *cap =
208                                         &op_cap->cap.turbo_enc;
209
210                         if (!flags_match(test_vector.turbo_enc.op_flags,
211                                         cap->capability_flags))
212                                 return TEST_FAILED;
213                         if (nb_inputs > cap->num_buffers_src) {
214                                 printf("Too many inputs defined: %u, max: %u\n",
215                                         nb_inputs, cap->num_buffers_src);
216                                 return TEST_FAILED;
217                         }
218                         if (nb_hard_outputs > cap->num_buffers_dst) {
219                                 printf(
220                                         "Too many hard outputs defined: %u, max: %u\n",
221                                         nb_hard_outputs, cap->num_buffers_src);
222                                 return TEST_FAILED;
223                         }
224                         if (intr_enabled && !(cap->capability_flags &
225                                         RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
226                                 printf(
227                                         "Dequeue interrupts are not supported!\n");
228                                 return TEST_FAILED;
229                         }
230
231                         return TEST_SUCCESS;
232                 }
233         }
234
235         if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
236                 return TEST_SUCCESS; /* Special case for NULL device */
237
238         return TEST_FAILED;
239 }
240
241 /* calculates optimal mempool size not smaller than the val */
242 static unsigned int
243 optimal_mempool_size(unsigned int val)
244 {
245         return rte_align32pow2(val + 1) - 1;
246 }
247
248 /* allocates mbuf mempool for inputs and outputs */
249 static struct rte_mempool *
250 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
251                 int socket_id, unsigned int mbuf_pool_size,
252                 const char *op_type_str)
253 {
254         unsigned int i;
255         uint32_t max_seg_sz = 0;
256         char pool_name[RTE_MEMPOOL_NAMESIZE];
257
258         /* find max input segment size */
259         for (i = 0; i < entries->nb_segments; ++i)
260                 if (entries->segments[i].length > max_seg_sz)
261                         max_seg_sz = entries->segments[i].length;
262
263         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
264                         dev_id);
265         return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
266                         RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
267                         (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
268 }
269
270 static int
271 create_mempools(struct active_device *ad, int socket_id,
272                 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
273 {
274         struct rte_mempool *mp;
275         unsigned int ops_pool_size, mbuf_pool_size = 0;
276         char pool_name[RTE_MEMPOOL_NAMESIZE];
277         const char *op_type_str;
278         enum rte_bbdev_op_type op_type = org_op_type;
279
280         struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
281         struct op_data_entries *hard_out =
282                         &test_vector.entries[DATA_HARD_OUTPUT];
283         struct op_data_entries *soft_out =
284                         &test_vector.entries[DATA_SOFT_OUTPUT];
285
286         /* allocate ops mempool */
287         ops_pool_size = optimal_mempool_size(RTE_MAX(
288                         /* Ops used plus 1 reference op */
289                         RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
290                         /* Minimal cache size plus 1 reference op */
291                         (unsigned int)(1.5 * rte_lcore_count() *
292                                         OPS_CACHE_SIZE + 1)),
293                         OPS_POOL_SIZE_MIN));
294
295         if (org_op_type == RTE_BBDEV_OP_NONE)
296                 op_type = RTE_BBDEV_OP_TURBO_ENC;
297
298         op_type_str = rte_bbdev_op_type_str(op_type);
299         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
300
301         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
302                         ad->dev_id);
303         mp = rte_bbdev_op_pool_create(pool_name, op_type,
304                         ops_pool_size, OPS_CACHE_SIZE, socket_id);
305         TEST_ASSERT_NOT_NULL(mp,
306                         "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
307                         ops_pool_size,
308                         ad->dev_id,
309                         socket_id);
310         ad->ops_mempool = mp;
311
312         /* Do not create inputs and outputs mbufs for BaseBand Null Device */
313         if (org_op_type == RTE_BBDEV_OP_NONE)
314                 return TEST_SUCCESS;
315
316         /* Inputs */
317         mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
318         mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
319         TEST_ASSERT_NOT_NULL(mp,
320                         "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
321                         mbuf_pool_size,
322                         ad->dev_id,
323                         socket_id);
324         ad->in_mbuf_pool = mp;
325
326         /* Hard outputs */
327         mbuf_pool_size = optimal_mempool_size(ops_pool_size *
328                         hard_out->nb_segments);
329         mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
330                         "hard_out");
331         TEST_ASSERT_NOT_NULL(mp,
332                         "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
333                         mbuf_pool_size,
334                         ad->dev_id,
335                         socket_id);
336         ad->hard_out_mbuf_pool = mp;
337
338         if (soft_out->nb_segments == 0)
339                 return TEST_SUCCESS;
340
341         /* Soft outputs */
342         mbuf_pool_size = optimal_mempool_size(ops_pool_size *
343                         soft_out->nb_segments);
344         mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, mbuf_pool_size,
345                         "soft_out");
346         TEST_ASSERT_NOT_NULL(mp,
347                         "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
348                         mbuf_pool_size,
349                         ad->dev_id,
350                         socket_id);
351         ad->soft_out_mbuf_pool = mp;
352
353         return 0;
354 }
355
356 static int
357 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
358                 struct test_bbdev_vector *vector)
359 {
360         int ret;
361         unsigned int queue_id;
362         struct rte_bbdev_queue_conf qconf;
363         struct active_device *ad = &active_devs[nb_active_devs];
364         unsigned int nb_queues;
365         enum rte_bbdev_op_type op_type = vector->op_type;
366
367         nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
368         /* setup device */
369         ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
370         if (ret < 0) {
371                 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
372                                 dev_id, nb_queues, info->socket_id, ret);
373                 return TEST_FAILED;
374         }
375
376         /* configure interrupts if needed */
377         if (intr_enabled) {
378                 ret = rte_bbdev_intr_enable(dev_id);
379                 if (ret < 0) {
380                         printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
381                                         ret);
382                         return TEST_FAILED;
383                 }
384         }
385
386         /* setup device queues */
387         qconf.socket = info->socket_id;
388         qconf.queue_size = info->drv.default_queue_conf.queue_size;
389         qconf.priority = 0;
390         qconf.deferred_start = 0;
391         qconf.op_type = op_type;
392
393         for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
394                 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
395                 if (ret != 0) {
396                         printf(
397                                         "Allocated all queues (id=%u) at prio%u on dev%u\n",
398                                         queue_id, qconf.priority, dev_id);
399                         qconf.priority++;
400                         ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
401                                         &qconf);
402                 }
403                 if (ret != 0) {
404                         printf("All queues on dev %u allocated: %u\n",
405                                         dev_id, queue_id);
406                         break;
407                 }
408                 ad->queue_ids[queue_id] = queue_id;
409         }
410         TEST_ASSERT(queue_id != 0,
411                         "ERROR Failed to configure any queues on dev %u",
412                         dev_id);
413         ad->nb_queues = queue_id;
414
415         set_avail_op(ad, op_type);
416
417         return TEST_SUCCESS;
418 }
419
420 static int
421 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
422                 struct test_bbdev_vector *vector)
423 {
424         int ret;
425
426         active_devs[nb_active_devs].driver_name = info->drv.driver_name;
427         active_devs[nb_active_devs].dev_id = dev_id;
428
429         ret = add_bbdev_dev(dev_id, info, vector);
430         if (ret == TEST_SUCCESS)
431                 ++nb_active_devs;
432         return ret;
433 }
434
435 static uint8_t
436 populate_active_devices(void)
437 {
438         int ret;
439         uint8_t dev_id;
440         uint8_t nb_devs_added = 0;
441         struct rte_bbdev_info info;
442
443         RTE_BBDEV_FOREACH(dev_id) {
444                 rte_bbdev_info_get(dev_id, &info);
445
446                 if (check_dev_cap(&info)) {
447                         printf(
448                                 "Device %d (%s) does not support specified capabilities\n",
449                                         dev_id, info.dev_name);
450                         continue;
451                 }
452
453                 ret = add_active_device(dev_id, &info, &test_vector);
454                 if (ret != 0) {
455                         printf("Adding active bbdev %s skipped\n",
456                                         info.dev_name);
457                         continue;
458                 }
459                 nb_devs_added++;
460         }
461
462         return nb_devs_added;
463 }
464
465 static int
466 read_test_vector(void)
467 {
468         int ret;
469
470         memset(&test_vector, 0, sizeof(test_vector));
471         printf("Test vector file = %s\n", get_vector_filename());
472         ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
473         TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
474                         get_vector_filename());
475
476         return TEST_SUCCESS;
477 }
478
479 static int
480 testsuite_setup(void)
481 {
482         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
483
484         if (populate_active_devices() == 0) {
485                 printf("No suitable devices found!\n");
486                 return TEST_SKIPPED;
487         }
488
489         return TEST_SUCCESS;
490 }
491
492 static int
493 interrupt_testsuite_setup(void)
494 {
495         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
496
497         /* Enable interrupts */
498         intr_enabled = true;
499
500         /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
501         if (populate_active_devices() == 0 ||
502                         test_vector.op_type == RTE_BBDEV_OP_NONE) {
503                 intr_enabled = false;
504                 printf("No suitable devices found!\n");
505                 return TEST_SKIPPED;
506         }
507
508         return TEST_SUCCESS;
509 }
510
511 static void
512 testsuite_teardown(void)
513 {
514         uint8_t dev_id;
515
516         /* Unconfigure devices */
517         RTE_BBDEV_FOREACH(dev_id)
518                 rte_bbdev_close(dev_id);
519
520         /* Clear active devices structs. */
521         memset(active_devs, 0, sizeof(active_devs));
522         nb_active_devs = 0;
523 }
524
525 static int
526 ut_setup(void)
527 {
528         uint8_t i, dev_id;
529
530         for (i = 0; i < nb_active_devs; i++) {
531                 dev_id = active_devs[i].dev_id;
532                 /* reset bbdev stats */
533                 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
534                                 "Failed to reset stats of bbdev %u", dev_id);
535                 /* start the device */
536                 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
537                                 "Failed to start bbdev %u", dev_id);
538         }
539
540         return TEST_SUCCESS;
541 }
542
543 static void
544 ut_teardown(void)
545 {
546         uint8_t i, dev_id;
547         struct rte_bbdev_stats stats;
548
549         for (i = 0; i < nb_active_devs; i++) {
550                 dev_id = active_devs[i].dev_id;
551                 /* read stats and print */
552                 rte_bbdev_stats_get(dev_id, &stats);
553                 /* Stop the device */
554                 rte_bbdev_stop(dev_id);
555         }
556 }
557
558 static int
559 init_op_data_objs(struct rte_bbdev_op_data *bufs,
560                 struct op_data_entries *ref_entries,
561                 struct rte_mempool *mbuf_pool, const uint16_t n,
562                 enum op_data_type op_type, uint16_t min_alignment)
563 {
564         int ret;
565         unsigned int i, j;
566
567         for (i = 0; i < n; ++i) {
568                 char *data;
569                 struct op_data_buf *seg = &ref_entries->segments[0];
570                 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
571                 TEST_ASSERT_NOT_NULL(m_head,
572                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
573                                 op_type, n * ref_entries->nb_segments,
574                                 mbuf_pool->size);
575
576                 bufs[i].data = m_head;
577                 bufs[i].offset = 0;
578                 bufs[i].length = 0;
579
580                 if (op_type == DATA_INPUT) {
581                         data = rte_pktmbuf_append(m_head, seg->length);
582                         TEST_ASSERT_NOT_NULL(data,
583                                         "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
584                                         seg->length, op_type);
585
586                         TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
587                                         "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
588                                         data, min_alignment);
589                         rte_memcpy(data, seg->addr, seg->length);
590                         bufs[i].length += seg->length;
591
592
593                         for (j = 1; j < ref_entries->nb_segments; ++j) {
594                                 struct rte_mbuf *m_tail =
595                                                 rte_pktmbuf_alloc(mbuf_pool);
596                                 TEST_ASSERT_NOT_NULL(m_tail,
597                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
598                                                 op_type,
599                                                 n * ref_entries->nb_segments,
600                                                 mbuf_pool->size);
601                                 seg += 1;
602
603                                 data = rte_pktmbuf_append(m_tail, seg->length);
604                                 TEST_ASSERT_NOT_NULL(data,
605                                                 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
606                                                 seg->length, op_type);
607
608                                 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
609                                                 min_alignment),
610                                                 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
611                                                 data, min_alignment);
612                                 rte_memcpy(data, seg->addr, seg->length);
613                                 bufs[i].length += seg->length;
614
615                                 ret = rte_pktmbuf_chain(m_head, m_tail);
616                                 TEST_ASSERT_SUCCESS(ret,
617                                                 "Couldn't chain mbufs from %d data type mbuf pool",
618                                                 op_type);
619                         }
620                 }
621         }
622
623         return 0;
624 }
625
626 static int
627 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
628                 const int socket)
629 {
630         int i;
631
632         *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
633         if (*buffers == NULL) {
634                 printf("WARNING: Failed to allocate op_data on socket %d\n",
635                                 socket);
636                 /* try to allocate memory on other detected sockets */
637                 for (i = 0; i < socket; i++) {
638                         *buffers = rte_zmalloc_socket(NULL, len, 0, i);
639                         if (*buffers != NULL)
640                                 break;
641                 }
642         }
643
644         return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
645 }
646
647 static void
648 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
649                 uint16_t n, int8_t max_llr_modulus)
650 {
651         uint16_t i, byte_idx;
652
653         for (i = 0; i < n; ++i) {
654                 struct rte_mbuf *m = input_ops[i].data;
655                 while (m != NULL) {
656                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
657                                         input_ops[i].offset);
658                         for (byte_idx = 0; byte_idx < input_ops[i].length;
659                                         ++byte_idx)
660                                 llr[byte_idx] = round((double)max_llr_modulus *
661                                                 llr[byte_idx] / INT8_MAX);
662
663                         m = m->next;
664                 }
665         }
666 }
667
668 static int
669 fill_queue_buffers(struct test_op_params *op_params,
670                 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
671                 struct rte_mempool *soft_out_mp, uint16_t queue_id,
672                 const struct rte_bbdev_op_cap *capabilities,
673                 uint16_t min_alignment, const int socket_id)
674 {
675         int ret;
676         enum op_data_type type;
677         const uint16_t n = op_params->num_to_process;
678
679         struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
680                 in_mp,
681                 soft_out_mp,
682                 hard_out_mp,
683         };
684
685         struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
686                 &op_params->q_bufs[socket_id][queue_id].inputs,
687                 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
688                 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
689         };
690
691         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
692                 struct op_data_entries *ref_entries =
693                                 &test_vector.entries[type];
694                 if (ref_entries->nb_segments == 0)
695                         continue;
696
697                 ret = allocate_buffers_on_socket(queue_ops[type],
698                                 n * sizeof(struct rte_bbdev_op_data),
699                                 socket_id);
700                 TEST_ASSERT_SUCCESS(ret,
701                                 "Couldn't allocate memory for rte_bbdev_op_data structs");
702
703                 ret = init_op_data_objs(*queue_ops[type], ref_entries,
704                                 mbuf_pools[type], n, type, min_alignment);
705                 TEST_ASSERT_SUCCESS(ret,
706                                 "Couldn't init rte_bbdev_op_data structs");
707         }
708
709         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
710                 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
711                         capabilities->cap.turbo_dec.max_llr_modulus);
712
713         return 0;
714 }
715
716 static void
717 free_buffers(struct active_device *ad, struct test_op_params *op_params)
718 {
719         unsigned int i, j;
720
721         rte_mempool_free(ad->ops_mempool);
722         rte_mempool_free(ad->in_mbuf_pool);
723         rte_mempool_free(ad->hard_out_mbuf_pool);
724         rte_mempool_free(ad->soft_out_mbuf_pool);
725
726         for (i = 0; i < rte_lcore_count(); ++i) {
727                 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
728                         rte_free(op_params->q_bufs[j][i].inputs);
729                         rte_free(op_params->q_bufs[j][i].hard_outputs);
730                         rte_free(op_params->q_bufs[j][i].soft_outputs);
731                 }
732         }
733 }
734
735 static void
736 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
737                 unsigned int start_idx,
738                 struct rte_bbdev_op_data *inputs,
739                 struct rte_bbdev_op_data *hard_outputs,
740                 struct rte_bbdev_op_data *soft_outputs,
741                 struct rte_bbdev_dec_op *ref_op)
742 {
743         unsigned int i;
744         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
745
746         for (i = 0; i < n; ++i) {
747                 if (turbo_dec->code_block_mode == 0) {
748                         ops[i]->turbo_dec.tb_params.ea =
749                                         turbo_dec->tb_params.ea;
750                         ops[i]->turbo_dec.tb_params.eb =
751                                         turbo_dec->tb_params.eb;
752                         ops[i]->turbo_dec.tb_params.k_pos =
753                                         turbo_dec->tb_params.k_pos;
754                         ops[i]->turbo_dec.tb_params.k_neg =
755                                         turbo_dec->tb_params.k_neg;
756                         ops[i]->turbo_dec.tb_params.c =
757                                         turbo_dec->tb_params.c;
758                         ops[i]->turbo_dec.tb_params.c_neg =
759                                         turbo_dec->tb_params.c_neg;
760                         ops[i]->turbo_dec.tb_params.cab =
761                                         turbo_dec->tb_params.cab;
762                         ops[i]->turbo_dec.tb_params.r =
763                                         turbo_dec->tb_params.r;
764                 } else {
765                         ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
766                         ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
767                 }
768
769                 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
770                 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
771                 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
772                 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
773                 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
774                 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
775                 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
776
777                 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
778                 ops[i]->turbo_dec.input = inputs[start_idx + i];
779                 if (soft_outputs != NULL)
780                         ops[i]->turbo_dec.soft_output =
781                                 soft_outputs[start_idx + i];
782         }
783 }
784
785 static void
786 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
787                 unsigned int start_idx,
788                 struct rte_bbdev_op_data *inputs,
789                 struct rte_bbdev_op_data *outputs,
790                 struct rte_bbdev_enc_op *ref_op)
791 {
792         unsigned int i;
793         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
794         for (i = 0; i < n; ++i) {
795                 if (turbo_enc->code_block_mode == 0) {
796                         ops[i]->turbo_enc.tb_params.ea =
797                                         turbo_enc->tb_params.ea;
798                         ops[i]->turbo_enc.tb_params.eb =
799                                         turbo_enc->tb_params.eb;
800                         ops[i]->turbo_enc.tb_params.k_pos =
801                                         turbo_enc->tb_params.k_pos;
802                         ops[i]->turbo_enc.tb_params.k_neg =
803                                         turbo_enc->tb_params.k_neg;
804                         ops[i]->turbo_enc.tb_params.c =
805                                         turbo_enc->tb_params.c;
806                         ops[i]->turbo_enc.tb_params.c_neg =
807                                         turbo_enc->tb_params.c_neg;
808                         ops[i]->turbo_enc.tb_params.cab =
809                                         turbo_enc->tb_params.cab;
810                         ops[i]->turbo_enc.tb_params.ncb_pos =
811                                         turbo_enc->tb_params.ncb_pos;
812                         ops[i]->turbo_enc.tb_params.ncb_neg =
813                                         turbo_enc->tb_params.ncb_neg;
814                         ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
815                 } else {
816                         ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
817                         ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
818                         ops[i]->turbo_enc.cb_params.ncb =
819                                         turbo_enc->cb_params.ncb;
820                 }
821                 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
822                 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
823                 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
824
825                 ops[i]->turbo_enc.output = outputs[start_idx + i];
826                 ops[i]->turbo_enc.input = inputs[start_idx + i];
827         }
828 }
829
830 static int
831 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
832                 unsigned int order_idx, const int expected_status)
833 {
834         TEST_ASSERT(op->status == expected_status,
835                         "op_status (%d) != expected_status (%d)",
836                         op->status, expected_status);
837
838         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
839                         "Ordering error, expected %p, got %p",
840                         (void *)(uintptr_t)order_idx, op->opaque_data);
841
842         return TEST_SUCCESS;
843 }
844
845 static int
846 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
847                 unsigned int order_idx, const int expected_status)
848 {
849         TEST_ASSERT(op->status == expected_status,
850                         "op_status (%d) != expected_status (%d)",
851                         op->status, expected_status);
852
853         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
854                         "Ordering error, expected %p, got %p",
855                         (void *)(uintptr_t)order_idx, op->opaque_data);
856
857         return TEST_SUCCESS;
858 }
859
860 static inline int
861 validate_op_chain(struct rte_bbdev_op_data *op,
862                 struct op_data_entries *orig_op)
863 {
864         uint8_t i;
865         struct rte_mbuf *m = op->data;
866         uint8_t nb_dst_segments = orig_op->nb_segments;
867
868         TEST_ASSERT(nb_dst_segments == m->nb_segs,
869                         "Number of segments differ in original (%u) and filled (%u) op",
870                         nb_dst_segments, m->nb_segs);
871
872         for (i = 0; i < nb_dst_segments; ++i) {
873                 /* Apply offset to the first mbuf segment */
874                 uint16_t offset = (i == 0) ? op->offset : 0;
875                 uint16_t data_len = m->data_len - offset;
876
877                 TEST_ASSERT(orig_op->segments[i].length == data_len,
878                                 "Length of segment differ in original (%u) and filled (%u) op",
879                                 orig_op->segments[i].length, data_len);
880                 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
881                                 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
882                                 data_len,
883                                 "Output buffers (CB=%u) are not equal", i);
884                 m = m->next;
885         }
886
887         return TEST_SUCCESS;
888 }
889
890 static int
891 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
892                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
893 {
894         unsigned int i;
895         int ret;
896         struct op_data_entries *hard_data_orig =
897                         &test_vector.entries[DATA_HARD_OUTPUT];
898         struct op_data_entries *soft_data_orig =
899                         &test_vector.entries[DATA_SOFT_OUTPUT];
900         struct rte_bbdev_op_turbo_dec *ops_td;
901         struct rte_bbdev_op_data *hard_output;
902         struct rte_bbdev_op_data *soft_output;
903         struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
904
905         for (i = 0; i < n; ++i) {
906                 ops_td = &ops[i]->turbo_dec;
907                 hard_output = &ops_td->hard_output;
908                 soft_output = &ops_td->soft_output;
909
910                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
911                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
912                                         "Returned iter_count (%d) > expected iter_count (%d)",
913                                         ops_td->iter_count, ref_td->iter_count);
914                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
915                 TEST_ASSERT_SUCCESS(ret,
916                                 "Checking status and ordering for decoder failed");
917
918                 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
919                                 hard_data_orig),
920                                 "Hard output buffers (CB=%u) are not equal",
921                                 i);
922
923                 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
924                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
925                                         soft_data_orig),
926                                         "Soft output buffers (CB=%u) are not equal",
927                                         i);
928         }
929
930         return TEST_SUCCESS;
931 }
932
933 static int
934 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
935                 struct rte_bbdev_enc_op *ref_op)
936 {
937         unsigned int i;
938         int ret;
939         struct op_data_entries *hard_data_orig =
940                         &test_vector.entries[DATA_HARD_OUTPUT];
941
942         for (i = 0; i < n; ++i) {
943                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
944                 TEST_ASSERT_SUCCESS(ret,
945                                 "Checking status and ordering for encoder failed");
946                 TEST_ASSERT_SUCCESS(validate_op_chain(
947                                 &ops[i]->turbo_enc.output,
948                                 hard_data_orig),
949                                 "Output buffers (CB=%u) are not equal",
950                                 i);
951         }
952
953         return TEST_SUCCESS;
954 }
955
956 static void
957 create_reference_dec_op(struct rte_bbdev_dec_op *op)
958 {
959         unsigned int i;
960         struct op_data_entries *entry;
961
962         op->turbo_dec = test_vector.turbo_dec;
963         entry = &test_vector.entries[DATA_INPUT];
964         for (i = 0; i < entry->nb_segments; ++i)
965                 op->turbo_dec.input.length +=
966                                 entry->segments[i].length;
967 }
968
969 static void
970 create_reference_enc_op(struct rte_bbdev_enc_op *op)
971 {
972         unsigned int i;
973         struct op_data_entries *entry;
974
975         op->turbo_enc = test_vector.turbo_enc;
976         entry = &test_vector.entries[DATA_INPUT];
977         for (i = 0; i < entry->nb_segments; ++i)
978                 op->turbo_enc.input.length +=
979                                 entry->segments[i].length;
980 }
981
982 static uint32_t
983 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
984 {
985         uint8_t i;
986         uint32_t c, r, tb_size = 0;
987
988         if (op->turbo_dec.code_block_mode) {
989                 tb_size = op->turbo_dec.tb_params.k_neg;
990         } else {
991                 c = op->turbo_dec.tb_params.c;
992                 r = op->turbo_dec.tb_params.r;
993                 for (i = 0; i < c-r; i++)
994                         tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
995                                 op->turbo_dec.tb_params.k_neg :
996                                 op->turbo_dec.tb_params.k_pos;
997         }
998         return tb_size;
999 }
1000
1001 static uint32_t
1002 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1003 {
1004         uint8_t i;
1005         uint32_t c, r, tb_size = 0;
1006
1007         if (op->turbo_enc.code_block_mode) {
1008                 tb_size = op->turbo_enc.tb_params.k_neg;
1009         } else {
1010                 c = op->turbo_enc.tb_params.c;
1011                 r = op->turbo_enc.tb_params.r;
1012                 for (i = 0; i < c-r; i++)
1013                         tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1014                                 op->turbo_enc.tb_params.k_neg :
1015                                 op->turbo_enc.tb_params.k_pos;
1016         }
1017         return tb_size;
1018 }
1019
1020 static int
1021 init_test_op_params(struct test_op_params *op_params,
1022                 enum rte_bbdev_op_type op_type, const int expected_status,
1023                 const int vector_mask, struct rte_mempool *ops_mp,
1024                 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1025 {
1026         int ret = 0;
1027         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1028                 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1029                                 &op_params->ref_dec_op, 1);
1030         else
1031                 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1032                                 &op_params->ref_enc_op, 1);
1033
1034         TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1035
1036         op_params->mp = ops_mp;
1037         op_params->burst_sz = burst_sz;
1038         op_params->num_to_process = num_to_process;
1039         op_params->num_lcores = num_lcores;
1040         op_params->vector_mask = vector_mask;
1041         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1042                 op_params->ref_dec_op->status = expected_status;
1043         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
1044                 op_params->ref_enc_op->status = expected_status;
1045
1046         return 0;
1047 }
1048
1049 static int
1050 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1051                 struct test_op_params *op_params)
1052 {
1053         int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1054         unsigned int i;
1055         struct active_device *ad;
1056         unsigned int burst_sz = get_burst_sz();
1057         enum rte_bbdev_op_type op_type = test_vector.op_type;
1058         const struct rte_bbdev_op_cap *capabilities = NULL;
1059
1060         ad = &active_devs[dev_id];
1061
1062         /* Check if device supports op_type */
1063         if (!is_avail_op(ad, test_vector.op_type))
1064                 return TEST_SUCCESS;
1065
1066         struct rte_bbdev_info info;
1067         rte_bbdev_info_get(ad->dev_id, &info);
1068         socket_id = GET_SOCKET(info.socket_id);
1069
1070         f_ret = create_mempools(ad, socket_id, op_type,
1071                         get_num_ops());
1072         if (f_ret != TEST_SUCCESS) {
1073                 printf("Couldn't create mempools");
1074                 goto fail;
1075         }
1076         if (op_type == RTE_BBDEV_OP_NONE)
1077                 op_type = RTE_BBDEV_OP_TURBO_ENC;
1078
1079         f_ret = init_test_op_params(op_params, test_vector.op_type,
1080                         test_vector.expected_status,
1081                         test_vector.mask,
1082                         ad->ops_mempool,
1083                         burst_sz,
1084                         get_num_ops(),
1085                         get_num_lcores());
1086         if (f_ret != TEST_SUCCESS) {
1087                 printf("Couldn't init test op params");
1088                 goto fail;
1089         }
1090
1091         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1092                 /* Find Decoder capabilities */
1093                 const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1094                 while (cap->type != RTE_BBDEV_OP_NONE) {
1095                         if (cap->type == RTE_BBDEV_OP_TURBO_DEC) {
1096                                 capabilities = cap;
1097                                 break;
1098                         }
1099                 }
1100                 TEST_ASSERT_NOT_NULL(capabilities,
1101                                 "Couldn't find Decoder capabilities");
1102
1103                 create_reference_dec_op(op_params->ref_dec_op);
1104         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1105                 create_reference_enc_op(op_params->ref_enc_op);
1106
1107         for (i = 0; i < ad->nb_queues; ++i) {
1108                 f_ret = fill_queue_buffers(op_params,
1109                                 ad->in_mbuf_pool,
1110                                 ad->hard_out_mbuf_pool,
1111                                 ad->soft_out_mbuf_pool,
1112                                 ad->queue_ids[i],
1113                                 capabilities,
1114                                 info.drv.min_alignment,
1115                                 socket_id);
1116                 if (f_ret != TEST_SUCCESS) {
1117                         printf("Couldn't init queue buffers");
1118                         goto fail;
1119                 }
1120         }
1121
1122         /* Run test case function */
1123         t_ret = test_case_func(ad, op_params);
1124
1125         /* Free active device resources and return */
1126         free_buffers(ad, op_params);
1127         return t_ret;
1128
1129 fail:
1130         free_buffers(ad, op_params);
1131         return TEST_FAILED;
1132 }
1133
1134 /* Run given test function per active device per supported op type
1135  * per burst size.
1136  */
1137 static int
1138 run_test_case(test_case_function *test_case_func)
1139 {
1140         int ret = 0;
1141         uint8_t dev;
1142
1143         /* Alloc op_params */
1144         struct test_op_params *op_params = rte_zmalloc(NULL,
1145                         sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1146         TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1147                         RTE_ALIGN(sizeof(struct test_op_params),
1148                                 RTE_CACHE_LINE_SIZE));
1149
1150         /* For each device run test case function */
1151         for (dev = 0; dev < nb_active_devs; ++dev)
1152                 ret |= run_test_case_on_device(test_case_func, dev, op_params);
1153
1154         rte_free(op_params);
1155
1156         return ret;
1157 }
1158
1159 static void
1160 dequeue_event_callback(uint16_t dev_id,
1161                 enum rte_bbdev_event_type event, void *cb_arg,
1162                 void *ret_param)
1163 {
1164         int ret;
1165         uint16_t i;
1166         uint64_t total_time;
1167         uint16_t deq, burst_sz, num_ops;
1168         uint16_t queue_id = INVALID_QUEUE_ID;
1169         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
1170         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
1171         struct rte_bbdev_info info;
1172
1173         double tb_len_bits;
1174
1175         struct thread_params *tp = cb_arg;
1176         RTE_SET_USED(ret_param);
1177         queue_id = tp->queue_id;
1178
1179         /* Find matching thread params using queue_id */
1180         for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1181                 if (tp->queue_id == queue_id)
1182                         break;
1183
1184         if (i == MAX_QUEUES) {
1185                 printf("%s: Queue_id from interrupt details was not found!\n",
1186                                 __func__);
1187                 return;
1188         }
1189
1190         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1191                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1192                 printf(
1193                         "Dequeue interrupt handler called for incorrect event!\n");
1194                 return;
1195         }
1196
1197         burst_sz = tp->op_params->burst_sz;
1198         num_ops = tp->op_params->num_to_process;
1199
1200         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1201                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, dec_ops,
1202                                 burst_sz);
1203                 rte_bbdev_dec_op_free_bulk(dec_ops, deq);
1204         } else {
1205                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, enc_ops,
1206                                 burst_sz);
1207                 rte_bbdev_enc_op_free_bulk(enc_ops, deq);
1208         }
1209
1210         if (deq < burst_sz) {
1211                 printf(
1212                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1213                         burst_sz, deq);
1214                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1215                 return;
1216         }
1217
1218         if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1219                 rte_atomic16_add(&tp->nb_dequeued, deq);
1220                 return;
1221         }
1222
1223         total_time = rte_rdtsc_precise() - tp->start_time;
1224
1225         rte_bbdev_info_get(dev_id, &info);
1226
1227         ret = TEST_SUCCESS;
1228
1229         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1230                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1231                 ret = validate_dec_op(dec_ops, num_ops, ref_op,
1232                                 tp->op_params->vector_mask);
1233                 rte_bbdev_dec_op_free_bulk(dec_ops, deq);
1234         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1235                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1236                 ret = validate_enc_op(enc_ops, num_ops, ref_op);
1237                 rte_bbdev_enc_op_free_bulk(enc_ops, deq);
1238         }
1239
1240         if (ret) {
1241                 printf("Buffers validation failed\n");
1242                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1243         }
1244
1245         switch (test_vector.op_type) {
1246         case RTE_BBDEV_OP_TURBO_DEC:
1247                 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1248                 break;
1249         case RTE_BBDEV_OP_TURBO_ENC:
1250                 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1251                 break;
1252         case RTE_BBDEV_OP_NONE:
1253                 tb_len_bits = 0.0;
1254                 break;
1255         default:
1256                 printf("Unknown op type: %d\n", test_vector.op_type);
1257                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1258                 return;
1259         }
1260
1261         tp->ops_per_sec = ((double)num_ops) /
1262                         ((double)total_time / (double)rte_get_tsc_hz());
1263         tp->mbps = (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1264                         ((double)total_time / (double)rte_get_tsc_hz());
1265
1266         rte_atomic16_add(&tp->nb_dequeued, deq);
1267 }
1268
1269 static int
1270 throughput_intr_lcore_dec(void *arg)
1271 {
1272         struct thread_params *tp = arg;
1273         unsigned int enqueued;
1274         const uint16_t queue_id = tp->queue_id;
1275         const uint16_t burst_sz = tp->op_params->burst_sz;
1276         const uint16_t num_to_process = tp->op_params->num_to_process;
1277         struct rte_bbdev_dec_op *ops[num_to_process];
1278         struct test_buffers *bufs = NULL;
1279         struct rte_bbdev_info info;
1280         int ret;
1281         uint16_t num_to_enq;
1282
1283         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1284                         "BURST_SIZE should be <= %u", MAX_BURST);
1285
1286         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1287                         "Failed to enable interrupts for dev: %u, queue_id: %u",
1288                         tp->dev_id, queue_id);
1289
1290         rte_bbdev_info_get(tp->dev_id, &info);
1291
1292         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1293                         "NUM_OPS cannot exceed %u for this device",
1294                         info.drv.queue_size_lim);
1295
1296         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1297
1298         rte_atomic16_clear(&tp->processing_status);
1299         rte_atomic16_clear(&tp->nb_dequeued);
1300
1301         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1302                 rte_pause();
1303
1304         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1305                                 num_to_process);
1306         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1307                         num_to_process);
1308         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1309                 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1310                                 bufs->hard_outputs, bufs->soft_outputs,
1311                                 tp->op_params->ref_dec_op);
1312
1313         tp->start_time = rte_rdtsc_precise();
1314         for (enqueued = 0; enqueued < num_to_process;) {
1315
1316                 num_to_enq = burst_sz;
1317
1318                 if (unlikely(num_to_process - enqueued < num_to_enq))
1319                         num_to_enq = num_to_process - enqueued;
1320
1321                 enqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id, queue_id,
1322                                 &ops[enqueued], num_to_enq);
1323         }
1324
1325         return TEST_SUCCESS;
1326 }
1327
1328 static int
1329 throughput_intr_lcore_enc(void *arg)
1330 {
1331         struct thread_params *tp = arg;
1332         unsigned int enqueued;
1333         const uint16_t queue_id = tp->queue_id;
1334         const uint16_t burst_sz = tp->op_params->burst_sz;
1335         const uint16_t num_to_process = tp->op_params->num_to_process;
1336         struct rte_bbdev_enc_op *ops[num_to_process];
1337         struct test_buffers *bufs = NULL;
1338         struct rte_bbdev_info info;
1339         int ret;
1340         uint16_t num_to_enq;
1341
1342         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1343                         "BURST_SIZE should be <= %u", MAX_BURST);
1344
1345         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1346                         "Failed to enable interrupts for dev: %u, queue_id: %u",
1347                         tp->dev_id, queue_id);
1348
1349         rte_bbdev_info_get(tp->dev_id, &info);
1350
1351         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1352                         "NUM_OPS cannot exceed %u for this device",
1353                         info.drv.queue_size_lim);
1354
1355         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1356
1357         rte_atomic16_clear(&tp->processing_status);
1358         rte_atomic16_clear(&tp->nb_dequeued);
1359
1360         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1361                 rte_pause();
1362
1363         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1364                         num_to_process);
1365         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1366                         num_to_process);
1367         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1368                 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1369                                 bufs->hard_outputs, tp->op_params->ref_enc_op);
1370
1371         tp->start_time = rte_rdtsc_precise();
1372         for (enqueued = 0; enqueued < num_to_process;) {
1373
1374                 num_to_enq = burst_sz;
1375
1376                 if (unlikely(num_to_process - enqueued < num_to_enq))
1377                         num_to_enq = num_to_process - enqueued;
1378
1379                 enqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id, queue_id,
1380                                 &ops[enqueued], num_to_enq);
1381         }
1382
1383         return TEST_SUCCESS;
1384 }
1385
1386 static int
1387 throughput_pmd_lcore_dec(void *arg)
1388 {
1389         struct thread_params *tp = arg;
1390         uint16_t enq, deq;
1391         uint64_t total_time = 0, start_time;
1392         const uint16_t queue_id = tp->queue_id;
1393         const uint16_t burst_sz = tp->op_params->burst_sz;
1394         const uint16_t num_ops = tp->op_params->num_to_process;
1395         struct rte_bbdev_dec_op *ops_enq[num_ops];
1396         struct rte_bbdev_dec_op *ops_deq[num_ops];
1397         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1398         struct test_buffers *bufs = NULL;
1399         int i, j, ret;
1400         struct rte_bbdev_info info;
1401         uint16_t num_to_enq;
1402
1403         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1404                         "BURST_SIZE should be <= %u", MAX_BURST);
1405
1406         rte_bbdev_info_get(tp->dev_id, &info);
1407
1408         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1409                         "NUM_OPS cannot exceed %u for this device",
1410                         info.drv.queue_size_lim);
1411
1412         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1413
1414         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1415                 rte_pause();
1416
1417         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
1418         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
1419
1420         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1421                 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
1422                                 bufs->hard_outputs, bufs->soft_outputs, ref_op);
1423
1424         /* Set counter to validate the ordering */
1425         for (j = 0; j < num_ops; ++j)
1426                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1427
1428         for (i = 0; i < TEST_REPETITIONS; ++i) {
1429
1430                 for (j = 0; j < num_ops; ++j) {
1431                         struct rte_bbdev_dec_op *op = ops_enq[j];
1432                         rte_pktmbuf_reset(op->turbo_dec.hard_output.data);
1433                 }
1434
1435                 start_time = rte_rdtsc_precise();
1436
1437                 for (enq = 0, deq = 0; enq < num_ops;) {
1438                         num_to_enq = burst_sz;
1439
1440                         if (unlikely(num_ops - enq < num_to_enq))
1441                                 num_to_enq = num_ops - enq;
1442
1443                         enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1444                                         queue_id, &ops_enq[enq], num_to_enq);
1445
1446                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1447                                         queue_id, &ops_deq[deq], enq - deq);
1448                 }
1449
1450                 /* dequeue the remaining */
1451                 while (deq < enq) {
1452                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1453                                         queue_id, &ops_deq[deq], enq - deq);
1454                 }
1455
1456                 total_time += rte_rdtsc_precise() - start_time;
1457         }
1458
1459         tp->iter_count = 0;
1460         /* get the max of iter_count for all dequeued ops */
1461         for (i = 0; i < num_ops; ++i) {
1462                 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
1463                                 tp->iter_count);
1464         }
1465
1466         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1467                 ret = validate_dec_op(ops_deq, num_ops, ref_op,
1468                                 tp->op_params->vector_mask);
1469                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1470         }
1471
1472         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
1473
1474         double tb_len_bits = calc_dec_TB_size(ref_op);
1475
1476         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1477                         ((double)total_time / (double)rte_get_tsc_hz());
1478         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1479                         1000000.0) / ((double)total_time /
1480                         (double)rte_get_tsc_hz());
1481
1482         return TEST_SUCCESS;
1483 }
1484
1485 static int
1486 throughput_pmd_lcore_enc(void *arg)
1487 {
1488         struct thread_params *tp = arg;
1489         uint16_t enq, deq;
1490         uint64_t total_time = 0, start_time;
1491         const uint16_t queue_id = tp->queue_id;
1492         const uint16_t burst_sz = tp->op_params->burst_sz;
1493         const uint16_t num_ops = tp->op_params->num_to_process;
1494         struct rte_bbdev_enc_op *ops_enq[num_ops];
1495         struct rte_bbdev_enc_op *ops_deq[num_ops];
1496         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1497         struct test_buffers *bufs = NULL;
1498         int i, j, ret;
1499         struct rte_bbdev_info info;
1500         uint16_t num_to_enq;
1501
1502         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1503                         "BURST_SIZE should be <= %u", MAX_BURST);
1504
1505         rte_bbdev_info_get(tp->dev_id, &info);
1506
1507         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1508                         "NUM_OPS cannot exceed %u for this device",
1509                         info.drv.queue_size_lim);
1510
1511         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1512
1513         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1514                 rte_pause();
1515
1516         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
1517                         num_ops);
1518         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1519                         num_ops);
1520         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1521                 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
1522                                 bufs->hard_outputs, ref_op);
1523
1524         /* Set counter to validate the ordering */
1525         for (j = 0; j < num_ops; ++j)
1526                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1527
1528         for (i = 0; i < TEST_REPETITIONS; ++i) {
1529
1530                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1531                         for (j = 0; j < num_ops; ++j)
1532                                 rte_pktmbuf_reset(
1533                                         ops_enq[j]->turbo_enc.output.data);
1534
1535                 start_time = rte_rdtsc_precise();
1536
1537                 for (enq = 0, deq = 0; enq < num_ops;) {
1538                         num_to_enq = burst_sz;
1539
1540                         if (unlikely(num_ops - enq < num_to_enq))
1541                                 num_to_enq = num_ops - enq;
1542
1543                         enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1544                                         queue_id, &ops_enq[enq], num_to_enq);
1545
1546                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1547                                         queue_id, &ops_deq[deq], enq - deq);
1548                 }
1549
1550                 /* dequeue the remaining */
1551                 while (deq < enq) {
1552                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1553                                         queue_id, &ops_deq[deq], enq - deq);
1554                 }
1555
1556                 total_time += rte_rdtsc_precise() - start_time;
1557         }
1558
1559         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1560                 ret = validate_enc_op(ops_deq, num_ops, ref_op);
1561                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1562         }
1563
1564         double tb_len_bits = calc_enc_TB_size(ref_op);
1565
1566         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1567                         ((double)total_time / (double)rte_get_tsc_hz());
1568         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
1569                         / 1000000.0) / ((double)total_time /
1570                         (double)rte_get_tsc_hz());
1571
1572         return TEST_SUCCESS;
1573 }
1574
1575 static void
1576 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
1577 {
1578         unsigned int lcore_id, iter = 0;
1579         double total_mops = 0, total_mbps = 0;
1580
1581         RTE_LCORE_FOREACH(lcore_id) {
1582                 if (iter++ >= used_cores)
1583                         break;
1584                 printf(
1585                                 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
1586                                 lcore_id, t_params[lcore_id].ops_per_sec,
1587                                 t_params[lcore_id].mbps);
1588                 total_mops += t_params[lcore_id].ops_per_sec;
1589                 total_mbps += t_params[lcore_id].mbps;
1590         }
1591         printf(
1592                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
1593                 used_cores, total_mops, total_mbps);
1594 }
1595
1596 static void
1597 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
1598 {
1599         unsigned int lcore_id, iter = 0;
1600         double total_mops = 0, total_mbps = 0;
1601         uint8_t iter_count = 0;
1602
1603         RTE_LCORE_FOREACH(lcore_id) {
1604                 if (iter++ >= used_cores)
1605                         break;
1606                 printf(
1607                                 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
1608                                 lcore_id, t_params[lcore_id].ops_per_sec,
1609                                 t_params[lcore_id].mbps,
1610                                 t_params[lcore_id].iter_count);
1611                 total_mops += t_params[lcore_id].ops_per_sec;
1612                 total_mbps += t_params[lcore_id].mbps;
1613                 iter_count = RTE_MAX(iter_count, t_params[lcore_id].iter_count);
1614         }
1615         printf(
1616                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
1617                 used_cores, total_mops, total_mbps, iter_count);
1618 }
1619
1620 /*
1621  * Test function that determines how long an enqueue + dequeue of a burst
1622  * takes on available lcores.
1623  */
1624 static int
1625 throughput_test(struct active_device *ad,
1626                 struct test_op_params *op_params)
1627 {
1628         int ret;
1629         unsigned int lcore_id, used_cores = 0;
1630         struct thread_params t_params[MAX_QUEUES];
1631         struct rte_bbdev_info info;
1632         lcore_function_t *throughput_function;
1633         struct thread_params *tp;
1634         uint16_t num_lcores;
1635         const char *op_type_str;
1636
1637         rte_bbdev_info_get(ad->dev_id, &info);
1638
1639         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
1640         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
1641                         test_vector.op_type);
1642
1643         printf(
1644                 "Throughput test: dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, int mode: %s, GHz: %lg\n",
1645                         info.dev_name, ad->nb_queues, op_params->burst_sz,
1646                         op_params->num_to_process, op_params->num_lcores,
1647                         op_type_str,
1648                         intr_enabled ? "Interrupt mode" : "PMD mode",
1649                         (double)rte_get_tsc_hz() / 1000000000.0);
1650
1651         /* Set number of lcores */
1652         num_lcores = (ad->nb_queues < (op_params->num_lcores))
1653                         ? ad->nb_queues
1654                         : op_params->num_lcores;
1655
1656         if (intr_enabled) {
1657                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1658                         throughput_function = throughput_intr_lcore_dec;
1659                 else
1660                         throughput_function = throughput_intr_lcore_enc;
1661
1662                 /* Dequeue interrupt callback registration */
1663                 ret = rte_bbdev_callback_register(ad->dev_id,
1664                                 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
1665                                 &t_params);
1666                 if (ret < 0)
1667                         return ret;
1668         } else {
1669                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1670                         throughput_function = throughput_pmd_lcore_dec;
1671                 else
1672                         throughput_function = throughput_pmd_lcore_enc;
1673         }
1674
1675         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
1676
1677         t_params[rte_lcore_id()].dev_id = ad->dev_id;
1678         t_params[rte_lcore_id()].op_params = op_params;
1679         t_params[rte_lcore_id()].queue_id =
1680                         ad->queue_ids[used_cores++];
1681
1682         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1683                 if (used_cores >= num_lcores)
1684                         break;
1685
1686                 t_params[lcore_id].dev_id = ad->dev_id;
1687                 t_params[lcore_id].op_params = op_params;
1688                 t_params[lcore_id].queue_id = ad->queue_ids[used_cores++];
1689
1690                 rte_eal_remote_launch(throughput_function, &t_params[lcore_id],
1691                                 lcore_id);
1692         }
1693
1694         rte_atomic16_set(&op_params->sync, SYNC_START);
1695         ret = throughput_function(&t_params[rte_lcore_id()]);
1696
1697         /* Master core is always used */
1698         used_cores = 1;
1699         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1700                 if (used_cores++ >= num_lcores)
1701                         break;
1702
1703                 ret |= rte_eal_wait_lcore(lcore_id);
1704         }
1705
1706         /* Return if test failed */
1707         if (ret)
1708                 return ret;
1709
1710         /* Print throughput if interrupts are disabled and test passed */
1711         if (!intr_enabled) {
1712                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1713                         print_dec_throughput(t_params, num_lcores);
1714                 else
1715                         print_enc_throughput(t_params, num_lcores);
1716                 return ret;
1717         }
1718
1719         /* In interrupt TC we need to wait for the interrupt callback to deqeue
1720          * all pending operations. Skip waiting for queues which reported an
1721          * error using processing_status variable.
1722          * Wait for master lcore operations.
1723          */
1724         tp = &t_params[rte_lcore_id()];
1725         while ((rte_atomic16_read(&tp->nb_dequeued) <
1726                         op_params->num_to_process) &&
1727                         (rte_atomic16_read(&tp->processing_status) !=
1728                         TEST_FAILED))
1729                 rte_pause();
1730
1731         ret |= rte_atomic16_read(&tp->processing_status);
1732
1733         /* Wait for slave lcores operations */
1734         used_cores = 1;
1735         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1736                 tp = &t_params[lcore_id];
1737                 if (used_cores++ >= num_lcores)
1738                         break;
1739
1740                 while ((rte_atomic16_read(&tp->nb_dequeued) <
1741                                 op_params->num_to_process) &&
1742                                 (rte_atomic16_read(&tp->processing_status) !=
1743                                 TEST_FAILED))
1744                         rte_pause();
1745
1746                 ret |= rte_atomic16_read(&tp->processing_status);
1747         }
1748
1749         /* Print throughput if test passed */
1750         if (!ret) {
1751                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1752                         print_dec_throughput(t_params, num_lcores);
1753                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1754                         print_enc_throughput(t_params, num_lcores);
1755         }
1756         return ret;
1757 }
1758
1759 static int
1760 latency_test_dec(struct rte_mempool *mempool,
1761                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
1762                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
1763                 const uint16_t num_to_process, uint16_t burst_sz,
1764                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1765 {
1766         int ret = TEST_SUCCESS;
1767         uint16_t i, j, dequeued;
1768         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1769         uint64_t start_time = 0, last_time = 0;
1770
1771         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1772                 uint16_t enq = 0, deq = 0;
1773                 bool first_time = true;
1774                 last_time = 0;
1775
1776                 if (unlikely(num_to_process - dequeued < burst_sz))
1777                         burst_sz = num_to_process - dequeued;
1778
1779                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
1780                 TEST_ASSERT_SUCCESS(ret,
1781                                 "rte_bbdev_dec_op_alloc_bulk() failed");
1782                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1783                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
1784                                         bufs->inputs,
1785                                         bufs->hard_outputs,
1786                                         bufs->soft_outputs,
1787                                         ref_op);
1788
1789                 /* Set counter to validate the ordering */
1790                 for (j = 0; j < burst_sz; ++j)
1791                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1792
1793                 start_time = rte_rdtsc_precise();
1794
1795                 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
1796                                 burst_sz);
1797                 TEST_ASSERT(enq == burst_sz,
1798                                 "Error enqueueing burst, expected %u, got %u",
1799                                 burst_sz, enq);
1800
1801                 /* Dequeue */
1802                 do {
1803                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1804                                         &ops_deq[deq], burst_sz - deq);
1805                         if (likely(first_time && (deq > 0))) {
1806                                 last_time = rte_rdtsc_precise() - start_time;
1807                                 first_time = false;
1808                         }
1809                 } while (unlikely(burst_sz != deq));
1810
1811                 *max_time = RTE_MAX(*max_time, last_time);
1812                 *min_time = RTE_MIN(*min_time, last_time);
1813                 *total_time += last_time;
1814
1815                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1816                         ret = validate_dec_op(ops_deq, burst_sz, ref_op,
1817                                         vector_mask);
1818                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1819                 }
1820
1821                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
1822                 dequeued += deq;
1823         }
1824
1825         return i;
1826 }
1827
1828 static int
1829 latency_test_enc(struct rte_mempool *mempool,
1830                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
1831                 uint16_t dev_id, uint16_t queue_id,
1832                 const uint16_t num_to_process, uint16_t burst_sz,
1833                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1834 {
1835         int ret = TEST_SUCCESS;
1836         uint16_t i, j, dequeued;
1837         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1838         uint64_t start_time = 0, last_time = 0;
1839
1840         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1841                 uint16_t enq = 0, deq = 0;
1842                 bool first_time = true;
1843                 last_time = 0;
1844
1845                 if (unlikely(num_to_process - dequeued < burst_sz))
1846                         burst_sz = num_to_process - dequeued;
1847
1848                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
1849                 TEST_ASSERT_SUCCESS(ret,
1850                                 "rte_bbdev_enc_op_alloc_bulk() failed");
1851                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1852                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
1853                                         bufs->inputs,
1854                                         bufs->hard_outputs,
1855                                         ref_op);
1856
1857                 /* Set counter to validate the ordering */
1858                 for (j = 0; j < burst_sz; ++j)
1859                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1860
1861                 start_time = rte_rdtsc_precise();
1862
1863                 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
1864                                 burst_sz);
1865                 TEST_ASSERT(enq == burst_sz,
1866                                 "Error enqueueing burst, expected %u, got %u",
1867                                 burst_sz, enq);
1868
1869                 /* Dequeue */
1870                 do {
1871                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1872                                         &ops_deq[deq], burst_sz - deq);
1873                         if (likely(first_time && (deq > 0))) {
1874                                 last_time += rte_rdtsc_precise() - start_time;
1875                                 first_time = false;
1876                         }
1877                 } while (unlikely(burst_sz != deq));
1878
1879                 *max_time = RTE_MAX(*max_time, last_time);
1880                 *min_time = RTE_MIN(*min_time, last_time);
1881                 *total_time += last_time;
1882
1883                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1884                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
1885                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1886                 }
1887
1888                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
1889                 dequeued += deq;
1890         }
1891
1892         return i;
1893 }
1894
1895 static int
1896 latency_test(struct active_device *ad,
1897                 struct test_op_params *op_params)
1898 {
1899         int iter;
1900         uint16_t burst_sz = op_params->burst_sz;
1901         const uint16_t num_to_process = op_params->num_to_process;
1902         const enum rte_bbdev_op_type op_type = test_vector.op_type;
1903         const uint16_t queue_id = ad->queue_ids[0];
1904         struct test_buffers *bufs = NULL;
1905         struct rte_bbdev_info info;
1906         uint64_t total_time, min_time, max_time;
1907         const char *op_type_str;
1908
1909         total_time = max_time = 0;
1910         min_time = UINT64_MAX;
1911
1912         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1913                         "BURST_SIZE should be <= %u", MAX_BURST);
1914
1915         rte_bbdev_info_get(ad->dev_id, &info);
1916         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1917
1918         op_type_str = rte_bbdev_op_type_str(op_type);
1919         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
1920
1921         printf(
1922                 "\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
1923                         info.dev_name, burst_sz, num_to_process, op_type_str);
1924
1925         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1926                 iter = latency_test_dec(op_params->mp, bufs,
1927                                 op_params->ref_dec_op, op_params->vector_mask,
1928                                 ad->dev_id, queue_id, num_to_process,
1929                                 burst_sz, &total_time, &min_time, &max_time);
1930         else
1931                 iter = latency_test_enc(op_params->mp, bufs,
1932                                 op_params->ref_enc_op, ad->dev_id, queue_id,
1933                                 num_to_process, burst_sz, &total_time,
1934                                 &min_time, &max_time);
1935
1936         if (iter <= 0)
1937                 return TEST_FAILED;
1938
1939         printf("Operation latency:\n"
1940                         "\tavg latency: %lg cycles, %lg us\n"
1941                         "\tmin latency: %lg cycles, %lg us\n"
1942                         "\tmax latency: %lg cycles, %lg us\n",
1943                         (double)total_time / (double)iter,
1944                         (double)(total_time * 1000000) / (double)iter /
1945                         (double)rte_get_tsc_hz(), (double)min_time,
1946                         (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
1947                         (double)max_time, (double)(max_time * 1000000) /
1948                         (double)rte_get_tsc_hz());
1949
1950         return TEST_SUCCESS;
1951 }
1952
1953 #ifdef RTE_BBDEV_OFFLOAD_COST
1954 static int
1955 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
1956                 struct rte_bbdev_stats *stats)
1957 {
1958         struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
1959         struct rte_bbdev_stats *q_stats;
1960
1961         if (queue_id >= dev->data->num_queues)
1962                 return -1;
1963
1964         q_stats = &dev->data->queues[queue_id].queue_stats;
1965
1966         stats->enqueued_count = q_stats->enqueued_count;
1967         stats->dequeued_count = q_stats->dequeued_count;
1968         stats->enqueue_err_count = q_stats->enqueue_err_count;
1969         stats->dequeue_err_count = q_stats->dequeue_err_count;
1970         stats->acc_offload_cycles = q_stats->acc_offload_cycles;
1971
1972         return 0;
1973 }
1974
1975 static int
1976 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
1977                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
1978                 uint16_t queue_id, const uint16_t num_to_process,
1979                 uint16_t burst_sz, struct test_time_stats *time_st)
1980 {
1981         int i, dequeued, ret;
1982         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1983         uint64_t enq_start_time, deq_start_time;
1984         uint64_t enq_sw_last_time, deq_last_time;
1985         struct rte_bbdev_stats stats;
1986
1987         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1988                 uint16_t enq = 0, deq = 0;
1989
1990                 if (unlikely(num_to_process - dequeued < burst_sz))
1991                         burst_sz = num_to_process - dequeued;
1992
1993                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
1994                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1995                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
1996                                         bufs->inputs,
1997                                         bufs->hard_outputs,
1998                                         bufs->soft_outputs,
1999                                         ref_op);
2000
2001                 /* Start time meas for enqueue function offload latency */
2002                 enq_start_time = rte_rdtsc_precise();
2003                 do {
2004                         enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
2005                                         &ops_enq[enq], burst_sz - enq);
2006                 } while (unlikely(burst_sz != enq));
2007
2008                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2009                 TEST_ASSERT_SUCCESS(ret,
2010                                 "Failed to get stats for queue (%u) of device (%u)",
2011                                 queue_id, dev_id);
2012
2013                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2014                                 stats.acc_offload_cycles;
2015                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2016                                 enq_sw_last_time);
2017                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2018                                 enq_sw_last_time);
2019                 time_st->enq_sw_total_time += enq_sw_last_time;
2020
2021                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2022                                 stats.acc_offload_cycles);
2023                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2024                                 stats.acc_offload_cycles);
2025                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2026
2027                 /* ensure enqueue has been completed */
2028                 rte_delay_ms(10);
2029
2030                 /* Start time meas for dequeue function offload latency */
2031                 deq_start_time = rte_rdtsc_precise();
2032                 /* Dequeue one operation */
2033                 do {
2034                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2035                                         &ops_deq[deq], 1);
2036                 } while (unlikely(deq != 1));
2037
2038                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2039                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2040                                 deq_last_time);
2041                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2042                                 deq_last_time);
2043                 time_st->deq_total_time += deq_last_time;
2044
2045                 /* Dequeue remaining operations if needed*/
2046                 while (burst_sz != deq)
2047                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2048                                         &ops_deq[deq], burst_sz - deq);
2049
2050                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2051                 dequeued += deq;
2052         }
2053
2054         return i;
2055 }
2056
2057 static int
2058 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
2059                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
2060                 uint16_t queue_id, const uint16_t num_to_process,
2061                 uint16_t burst_sz, struct test_time_stats *time_st)
2062 {
2063         int i, dequeued, ret;
2064         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2065         uint64_t enq_start_time, deq_start_time;
2066         uint64_t enq_sw_last_time, deq_last_time;
2067         struct rte_bbdev_stats stats;
2068
2069         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2070                 uint16_t enq = 0, deq = 0;
2071
2072                 if (unlikely(num_to_process - dequeued < burst_sz))
2073                         burst_sz = num_to_process - dequeued;
2074
2075                 rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2076                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2077                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2078                                         bufs->inputs,
2079                                         bufs->hard_outputs,
2080                                         ref_op);
2081
2082                 /* Start time meas for enqueue function offload latency */
2083                 enq_start_time = rte_rdtsc_precise();
2084                 do {
2085                         enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
2086                                         &ops_enq[enq], burst_sz - enq);
2087                 } while (unlikely(burst_sz != enq));
2088
2089                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2090                 TEST_ASSERT_SUCCESS(ret,
2091                                 "Failed to get stats for queue (%u) of device (%u)",
2092                                 queue_id, dev_id);
2093
2094                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2095                                 stats.acc_offload_cycles;
2096                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2097                                 enq_sw_last_time);
2098                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2099                                 enq_sw_last_time);
2100                 time_st->enq_sw_total_time += enq_sw_last_time;
2101
2102                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2103                                 stats.acc_offload_cycles);
2104                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2105                                 stats.acc_offload_cycles);
2106                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2107
2108                 /* ensure enqueue has been completed */
2109                 rte_delay_ms(10);
2110
2111                 /* Start time meas for dequeue function offload latency */
2112                 deq_start_time = rte_rdtsc_precise();
2113                 /* Dequeue one operation */
2114                 do {
2115                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2116                                         &ops_deq[deq], 1);
2117                 } while (unlikely(deq != 1));
2118
2119                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2120                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2121                                 deq_last_time);
2122                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2123                                 deq_last_time);
2124                 time_st->deq_total_time += deq_last_time;
2125
2126                 while (burst_sz != deq)
2127                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2128                                         &ops_deq[deq], burst_sz - deq);
2129
2130                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2131                 dequeued += deq;
2132         }
2133
2134         return i;
2135 }
2136 #endif
2137
2138 static int
2139 offload_cost_test(struct active_device *ad,
2140                 struct test_op_params *op_params)
2141 {
2142 #ifndef RTE_BBDEV_OFFLOAD_COST
2143         RTE_SET_USED(ad);
2144         RTE_SET_USED(op_params);
2145         printf("Offload latency test is disabled.\n");
2146         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2147         return TEST_SKIPPED;
2148 #else
2149         int iter;
2150         uint16_t burst_sz = op_params->burst_sz;
2151         const uint16_t num_to_process = op_params->num_to_process;
2152         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2153         const uint16_t queue_id = ad->queue_ids[0];
2154         struct test_buffers *bufs = NULL;
2155         struct rte_bbdev_info info;
2156         const char *op_type_str;
2157         struct test_time_stats time_st;
2158
2159         memset(&time_st, 0, sizeof(struct test_time_stats));
2160         time_st.enq_sw_min_time = UINT64_MAX;
2161         time_st.enq_acc_min_time = UINT64_MAX;
2162         time_st.deq_min_time = UINT64_MAX;
2163
2164         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2165                         "BURST_SIZE should be <= %u", MAX_BURST);
2166
2167         rte_bbdev_info_get(ad->dev_id, &info);
2168         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2169
2170         op_type_str = rte_bbdev_op_type_str(op_type);
2171         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2172
2173         printf(
2174                 "\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2175                         info.dev_name, burst_sz, num_to_process, op_type_str);
2176
2177         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2178                 iter = offload_latency_test_dec(op_params->mp, bufs,
2179                                 op_params->ref_dec_op, ad->dev_id, queue_id,
2180                                 num_to_process, burst_sz, &time_st);
2181         else
2182                 iter = offload_latency_test_enc(op_params->mp, bufs,
2183                                 op_params->ref_enc_op, ad->dev_id, queue_id,
2184                                 num_to_process, burst_sz, &time_st);
2185
2186         if (iter <= 0)
2187                 return TEST_FAILED;
2188
2189         printf("Enqueue offload cost latency:\n"
2190                         "\tDriver offload avg %lg cycles, %lg us\n"
2191                         "\tDriver offload min %lg cycles, %lg us\n"
2192                         "\tDriver offload max %lg cycles, %lg us\n"
2193                         "\tAccelerator offload avg %lg cycles, %lg us\n"
2194                         "\tAccelerator offload min %lg cycles, %lg us\n"
2195                         "\tAccelerator offload max %lg cycles, %lg us\n",
2196                         (double)time_st.enq_sw_total_time / (double)iter,
2197                         (double)(time_st.enq_sw_total_time * 1000000) /
2198                         (double)iter / (double)rte_get_tsc_hz(),
2199                         (double)time_st.enq_sw_min_time,
2200                         (double)(time_st.enq_sw_min_time * 1000000) /
2201                         rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
2202                         (double)(time_st.enq_sw_max_time * 1000000) /
2203                         rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
2204                         (double)iter,
2205                         (double)(time_st.enq_acc_total_time * 1000000) /
2206                         (double)iter / (double)rte_get_tsc_hz(),
2207                         (double)time_st.enq_acc_min_time,
2208                         (double)(time_st.enq_acc_min_time * 1000000) /
2209                         rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
2210                         (double)(time_st.enq_acc_max_time * 1000000) /
2211                         rte_get_tsc_hz());
2212
2213         printf("Dequeue offload cost latency - one op:\n"
2214                         "\tavg %lg cycles, %lg us\n"
2215                         "\tmin %lg cycles, %lg us\n"
2216                         "\tmax %lg cycles, %lg us\n",
2217                         (double)time_st.deq_total_time / (double)iter,
2218                         (double)(time_st.deq_total_time * 1000000) /
2219                         (double)iter / (double)rte_get_tsc_hz(),
2220                         (double)time_st.deq_min_time,
2221                         (double)(time_st.deq_min_time * 1000000) /
2222                         rte_get_tsc_hz(), (double)time_st.deq_max_time,
2223                         (double)(time_st.deq_max_time * 1000000) /
2224                         rte_get_tsc_hz());
2225
2226         return TEST_SUCCESS;
2227 #endif
2228 }
2229
2230 #ifdef RTE_BBDEV_OFFLOAD_COST
2231 static int
2232 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
2233                 const uint16_t num_to_process, uint16_t burst_sz,
2234                 uint64_t *deq_total_time, uint64_t *deq_min_time,
2235                 uint64_t *deq_max_time)
2236 {
2237         int i, deq_total;
2238         struct rte_bbdev_dec_op *ops[MAX_BURST];
2239         uint64_t deq_start_time, deq_last_time;
2240
2241         /* Test deq offload latency from an empty queue */
2242
2243         for (i = 0, deq_total = 0; deq_total < num_to_process;
2244                         ++i, deq_total += burst_sz) {
2245                 deq_start_time = rte_rdtsc_precise();
2246
2247                 if (unlikely(num_to_process - deq_total < burst_sz))
2248                         burst_sz = num_to_process - deq_total;
2249                 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
2250
2251                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2252                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2253                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2254                 *deq_total_time += deq_last_time;
2255         }
2256
2257         return i;
2258 }
2259
2260 static int
2261 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
2262                 const uint16_t num_to_process, uint16_t burst_sz,
2263                 uint64_t *deq_total_time, uint64_t *deq_min_time,
2264                 uint64_t *deq_max_time)
2265 {
2266         int i, deq_total;
2267         struct rte_bbdev_enc_op *ops[MAX_BURST];
2268         uint64_t deq_start_time, deq_last_time;
2269
2270         /* Test deq offload latency from an empty queue */
2271         for (i = 0, deq_total = 0; deq_total < num_to_process;
2272                         ++i, deq_total += burst_sz) {
2273                 deq_start_time = rte_rdtsc_precise();
2274
2275                 if (unlikely(num_to_process - deq_total < burst_sz))
2276                         burst_sz = num_to_process - deq_total;
2277                 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
2278
2279                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2280                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2281                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2282                 *deq_total_time += deq_last_time;
2283         }
2284
2285         return i;
2286 }
2287 #endif
2288
2289 static int
2290 offload_latency_empty_q_test(struct active_device *ad,
2291                 struct test_op_params *op_params)
2292 {
2293 #ifndef RTE_BBDEV_OFFLOAD_COST
2294         RTE_SET_USED(ad);
2295         RTE_SET_USED(op_params);
2296         printf("Offload latency empty dequeue test is disabled.\n");
2297         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2298         return TEST_SKIPPED;
2299 #else
2300         int iter;
2301         uint64_t deq_total_time, deq_min_time, deq_max_time;
2302         uint16_t burst_sz = op_params->burst_sz;
2303         const uint16_t num_to_process = op_params->num_to_process;
2304         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2305         const uint16_t queue_id = ad->queue_ids[0];
2306         struct rte_bbdev_info info;
2307         const char *op_type_str;
2308
2309         deq_total_time = deq_max_time = 0;
2310         deq_min_time = UINT64_MAX;
2311
2312         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2313                         "BURST_SIZE should be <= %u", MAX_BURST);
2314
2315         rte_bbdev_info_get(ad->dev_id, &info);
2316
2317         op_type_str = rte_bbdev_op_type_str(op_type);
2318         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2319
2320         printf(
2321                 "\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2322                         info.dev_name, burst_sz, num_to_process, op_type_str);
2323
2324         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2325                 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
2326                                 num_to_process, burst_sz, &deq_total_time,
2327                                 &deq_min_time, &deq_max_time);
2328         else
2329                 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
2330                                 num_to_process, burst_sz, &deq_total_time,
2331                                 &deq_min_time, &deq_max_time);
2332
2333         if (iter <= 0)
2334                 return TEST_FAILED;
2335
2336         printf("Empty dequeue offload\n"
2337                         "\tavg. latency: %lg cycles, %lg us\n"
2338                         "\tmin. latency: %lg cycles, %lg us\n"
2339                         "\tmax. latency: %lg cycles, %lg us\n",
2340                         (double)deq_total_time / (double)iter,
2341                         (double)(deq_total_time * 1000000) / (double)iter /
2342                         (double)rte_get_tsc_hz(), (double)deq_min_time,
2343                         (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
2344                         (double)deq_max_time, (double)(deq_max_time * 1000000) /
2345                         rte_get_tsc_hz());
2346
2347         return TEST_SUCCESS;
2348 #endif
2349 }
2350
2351 static int
2352 throughput_tc(void)
2353 {
2354         return run_test_case(throughput_test);
2355 }
2356
2357 static int
2358 offload_cost_tc(void)
2359 {
2360         return run_test_case(offload_cost_test);
2361 }
2362
2363 static int
2364 offload_latency_empty_q_tc(void)
2365 {
2366         return run_test_case(offload_latency_empty_q_test);
2367 }
2368
2369 static int
2370 latency_tc(void)
2371 {
2372         return run_test_case(latency_test);
2373 }
2374
2375 static int
2376 interrupt_tc(void)
2377 {
2378         return run_test_case(throughput_test);
2379 }
2380
2381 static struct unit_test_suite bbdev_throughput_testsuite = {
2382         .suite_name = "BBdev Throughput Tests",
2383         .setup = testsuite_setup,
2384         .teardown = testsuite_teardown,
2385         .unit_test_cases = {
2386                 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
2387                 TEST_CASES_END() /**< NULL terminate unit test array */
2388         }
2389 };
2390
2391 static struct unit_test_suite bbdev_validation_testsuite = {
2392         .suite_name = "BBdev Validation Tests",
2393         .setup = testsuite_setup,
2394         .teardown = testsuite_teardown,
2395         .unit_test_cases = {
2396                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2397                 TEST_CASES_END() /**< NULL terminate unit test array */
2398         }
2399 };
2400
2401 static struct unit_test_suite bbdev_latency_testsuite = {
2402         .suite_name = "BBdev Latency Tests",
2403         .setup = testsuite_setup,
2404         .teardown = testsuite_teardown,
2405         .unit_test_cases = {
2406                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2407                 TEST_CASES_END() /**< NULL terminate unit test array */
2408         }
2409 };
2410
2411 static struct unit_test_suite bbdev_offload_cost_testsuite = {
2412         .suite_name = "BBdev Offload Cost Tests",
2413         .setup = testsuite_setup,
2414         .teardown = testsuite_teardown,
2415         .unit_test_cases = {
2416                 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
2417                 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
2418                 TEST_CASES_END() /**< NULL terminate unit test array */
2419         }
2420 };
2421
2422 static struct unit_test_suite bbdev_interrupt_testsuite = {
2423         .suite_name = "BBdev Interrupt Tests",
2424         .setup = interrupt_testsuite_setup,
2425         .teardown = testsuite_teardown,
2426         .unit_test_cases = {
2427                 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
2428                 TEST_CASES_END() /**< NULL terminate unit test array */
2429         }
2430 };
2431
2432 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
2433 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
2434 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
2435 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
2436 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);