5bec70d301ed97caa216e1f79e4c620afca3cf49
[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19
20 #include "main.h"
21 #include "test_bbdev_vector.h"
22
23 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
24
25 #define MAX_QUEUES RTE_MAX_LCORE
26 #define TEST_REPETITIONS 1000
27
28 #define OPS_CACHE_SIZE 256U
29 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
30
31 #define SYNC_WAIT 0
32 #define SYNC_START 1
33
34 #define INVALID_QUEUE_ID -1
35
36 static struct test_bbdev_vector test_vector;
37
38 /* Switch between PMD and Interrupt for throughput TC */
39 static bool intr_enabled;
40
41 /* Represents tested active devices */
42 static struct active_device {
43         const char *driver_name;
44         uint8_t dev_id;
45         uint16_t supported_ops;
46         uint16_t queue_ids[MAX_QUEUES];
47         uint16_t nb_queues;
48         struct rte_mempool *ops_mempool;
49         struct rte_mempool *in_mbuf_pool;
50         struct rte_mempool *hard_out_mbuf_pool;
51         struct rte_mempool *soft_out_mbuf_pool;
52 } active_devs[RTE_BBDEV_MAX_DEVS];
53
54 static uint8_t nb_active_devs;
55
56 /* Data buffers used by BBDEV ops */
57 struct test_buffers {
58         struct rte_bbdev_op_data *inputs;
59         struct rte_bbdev_op_data *hard_outputs;
60         struct rte_bbdev_op_data *soft_outputs;
61 };
62
63 /* Operation parameters specific for given test case */
64 struct test_op_params {
65         struct rte_mempool *mp;
66         struct rte_bbdev_dec_op *ref_dec_op;
67         struct rte_bbdev_enc_op *ref_enc_op;
68         uint16_t burst_sz;
69         uint16_t num_to_process;
70         uint16_t num_lcores;
71         int vector_mask;
72         rte_atomic16_t sync;
73         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
74 };
75
76 /* Contains per lcore params */
77 struct thread_params {
78         uint8_t dev_id;
79         uint16_t queue_id;
80         uint64_t start_time;
81         double ops_per_sec;
82         double mbps;
83         uint8_t iter_count;
84         rte_atomic16_t nb_dequeued;
85         rte_atomic16_t processing_status;
86         struct test_op_params *op_params;
87 };
88
89 #ifdef RTE_BBDEV_OFFLOAD_COST
90 /* Stores time statistics */
91 struct test_time_stats {
92         /* Stores software enqueue total working time */
93         uint64_t enq_sw_total_time;
94         /* Stores minimum value of software enqueue working time */
95         uint64_t enq_sw_min_time;
96         /* Stores maximum value of software enqueue working time */
97         uint64_t enq_sw_max_time;
98         /* Stores turbo enqueue total working time */
99         uint64_t enq_acc_total_time;
100         /* Stores minimum value of accelerator enqueue working time */
101         uint64_t enq_acc_min_time;
102         /* Stores maximum value of accelerator enqueue working time */
103         uint64_t enq_acc_max_time;
104         /* Stores dequeue total working time */
105         uint64_t deq_total_time;
106         /* Stores minimum value of dequeue working time */
107         uint64_t deq_min_time;
108         /* Stores maximum value of dequeue working time */
109         uint64_t deq_max_time;
110 };
111 #endif
112
113 typedef int (test_case_function)(struct active_device *ad,
114                 struct test_op_params *op_params);
115
116 static inline void
117 mbuf_reset(struct rte_mbuf *m)
118 {
119         m->pkt_len = 0;
120
121         do {
122                 m->data_len = 0;
123                 m = m->next;
124         } while (m != NULL);
125 }
126
127 static inline void
128 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
129 {
130         ad->supported_ops |= (1 << op_type);
131 }
132
133 static inline bool
134 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
135 {
136         return ad->supported_ops & (1 << op_type);
137 }
138
139 static inline bool
140 flags_match(uint32_t flags_req, uint32_t flags_present)
141 {
142         return (flags_req & flags_present) == flags_req;
143 }
144
145 static void
146 clear_soft_out_cap(uint32_t *op_flags)
147 {
148         *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
149         *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
150         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
151 }
152
153 static int
154 check_dev_cap(const struct rte_bbdev_info *dev_info)
155 {
156         unsigned int i;
157         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs;
158         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
159
160         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
161         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
162         nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
163
164         for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
165                 if (op_cap->type != test_vector.op_type)
166                         continue;
167
168                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
169                         const struct rte_bbdev_op_cap_turbo_dec *cap =
170                                         &op_cap->cap.turbo_dec;
171                         /* Ignore lack of soft output capability, just skip
172                          * checking if soft output is valid.
173                          */
174                         if ((test_vector.turbo_dec.op_flags &
175                                         RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
176                                         !(cap->capability_flags &
177                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
178                                 printf(
179                                         "WARNING: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
180                                         dev_info->dev_name);
181                                 clear_soft_out_cap(
182                                         &test_vector.turbo_dec.op_flags);
183                         }
184
185                         if (!flags_match(test_vector.turbo_dec.op_flags,
186                                         cap->capability_flags))
187                                 return TEST_FAILED;
188                         if (nb_inputs > cap->num_buffers_src) {
189                                 printf("Too many inputs defined: %u, max: %u\n",
190                                         nb_inputs, cap->num_buffers_src);
191                                 return TEST_FAILED;
192                         }
193                         if (nb_soft_outputs > cap->num_buffers_soft_out &&
194                                         (test_vector.turbo_dec.op_flags &
195                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
196                                 printf(
197                                         "Too many soft outputs defined: %u, max: %u\n",
198                                                 nb_soft_outputs,
199                                                 cap->num_buffers_soft_out);
200                                 return TEST_FAILED;
201                         }
202                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
203                                 printf(
204                                         "Too many hard outputs defined: %u, max: %u\n",
205                                                 nb_hard_outputs,
206                                                 cap->num_buffers_hard_out);
207                                 return TEST_FAILED;
208                         }
209                         if (intr_enabled && !(cap->capability_flags &
210                                         RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
211                                 printf(
212                                         "Dequeue interrupts are not supported!\n");
213                                 return TEST_FAILED;
214                         }
215
216                         return TEST_SUCCESS;
217                 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
218                         const struct rte_bbdev_op_cap_turbo_enc *cap =
219                                         &op_cap->cap.turbo_enc;
220
221                         if (!flags_match(test_vector.turbo_enc.op_flags,
222                                         cap->capability_flags))
223                                 return TEST_FAILED;
224                         if (nb_inputs > cap->num_buffers_src) {
225                                 printf("Too many inputs defined: %u, max: %u\n",
226                                         nb_inputs, cap->num_buffers_src);
227                                 return TEST_FAILED;
228                         }
229                         if (nb_hard_outputs > cap->num_buffers_dst) {
230                                 printf(
231                                         "Too many hard outputs defined: %u, max: %u\n",
232                                         nb_hard_outputs, cap->num_buffers_src);
233                                 return TEST_FAILED;
234                         }
235                         if (intr_enabled && !(cap->capability_flags &
236                                         RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
237                                 printf(
238                                         "Dequeue interrupts are not supported!\n");
239                                 return TEST_FAILED;
240                         }
241
242                         return TEST_SUCCESS;
243                 }
244         }
245
246         if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
247                 return TEST_SUCCESS; /* Special case for NULL device */
248
249         return TEST_FAILED;
250 }
251
252 /* calculates optimal mempool size not smaller than the val */
253 static unsigned int
254 optimal_mempool_size(unsigned int val)
255 {
256         return rte_align32pow2(val + 1) - 1;
257 }
258
259 /* allocates mbuf mempool for inputs and outputs */
260 static struct rte_mempool *
261 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
262                 int socket_id, unsigned int mbuf_pool_size,
263                 const char *op_type_str)
264 {
265         unsigned int i;
266         uint32_t max_seg_sz = 0;
267         char pool_name[RTE_MEMPOOL_NAMESIZE];
268
269         /* find max input segment size */
270         for (i = 0; i < entries->nb_segments; ++i)
271                 if (entries->segments[i].length > max_seg_sz)
272                         max_seg_sz = entries->segments[i].length;
273
274         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
275                         dev_id);
276         return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
277                         RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
278                         (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
279 }
280
281 static int
282 create_mempools(struct active_device *ad, int socket_id,
283                 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
284 {
285         struct rte_mempool *mp;
286         unsigned int ops_pool_size, mbuf_pool_size = 0;
287         char pool_name[RTE_MEMPOOL_NAMESIZE];
288         const char *op_type_str;
289         enum rte_bbdev_op_type op_type = org_op_type;
290
291         struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
292         struct op_data_entries *hard_out =
293                         &test_vector.entries[DATA_HARD_OUTPUT];
294         struct op_data_entries *soft_out =
295                         &test_vector.entries[DATA_SOFT_OUTPUT];
296
297         /* allocate ops mempool */
298         ops_pool_size = optimal_mempool_size(RTE_MAX(
299                         /* Ops used plus 1 reference op */
300                         RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
301                         /* Minimal cache size plus 1 reference op */
302                         (unsigned int)(1.5 * rte_lcore_count() *
303                                         OPS_CACHE_SIZE + 1)),
304                         OPS_POOL_SIZE_MIN));
305
306         if (org_op_type == RTE_BBDEV_OP_NONE)
307                 op_type = RTE_BBDEV_OP_TURBO_ENC;
308
309         op_type_str = rte_bbdev_op_type_str(op_type);
310         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
311
312         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
313                         ad->dev_id);
314         mp = rte_bbdev_op_pool_create(pool_name, op_type,
315                         ops_pool_size, OPS_CACHE_SIZE, socket_id);
316         TEST_ASSERT_NOT_NULL(mp,
317                         "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
318                         ops_pool_size,
319                         ad->dev_id,
320                         socket_id);
321         ad->ops_mempool = mp;
322
323         /* Do not create inputs and outputs mbufs for BaseBand Null Device */
324         if (org_op_type == RTE_BBDEV_OP_NONE)
325                 return TEST_SUCCESS;
326
327         /* Inputs */
328         mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
329         mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
330         TEST_ASSERT_NOT_NULL(mp,
331                         "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
332                         mbuf_pool_size,
333                         ad->dev_id,
334                         socket_id);
335         ad->in_mbuf_pool = mp;
336
337         /* Hard outputs */
338         mbuf_pool_size = optimal_mempool_size(ops_pool_size *
339                         hard_out->nb_segments);
340         mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
341                         "hard_out");
342         TEST_ASSERT_NOT_NULL(mp,
343                         "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
344                         mbuf_pool_size,
345                         ad->dev_id,
346                         socket_id);
347         ad->hard_out_mbuf_pool = mp;
348
349         if (soft_out->nb_segments == 0)
350                 return TEST_SUCCESS;
351
352         /* Soft outputs */
353         mbuf_pool_size = optimal_mempool_size(ops_pool_size *
354                         soft_out->nb_segments);
355         mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, mbuf_pool_size,
356                         "soft_out");
357         TEST_ASSERT_NOT_NULL(mp,
358                         "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
359                         mbuf_pool_size,
360                         ad->dev_id,
361                         socket_id);
362         ad->soft_out_mbuf_pool = mp;
363
364         return 0;
365 }
366
367 static int
368 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
369                 struct test_bbdev_vector *vector)
370 {
371         int ret;
372         unsigned int queue_id;
373         struct rte_bbdev_queue_conf qconf;
374         struct active_device *ad = &active_devs[nb_active_devs];
375         unsigned int nb_queues;
376         enum rte_bbdev_op_type op_type = vector->op_type;
377
378         nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
379         /* setup device */
380         ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
381         if (ret < 0) {
382                 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
383                                 dev_id, nb_queues, info->socket_id, ret);
384                 return TEST_FAILED;
385         }
386
387         /* configure interrupts if needed */
388         if (intr_enabled) {
389                 ret = rte_bbdev_intr_enable(dev_id);
390                 if (ret < 0) {
391                         printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
392                                         ret);
393                         return TEST_FAILED;
394                 }
395         }
396
397         /* setup device queues */
398         qconf.socket = info->socket_id;
399         qconf.queue_size = info->drv.default_queue_conf.queue_size;
400         qconf.priority = 0;
401         qconf.deferred_start = 0;
402         qconf.op_type = op_type;
403
404         for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
405                 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
406                 if (ret != 0) {
407                         printf(
408                                         "Allocated all queues (id=%u) at prio%u on dev%u\n",
409                                         queue_id, qconf.priority, dev_id);
410                         qconf.priority++;
411                         ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
412                                         &qconf);
413                 }
414                 if (ret != 0) {
415                         printf("All queues on dev %u allocated: %u\n",
416                                         dev_id, queue_id);
417                         break;
418                 }
419                 ad->queue_ids[queue_id] = queue_id;
420         }
421         TEST_ASSERT(queue_id != 0,
422                         "ERROR Failed to configure any queues on dev %u",
423                         dev_id);
424         ad->nb_queues = queue_id;
425
426         set_avail_op(ad, op_type);
427
428         return TEST_SUCCESS;
429 }
430
431 static int
432 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
433                 struct test_bbdev_vector *vector)
434 {
435         int ret;
436
437         active_devs[nb_active_devs].driver_name = info->drv.driver_name;
438         active_devs[nb_active_devs].dev_id = dev_id;
439
440         ret = add_bbdev_dev(dev_id, info, vector);
441         if (ret == TEST_SUCCESS)
442                 ++nb_active_devs;
443         return ret;
444 }
445
446 static uint8_t
447 populate_active_devices(void)
448 {
449         int ret;
450         uint8_t dev_id;
451         uint8_t nb_devs_added = 0;
452         struct rte_bbdev_info info;
453
454         RTE_BBDEV_FOREACH(dev_id) {
455                 rte_bbdev_info_get(dev_id, &info);
456
457                 if (check_dev_cap(&info)) {
458                         printf(
459                                 "Device %d (%s) does not support specified capabilities\n",
460                                         dev_id, info.dev_name);
461                         continue;
462                 }
463
464                 ret = add_active_device(dev_id, &info, &test_vector);
465                 if (ret != 0) {
466                         printf("Adding active bbdev %s skipped\n",
467                                         info.dev_name);
468                         continue;
469                 }
470                 nb_devs_added++;
471         }
472
473         return nb_devs_added;
474 }
475
476 static int
477 read_test_vector(void)
478 {
479         int ret;
480
481         memset(&test_vector, 0, sizeof(test_vector));
482         printf("Test vector file = %s\n", get_vector_filename());
483         ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
484         TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
485                         get_vector_filename());
486
487         return TEST_SUCCESS;
488 }
489
490 static int
491 testsuite_setup(void)
492 {
493         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
494
495         if (populate_active_devices() == 0) {
496                 printf("No suitable devices found!\n");
497                 return TEST_SKIPPED;
498         }
499
500         return TEST_SUCCESS;
501 }
502
503 static int
504 interrupt_testsuite_setup(void)
505 {
506         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
507
508         /* Enable interrupts */
509         intr_enabled = true;
510
511         /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
512         if (populate_active_devices() == 0 ||
513                         test_vector.op_type == RTE_BBDEV_OP_NONE) {
514                 intr_enabled = false;
515                 printf("No suitable devices found!\n");
516                 return TEST_SKIPPED;
517         }
518
519         return TEST_SUCCESS;
520 }
521
522 static void
523 testsuite_teardown(void)
524 {
525         uint8_t dev_id;
526
527         /* Unconfigure devices */
528         RTE_BBDEV_FOREACH(dev_id)
529                 rte_bbdev_close(dev_id);
530
531         /* Clear active devices structs. */
532         memset(active_devs, 0, sizeof(active_devs));
533         nb_active_devs = 0;
534 }
535
536 static int
537 ut_setup(void)
538 {
539         uint8_t i, dev_id;
540
541         for (i = 0; i < nb_active_devs; i++) {
542                 dev_id = active_devs[i].dev_id;
543                 /* reset bbdev stats */
544                 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
545                                 "Failed to reset stats of bbdev %u", dev_id);
546                 /* start the device */
547                 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
548                                 "Failed to start bbdev %u", dev_id);
549         }
550
551         return TEST_SUCCESS;
552 }
553
554 static void
555 ut_teardown(void)
556 {
557         uint8_t i, dev_id;
558         struct rte_bbdev_stats stats;
559
560         for (i = 0; i < nb_active_devs; i++) {
561                 dev_id = active_devs[i].dev_id;
562                 /* read stats and print */
563                 rte_bbdev_stats_get(dev_id, &stats);
564                 /* Stop the device */
565                 rte_bbdev_stop(dev_id);
566         }
567 }
568
569 static int
570 init_op_data_objs(struct rte_bbdev_op_data *bufs,
571                 struct op_data_entries *ref_entries,
572                 struct rte_mempool *mbuf_pool, const uint16_t n,
573                 enum op_data_type op_type, uint16_t min_alignment)
574 {
575         int ret;
576         unsigned int i, j;
577
578         for (i = 0; i < n; ++i) {
579                 char *data;
580                 struct op_data_buf *seg = &ref_entries->segments[0];
581                 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
582                 TEST_ASSERT_NOT_NULL(m_head,
583                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
584                                 op_type, n * ref_entries->nb_segments,
585                                 mbuf_pool->size);
586
587                 TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
588                                 (uint32_t)UINT16_MAX),
589                                 "Given data is bigger than allowed mbuf segment size");
590
591                 bufs[i].data = m_head;
592                 bufs[i].offset = 0;
593                 bufs[i].length = 0;
594
595                 if (op_type == DATA_INPUT) {
596                         data = rte_pktmbuf_append(m_head, seg->length);
597                         TEST_ASSERT_NOT_NULL(data,
598                                         "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
599                                         seg->length, op_type);
600
601                         TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
602                                         "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
603                                         data, min_alignment);
604                         rte_memcpy(data, seg->addr, seg->length);
605                         bufs[i].length += seg->length;
606
607                         for (j = 1; j < ref_entries->nb_segments; ++j) {
608                                 struct rte_mbuf *m_tail =
609                                                 rte_pktmbuf_alloc(mbuf_pool);
610                                 TEST_ASSERT_NOT_NULL(m_tail,
611                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
612                                                 op_type,
613                                                 n * ref_entries->nb_segments,
614                                                 mbuf_pool->size);
615                                 seg += 1;
616
617                                 data = rte_pktmbuf_append(m_tail, seg->length);
618                                 TEST_ASSERT_NOT_NULL(data,
619                                                 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
620                                                 seg->length, op_type);
621
622                                 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
623                                                 min_alignment),
624                                                 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
625                                                 data, min_alignment);
626                                 rte_memcpy(data, seg->addr, seg->length);
627                                 bufs[i].length += seg->length;
628
629                                 ret = rte_pktmbuf_chain(m_head, m_tail);
630                                 TEST_ASSERT_SUCCESS(ret,
631                                                 "Couldn't chain mbufs from %d data type mbuf pool",
632                                                 op_type);
633                         }
634
635                 } else {
636
637                         /* allocate chained-mbuf for output buffer */
638                         for (j = 1; j < ref_entries->nb_segments; ++j) {
639                                 struct rte_mbuf *m_tail =
640                                                 rte_pktmbuf_alloc(mbuf_pool);
641                                 TEST_ASSERT_NOT_NULL(m_tail,
642                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
643                                                 op_type,
644                                                 n * ref_entries->nb_segments,
645                                                 mbuf_pool->size);
646
647                                 ret = rte_pktmbuf_chain(m_head, m_tail);
648                                 TEST_ASSERT_SUCCESS(ret,
649                                                 "Couldn't chain mbufs from %d data type mbuf pool",
650                                                 op_type);
651                         }
652                 }
653         }
654
655         return 0;
656 }
657
658 static int
659 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
660                 const int socket)
661 {
662         int i;
663
664         *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
665         if (*buffers == NULL) {
666                 printf("WARNING: Failed to allocate op_data on socket %d\n",
667                                 socket);
668                 /* try to allocate memory on other detected sockets */
669                 for (i = 0; i < socket; i++) {
670                         *buffers = rte_zmalloc_socket(NULL, len, 0, i);
671                         if (*buffers != NULL)
672                                 break;
673                 }
674         }
675
676         return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
677 }
678
679 static void
680 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
681                 uint16_t n, int8_t max_llr_modulus)
682 {
683         uint16_t i, byte_idx;
684
685         for (i = 0; i < n; ++i) {
686                 struct rte_mbuf *m = input_ops[i].data;
687                 while (m != NULL) {
688                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
689                                         input_ops[i].offset);
690                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
691                                         ++byte_idx)
692                                 llr[byte_idx] = round((double)max_llr_modulus *
693                                                 llr[byte_idx] / INT8_MAX);
694
695                         m = m->next;
696                 }
697         }
698 }
699
700 static int
701 fill_queue_buffers(struct test_op_params *op_params,
702                 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
703                 struct rte_mempool *soft_out_mp, uint16_t queue_id,
704                 const struct rte_bbdev_op_cap *capabilities,
705                 uint16_t min_alignment, const int socket_id)
706 {
707         int ret;
708         enum op_data_type type;
709         const uint16_t n = op_params->num_to_process;
710
711         struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
712                 in_mp,
713                 soft_out_mp,
714                 hard_out_mp,
715         };
716
717         struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
718                 &op_params->q_bufs[socket_id][queue_id].inputs,
719                 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
720                 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
721         };
722
723         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
724                 struct op_data_entries *ref_entries =
725                                 &test_vector.entries[type];
726                 if (ref_entries->nb_segments == 0)
727                         continue;
728
729                 ret = allocate_buffers_on_socket(queue_ops[type],
730                                 n * sizeof(struct rte_bbdev_op_data),
731                                 socket_id);
732                 TEST_ASSERT_SUCCESS(ret,
733                                 "Couldn't allocate memory for rte_bbdev_op_data structs");
734
735                 ret = init_op_data_objs(*queue_ops[type], ref_entries,
736                                 mbuf_pools[type], n, type, min_alignment);
737                 TEST_ASSERT_SUCCESS(ret,
738                                 "Couldn't init rte_bbdev_op_data structs");
739         }
740
741         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
742                 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
743                         capabilities->cap.turbo_dec.max_llr_modulus);
744
745         return 0;
746 }
747
748 static void
749 free_buffers(struct active_device *ad, struct test_op_params *op_params)
750 {
751         unsigned int i, j;
752
753         rte_mempool_free(ad->ops_mempool);
754         rte_mempool_free(ad->in_mbuf_pool);
755         rte_mempool_free(ad->hard_out_mbuf_pool);
756         rte_mempool_free(ad->soft_out_mbuf_pool);
757
758         for (i = 0; i < rte_lcore_count(); ++i) {
759                 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
760                         rte_free(op_params->q_bufs[j][i].inputs);
761                         rte_free(op_params->q_bufs[j][i].hard_outputs);
762                         rte_free(op_params->q_bufs[j][i].soft_outputs);
763                 }
764         }
765 }
766
767 static void
768 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
769                 unsigned int start_idx,
770                 struct rte_bbdev_op_data *inputs,
771                 struct rte_bbdev_op_data *hard_outputs,
772                 struct rte_bbdev_op_data *soft_outputs,
773                 struct rte_bbdev_dec_op *ref_op)
774 {
775         unsigned int i;
776         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
777
778         for (i = 0; i < n; ++i) {
779                 if (turbo_dec->code_block_mode == 0) {
780                         ops[i]->turbo_dec.tb_params.ea =
781                                         turbo_dec->tb_params.ea;
782                         ops[i]->turbo_dec.tb_params.eb =
783                                         turbo_dec->tb_params.eb;
784                         ops[i]->turbo_dec.tb_params.k_pos =
785                                         turbo_dec->tb_params.k_pos;
786                         ops[i]->turbo_dec.tb_params.k_neg =
787                                         turbo_dec->tb_params.k_neg;
788                         ops[i]->turbo_dec.tb_params.c =
789                                         turbo_dec->tb_params.c;
790                         ops[i]->turbo_dec.tb_params.c_neg =
791                                         turbo_dec->tb_params.c_neg;
792                         ops[i]->turbo_dec.tb_params.cab =
793                                         turbo_dec->tb_params.cab;
794                         ops[i]->turbo_dec.tb_params.r =
795                                         turbo_dec->tb_params.r;
796                 } else {
797                         ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
798                         ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
799                 }
800
801                 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
802                 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
803                 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
804                 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
805                 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
806                 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
807                 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
808
809                 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
810                 ops[i]->turbo_dec.input = inputs[start_idx + i];
811                 if (soft_outputs != NULL)
812                         ops[i]->turbo_dec.soft_output =
813                                 soft_outputs[start_idx + i];
814         }
815 }
816
817 static void
818 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
819                 unsigned int start_idx,
820                 struct rte_bbdev_op_data *inputs,
821                 struct rte_bbdev_op_data *outputs,
822                 struct rte_bbdev_enc_op *ref_op)
823 {
824         unsigned int i;
825         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
826         for (i = 0; i < n; ++i) {
827                 if (turbo_enc->code_block_mode == 0) {
828                         ops[i]->turbo_enc.tb_params.ea =
829                                         turbo_enc->tb_params.ea;
830                         ops[i]->turbo_enc.tb_params.eb =
831                                         turbo_enc->tb_params.eb;
832                         ops[i]->turbo_enc.tb_params.k_pos =
833                                         turbo_enc->tb_params.k_pos;
834                         ops[i]->turbo_enc.tb_params.k_neg =
835                                         turbo_enc->tb_params.k_neg;
836                         ops[i]->turbo_enc.tb_params.c =
837                                         turbo_enc->tb_params.c;
838                         ops[i]->turbo_enc.tb_params.c_neg =
839                                         turbo_enc->tb_params.c_neg;
840                         ops[i]->turbo_enc.tb_params.cab =
841                                         turbo_enc->tb_params.cab;
842                         ops[i]->turbo_enc.tb_params.ncb_pos =
843                                         turbo_enc->tb_params.ncb_pos;
844                         ops[i]->turbo_enc.tb_params.ncb_neg =
845                                         turbo_enc->tb_params.ncb_neg;
846                         ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
847                 } else {
848                         ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
849                         ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
850                         ops[i]->turbo_enc.cb_params.ncb =
851                                         turbo_enc->cb_params.ncb;
852                 }
853                 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
854                 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
855                 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
856
857                 ops[i]->turbo_enc.output = outputs[start_idx + i];
858                 ops[i]->turbo_enc.input = inputs[start_idx + i];
859         }
860 }
861
862 static int
863 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
864                 unsigned int order_idx, const int expected_status)
865 {
866         TEST_ASSERT(op->status == expected_status,
867                         "op_status (%d) != expected_status (%d)",
868                         op->status, expected_status);
869
870         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
871                         "Ordering error, expected %p, got %p",
872                         (void *)(uintptr_t)order_idx, op->opaque_data);
873
874         return TEST_SUCCESS;
875 }
876
877 static int
878 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
879                 unsigned int order_idx, const int expected_status)
880 {
881         TEST_ASSERT(op->status == expected_status,
882                         "op_status (%d) != expected_status (%d)",
883                         op->status, expected_status);
884
885         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
886                         "Ordering error, expected %p, got %p",
887                         (void *)(uintptr_t)order_idx, op->opaque_data);
888
889         return TEST_SUCCESS;
890 }
891
892 static inline int
893 validate_op_chain(struct rte_bbdev_op_data *op,
894                 struct op_data_entries *orig_op)
895 {
896         uint8_t i;
897         struct rte_mbuf *m = op->data;
898         uint8_t nb_dst_segments = orig_op->nb_segments;
899         uint32_t total_data_size = 0;
900
901         TEST_ASSERT(nb_dst_segments == m->nb_segs,
902                         "Number of segments differ in original (%u) and filled (%u) op",
903                         nb_dst_segments, m->nb_segs);
904
905         /* Validate each mbuf segment length */
906         for (i = 0; i < nb_dst_segments; ++i) {
907                 /* Apply offset to the first mbuf segment */
908                 uint16_t offset = (i == 0) ? op->offset : 0;
909                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
910                 total_data_size += orig_op->segments[i].length;
911
912                 TEST_ASSERT(orig_op->segments[i].length == data_len,
913                                 "Length of segment differ in original (%u) and filled (%u) op",
914                                 orig_op->segments[i].length, data_len);
915                 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
916                                 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
917                                 data_len,
918                                 "Output buffers (CB=%u) are not equal", i);
919                 m = m->next;
920         }
921
922         /* Validate total mbuf pkt length */
923         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
924         TEST_ASSERT(total_data_size == pkt_len,
925                         "Length of data differ in original (%u) and filled (%u) op",
926                         total_data_size, pkt_len);
927
928         return TEST_SUCCESS;
929 }
930
931 static int
932 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
933                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
934 {
935         unsigned int i;
936         int ret;
937         struct op_data_entries *hard_data_orig =
938                         &test_vector.entries[DATA_HARD_OUTPUT];
939         struct op_data_entries *soft_data_orig =
940                         &test_vector.entries[DATA_SOFT_OUTPUT];
941         struct rte_bbdev_op_turbo_dec *ops_td;
942         struct rte_bbdev_op_data *hard_output;
943         struct rte_bbdev_op_data *soft_output;
944         struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
945
946         for (i = 0; i < n; ++i) {
947                 ops_td = &ops[i]->turbo_dec;
948                 hard_output = &ops_td->hard_output;
949                 soft_output = &ops_td->soft_output;
950
951                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
952                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
953                                         "Returned iter_count (%d) > expected iter_count (%d)",
954                                         ops_td->iter_count, ref_td->iter_count);
955                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
956                 TEST_ASSERT_SUCCESS(ret,
957                                 "Checking status and ordering for decoder failed");
958
959                 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
960                                 hard_data_orig),
961                                 "Hard output buffers (CB=%u) are not equal",
962                                 i);
963
964                 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
965                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
966                                         soft_data_orig),
967                                         "Soft output buffers (CB=%u) are not equal",
968                                         i);
969         }
970
971         return TEST_SUCCESS;
972 }
973
974 static int
975 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
976                 struct rte_bbdev_enc_op *ref_op)
977 {
978         unsigned int i;
979         int ret;
980         struct op_data_entries *hard_data_orig =
981                         &test_vector.entries[DATA_HARD_OUTPUT];
982
983         for (i = 0; i < n; ++i) {
984                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
985                 TEST_ASSERT_SUCCESS(ret,
986                                 "Checking status and ordering for encoder failed");
987                 TEST_ASSERT_SUCCESS(validate_op_chain(
988                                 &ops[i]->turbo_enc.output,
989                                 hard_data_orig),
990                                 "Output buffers (CB=%u) are not equal",
991                                 i);
992         }
993
994         return TEST_SUCCESS;
995 }
996
997 static void
998 create_reference_dec_op(struct rte_bbdev_dec_op *op)
999 {
1000         unsigned int i;
1001         struct op_data_entries *entry;
1002
1003         op->turbo_dec = test_vector.turbo_dec;
1004         entry = &test_vector.entries[DATA_INPUT];
1005         for (i = 0; i < entry->nb_segments; ++i)
1006                 op->turbo_dec.input.length +=
1007                                 entry->segments[i].length;
1008 }
1009
1010 static void
1011 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1012 {
1013         unsigned int i;
1014         struct op_data_entries *entry;
1015
1016         op->turbo_enc = test_vector.turbo_enc;
1017         entry = &test_vector.entries[DATA_INPUT];
1018         for (i = 0; i < entry->nb_segments; ++i)
1019                 op->turbo_enc.input.length +=
1020                                 entry->segments[i].length;
1021 }
1022
1023 static uint32_t
1024 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1025 {
1026         uint8_t i;
1027         uint32_t c, r, tb_size = 0;
1028
1029         if (op->turbo_dec.code_block_mode) {
1030                 tb_size = op->turbo_dec.tb_params.k_neg;
1031         } else {
1032                 c = op->turbo_dec.tb_params.c;
1033                 r = op->turbo_dec.tb_params.r;
1034                 for (i = 0; i < c-r; i++)
1035                         tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1036                                 op->turbo_dec.tb_params.k_neg :
1037                                 op->turbo_dec.tb_params.k_pos;
1038         }
1039         return tb_size;
1040 }
1041
1042 static uint32_t
1043 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1044 {
1045         uint8_t i;
1046         uint32_t c, r, tb_size = 0;
1047
1048         if (op->turbo_enc.code_block_mode) {
1049                 tb_size = op->turbo_enc.tb_params.k_neg;
1050         } else {
1051                 c = op->turbo_enc.tb_params.c;
1052                 r = op->turbo_enc.tb_params.r;
1053                 for (i = 0; i < c-r; i++)
1054                         tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1055                                 op->turbo_enc.tb_params.k_neg :
1056                                 op->turbo_enc.tb_params.k_pos;
1057         }
1058         return tb_size;
1059 }
1060
1061 static int
1062 init_test_op_params(struct test_op_params *op_params,
1063                 enum rte_bbdev_op_type op_type, const int expected_status,
1064                 const int vector_mask, struct rte_mempool *ops_mp,
1065                 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1066 {
1067         int ret = 0;
1068         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1069                 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1070                                 &op_params->ref_dec_op, 1);
1071         else
1072                 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1073                                 &op_params->ref_enc_op, 1);
1074
1075         TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1076
1077         op_params->mp = ops_mp;
1078         op_params->burst_sz = burst_sz;
1079         op_params->num_to_process = num_to_process;
1080         op_params->num_lcores = num_lcores;
1081         op_params->vector_mask = vector_mask;
1082         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1083                 op_params->ref_dec_op->status = expected_status;
1084         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
1085                 op_params->ref_enc_op->status = expected_status;
1086
1087         return 0;
1088 }
1089
1090 static int
1091 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1092                 struct test_op_params *op_params)
1093 {
1094         int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1095         unsigned int i;
1096         struct active_device *ad;
1097         unsigned int burst_sz = get_burst_sz();
1098         enum rte_bbdev_op_type op_type = test_vector.op_type;
1099         const struct rte_bbdev_op_cap *capabilities = NULL;
1100
1101         ad = &active_devs[dev_id];
1102
1103         /* Check if device supports op_type */
1104         if (!is_avail_op(ad, test_vector.op_type))
1105                 return TEST_SUCCESS;
1106
1107         struct rte_bbdev_info info;
1108         rte_bbdev_info_get(ad->dev_id, &info);
1109         socket_id = GET_SOCKET(info.socket_id);
1110
1111         f_ret = create_mempools(ad, socket_id, op_type,
1112                         get_num_ops());
1113         if (f_ret != TEST_SUCCESS) {
1114                 printf("Couldn't create mempools");
1115                 goto fail;
1116         }
1117         if (op_type == RTE_BBDEV_OP_NONE)
1118                 op_type = RTE_BBDEV_OP_TURBO_ENC;
1119
1120         f_ret = init_test_op_params(op_params, test_vector.op_type,
1121                         test_vector.expected_status,
1122                         test_vector.mask,
1123                         ad->ops_mempool,
1124                         burst_sz,
1125                         get_num_ops(),
1126                         get_num_lcores());
1127         if (f_ret != TEST_SUCCESS) {
1128                 printf("Couldn't init test op params");
1129                 goto fail;
1130         }
1131
1132         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1133                 /* Find Decoder capabilities */
1134                 const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1135                 while (cap->type != RTE_BBDEV_OP_NONE) {
1136                         if (cap->type == RTE_BBDEV_OP_TURBO_DEC) {
1137                                 capabilities = cap;
1138                                 break;
1139                         }
1140                 }
1141                 TEST_ASSERT_NOT_NULL(capabilities,
1142                                 "Couldn't find Decoder capabilities");
1143
1144                 create_reference_dec_op(op_params->ref_dec_op);
1145         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1146                 create_reference_enc_op(op_params->ref_enc_op);
1147
1148         for (i = 0; i < ad->nb_queues; ++i) {
1149                 f_ret = fill_queue_buffers(op_params,
1150                                 ad->in_mbuf_pool,
1151                                 ad->hard_out_mbuf_pool,
1152                                 ad->soft_out_mbuf_pool,
1153                                 ad->queue_ids[i],
1154                                 capabilities,
1155                                 info.drv.min_alignment,
1156                                 socket_id);
1157                 if (f_ret != TEST_SUCCESS) {
1158                         printf("Couldn't init queue buffers");
1159                         goto fail;
1160                 }
1161         }
1162
1163         /* Run test case function */
1164         t_ret = test_case_func(ad, op_params);
1165
1166         /* Free active device resources and return */
1167         free_buffers(ad, op_params);
1168         return t_ret;
1169
1170 fail:
1171         free_buffers(ad, op_params);
1172         return TEST_FAILED;
1173 }
1174
1175 /* Run given test function per active device per supported op type
1176  * per burst size.
1177  */
1178 static int
1179 run_test_case(test_case_function *test_case_func)
1180 {
1181         int ret = 0;
1182         uint8_t dev;
1183
1184         /* Alloc op_params */
1185         struct test_op_params *op_params = rte_zmalloc(NULL,
1186                         sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1187         TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1188                         RTE_ALIGN(sizeof(struct test_op_params),
1189                                 RTE_CACHE_LINE_SIZE));
1190
1191         /* For each device run test case function */
1192         for (dev = 0; dev < nb_active_devs; ++dev)
1193                 ret |= run_test_case_on_device(test_case_func, dev, op_params);
1194
1195         rte_free(op_params);
1196
1197         return ret;
1198 }
1199
1200 static void
1201 dequeue_event_callback(uint16_t dev_id,
1202                 enum rte_bbdev_event_type event, void *cb_arg,
1203                 void *ret_param)
1204 {
1205         int ret;
1206         uint16_t i;
1207         uint64_t total_time;
1208         uint16_t deq, burst_sz, num_ops;
1209         uint16_t queue_id = INVALID_QUEUE_ID;
1210         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
1211         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
1212         struct rte_bbdev_info info;
1213
1214         double tb_len_bits;
1215
1216         struct thread_params *tp = cb_arg;
1217         RTE_SET_USED(ret_param);
1218         queue_id = tp->queue_id;
1219
1220         /* Find matching thread params using queue_id */
1221         for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1222                 if (tp->queue_id == queue_id)
1223                         break;
1224
1225         if (i == MAX_QUEUES) {
1226                 printf("%s: Queue_id from interrupt details was not found!\n",
1227                                 __func__);
1228                 return;
1229         }
1230
1231         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1232                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1233                 printf(
1234                         "Dequeue interrupt handler called for incorrect event!\n");
1235                 return;
1236         }
1237
1238         burst_sz = tp->op_params->burst_sz;
1239         num_ops = tp->op_params->num_to_process;
1240
1241         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1242                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, dec_ops,
1243                                 burst_sz);
1244                 rte_bbdev_dec_op_free_bulk(dec_ops, deq);
1245         } else {
1246                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, enc_ops,
1247                                 burst_sz);
1248                 rte_bbdev_enc_op_free_bulk(enc_ops, deq);
1249         }
1250
1251         if (deq < burst_sz) {
1252                 printf(
1253                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1254                         burst_sz, deq);
1255                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1256                 return;
1257         }
1258
1259         if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1260                 rte_atomic16_add(&tp->nb_dequeued, deq);
1261                 return;
1262         }
1263
1264         total_time = rte_rdtsc_precise() - tp->start_time;
1265
1266         rte_bbdev_info_get(dev_id, &info);
1267
1268         ret = TEST_SUCCESS;
1269
1270         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1271                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1272                 ret = validate_dec_op(dec_ops, num_ops, ref_op,
1273                                 tp->op_params->vector_mask);
1274                 rte_bbdev_dec_op_free_bulk(dec_ops, deq);
1275         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1276                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1277                 ret = validate_enc_op(enc_ops, num_ops, ref_op);
1278                 rte_bbdev_enc_op_free_bulk(enc_ops, deq);
1279         }
1280
1281         if (ret) {
1282                 printf("Buffers validation failed\n");
1283                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1284         }
1285
1286         switch (test_vector.op_type) {
1287         case RTE_BBDEV_OP_TURBO_DEC:
1288                 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1289                 break;
1290         case RTE_BBDEV_OP_TURBO_ENC:
1291                 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1292                 break;
1293         case RTE_BBDEV_OP_NONE:
1294                 tb_len_bits = 0.0;
1295                 break;
1296         default:
1297                 printf("Unknown op type: %d\n", test_vector.op_type);
1298                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1299                 return;
1300         }
1301
1302         tp->ops_per_sec = ((double)num_ops) /
1303                         ((double)total_time / (double)rte_get_tsc_hz());
1304         tp->mbps = (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1305                         ((double)total_time / (double)rte_get_tsc_hz());
1306
1307         rte_atomic16_add(&tp->nb_dequeued, deq);
1308 }
1309
1310 static int
1311 throughput_intr_lcore_dec(void *arg)
1312 {
1313         struct thread_params *tp = arg;
1314         unsigned int enqueued;
1315         const uint16_t queue_id = tp->queue_id;
1316         const uint16_t burst_sz = tp->op_params->burst_sz;
1317         const uint16_t num_to_process = tp->op_params->num_to_process;
1318         struct rte_bbdev_dec_op *ops[num_to_process];
1319         struct test_buffers *bufs = NULL;
1320         struct rte_bbdev_info info;
1321         int ret;
1322         uint16_t num_to_enq;
1323
1324         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1325                         "BURST_SIZE should be <= %u", MAX_BURST);
1326
1327         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1328                         "Failed to enable interrupts for dev: %u, queue_id: %u",
1329                         tp->dev_id, queue_id);
1330
1331         rte_bbdev_info_get(tp->dev_id, &info);
1332
1333         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1334                         "NUM_OPS cannot exceed %u for this device",
1335                         info.drv.queue_size_lim);
1336
1337         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1338
1339         rte_atomic16_clear(&tp->processing_status);
1340         rte_atomic16_clear(&tp->nb_dequeued);
1341
1342         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1343                 rte_pause();
1344
1345         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1346                                 num_to_process);
1347         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1348                         num_to_process);
1349         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1350                 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1351                                 bufs->hard_outputs, bufs->soft_outputs,
1352                                 tp->op_params->ref_dec_op);
1353
1354         tp->start_time = rte_rdtsc_precise();
1355         for (enqueued = 0; enqueued < num_to_process;) {
1356
1357                 num_to_enq = burst_sz;
1358
1359                 if (unlikely(num_to_process - enqueued < num_to_enq))
1360                         num_to_enq = num_to_process - enqueued;
1361
1362                 enqueued += rte_bbdev_enqueue_dec_ops(tp->dev_id, queue_id,
1363                                 &ops[enqueued], num_to_enq);
1364         }
1365
1366         return TEST_SUCCESS;
1367 }
1368
1369 static int
1370 throughput_intr_lcore_enc(void *arg)
1371 {
1372         struct thread_params *tp = arg;
1373         unsigned int enqueued;
1374         const uint16_t queue_id = tp->queue_id;
1375         const uint16_t burst_sz = tp->op_params->burst_sz;
1376         const uint16_t num_to_process = tp->op_params->num_to_process;
1377         struct rte_bbdev_enc_op *ops[num_to_process];
1378         struct test_buffers *bufs = NULL;
1379         struct rte_bbdev_info info;
1380         int ret;
1381         uint16_t num_to_enq;
1382
1383         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1384                         "BURST_SIZE should be <= %u", MAX_BURST);
1385
1386         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1387                         "Failed to enable interrupts for dev: %u, queue_id: %u",
1388                         tp->dev_id, queue_id);
1389
1390         rte_bbdev_info_get(tp->dev_id, &info);
1391
1392         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1393                         "NUM_OPS cannot exceed %u for this device",
1394                         info.drv.queue_size_lim);
1395
1396         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1397
1398         rte_atomic16_clear(&tp->processing_status);
1399         rte_atomic16_clear(&tp->nb_dequeued);
1400
1401         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1402                 rte_pause();
1403
1404         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1405                         num_to_process);
1406         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1407                         num_to_process);
1408         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1409                 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1410                                 bufs->hard_outputs, tp->op_params->ref_enc_op);
1411
1412         tp->start_time = rte_rdtsc_precise();
1413         for (enqueued = 0; enqueued < num_to_process;) {
1414
1415                 num_to_enq = burst_sz;
1416
1417                 if (unlikely(num_to_process - enqueued < num_to_enq))
1418                         num_to_enq = num_to_process - enqueued;
1419
1420                 enqueued += rte_bbdev_enqueue_enc_ops(tp->dev_id, queue_id,
1421                                 &ops[enqueued], num_to_enq);
1422         }
1423
1424         return TEST_SUCCESS;
1425 }
1426
1427 static int
1428 throughput_pmd_lcore_dec(void *arg)
1429 {
1430         struct thread_params *tp = arg;
1431         uint16_t enq, deq;
1432         uint64_t total_time = 0, start_time;
1433         const uint16_t queue_id = tp->queue_id;
1434         const uint16_t burst_sz = tp->op_params->burst_sz;
1435         const uint16_t num_ops = tp->op_params->num_to_process;
1436         struct rte_bbdev_dec_op *ops_enq[num_ops];
1437         struct rte_bbdev_dec_op *ops_deq[num_ops];
1438         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1439         struct test_buffers *bufs = NULL;
1440         int i, j, ret;
1441         struct rte_bbdev_info info;
1442         uint16_t num_to_enq;
1443
1444         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1445                         "BURST_SIZE should be <= %u", MAX_BURST);
1446
1447         rte_bbdev_info_get(tp->dev_id, &info);
1448
1449         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1450                         "NUM_OPS cannot exceed %u for this device",
1451                         info.drv.queue_size_lim);
1452
1453         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1454
1455         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1456                 rte_pause();
1457
1458         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
1459         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
1460
1461         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1462                 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
1463                                 bufs->hard_outputs, bufs->soft_outputs, ref_op);
1464
1465         /* Set counter to validate the ordering */
1466         for (j = 0; j < num_ops; ++j)
1467                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1468
1469         for (i = 0; i < TEST_REPETITIONS; ++i) {
1470
1471                 for (j = 0; j < num_ops; ++j)
1472                         mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
1473
1474                 start_time = rte_rdtsc_precise();
1475
1476                 for (enq = 0, deq = 0; enq < num_ops;) {
1477                         num_to_enq = burst_sz;
1478
1479                         if (unlikely(num_ops - enq < num_to_enq))
1480                                 num_to_enq = num_ops - enq;
1481
1482                         enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1483                                         queue_id, &ops_enq[enq], num_to_enq);
1484
1485                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1486                                         queue_id, &ops_deq[deq], enq - deq);
1487                 }
1488
1489                 /* dequeue the remaining */
1490                 while (deq < enq) {
1491                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1492                                         queue_id, &ops_deq[deq], enq - deq);
1493                 }
1494
1495                 total_time += rte_rdtsc_precise() - start_time;
1496         }
1497
1498         tp->iter_count = 0;
1499         /* get the max of iter_count for all dequeued ops */
1500         for (i = 0; i < num_ops; ++i) {
1501                 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
1502                                 tp->iter_count);
1503         }
1504
1505         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1506                 ret = validate_dec_op(ops_deq, num_ops, ref_op,
1507                                 tp->op_params->vector_mask);
1508                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1509         }
1510
1511         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
1512
1513         double tb_len_bits = calc_dec_TB_size(ref_op);
1514
1515         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1516                         ((double)total_time / (double)rte_get_tsc_hz());
1517         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1518                         1000000.0) / ((double)total_time /
1519                         (double)rte_get_tsc_hz());
1520
1521         return TEST_SUCCESS;
1522 }
1523
1524 static int
1525 throughput_pmd_lcore_enc(void *arg)
1526 {
1527         struct thread_params *tp = arg;
1528         uint16_t enq, deq;
1529         uint64_t total_time = 0, start_time;
1530         const uint16_t queue_id = tp->queue_id;
1531         const uint16_t burst_sz = tp->op_params->burst_sz;
1532         const uint16_t num_ops = tp->op_params->num_to_process;
1533         struct rte_bbdev_enc_op *ops_enq[num_ops];
1534         struct rte_bbdev_enc_op *ops_deq[num_ops];
1535         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1536         struct test_buffers *bufs = NULL;
1537         int i, j, ret;
1538         struct rte_bbdev_info info;
1539         uint16_t num_to_enq;
1540
1541         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1542                         "BURST_SIZE should be <= %u", MAX_BURST);
1543
1544         rte_bbdev_info_get(tp->dev_id, &info);
1545
1546         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1547                         "NUM_OPS cannot exceed %u for this device",
1548                         info.drv.queue_size_lim);
1549
1550         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1551
1552         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1553                 rte_pause();
1554
1555         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
1556                         num_ops);
1557         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1558                         num_ops);
1559         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1560                 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
1561                                 bufs->hard_outputs, ref_op);
1562
1563         /* Set counter to validate the ordering */
1564         for (j = 0; j < num_ops; ++j)
1565                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1566
1567         for (i = 0; i < TEST_REPETITIONS; ++i) {
1568
1569                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1570                         for (j = 0; j < num_ops; ++j)
1571                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
1572
1573                 start_time = rte_rdtsc_precise();
1574
1575                 for (enq = 0, deq = 0; enq < num_ops;) {
1576                         num_to_enq = burst_sz;
1577
1578                         if (unlikely(num_ops - enq < num_to_enq))
1579                                 num_to_enq = num_ops - enq;
1580
1581                         enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1582                                         queue_id, &ops_enq[enq], num_to_enq);
1583
1584                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1585                                         queue_id, &ops_deq[deq], enq - deq);
1586                 }
1587
1588                 /* dequeue the remaining */
1589                 while (deq < enq) {
1590                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1591                                         queue_id, &ops_deq[deq], enq - deq);
1592                 }
1593
1594                 total_time += rte_rdtsc_precise() - start_time;
1595         }
1596
1597         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1598                 ret = validate_enc_op(ops_deq, num_ops, ref_op);
1599                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1600         }
1601
1602         double tb_len_bits = calc_enc_TB_size(ref_op);
1603
1604         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1605                         ((double)total_time / (double)rte_get_tsc_hz());
1606         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
1607                         / 1000000.0) / ((double)total_time /
1608                         (double)rte_get_tsc_hz());
1609
1610         return TEST_SUCCESS;
1611 }
1612
1613 static void
1614 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
1615 {
1616         unsigned int lcore_id, iter = 0;
1617         double total_mops = 0, total_mbps = 0;
1618
1619         RTE_LCORE_FOREACH(lcore_id) {
1620                 if (iter++ >= used_cores)
1621                         break;
1622                 printf(
1623                                 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
1624                                 lcore_id, t_params[lcore_id].ops_per_sec,
1625                                 t_params[lcore_id].mbps);
1626                 total_mops += t_params[lcore_id].ops_per_sec;
1627                 total_mbps += t_params[lcore_id].mbps;
1628         }
1629         printf(
1630                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
1631                 used_cores, total_mops, total_mbps);
1632 }
1633
1634 static void
1635 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
1636 {
1637         unsigned int lcore_id, iter = 0;
1638         double total_mops = 0, total_mbps = 0;
1639         uint8_t iter_count = 0;
1640
1641         RTE_LCORE_FOREACH(lcore_id) {
1642                 if (iter++ >= used_cores)
1643                         break;
1644                 printf(
1645                                 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
1646                                 lcore_id, t_params[lcore_id].ops_per_sec,
1647                                 t_params[lcore_id].mbps,
1648                                 t_params[lcore_id].iter_count);
1649                 total_mops += t_params[lcore_id].ops_per_sec;
1650                 total_mbps += t_params[lcore_id].mbps;
1651                 iter_count = RTE_MAX(iter_count, t_params[lcore_id].iter_count);
1652         }
1653         printf(
1654                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
1655                 used_cores, total_mops, total_mbps, iter_count);
1656 }
1657
1658 /*
1659  * Test function that determines how long an enqueue + dequeue of a burst
1660  * takes on available lcores.
1661  */
1662 static int
1663 throughput_test(struct active_device *ad,
1664                 struct test_op_params *op_params)
1665 {
1666         int ret;
1667         unsigned int lcore_id, used_cores = 0;
1668         struct thread_params t_params[MAX_QUEUES];
1669         struct rte_bbdev_info info;
1670         lcore_function_t *throughput_function;
1671         struct thread_params *tp;
1672         uint16_t num_lcores;
1673         const char *op_type_str;
1674
1675         rte_bbdev_info_get(ad->dev_id, &info);
1676
1677         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
1678         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
1679                         test_vector.op_type);
1680
1681         printf(
1682                 "Throughput test: dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, int mode: %s, GHz: %lg\n",
1683                         info.dev_name, ad->nb_queues, op_params->burst_sz,
1684                         op_params->num_to_process, op_params->num_lcores,
1685                         op_type_str,
1686                         intr_enabled ? "Interrupt mode" : "PMD mode",
1687                         (double)rte_get_tsc_hz() / 1000000000.0);
1688
1689         /* Set number of lcores */
1690         num_lcores = (ad->nb_queues < (op_params->num_lcores))
1691                         ? ad->nb_queues
1692                         : op_params->num_lcores;
1693
1694         if (intr_enabled) {
1695                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1696                         throughput_function = throughput_intr_lcore_dec;
1697                 else
1698                         throughput_function = throughput_intr_lcore_enc;
1699
1700                 /* Dequeue interrupt callback registration */
1701                 ret = rte_bbdev_callback_register(ad->dev_id,
1702                                 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
1703                                 &t_params);
1704                 if (ret < 0)
1705                         return ret;
1706         } else {
1707                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1708                         throughput_function = throughput_pmd_lcore_dec;
1709                 else
1710                         throughput_function = throughput_pmd_lcore_enc;
1711         }
1712
1713         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
1714
1715         t_params[rte_lcore_id()].dev_id = ad->dev_id;
1716         t_params[rte_lcore_id()].op_params = op_params;
1717         t_params[rte_lcore_id()].queue_id =
1718                         ad->queue_ids[used_cores++];
1719
1720         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1721                 if (used_cores >= num_lcores)
1722                         break;
1723
1724                 t_params[lcore_id].dev_id = ad->dev_id;
1725                 t_params[lcore_id].op_params = op_params;
1726                 t_params[lcore_id].queue_id = ad->queue_ids[used_cores++];
1727
1728                 rte_eal_remote_launch(throughput_function, &t_params[lcore_id],
1729                                 lcore_id);
1730         }
1731
1732         rte_atomic16_set(&op_params->sync, SYNC_START);
1733         ret = throughput_function(&t_params[rte_lcore_id()]);
1734
1735         /* Master core is always used */
1736         used_cores = 1;
1737         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1738                 if (used_cores++ >= num_lcores)
1739                         break;
1740
1741                 ret |= rte_eal_wait_lcore(lcore_id);
1742         }
1743
1744         /* Return if test failed */
1745         if (ret)
1746                 return ret;
1747
1748         /* Print throughput if interrupts are disabled and test passed */
1749         if (!intr_enabled) {
1750                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1751                         print_dec_throughput(t_params, num_lcores);
1752                 else
1753                         print_enc_throughput(t_params, num_lcores);
1754                 return ret;
1755         }
1756
1757         /* In interrupt TC we need to wait for the interrupt callback to deqeue
1758          * all pending operations. Skip waiting for queues which reported an
1759          * error using processing_status variable.
1760          * Wait for master lcore operations.
1761          */
1762         tp = &t_params[rte_lcore_id()];
1763         while ((rte_atomic16_read(&tp->nb_dequeued) <
1764                         op_params->num_to_process) &&
1765                         (rte_atomic16_read(&tp->processing_status) !=
1766                         TEST_FAILED))
1767                 rte_pause();
1768
1769         ret |= rte_atomic16_read(&tp->processing_status);
1770
1771         /* Wait for slave lcores operations */
1772         used_cores = 1;
1773         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1774                 tp = &t_params[lcore_id];
1775                 if (used_cores++ >= num_lcores)
1776                         break;
1777
1778                 while ((rte_atomic16_read(&tp->nb_dequeued) <
1779                                 op_params->num_to_process) &&
1780                                 (rte_atomic16_read(&tp->processing_status) !=
1781                                 TEST_FAILED))
1782                         rte_pause();
1783
1784                 ret |= rte_atomic16_read(&tp->processing_status);
1785         }
1786
1787         /* Print throughput if test passed */
1788         if (!ret) {
1789                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1790                         print_dec_throughput(t_params, num_lcores);
1791                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1792                         print_enc_throughput(t_params, num_lcores);
1793         }
1794         return ret;
1795 }
1796
1797 static int
1798 latency_test_dec(struct rte_mempool *mempool,
1799                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
1800                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
1801                 const uint16_t num_to_process, uint16_t burst_sz,
1802                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1803 {
1804         int ret = TEST_SUCCESS;
1805         uint16_t i, j, dequeued;
1806         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1807         uint64_t start_time = 0, last_time = 0;
1808
1809         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1810                 uint16_t enq = 0, deq = 0;
1811                 bool first_time = true;
1812                 last_time = 0;
1813
1814                 if (unlikely(num_to_process - dequeued < burst_sz))
1815                         burst_sz = num_to_process - dequeued;
1816
1817                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
1818                 TEST_ASSERT_SUCCESS(ret,
1819                                 "rte_bbdev_dec_op_alloc_bulk() failed");
1820                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1821                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
1822                                         bufs->inputs,
1823                                         bufs->hard_outputs,
1824                                         bufs->soft_outputs,
1825                                         ref_op);
1826
1827                 /* Set counter to validate the ordering */
1828                 for (j = 0; j < burst_sz; ++j)
1829                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1830
1831                 start_time = rte_rdtsc_precise();
1832
1833                 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
1834                                 burst_sz);
1835                 TEST_ASSERT(enq == burst_sz,
1836                                 "Error enqueueing burst, expected %u, got %u",
1837                                 burst_sz, enq);
1838
1839                 /* Dequeue */
1840                 do {
1841                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1842                                         &ops_deq[deq], burst_sz - deq);
1843                         if (likely(first_time && (deq > 0))) {
1844                                 last_time = rte_rdtsc_precise() - start_time;
1845                                 first_time = false;
1846                         }
1847                 } while (unlikely(burst_sz != deq));
1848
1849                 *max_time = RTE_MAX(*max_time, last_time);
1850                 *min_time = RTE_MIN(*min_time, last_time);
1851                 *total_time += last_time;
1852
1853                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1854                         ret = validate_dec_op(ops_deq, burst_sz, ref_op,
1855                                         vector_mask);
1856                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1857                 }
1858
1859                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
1860                 dequeued += deq;
1861         }
1862
1863         return i;
1864 }
1865
1866 static int
1867 latency_test_enc(struct rte_mempool *mempool,
1868                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
1869                 uint16_t dev_id, uint16_t queue_id,
1870                 const uint16_t num_to_process, uint16_t burst_sz,
1871                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1872 {
1873         int ret = TEST_SUCCESS;
1874         uint16_t i, j, dequeued;
1875         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1876         uint64_t start_time = 0, last_time = 0;
1877
1878         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1879                 uint16_t enq = 0, deq = 0;
1880                 bool first_time = true;
1881                 last_time = 0;
1882
1883                 if (unlikely(num_to_process - dequeued < burst_sz))
1884                         burst_sz = num_to_process - dequeued;
1885
1886                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
1887                 TEST_ASSERT_SUCCESS(ret,
1888                                 "rte_bbdev_enc_op_alloc_bulk() failed");
1889                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1890                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
1891                                         bufs->inputs,
1892                                         bufs->hard_outputs,
1893                                         ref_op);
1894
1895                 /* Set counter to validate the ordering */
1896                 for (j = 0; j < burst_sz; ++j)
1897                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1898
1899                 start_time = rte_rdtsc_precise();
1900
1901                 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
1902                                 burst_sz);
1903                 TEST_ASSERT(enq == burst_sz,
1904                                 "Error enqueueing burst, expected %u, got %u",
1905                                 burst_sz, enq);
1906
1907                 /* Dequeue */
1908                 do {
1909                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1910                                         &ops_deq[deq], burst_sz - deq);
1911                         if (likely(first_time && (deq > 0))) {
1912                                 last_time += rte_rdtsc_precise() - start_time;
1913                                 first_time = false;
1914                         }
1915                 } while (unlikely(burst_sz != deq));
1916
1917                 *max_time = RTE_MAX(*max_time, last_time);
1918                 *min_time = RTE_MIN(*min_time, last_time);
1919                 *total_time += last_time;
1920
1921                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1922                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
1923                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1924                 }
1925
1926                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
1927                 dequeued += deq;
1928         }
1929
1930         return i;
1931 }
1932
1933 static int
1934 latency_test(struct active_device *ad,
1935                 struct test_op_params *op_params)
1936 {
1937         int iter;
1938         uint16_t burst_sz = op_params->burst_sz;
1939         const uint16_t num_to_process = op_params->num_to_process;
1940         const enum rte_bbdev_op_type op_type = test_vector.op_type;
1941         const uint16_t queue_id = ad->queue_ids[0];
1942         struct test_buffers *bufs = NULL;
1943         struct rte_bbdev_info info;
1944         uint64_t total_time, min_time, max_time;
1945         const char *op_type_str;
1946
1947         total_time = max_time = 0;
1948         min_time = UINT64_MAX;
1949
1950         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1951                         "BURST_SIZE should be <= %u", MAX_BURST);
1952
1953         rte_bbdev_info_get(ad->dev_id, &info);
1954         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1955
1956         op_type_str = rte_bbdev_op_type_str(op_type);
1957         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
1958
1959         printf(
1960                 "\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
1961                         info.dev_name, burst_sz, num_to_process, op_type_str);
1962
1963         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1964                 iter = latency_test_dec(op_params->mp, bufs,
1965                                 op_params->ref_dec_op, op_params->vector_mask,
1966                                 ad->dev_id, queue_id, num_to_process,
1967                                 burst_sz, &total_time, &min_time, &max_time);
1968         else
1969                 iter = latency_test_enc(op_params->mp, bufs,
1970                                 op_params->ref_enc_op, ad->dev_id, queue_id,
1971                                 num_to_process, burst_sz, &total_time,
1972                                 &min_time, &max_time);
1973
1974         if (iter <= 0)
1975                 return TEST_FAILED;
1976
1977         printf("Operation latency:\n"
1978                         "\tavg latency: %lg cycles, %lg us\n"
1979                         "\tmin latency: %lg cycles, %lg us\n"
1980                         "\tmax latency: %lg cycles, %lg us\n",
1981                         (double)total_time / (double)iter,
1982                         (double)(total_time * 1000000) / (double)iter /
1983                         (double)rte_get_tsc_hz(), (double)min_time,
1984                         (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
1985                         (double)max_time, (double)(max_time * 1000000) /
1986                         (double)rte_get_tsc_hz());
1987
1988         return TEST_SUCCESS;
1989 }
1990
1991 #ifdef RTE_BBDEV_OFFLOAD_COST
1992 static int
1993 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
1994                 struct rte_bbdev_stats *stats)
1995 {
1996         struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
1997         struct rte_bbdev_stats *q_stats;
1998
1999         if (queue_id >= dev->data->num_queues)
2000                 return -1;
2001
2002         q_stats = &dev->data->queues[queue_id].queue_stats;
2003
2004         stats->enqueued_count = q_stats->enqueued_count;
2005         stats->dequeued_count = q_stats->dequeued_count;
2006         stats->enqueue_err_count = q_stats->enqueue_err_count;
2007         stats->dequeue_err_count = q_stats->dequeue_err_count;
2008         stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2009
2010         return 0;
2011 }
2012
2013 static int
2014 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2015                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
2016                 uint16_t queue_id, const uint16_t num_to_process,
2017                 uint16_t burst_sz, struct test_time_stats *time_st)
2018 {
2019         int i, dequeued, ret;
2020         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2021         uint64_t enq_start_time, deq_start_time;
2022         uint64_t enq_sw_last_time, deq_last_time;
2023         struct rte_bbdev_stats stats;
2024
2025         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2026                 uint16_t enq = 0, deq = 0;
2027
2028                 if (unlikely(num_to_process - dequeued < burst_sz))
2029                         burst_sz = num_to_process - dequeued;
2030
2031                 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2032                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2033                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2034                                         bufs->inputs,
2035                                         bufs->hard_outputs,
2036                                         bufs->soft_outputs,
2037                                         ref_op);
2038
2039                 /* Start time meas for enqueue function offload latency */
2040                 enq_start_time = rte_rdtsc_precise();
2041                 do {
2042                         enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
2043                                         &ops_enq[enq], burst_sz - enq);
2044                 } while (unlikely(burst_sz != enq));
2045
2046                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2047                 TEST_ASSERT_SUCCESS(ret,
2048                                 "Failed to get stats for queue (%u) of device (%u)",
2049                                 queue_id, dev_id);
2050
2051                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2052                                 stats.acc_offload_cycles;
2053                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2054                                 enq_sw_last_time);
2055                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2056                                 enq_sw_last_time);
2057                 time_st->enq_sw_total_time += enq_sw_last_time;
2058
2059                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2060                                 stats.acc_offload_cycles);
2061                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2062                                 stats.acc_offload_cycles);
2063                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2064
2065                 /* ensure enqueue has been completed */
2066                 rte_delay_us(200);
2067
2068                 /* Start time meas for dequeue function offload latency */
2069                 deq_start_time = rte_rdtsc_precise();
2070                 /* Dequeue one operation */
2071                 do {
2072                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2073                                         &ops_deq[deq], 1);
2074                 } while (unlikely(deq != 1));
2075
2076                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2077                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2078                                 deq_last_time);
2079                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2080                                 deq_last_time);
2081                 time_st->deq_total_time += deq_last_time;
2082
2083                 /* Dequeue remaining operations if needed*/
2084                 while (burst_sz != deq)
2085                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2086                                         &ops_deq[deq], burst_sz - deq);
2087
2088                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2089                 dequeued += deq;
2090         }
2091
2092         return i;
2093 }
2094
2095 static int
2096 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
2097                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
2098                 uint16_t queue_id, const uint16_t num_to_process,
2099                 uint16_t burst_sz, struct test_time_stats *time_st)
2100 {
2101         int i, dequeued, ret;
2102         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2103         uint64_t enq_start_time, deq_start_time;
2104         uint64_t enq_sw_last_time, deq_last_time;
2105         struct rte_bbdev_stats stats;
2106
2107         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2108                 uint16_t enq = 0, deq = 0;
2109
2110                 if (unlikely(num_to_process - dequeued < burst_sz))
2111                         burst_sz = num_to_process - dequeued;
2112
2113                 rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2114                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2115                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2116                                         bufs->inputs,
2117                                         bufs->hard_outputs,
2118                                         ref_op);
2119
2120                 /* Start time meas for enqueue function offload latency */
2121                 enq_start_time = rte_rdtsc_precise();
2122                 do {
2123                         enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
2124                                         &ops_enq[enq], burst_sz - enq);
2125                 } while (unlikely(burst_sz != enq));
2126
2127                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2128                 TEST_ASSERT_SUCCESS(ret,
2129                                 "Failed to get stats for queue (%u) of device (%u)",
2130                                 queue_id, dev_id);
2131
2132                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2133                                 stats.acc_offload_cycles;
2134                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2135                                 enq_sw_last_time);
2136                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2137                                 enq_sw_last_time);
2138                 time_st->enq_sw_total_time += enq_sw_last_time;
2139
2140                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2141                                 stats.acc_offload_cycles);
2142                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2143                                 stats.acc_offload_cycles);
2144                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2145
2146                 /* ensure enqueue has been completed */
2147                 rte_delay_us(200);
2148
2149                 /* Start time meas for dequeue function offload latency */
2150                 deq_start_time = rte_rdtsc_precise();
2151                 /* Dequeue one operation */
2152                 do {
2153                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2154                                         &ops_deq[deq], 1);
2155                 } while (unlikely(deq != 1));
2156
2157                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2158                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2159                                 deq_last_time);
2160                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2161                                 deq_last_time);
2162                 time_st->deq_total_time += deq_last_time;
2163
2164                 while (burst_sz != deq)
2165                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2166                                         &ops_deq[deq], burst_sz - deq);
2167
2168                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2169                 dequeued += deq;
2170         }
2171
2172         return i;
2173 }
2174 #endif
2175
2176 static int
2177 offload_cost_test(struct active_device *ad,
2178                 struct test_op_params *op_params)
2179 {
2180 #ifndef RTE_BBDEV_OFFLOAD_COST
2181         RTE_SET_USED(ad);
2182         RTE_SET_USED(op_params);
2183         printf("Offload latency test is disabled.\n");
2184         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2185         return TEST_SKIPPED;
2186 #else
2187         int iter;
2188         uint16_t burst_sz = op_params->burst_sz;
2189         const uint16_t num_to_process = op_params->num_to_process;
2190         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2191         const uint16_t queue_id = ad->queue_ids[0];
2192         struct test_buffers *bufs = NULL;
2193         struct rte_bbdev_info info;
2194         const char *op_type_str;
2195         struct test_time_stats time_st;
2196
2197         memset(&time_st, 0, sizeof(struct test_time_stats));
2198         time_st.enq_sw_min_time = UINT64_MAX;
2199         time_st.enq_acc_min_time = UINT64_MAX;
2200         time_st.deq_min_time = UINT64_MAX;
2201
2202         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2203                         "BURST_SIZE should be <= %u", MAX_BURST);
2204
2205         rte_bbdev_info_get(ad->dev_id, &info);
2206         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2207
2208         op_type_str = rte_bbdev_op_type_str(op_type);
2209         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2210
2211         printf(
2212                 "\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2213                         info.dev_name, burst_sz, num_to_process, op_type_str);
2214
2215         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2216                 iter = offload_latency_test_dec(op_params->mp, bufs,
2217                                 op_params->ref_dec_op, ad->dev_id, queue_id,
2218                                 num_to_process, burst_sz, &time_st);
2219         else
2220                 iter = offload_latency_test_enc(op_params->mp, bufs,
2221                                 op_params->ref_enc_op, ad->dev_id, queue_id,
2222                                 num_to_process, burst_sz, &time_st);
2223
2224         if (iter <= 0)
2225                 return TEST_FAILED;
2226
2227         printf("Enqueue offload cost latency:\n"
2228                         "\tDriver offload avg %lg cycles, %lg us\n"
2229                         "\tDriver offload min %lg cycles, %lg us\n"
2230                         "\tDriver offload max %lg cycles, %lg us\n"
2231                         "\tAccelerator offload avg %lg cycles, %lg us\n"
2232                         "\tAccelerator offload min %lg cycles, %lg us\n"
2233                         "\tAccelerator offload max %lg cycles, %lg us\n",
2234                         (double)time_st.enq_sw_total_time / (double)iter,
2235                         (double)(time_st.enq_sw_total_time * 1000000) /
2236                         (double)iter / (double)rte_get_tsc_hz(),
2237                         (double)time_st.enq_sw_min_time,
2238                         (double)(time_st.enq_sw_min_time * 1000000) /
2239                         rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
2240                         (double)(time_st.enq_sw_max_time * 1000000) /
2241                         rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
2242                         (double)iter,
2243                         (double)(time_st.enq_acc_total_time * 1000000) /
2244                         (double)iter / (double)rte_get_tsc_hz(),
2245                         (double)time_st.enq_acc_min_time,
2246                         (double)(time_st.enq_acc_min_time * 1000000) /
2247                         rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
2248                         (double)(time_st.enq_acc_max_time * 1000000) /
2249                         rte_get_tsc_hz());
2250
2251         printf("Dequeue offload cost latency - one op:\n"
2252                         "\tavg %lg cycles, %lg us\n"
2253                         "\tmin %lg cycles, %lg us\n"
2254                         "\tmax %lg cycles, %lg us\n",
2255                         (double)time_st.deq_total_time / (double)iter,
2256                         (double)(time_st.deq_total_time * 1000000) /
2257                         (double)iter / (double)rte_get_tsc_hz(),
2258                         (double)time_st.deq_min_time,
2259                         (double)(time_st.deq_min_time * 1000000) /
2260                         rte_get_tsc_hz(), (double)time_st.deq_max_time,
2261                         (double)(time_st.deq_max_time * 1000000) /
2262                         rte_get_tsc_hz());
2263
2264         return TEST_SUCCESS;
2265 #endif
2266 }
2267
2268 #ifdef RTE_BBDEV_OFFLOAD_COST
2269 static int
2270 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
2271                 const uint16_t num_to_process, uint16_t burst_sz,
2272                 uint64_t *deq_total_time, uint64_t *deq_min_time,
2273                 uint64_t *deq_max_time)
2274 {
2275         int i, deq_total;
2276         struct rte_bbdev_dec_op *ops[MAX_BURST];
2277         uint64_t deq_start_time, deq_last_time;
2278
2279         /* Test deq offload latency from an empty queue */
2280
2281         for (i = 0, deq_total = 0; deq_total < num_to_process;
2282                         ++i, deq_total += burst_sz) {
2283                 deq_start_time = rte_rdtsc_precise();
2284
2285                 if (unlikely(num_to_process - deq_total < burst_sz))
2286                         burst_sz = num_to_process - deq_total;
2287                 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
2288
2289                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2290                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2291                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2292                 *deq_total_time += deq_last_time;
2293         }
2294
2295         return i;
2296 }
2297
2298 static int
2299 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
2300                 const uint16_t num_to_process, uint16_t burst_sz,
2301                 uint64_t *deq_total_time, uint64_t *deq_min_time,
2302                 uint64_t *deq_max_time)
2303 {
2304         int i, deq_total;
2305         struct rte_bbdev_enc_op *ops[MAX_BURST];
2306         uint64_t deq_start_time, deq_last_time;
2307
2308         /* Test deq offload latency from an empty queue */
2309         for (i = 0, deq_total = 0; deq_total < num_to_process;
2310                         ++i, deq_total += burst_sz) {
2311                 deq_start_time = rte_rdtsc_precise();
2312
2313                 if (unlikely(num_to_process - deq_total < burst_sz))
2314                         burst_sz = num_to_process - deq_total;
2315                 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
2316
2317                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2318                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2319                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2320                 *deq_total_time += deq_last_time;
2321         }
2322
2323         return i;
2324 }
2325 #endif
2326
2327 static int
2328 offload_latency_empty_q_test(struct active_device *ad,
2329                 struct test_op_params *op_params)
2330 {
2331 #ifndef RTE_BBDEV_OFFLOAD_COST
2332         RTE_SET_USED(ad);
2333         RTE_SET_USED(op_params);
2334         printf("Offload latency empty dequeue test is disabled.\n");
2335         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2336         return TEST_SKIPPED;
2337 #else
2338         int iter;
2339         uint64_t deq_total_time, deq_min_time, deq_max_time;
2340         uint16_t burst_sz = op_params->burst_sz;
2341         const uint16_t num_to_process = op_params->num_to_process;
2342         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2343         const uint16_t queue_id = ad->queue_ids[0];
2344         struct rte_bbdev_info info;
2345         const char *op_type_str;
2346
2347         deq_total_time = deq_max_time = 0;
2348         deq_min_time = UINT64_MAX;
2349
2350         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2351                         "BURST_SIZE should be <= %u", MAX_BURST);
2352
2353         rte_bbdev_info_get(ad->dev_id, &info);
2354
2355         op_type_str = rte_bbdev_op_type_str(op_type);
2356         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2357
2358         printf(
2359                 "\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2360                         info.dev_name, burst_sz, num_to_process, op_type_str);
2361
2362         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2363                 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
2364                                 num_to_process, burst_sz, &deq_total_time,
2365                                 &deq_min_time, &deq_max_time);
2366         else
2367                 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
2368                                 num_to_process, burst_sz, &deq_total_time,
2369                                 &deq_min_time, &deq_max_time);
2370
2371         if (iter <= 0)
2372                 return TEST_FAILED;
2373
2374         printf("Empty dequeue offload\n"
2375                         "\tavg. latency: %lg cycles, %lg us\n"
2376                         "\tmin. latency: %lg cycles, %lg us\n"
2377                         "\tmax. latency: %lg cycles, %lg us\n",
2378                         (double)deq_total_time / (double)iter,
2379                         (double)(deq_total_time * 1000000) / (double)iter /
2380                         (double)rte_get_tsc_hz(), (double)deq_min_time,
2381                         (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
2382                         (double)deq_max_time, (double)(deq_max_time * 1000000) /
2383                         rte_get_tsc_hz());
2384
2385         return TEST_SUCCESS;
2386 #endif
2387 }
2388
2389 static int
2390 throughput_tc(void)
2391 {
2392         return run_test_case(throughput_test);
2393 }
2394
2395 static int
2396 offload_cost_tc(void)
2397 {
2398         return run_test_case(offload_cost_test);
2399 }
2400
2401 static int
2402 offload_latency_empty_q_tc(void)
2403 {
2404         return run_test_case(offload_latency_empty_q_test);
2405 }
2406
2407 static int
2408 latency_tc(void)
2409 {
2410         return run_test_case(latency_test);
2411 }
2412
2413 static int
2414 interrupt_tc(void)
2415 {
2416         return run_test_case(throughput_test);
2417 }
2418
2419 static struct unit_test_suite bbdev_throughput_testsuite = {
2420         .suite_name = "BBdev Throughput Tests",
2421         .setup = testsuite_setup,
2422         .teardown = testsuite_teardown,
2423         .unit_test_cases = {
2424                 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
2425                 TEST_CASES_END() /**< NULL terminate unit test array */
2426         }
2427 };
2428
2429 static struct unit_test_suite bbdev_validation_testsuite = {
2430         .suite_name = "BBdev Validation Tests",
2431         .setup = testsuite_setup,
2432         .teardown = testsuite_teardown,
2433         .unit_test_cases = {
2434                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2435                 TEST_CASES_END() /**< NULL terminate unit test array */
2436         }
2437 };
2438
2439 static struct unit_test_suite bbdev_latency_testsuite = {
2440         .suite_name = "BBdev Latency Tests",
2441         .setup = testsuite_setup,
2442         .teardown = testsuite_teardown,
2443         .unit_test_cases = {
2444                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2445                 TEST_CASES_END() /**< NULL terminate unit test array */
2446         }
2447 };
2448
2449 static struct unit_test_suite bbdev_offload_cost_testsuite = {
2450         .suite_name = "BBdev Offload Cost Tests",
2451         .setup = testsuite_setup,
2452         .teardown = testsuite_teardown,
2453         .unit_test_cases = {
2454                 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
2455                 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
2456                 TEST_CASES_END() /**< NULL terminate unit test array */
2457         }
2458 };
2459
2460 static struct unit_test_suite bbdev_interrupt_testsuite = {
2461         .suite_name = "BBdev Interrupt Tests",
2462         .setup = interrupt_testsuite_setup,
2463         .teardown = testsuite_teardown,
2464         .unit_test_cases = {
2465                 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
2466                 TEST_CASES_END() /**< NULL terminate unit test array */
2467         }
2468 };
2469
2470 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
2471 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
2472 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
2473 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
2474 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);