use appropriate EAL macro for constructors
[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <inttypes.h>
7 #include <math.h>
8
9 #include <rte_eal.h>
10 #include <rte_common.h>
11 #include <rte_dev.h>
12 #include <rte_launch.h>
13 #include <rte_bbdev.h>
14 #include <rte_cycles.h>
15 #include <rte_lcore.h>
16 #include <rte_malloc.h>
17 #include <rte_random.h>
18 #include <rte_hexdump.h>
19
20 #include "main.h"
21 #include "test_bbdev_vector.h"
22
23 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
24
25 #define MAX_QUEUES RTE_MAX_LCORE
26 #define TEST_REPETITIONS 1000
27
28 #define OPS_CACHE_SIZE 256U
29 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */
30
31 #define SYNC_WAIT 0
32 #define SYNC_START 1
33
34 #define INVALID_QUEUE_ID -1
35
36 static struct test_bbdev_vector test_vector;
37
38 /* Switch between PMD and Interrupt for throughput TC */
39 static bool intr_enabled;
40
41 /* Represents tested active devices */
42 static struct active_device {
43         const char *driver_name;
44         uint8_t dev_id;
45         uint16_t supported_ops;
46         uint16_t queue_ids[MAX_QUEUES];
47         uint16_t nb_queues;
48         struct rte_mempool *ops_mempool;
49         struct rte_mempool *in_mbuf_pool;
50         struct rte_mempool *hard_out_mbuf_pool;
51         struct rte_mempool *soft_out_mbuf_pool;
52 } active_devs[RTE_BBDEV_MAX_DEVS];
53
54 static uint8_t nb_active_devs;
55
56 /* Data buffers used by BBDEV ops */
57 struct test_buffers {
58         struct rte_bbdev_op_data *inputs;
59         struct rte_bbdev_op_data *hard_outputs;
60         struct rte_bbdev_op_data *soft_outputs;
61 };
62
63 /* Operation parameters specific for given test case */
64 struct test_op_params {
65         struct rte_mempool *mp;
66         struct rte_bbdev_dec_op *ref_dec_op;
67         struct rte_bbdev_enc_op *ref_enc_op;
68         uint16_t burst_sz;
69         uint16_t num_to_process;
70         uint16_t num_lcores;
71         int vector_mask;
72         rte_atomic16_t sync;
73         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
74 };
75
76 /* Contains per lcore params */
77 struct thread_params {
78         uint8_t dev_id;
79         uint16_t queue_id;
80         uint32_t lcore_id;
81         uint64_t start_time;
82         double ops_per_sec;
83         double mbps;
84         uint8_t iter_count;
85         rte_atomic16_t nb_dequeued;
86         rte_atomic16_t processing_status;
87         rte_atomic16_t burst_sz;
88         struct test_op_params *op_params;
89         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
90         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
91 };
92
93 #ifdef RTE_BBDEV_OFFLOAD_COST
94 /* Stores time statistics */
95 struct test_time_stats {
96         /* Stores software enqueue total working time */
97         uint64_t enq_sw_total_time;
98         /* Stores minimum value of software enqueue working time */
99         uint64_t enq_sw_min_time;
100         /* Stores maximum value of software enqueue working time */
101         uint64_t enq_sw_max_time;
102         /* Stores turbo enqueue total working time */
103         uint64_t enq_acc_total_time;
104         /* Stores minimum value of accelerator enqueue working time */
105         uint64_t enq_acc_min_time;
106         /* Stores maximum value of accelerator enqueue working time */
107         uint64_t enq_acc_max_time;
108         /* Stores dequeue total working time */
109         uint64_t deq_total_time;
110         /* Stores minimum value of dequeue working time */
111         uint64_t deq_min_time;
112         /* Stores maximum value of dequeue working time */
113         uint64_t deq_max_time;
114 };
115 #endif
116
117 typedef int (test_case_function)(struct active_device *ad,
118                 struct test_op_params *op_params);
119
120 static inline void
121 mbuf_reset(struct rte_mbuf *m)
122 {
123         m->pkt_len = 0;
124
125         do {
126                 m->data_len = 0;
127                 m = m->next;
128         } while (m != NULL);
129 }
130
131 static inline void
132 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
133 {
134         ad->supported_ops |= (1 << op_type);
135 }
136
137 static inline bool
138 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
139 {
140         return ad->supported_ops & (1 << op_type);
141 }
142
143 static inline bool
144 flags_match(uint32_t flags_req, uint32_t flags_present)
145 {
146         return (flags_req & flags_present) == flags_req;
147 }
148
149 static void
150 clear_soft_out_cap(uint32_t *op_flags)
151 {
152         *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT;
153         *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT;
154         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
155 }
156
157 static int
158 check_dev_cap(const struct rte_bbdev_info *dev_info)
159 {
160         unsigned int i;
161         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs;
162         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
163
164         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
165         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
166         nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments;
167
168         for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) {
169                 if (op_cap->type != test_vector.op_type)
170                         continue;
171
172                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
173                         const struct rte_bbdev_op_cap_turbo_dec *cap =
174                                         &op_cap->cap.turbo_dec;
175                         /* Ignore lack of soft output capability, just skip
176                          * checking if soft output is valid.
177                          */
178                         if ((test_vector.turbo_dec.op_flags &
179                                         RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
180                                         !(cap->capability_flags &
181                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
182                                 printf(
183                                         "WARNING: Device \"%s\" does not support soft output - soft output flags will be ignored.\n",
184                                         dev_info->dev_name);
185                                 clear_soft_out_cap(
186                                         &test_vector.turbo_dec.op_flags);
187                         }
188
189                         if (!flags_match(test_vector.turbo_dec.op_flags,
190                                         cap->capability_flags))
191                                 return TEST_FAILED;
192                         if (nb_inputs > cap->num_buffers_src) {
193                                 printf("Too many inputs defined: %u, max: %u\n",
194                                         nb_inputs, cap->num_buffers_src);
195                                 return TEST_FAILED;
196                         }
197                         if (nb_soft_outputs > cap->num_buffers_soft_out &&
198                                         (test_vector.turbo_dec.op_flags &
199                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
200                                 printf(
201                                         "Too many soft outputs defined: %u, max: %u\n",
202                                                 nb_soft_outputs,
203                                                 cap->num_buffers_soft_out);
204                                 return TEST_FAILED;
205                         }
206                         if (nb_hard_outputs > cap->num_buffers_hard_out) {
207                                 printf(
208                                         "Too many hard outputs defined: %u, max: %u\n",
209                                                 nb_hard_outputs,
210                                                 cap->num_buffers_hard_out);
211                                 return TEST_FAILED;
212                         }
213                         if (intr_enabled && !(cap->capability_flags &
214                                         RTE_BBDEV_TURBO_DEC_INTERRUPTS)) {
215                                 printf(
216                                         "Dequeue interrupts are not supported!\n");
217                                 return TEST_FAILED;
218                         }
219
220                         return TEST_SUCCESS;
221                 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) {
222                         const struct rte_bbdev_op_cap_turbo_enc *cap =
223                                         &op_cap->cap.turbo_enc;
224
225                         if (!flags_match(test_vector.turbo_enc.op_flags,
226                                         cap->capability_flags))
227                                 return TEST_FAILED;
228                         if (nb_inputs > cap->num_buffers_src) {
229                                 printf("Too many inputs defined: %u, max: %u\n",
230                                         nb_inputs, cap->num_buffers_src);
231                                 return TEST_FAILED;
232                         }
233                         if (nb_hard_outputs > cap->num_buffers_dst) {
234                                 printf(
235                                         "Too many hard outputs defined: %u, max: %u\n",
236                                         nb_hard_outputs, cap->num_buffers_src);
237                                 return TEST_FAILED;
238                         }
239                         if (intr_enabled && !(cap->capability_flags &
240                                         RTE_BBDEV_TURBO_ENC_INTERRUPTS)) {
241                                 printf(
242                                         "Dequeue interrupts are not supported!\n");
243                                 return TEST_FAILED;
244                         }
245
246                         return TEST_SUCCESS;
247                 }
248         }
249
250         if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE))
251                 return TEST_SUCCESS; /* Special case for NULL device */
252
253         return TEST_FAILED;
254 }
255
256 /* calculates optimal mempool size not smaller than the val */
257 static unsigned int
258 optimal_mempool_size(unsigned int val)
259 {
260         return rte_align32pow2(val + 1) - 1;
261 }
262
263 /* allocates mbuf mempool for inputs and outputs */
264 static struct rte_mempool *
265 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
266                 int socket_id, unsigned int mbuf_pool_size,
267                 const char *op_type_str)
268 {
269         unsigned int i;
270         uint32_t max_seg_sz = 0;
271         char pool_name[RTE_MEMPOOL_NAMESIZE];
272
273         /* find max input segment size */
274         for (i = 0; i < entries->nb_segments; ++i)
275                 if (entries->segments[i].length > max_seg_sz)
276                         max_seg_sz = entries->segments[i].length;
277
278         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
279                         dev_id);
280         return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0,
281                         RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM,
282                         (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id);
283 }
284
285 static int
286 create_mempools(struct active_device *ad, int socket_id,
287                 enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
288 {
289         struct rte_mempool *mp;
290         unsigned int ops_pool_size, mbuf_pool_size = 0;
291         char pool_name[RTE_MEMPOOL_NAMESIZE];
292         const char *op_type_str;
293         enum rte_bbdev_op_type op_type = org_op_type;
294
295         struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
296         struct op_data_entries *hard_out =
297                         &test_vector.entries[DATA_HARD_OUTPUT];
298         struct op_data_entries *soft_out =
299                         &test_vector.entries[DATA_SOFT_OUTPUT];
300
301         /* allocate ops mempool */
302         ops_pool_size = optimal_mempool_size(RTE_MAX(
303                         /* Ops used plus 1 reference op */
304                         RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1),
305                         /* Minimal cache size plus 1 reference op */
306                         (unsigned int)(1.5 * rte_lcore_count() *
307                                         OPS_CACHE_SIZE + 1)),
308                         OPS_POOL_SIZE_MIN));
309
310         if (org_op_type == RTE_BBDEV_OP_NONE)
311                 op_type = RTE_BBDEV_OP_TURBO_ENC;
312
313         op_type_str = rte_bbdev_op_type_str(op_type);
314         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
315
316         snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str,
317                         ad->dev_id);
318         mp = rte_bbdev_op_pool_create(pool_name, op_type,
319                         ops_pool_size, OPS_CACHE_SIZE, socket_id);
320         TEST_ASSERT_NOT_NULL(mp,
321                         "ERROR Failed to create %u items ops pool for dev %u on socket %u.",
322                         ops_pool_size,
323                         ad->dev_id,
324                         socket_id);
325         ad->ops_mempool = mp;
326
327         /* Do not create inputs and outputs mbufs for BaseBand Null Device */
328         if (org_op_type == RTE_BBDEV_OP_NONE)
329                 return TEST_SUCCESS;
330
331         /* Inputs */
332         mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
333         mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
334         TEST_ASSERT_NOT_NULL(mp,
335                         "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.",
336                         mbuf_pool_size,
337                         ad->dev_id,
338                         socket_id);
339         ad->in_mbuf_pool = mp;
340
341         /* Hard outputs */
342         mbuf_pool_size = optimal_mempool_size(ops_pool_size *
343                         hard_out->nb_segments);
344         mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, mbuf_pool_size,
345                         "hard_out");
346         TEST_ASSERT_NOT_NULL(mp,
347                         "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.",
348                         mbuf_pool_size,
349                         ad->dev_id,
350                         socket_id);
351         ad->hard_out_mbuf_pool = mp;
352
353         if (soft_out->nb_segments == 0)
354                 return TEST_SUCCESS;
355
356         /* Soft outputs */
357         mbuf_pool_size = optimal_mempool_size(ops_pool_size *
358                         soft_out->nb_segments);
359         mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, mbuf_pool_size,
360                         "soft_out");
361         TEST_ASSERT_NOT_NULL(mp,
362                         "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.",
363                         mbuf_pool_size,
364                         ad->dev_id,
365                         socket_id);
366         ad->soft_out_mbuf_pool = mp;
367
368         return 0;
369 }
370
371 static int
372 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
373                 struct test_bbdev_vector *vector)
374 {
375         int ret;
376         unsigned int queue_id;
377         struct rte_bbdev_queue_conf qconf;
378         struct active_device *ad = &active_devs[nb_active_devs];
379         unsigned int nb_queues;
380         enum rte_bbdev_op_type op_type = vector->op_type;
381
382         nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues);
383         /* setup device */
384         ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id);
385         if (ret < 0) {
386                 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n",
387                                 dev_id, nb_queues, info->socket_id, ret);
388                 return TEST_FAILED;
389         }
390
391         /* configure interrupts if needed */
392         if (intr_enabled) {
393                 ret = rte_bbdev_intr_enable(dev_id);
394                 if (ret < 0) {
395                         printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id,
396                                         ret);
397                         return TEST_FAILED;
398                 }
399         }
400
401         /* setup device queues */
402         qconf.socket = info->socket_id;
403         qconf.queue_size = info->drv.default_queue_conf.queue_size;
404         qconf.priority = 0;
405         qconf.deferred_start = 0;
406         qconf.op_type = op_type;
407
408         for (queue_id = 0; queue_id < nb_queues; ++queue_id) {
409                 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf);
410                 if (ret != 0) {
411                         printf(
412                                         "Allocated all queues (id=%u) at prio%u on dev%u\n",
413                                         queue_id, qconf.priority, dev_id);
414                         qconf.priority++;
415                         ret = rte_bbdev_queue_configure(ad->dev_id, queue_id,
416                                         &qconf);
417                 }
418                 if (ret != 0) {
419                         printf("All queues on dev %u allocated: %u\n",
420                                         dev_id, queue_id);
421                         break;
422                 }
423                 ad->queue_ids[queue_id] = queue_id;
424         }
425         TEST_ASSERT(queue_id != 0,
426                         "ERROR Failed to configure any queues on dev %u",
427                         dev_id);
428         ad->nb_queues = queue_id;
429
430         set_avail_op(ad, op_type);
431
432         return TEST_SUCCESS;
433 }
434
435 static int
436 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info,
437                 struct test_bbdev_vector *vector)
438 {
439         int ret;
440
441         active_devs[nb_active_devs].driver_name = info->drv.driver_name;
442         active_devs[nb_active_devs].dev_id = dev_id;
443
444         ret = add_bbdev_dev(dev_id, info, vector);
445         if (ret == TEST_SUCCESS)
446                 ++nb_active_devs;
447         return ret;
448 }
449
450 static uint8_t
451 populate_active_devices(void)
452 {
453         int ret;
454         uint8_t dev_id;
455         uint8_t nb_devs_added = 0;
456         struct rte_bbdev_info info;
457
458         RTE_BBDEV_FOREACH(dev_id) {
459                 rte_bbdev_info_get(dev_id, &info);
460
461                 if (check_dev_cap(&info)) {
462                         printf(
463                                 "Device %d (%s) does not support specified capabilities\n",
464                                         dev_id, info.dev_name);
465                         continue;
466                 }
467
468                 ret = add_active_device(dev_id, &info, &test_vector);
469                 if (ret != 0) {
470                         printf("Adding active bbdev %s skipped\n",
471                                         info.dev_name);
472                         continue;
473                 }
474                 nb_devs_added++;
475         }
476
477         return nb_devs_added;
478 }
479
480 static int
481 read_test_vector(void)
482 {
483         int ret;
484
485         memset(&test_vector, 0, sizeof(test_vector));
486         printf("Test vector file = %s\n", get_vector_filename());
487         ret = test_bbdev_vector_read(get_vector_filename(), &test_vector);
488         TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n",
489                         get_vector_filename());
490
491         return TEST_SUCCESS;
492 }
493
494 static int
495 testsuite_setup(void)
496 {
497         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
498
499         if (populate_active_devices() == 0) {
500                 printf("No suitable devices found!\n");
501                 return TEST_SKIPPED;
502         }
503
504         return TEST_SUCCESS;
505 }
506
507 static int
508 interrupt_testsuite_setup(void)
509 {
510         TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n");
511
512         /* Enable interrupts */
513         intr_enabled = true;
514
515         /* Special case for NULL device (RTE_BBDEV_OP_NONE) */
516         if (populate_active_devices() == 0 ||
517                         test_vector.op_type == RTE_BBDEV_OP_NONE) {
518                 intr_enabled = false;
519                 printf("No suitable devices found!\n");
520                 return TEST_SKIPPED;
521         }
522
523         return TEST_SUCCESS;
524 }
525
526 static void
527 testsuite_teardown(void)
528 {
529         uint8_t dev_id;
530
531         /* Unconfigure devices */
532         RTE_BBDEV_FOREACH(dev_id)
533                 rte_bbdev_close(dev_id);
534
535         /* Clear active devices structs. */
536         memset(active_devs, 0, sizeof(active_devs));
537         nb_active_devs = 0;
538 }
539
540 static int
541 ut_setup(void)
542 {
543         uint8_t i, dev_id;
544
545         for (i = 0; i < nb_active_devs; i++) {
546                 dev_id = active_devs[i].dev_id;
547                 /* reset bbdev stats */
548                 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id),
549                                 "Failed to reset stats of bbdev %u", dev_id);
550                 /* start the device */
551                 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id),
552                                 "Failed to start bbdev %u", dev_id);
553         }
554
555         return TEST_SUCCESS;
556 }
557
558 static void
559 ut_teardown(void)
560 {
561         uint8_t i, dev_id;
562         struct rte_bbdev_stats stats;
563
564         for (i = 0; i < nb_active_devs; i++) {
565                 dev_id = active_devs[i].dev_id;
566                 /* read stats and print */
567                 rte_bbdev_stats_get(dev_id, &stats);
568                 /* Stop the device */
569                 rte_bbdev_stop(dev_id);
570         }
571 }
572
573 static int
574 init_op_data_objs(struct rte_bbdev_op_data *bufs,
575                 struct op_data_entries *ref_entries,
576                 struct rte_mempool *mbuf_pool, const uint16_t n,
577                 enum op_data_type op_type, uint16_t min_alignment)
578 {
579         int ret;
580         unsigned int i, j;
581
582         for (i = 0; i < n; ++i) {
583                 char *data;
584                 struct op_data_buf *seg = &ref_entries->segments[0];
585                 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool);
586                 TEST_ASSERT_NOT_NULL(m_head,
587                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
588                                 op_type, n * ref_entries->nb_segments,
589                                 mbuf_pool->size);
590
591                 TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
592                                 (uint32_t)UINT16_MAX),
593                                 "Given data is bigger than allowed mbuf segment size");
594
595                 bufs[i].data = m_head;
596                 bufs[i].offset = 0;
597                 bufs[i].length = 0;
598
599                 if (op_type == DATA_INPUT) {
600                         data = rte_pktmbuf_append(m_head, seg->length);
601                         TEST_ASSERT_NOT_NULL(data,
602                                         "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
603                                         seg->length, op_type);
604
605                         TEST_ASSERT(data == RTE_PTR_ALIGN(data, min_alignment),
606                                         "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
607                                         data, min_alignment);
608                         rte_memcpy(data, seg->addr, seg->length);
609                         bufs[i].length += seg->length;
610
611                         for (j = 1; j < ref_entries->nb_segments; ++j) {
612                                 struct rte_mbuf *m_tail =
613                                                 rte_pktmbuf_alloc(mbuf_pool);
614                                 TEST_ASSERT_NOT_NULL(m_tail,
615                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
616                                                 op_type,
617                                                 n * ref_entries->nb_segments,
618                                                 mbuf_pool->size);
619                                 seg += 1;
620
621                                 data = rte_pktmbuf_append(m_tail, seg->length);
622                                 TEST_ASSERT_NOT_NULL(data,
623                                                 "Couldn't append %u bytes to mbuf from %d data type mbuf pool",
624                                                 seg->length, op_type);
625
626                                 TEST_ASSERT(data == RTE_PTR_ALIGN(data,
627                                                 min_alignment),
628                                                 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)",
629                                                 data, min_alignment);
630                                 rte_memcpy(data, seg->addr, seg->length);
631                                 bufs[i].length += seg->length;
632
633                                 ret = rte_pktmbuf_chain(m_head, m_tail);
634                                 TEST_ASSERT_SUCCESS(ret,
635                                                 "Couldn't chain mbufs from %d data type mbuf pool",
636                                                 op_type);
637                         }
638
639                 } else {
640
641                         /* allocate chained-mbuf for output buffer */
642                         for (j = 1; j < ref_entries->nb_segments; ++j) {
643                                 struct rte_mbuf *m_tail =
644                                                 rte_pktmbuf_alloc(mbuf_pool);
645                                 TEST_ASSERT_NOT_NULL(m_tail,
646                                                 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
647                                                 op_type,
648                                                 n * ref_entries->nb_segments,
649                                                 mbuf_pool->size);
650
651                                 ret = rte_pktmbuf_chain(m_head, m_tail);
652                                 TEST_ASSERT_SUCCESS(ret,
653                                                 "Couldn't chain mbufs from %d data type mbuf pool",
654                                                 op_type);
655                         }
656                 }
657         }
658
659         return 0;
660 }
661
662 static int
663 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len,
664                 const int socket)
665 {
666         int i;
667
668         *buffers = rte_zmalloc_socket(NULL, len, 0, socket);
669         if (*buffers == NULL) {
670                 printf("WARNING: Failed to allocate op_data on socket %d\n",
671                                 socket);
672                 /* try to allocate memory on other detected sockets */
673                 for (i = 0; i < socket; i++) {
674                         *buffers = rte_zmalloc_socket(NULL, len, 0, i);
675                         if (*buffers != NULL)
676                                 break;
677                 }
678         }
679
680         return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS;
681 }
682
683 static void
684 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
685                 uint16_t n, int8_t max_llr_modulus)
686 {
687         uint16_t i, byte_idx;
688
689         for (i = 0; i < n; ++i) {
690                 struct rte_mbuf *m = input_ops[i].data;
691                 while (m != NULL) {
692                         int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
693                                         input_ops[i].offset);
694                         for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
695                                         ++byte_idx)
696                                 llr[byte_idx] = round((double)max_llr_modulus *
697                                                 llr[byte_idx] / INT8_MAX);
698
699                         m = m->next;
700                 }
701         }
702 }
703
704 static int
705 fill_queue_buffers(struct test_op_params *op_params,
706                 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp,
707                 struct rte_mempool *soft_out_mp, uint16_t queue_id,
708                 const struct rte_bbdev_op_cap *capabilities,
709                 uint16_t min_alignment, const int socket_id)
710 {
711         int ret;
712         enum op_data_type type;
713         const uint16_t n = op_params->num_to_process;
714
715         struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = {
716                 in_mp,
717                 soft_out_mp,
718                 hard_out_mp,
719         };
720
721         struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = {
722                 &op_params->q_bufs[socket_id][queue_id].inputs,
723                 &op_params->q_bufs[socket_id][queue_id].soft_outputs,
724                 &op_params->q_bufs[socket_id][queue_id].hard_outputs,
725         };
726
727         for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
728                 struct op_data_entries *ref_entries =
729                                 &test_vector.entries[type];
730                 if (ref_entries->nb_segments == 0)
731                         continue;
732
733                 ret = allocate_buffers_on_socket(queue_ops[type],
734                                 n * sizeof(struct rte_bbdev_op_data),
735                                 socket_id);
736                 TEST_ASSERT_SUCCESS(ret,
737                                 "Couldn't allocate memory for rte_bbdev_op_data structs");
738
739                 ret = init_op_data_objs(*queue_ops[type], ref_entries,
740                                 mbuf_pools[type], n, type, min_alignment);
741                 TEST_ASSERT_SUCCESS(ret,
742                                 "Couldn't init rte_bbdev_op_data structs");
743         }
744
745         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
746                 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n,
747                         capabilities->cap.turbo_dec.max_llr_modulus);
748
749         return 0;
750 }
751
752 static void
753 free_buffers(struct active_device *ad, struct test_op_params *op_params)
754 {
755         unsigned int i, j;
756
757         rte_mempool_free(ad->ops_mempool);
758         rte_mempool_free(ad->in_mbuf_pool);
759         rte_mempool_free(ad->hard_out_mbuf_pool);
760         rte_mempool_free(ad->soft_out_mbuf_pool);
761
762         for (i = 0; i < rte_lcore_count(); ++i) {
763                 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) {
764                         rte_free(op_params->q_bufs[j][i].inputs);
765                         rte_free(op_params->q_bufs[j][i].hard_outputs);
766                         rte_free(op_params->q_bufs[j][i].soft_outputs);
767                 }
768         }
769 }
770
771 static void
772 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
773                 unsigned int start_idx,
774                 struct rte_bbdev_op_data *inputs,
775                 struct rte_bbdev_op_data *hard_outputs,
776                 struct rte_bbdev_op_data *soft_outputs,
777                 struct rte_bbdev_dec_op *ref_op)
778 {
779         unsigned int i;
780         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
781
782         for (i = 0; i < n; ++i) {
783                 if (turbo_dec->code_block_mode == 0) {
784                         ops[i]->turbo_dec.tb_params.ea =
785                                         turbo_dec->tb_params.ea;
786                         ops[i]->turbo_dec.tb_params.eb =
787                                         turbo_dec->tb_params.eb;
788                         ops[i]->turbo_dec.tb_params.k_pos =
789                                         turbo_dec->tb_params.k_pos;
790                         ops[i]->turbo_dec.tb_params.k_neg =
791                                         turbo_dec->tb_params.k_neg;
792                         ops[i]->turbo_dec.tb_params.c =
793                                         turbo_dec->tb_params.c;
794                         ops[i]->turbo_dec.tb_params.c_neg =
795                                         turbo_dec->tb_params.c_neg;
796                         ops[i]->turbo_dec.tb_params.cab =
797                                         turbo_dec->tb_params.cab;
798                         ops[i]->turbo_dec.tb_params.r =
799                                         turbo_dec->tb_params.r;
800                 } else {
801                         ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e;
802                         ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k;
803                 }
804
805                 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale;
806                 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max;
807                 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min;
808                 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags;
809                 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index;
810                 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps;
811                 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode;
812
813                 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i];
814                 ops[i]->turbo_dec.input = inputs[start_idx + i];
815                 if (soft_outputs != NULL)
816                         ops[i]->turbo_dec.soft_output =
817                                 soft_outputs[start_idx + i];
818         }
819 }
820
821 static void
822 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
823                 unsigned int start_idx,
824                 struct rte_bbdev_op_data *inputs,
825                 struct rte_bbdev_op_data *outputs,
826                 struct rte_bbdev_enc_op *ref_op)
827 {
828         unsigned int i;
829         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
830         for (i = 0; i < n; ++i) {
831                 if (turbo_enc->code_block_mode == 0) {
832                         ops[i]->turbo_enc.tb_params.ea =
833                                         turbo_enc->tb_params.ea;
834                         ops[i]->turbo_enc.tb_params.eb =
835                                         turbo_enc->tb_params.eb;
836                         ops[i]->turbo_enc.tb_params.k_pos =
837                                         turbo_enc->tb_params.k_pos;
838                         ops[i]->turbo_enc.tb_params.k_neg =
839                                         turbo_enc->tb_params.k_neg;
840                         ops[i]->turbo_enc.tb_params.c =
841                                         turbo_enc->tb_params.c;
842                         ops[i]->turbo_enc.tb_params.c_neg =
843                                         turbo_enc->tb_params.c_neg;
844                         ops[i]->turbo_enc.tb_params.cab =
845                                         turbo_enc->tb_params.cab;
846                         ops[i]->turbo_enc.tb_params.ncb_pos =
847                                         turbo_enc->tb_params.ncb_pos;
848                         ops[i]->turbo_enc.tb_params.ncb_neg =
849                                         turbo_enc->tb_params.ncb_neg;
850                         ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r;
851                 } else {
852                         ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e;
853                         ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k;
854                         ops[i]->turbo_enc.cb_params.ncb =
855                                         turbo_enc->cb_params.ncb;
856                 }
857                 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index;
858                 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags;
859                 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode;
860
861                 ops[i]->turbo_enc.output = outputs[start_idx + i];
862                 ops[i]->turbo_enc.input = inputs[start_idx + i];
863         }
864 }
865
866 static int
867 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op,
868                 unsigned int order_idx, const int expected_status)
869 {
870         TEST_ASSERT(op->status == expected_status,
871                         "op_status (%d) != expected_status (%d)",
872                         op->status, expected_status);
873
874         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
875                         "Ordering error, expected %p, got %p",
876                         (void *)(uintptr_t)order_idx, op->opaque_data);
877
878         return TEST_SUCCESS;
879 }
880
881 static int
882 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op,
883                 unsigned int order_idx, const int expected_status)
884 {
885         TEST_ASSERT(op->status == expected_status,
886                         "op_status (%d) != expected_status (%d)",
887                         op->status, expected_status);
888
889         TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data,
890                         "Ordering error, expected %p, got %p",
891                         (void *)(uintptr_t)order_idx, op->opaque_data);
892
893         return TEST_SUCCESS;
894 }
895
896 static inline int
897 validate_op_chain(struct rte_bbdev_op_data *op,
898                 struct op_data_entries *orig_op)
899 {
900         uint8_t i;
901         struct rte_mbuf *m = op->data;
902         uint8_t nb_dst_segments = orig_op->nb_segments;
903         uint32_t total_data_size = 0;
904
905         TEST_ASSERT(nb_dst_segments == m->nb_segs,
906                         "Number of segments differ in original (%u) and filled (%u) op",
907                         nb_dst_segments, m->nb_segs);
908
909         /* Validate each mbuf segment length */
910         for (i = 0; i < nb_dst_segments; ++i) {
911                 /* Apply offset to the first mbuf segment */
912                 uint16_t offset = (i == 0) ? op->offset : 0;
913                 uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
914                 total_data_size += orig_op->segments[i].length;
915
916                 TEST_ASSERT(orig_op->segments[i].length == data_len,
917                                 "Length of segment differ in original (%u) and filled (%u) op",
918                                 orig_op->segments[i].length, data_len);
919                 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr,
920                                 rte_pktmbuf_mtod_offset(m, uint32_t *, offset),
921                                 data_len,
922                                 "Output buffers (CB=%u) are not equal", i);
923                 m = m->next;
924         }
925
926         /* Validate total mbuf pkt length */
927         uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
928         TEST_ASSERT(total_data_size == pkt_len,
929                         "Length of data differ in original (%u) and filled (%u) op",
930                         total_data_size, pkt_len);
931
932         return TEST_SUCCESS;
933 }
934
935 static int
936 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n,
937                 struct rte_bbdev_dec_op *ref_op, const int vector_mask)
938 {
939         unsigned int i;
940         int ret;
941         struct op_data_entries *hard_data_orig =
942                         &test_vector.entries[DATA_HARD_OUTPUT];
943         struct op_data_entries *soft_data_orig =
944                         &test_vector.entries[DATA_SOFT_OUTPUT];
945         struct rte_bbdev_op_turbo_dec *ops_td;
946         struct rte_bbdev_op_data *hard_output;
947         struct rte_bbdev_op_data *soft_output;
948         struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec;
949
950         for (i = 0; i < n; ++i) {
951                 ops_td = &ops[i]->turbo_dec;
952                 hard_output = &ops_td->hard_output;
953                 soft_output = &ops_td->soft_output;
954
955                 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT)
956                         TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count,
957                                         "Returned iter_count (%d) > expected iter_count (%d)",
958                                         ops_td->iter_count, ref_td->iter_count);
959                 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status);
960                 TEST_ASSERT_SUCCESS(ret,
961                                 "Checking status and ordering for decoder failed");
962
963                 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output,
964                                 hard_data_orig),
965                                 "Hard output buffers (CB=%u) are not equal",
966                                 i);
967
968                 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT)
969                         TEST_ASSERT_SUCCESS(validate_op_chain(soft_output,
970                                         soft_data_orig),
971                                         "Soft output buffers (CB=%u) are not equal",
972                                         i);
973         }
974
975         return TEST_SUCCESS;
976 }
977
978 static int
979 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n,
980                 struct rte_bbdev_enc_op *ref_op)
981 {
982         unsigned int i;
983         int ret;
984         struct op_data_entries *hard_data_orig =
985                         &test_vector.entries[DATA_HARD_OUTPUT];
986
987         for (i = 0; i < n; ++i) {
988                 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status);
989                 TEST_ASSERT_SUCCESS(ret,
990                                 "Checking status and ordering for encoder failed");
991                 TEST_ASSERT_SUCCESS(validate_op_chain(
992                                 &ops[i]->turbo_enc.output,
993                                 hard_data_orig),
994                                 "Output buffers (CB=%u) are not equal",
995                                 i);
996         }
997
998         return TEST_SUCCESS;
999 }
1000
1001 static void
1002 create_reference_dec_op(struct rte_bbdev_dec_op *op)
1003 {
1004         unsigned int i;
1005         struct op_data_entries *entry;
1006
1007         op->turbo_dec = test_vector.turbo_dec;
1008         entry = &test_vector.entries[DATA_INPUT];
1009         for (i = 0; i < entry->nb_segments; ++i)
1010                 op->turbo_dec.input.length +=
1011                                 entry->segments[i].length;
1012 }
1013
1014 static void
1015 create_reference_enc_op(struct rte_bbdev_enc_op *op)
1016 {
1017         unsigned int i;
1018         struct op_data_entries *entry;
1019
1020         op->turbo_enc = test_vector.turbo_enc;
1021         entry = &test_vector.entries[DATA_INPUT];
1022         for (i = 0; i < entry->nb_segments; ++i)
1023                 op->turbo_enc.input.length +=
1024                                 entry->segments[i].length;
1025 }
1026
1027 static uint32_t
1028 calc_dec_TB_size(struct rte_bbdev_dec_op *op)
1029 {
1030         uint8_t i;
1031         uint32_t c, r, tb_size = 0;
1032
1033         if (op->turbo_dec.code_block_mode) {
1034                 tb_size = op->turbo_dec.tb_params.k_neg;
1035         } else {
1036                 c = op->turbo_dec.tb_params.c;
1037                 r = op->turbo_dec.tb_params.r;
1038                 for (i = 0; i < c-r; i++)
1039                         tb_size += (r < op->turbo_dec.tb_params.c_neg) ?
1040                                 op->turbo_dec.tb_params.k_neg :
1041                                 op->turbo_dec.tb_params.k_pos;
1042         }
1043         return tb_size;
1044 }
1045
1046 static uint32_t
1047 calc_enc_TB_size(struct rte_bbdev_enc_op *op)
1048 {
1049         uint8_t i;
1050         uint32_t c, r, tb_size = 0;
1051
1052         if (op->turbo_enc.code_block_mode) {
1053                 tb_size = op->turbo_enc.tb_params.k_neg;
1054         } else {
1055                 c = op->turbo_enc.tb_params.c;
1056                 r = op->turbo_enc.tb_params.r;
1057                 for (i = 0; i < c-r; i++)
1058                         tb_size += (r < op->turbo_enc.tb_params.c_neg) ?
1059                                 op->turbo_enc.tb_params.k_neg :
1060                                 op->turbo_enc.tb_params.k_pos;
1061         }
1062         return tb_size;
1063 }
1064
1065 static int
1066 init_test_op_params(struct test_op_params *op_params,
1067                 enum rte_bbdev_op_type op_type, const int expected_status,
1068                 const int vector_mask, struct rte_mempool *ops_mp,
1069                 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores)
1070 {
1071         int ret = 0;
1072         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1073                 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp,
1074                                 &op_params->ref_dec_op, 1);
1075         else
1076                 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp,
1077                                 &op_params->ref_enc_op, 1);
1078
1079         TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed");
1080
1081         op_params->mp = ops_mp;
1082         op_params->burst_sz = burst_sz;
1083         op_params->num_to_process = num_to_process;
1084         op_params->num_lcores = num_lcores;
1085         op_params->vector_mask = vector_mask;
1086         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
1087                 op_params->ref_dec_op->status = expected_status;
1088         else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
1089                 op_params->ref_enc_op->status = expected_status;
1090
1091         return 0;
1092 }
1093
1094 static int
1095 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
1096                 struct test_op_params *op_params)
1097 {
1098         int t_ret, f_ret, socket_id = SOCKET_ID_ANY;
1099         unsigned int i;
1100         struct active_device *ad;
1101         unsigned int burst_sz = get_burst_sz();
1102         enum rte_bbdev_op_type op_type = test_vector.op_type;
1103         const struct rte_bbdev_op_cap *capabilities = NULL;
1104
1105         ad = &active_devs[dev_id];
1106
1107         /* Check if device supports op_type */
1108         if (!is_avail_op(ad, test_vector.op_type))
1109                 return TEST_SUCCESS;
1110
1111         struct rte_bbdev_info info;
1112         rte_bbdev_info_get(ad->dev_id, &info);
1113         socket_id = GET_SOCKET(info.socket_id);
1114
1115         f_ret = create_mempools(ad, socket_id, op_type,
1116                         get_num_ops());
1117         if (f_ret != TEST_SUCCESS) {
1118                 printf("Couldn't create mempools");
1119                 goto fail;
1120         }
1121         if (op_type == RTE_BBDEV_OP_NONE)
1122                 op_type = RTE_BBDEV_OP_TURBO_ENC;
1123
1124         f_ret = init_test_op_params(op_params, test_vector.op_type,
1125                         test_vector.expected_status,
1126                         test_vector.mask,
1127                         ad->ops_mempool,
1128                         burst_sz,
1129                         get_num_ops(),
1130                         get_num_lcores());
1131         if (f_ret != TEST_SUCCESS) {
1132                 printf("Couldn't init test op params");
1133                 goto fail;
1134         }
1135
1136         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1137                 /* Find Decoder capabilities */
1138                 const struct rte_bbdev_op_cap *cap = info.drv.capabilities;
1139                 while (cap->type != RTE_BBDEV_OP_NONE) {
1140                         if (cap->type == RTE_BBDEV_OP_TURBO_DEC) {
1141                                 capabilities = cap;
1142                                 break;
1143                         }
1144                 }
1145                 TEST_ASSERT_NOT_NULL(capabilities,
1146                                 "Couldn't find Decoder capabilities");
1147
1148                 create_reference_dec_op(op_params->ref_dec_op);
1149         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1150                 create_reference_enc_op(op_params->ref_enc_op);
1151
1152         for (i = 0; i < ad->nb_queues; ++i) {
1153                 f_ret = fill_queue_buffers(op_params,
1154                                 ad->in_mbuf_pool,
1155                                 ad->hard_out_mbuf_pool,
1156                                 ad->soft_out_mbuf_pool,
1157                                 ad->queue_ids[i],
1158                                 capabilities,
1159                                 info.drv.min_alignment,
1160                                 socket_id);
1161                 if (f_ret != TEST_SUCCESS) {
1162                         printf("Couldn't init queue buffers");
1163                         goto fail;
1164                 }
1165         }
1166
1167         /* Run test case function */
1168         t_ret = test_case_func(ad, op_params);
1169
1170         /* Free active device resources and return */
1171         free_buffers(ad, op_params);
1172         return t_ret;
1173
1174 fail:
1175         free_buffers(ad, op_params);
1176         return TEST_FAILED;
1177 }
1178
1179 /* Run given test function per active device per supported op type
1180  * per burst size.
1181  */
1182 static int
1183 run_test_case(test_case_function *test_case_func)
1184 {
1185         int ret = 0;
1186         uint8_t dev;
1187
1188         /* Alloc op_params */
1189         struct test_op_params *op_params = rte_zmalloc(NULL,
1190                         sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE);
1191         TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params",
1192                         RTE_ALIGN(sizeof(struct test_op_params),
1193                                 RTE_CACHE_LINE_SIZE));
1194
1195         /* For each device run test case function */
1196         for (dev = 0; dev < nb_active_devs; ++dev)
1197                 ret |= run_test_case_on_device(test_case_func, dev, op_params);
1198
1199         rte_free(op_params);
1200
1201         return ret;
1202 }
1203
1204 static void
1205 dequeue_event_callback(uint16_t dev_id,
1206                 enum rte_bbdev_event_type event, void *cb_arg,
1207                 void *ret_param)
1208 {
1209         int ret;
1210         uint16_t i;
1211         uint64_t total_time;
1212         uint16_t deq, burst_sz, num_ops;
1213         uint16_t queue_id = *(uint16_t *) ret_param;
1214         struct rte_bbdev_info info;
1215
1216         double tb_len_bits;
1217
1218         struct thread_params *tp = cb_arg;
1219
1220         /* Find matching thread params using queue_id */
1221         for (i = 0; i < MAX_QUEUES; ++i, ++tp)
1222                 if (tp->queue_id == queue_id)
1223                         break;
1224
1225         if (i == MAX_QUEUES) {
1226                 printf("%s: Queue_id from interrupt details was not found!\n",
1227                                 __func__);
1228                 return;
1229         }
1230
1231         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
1232                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1233                 printf(
1234                         "Dequeue interrupt handler called for incorrect event!\n");
1235                 return;
1236         }
1237
1238         burst_sz = rte_atomic16_read(&tp->burst_sz);
1239         num_ops = tp->op_params->num_to_process;
1240
1241         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1242                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1243                                 &tp->dec_ops[
1244                                         rte_atomic16_read(&tp->nb_dequeued)],
1245                                 burst_sz);
1246         else
1247                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1248                                 &tp->enc_ops[
1249                                         rte_atomic16_read(&tp->nb_dequeued)],
1250                                 burst_sz);
1251
1252         if (deq < burst_sz) {
1253                 printf(
1254                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
1255                         burst_sz, deq);
1256                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1257                 return;
1258         }
1259
1260         if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
1261                 rte_atomic16_add(&tp->nb_dequeued, deq);
1262                 return;
1263         }
1264
1265         total_time = rte_rdtsc_precise() - tp->start_time;
1266
1267         rte_bbdev_info_get(dev_id, &info);
1268
1269         ret = TEST_SUCCESS;
1270
1271         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) {
1272                 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1273                 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op,
1274                                 tp->op_params->vector_mask);
1275                 /* get the max of iter_count for all dequeued ops */
1276                 for (i = 0; i < num_ops; ++i)
1277                         tp->iter_count = RTE_MAX(
1278                                         tp->dec_ops[i]->turbo_dec.iter_count,
1279                                         tp->iter_count);
1280                 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq);
1281         } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) {
1282                 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1283                 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op);
1284                 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq);
1285         }
1286
1287         if (ret) {
1288                 printf("Buffers validation failed\n");
1289                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1290         }
1291
1292         switch (test_vector.op_type) {
1293         case RTE_BBDEV_OP_TURBO_DEC:
1294                 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op);
1295                 break;
1296         case RTE_BBDEV_OP_TURBO_ENC:
1297                 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op);
1298                 break;
1299         case RTE_BBDEV_OP_NONE:
1300                 tb_len_bits = 0.0;
1301                 break;
1302         default:
1303                 printf("Unknown op type: %d\n", test_vector.op_type);
1304                 rte_atomic16_set(&tp->processing_status, TEST_FAILED);
1305                 return;
1306         }
1307
1308         tp->ops_per_sec += ((double)num_ops) /
1309                         ((double)total_time / (double)rte_get_tsc_hz());
1310         tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
1311                         ((double)total_time / (double)rte_get_tsc_hz());
1312
1313         rte_atomic16_add(&tp->nb_dequeued, deq);
1314 }
1315
1316 static int
1317 throughput_intr_lcore_dec(void *arg)
1318 {
1319         struct thread_params *tp = arg;
1320         unsigned int enqueued;
1321         const uint16_t queue_id = tp->queue_id;
1322         const uint16_t burst_sz = tp->op_params->burst_sz;
1323         const uint16_t num_to_process = tp->op_params->num_to_process;
1324         struct rte_bbdev_dec_op *ops[num_to_process];
1325         struct test_buffers *bufs = NULL;
1326         struct rte_bbdev_info info;
1327         int ret, i, j;
1328         uint16_t num_to_enq, enq;
1329
1330         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1331                         "BURST_SIZE should be <= %u", MAX_BURST);
1332
1333         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1334                         "Failed to enable interrupts for dev: %u, queue_id: %u",
1335                         tp->dev_id, queue_id);
1336
1337         rte_bbdev_info_get(tp->dev_id, &info);
1338
1339         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1340                         "NUM_OPS cannot exceed %u for this device",
1341                         info.drv.queue_size_lim);
1342
1343         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1344
1345         rte_atomic16_clear(&tp->processing_status);
1346         rte_atomic16_clear(&tp->nb_dequeued);
1347
1348         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1349                 rte_pause();
1350
1351         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
1352                                 num_to_process);
1353         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1354                         num_to_process);
1355         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1356                 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs,
1357                                 bufs->hard_outputs, bufs->soft_outputs,
1358                                 tp->op_params->ref_dec_op);
1359
1360         /* Set counter to validate the ordering */
1361         for (j = 0; j < num_to_process; ++j)
1362                 ops[j]->opaque_data = (void *)(uintptr_t)j;
1363
1364         for (j = 0; j < TEST_REPETITIONS; ++j) {
1365                 for (i = 0; i < num_to_process; ++i)
1366                         rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data);
1367
1368                 tp->start_time = rte_rdtsc_precise();
1369                 for (enqueued = 0; enqueued < num_to_process;) {
1370                         num_to_enq = burst_sz;
1371
1372                         if (unlikely(num_to_process - enqueued < num_to_enq))
1373                                 num_to_enq = num_to_process - enqueued;
1374
1375                         enq = 0;
1376                         do {
1377                                 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1378                                         queue_id, &ops[enqueued],
1379                                         num_to_enq);
1380                         } while (unlikely(num_to_enq != enq));
1381                         enqueued += enq;
1382
1383                         /* Write to thread burst_sz current number of enqueued
1384                          * descriptors. It ensures that proper number of
1385                          * descriptors will be dequeued in callback
1386                          * function - needed for last batch in case where
1387                          * the number of operations is not a multiple of
1388                          * burst size.
1389                          */
1390                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
1391
1392                         /* Wait until processing of previous batch is
1393                          * completed.
1394                          */
1395                         while (rte_atomic16_read(&tp->nb_dequeued) !=
1396                                         (int16_t) enqueued)
1397                                 rte_pause();
1398                 }
1399                 if (j != TEST_REPETITIONS - 1)
1400                         rte_atomic16_clear(&tp->nb_dequeued);
1401         }
1402
1403         return TEST_SUCCESS;
1404 }
1405
1406 static int
1407 throughput_intr_lcore_enc(void *arg)
1408 {
1409         struct thread_params *tp = arg;
1410         unsigned int enqueued;
1411         const uint16_t queue_id = tp->queue_id;
1412         const uint16_t burst_sz = tp->op_params->burst_sz;
1413         const uint16_t num_to_process = tp->op_params->num_to_process;
1414         struct rte_bbdev_enc_op *ops[num_to_process];
1415         struct test_buffers *bufs = NULL;
1416         struct rte_bbdev_info info;
1417         int ret, i, j;
1418         uint16_t num_to_enq, enq;
1419
1420         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1421                         "BURST_SIZE should be <= %u", MAX_BURST);
1422
1423         TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id),
1424                         "Failed to enable interrupts for dev: %u, queue_id: %u",
1425                         tp->dev_id, queue_id);
1426
1427         rte_bbdev_info_get(tp->dev_id, &info);
1428
1429         TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim),
1430                         "NUM_OPS cannot exceed %u for this device",
1431                         info.drv.queue_size_lim);
1432
1433         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1434
1435         rte_atomic16_clear(&tp->processing_status);
1436         rte_atomic16_clear(&tp->nb_dequeued);
1437
1438         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1439                 rte_pause();
1440
1441         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
1442                         num_to_process);
1443         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1444                         num_to_process);
1445         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1446                 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs,
1447                                 bufs->hard_outputs, tp->op_params->ref_enc_op);
1448
1449         /* Set counter to validate the ordering */
1450         for (j = 0; j < num_to_process; ++j)
1451                 ops[j]->opaque_data = (void *)(uintptr_t)j;
1452
1453         for (j = 0; j < TEST_REPETITIONS; ++j) {
1454                 for (i = 0; i < num_to_process; ++i)
1455                         rte_pktmbuf_reset(ops[i]->turbo_enc.output.data);
1456
1457                 tp->start_time = rte_rdtsc_precise();
1458                 for (enqueued = 0; enqueued < num_to_process;) {
1459                         num_to_enq = burst_sz;
1460
1461                         if (unlikely(num_to_process - enqueued < num_to_enq))
1462                                 num_to_enq = num_to_process - enqueued;
1463
1464                         enq = 0;
1465                         do {
1466                                 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1467                                                 queue_id, &ops[enqueued],
1468                                                 num_to_enq);
1469                         } while (unlikely(enq != num_to_enq));
1470                         enqueued += enq;
1471
1472                         /* Write to thread burst_sz current number of enqueued
1473                          * descriptors. It ensures that proper number of
1474                          * descriptors will be dequeued in callback
1475                          * function - needed for last batch in case where
1476                          * the number of operations is not a multiple of
1477                          * burst size.
1478                          */
1479                         rte_atomic16_set(&tp->burst_sz, num_to_enq);
1480
1481                         /* Wait until processing of previous batch is
1482                          * completed.
1483                          */
1484                         while (rte_atomic16_read(&tp->nb_dequeued) !=
1485                                         (int16_t) enqueued)
1486                                 rte_pause();
1487                 }
1488                 if (j != TEST_REPETITIONS - 1)
1489                         rte_atomic16_clear(&tp->nb_dequeued);
1490         }
1491
1492         return TEST_SUCCESS;
1493 }
1494
1495 static int
1496 throughput_pmd_lcore_dec(void *arg)
1497 {
1498         struct thread_params *tp = arg;
1499         uint16_t enq, deq;
1500         uint64_t total_time = 0, start_time;
1501         const uint16_t queue_id = tp->queue_id;
1502         const uint16_t burst_sz = tp->op_params->burst_sz;
1503         const uint16_t num_ops = tp->op_params->num_to_process;
1504         struct rte_bbdev_dec_op *ops_enq[num_ops];
1505         struct rte_bbdev_dec_op *ops_deq[num_ops];
1506         struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op;
1507         struct test_buffers *bufs = NULL;
1508         int i, j, ret;
1509         struct rte_bbdev_info info;
1510         uint16_t num_to_enq;
1511
1512         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1513                         "BURST_SIZE should be <= %u", MAX_BURST);
1514
1515         rte_bbdev_info_get(tp->dev_id, &info);
1516
1517         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1518                         "NUM_OPS cannot exceed %u for this device",
1519                         info.drv.queue_size_lim);
1520
1521         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1522
1523         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1524                 rte_pause();
1525
1526         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
1527         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
1528
1529         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1530                 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs,
1531                                 bufs->hard_outputs, bufs->soft_outputs, ref_op);
1532
1533         /* Set counter to validate the ordering */
1534         for (j = 0; j < num_ops; ++j)
1535                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1536
1537         for (i = 0; i < TEST_REPETITIONS; ++i) {
1538
1539                 for (j = 0; j < num_ops; ++j)
1540                         mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
1541
1542                 start_time = rte_rdtsc_precise();
1543
1544                 for (enq = 0, deq = 0; enq < num_ops;) {
1545                         num_to_enq = burst_sz;
1546
1547                         if (unlikely(num_ops - enq < num_to_enq))
1548                                 num_to_enq = num_ops - enq;
1549
1550                         enq += rte_bbdev_enqueue_dec_ops(tp->dev_id,
1551                                         queue_id, &ops_enq[enq], num_to_enq);
1552
1553                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1554                                         queue_id, &ops_deq[deq], enq - deq);
1555                 }
1556
1557                 /* dequeue the remaining */
1558                 while (deq < enq) {
1559                         deq += rte_bbdev_dequeue_dec_ops(tp->dev_id,
1560                                         queue_id, &ops_deq[deq], enq - deq);
1561                 }
1562
1563                 total_time += rte_rdtsc_precise() - start_time;
1564         }
1565
1566         tp->iter_count = 0;
1567         /* get the max of iter_count for all dequeued ops */
1568         for (i = 0; i < num_ops; ++i) {
1569                 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count,
1570                                 tp->iter_count);
1571         }
1572
1573         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1574                 ret = validate_dec_op(ops_deq, num_ops, ref_op,
1575                                 tp->op_params->vector_mask);
1576                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1577         }
1578
1579         rte_bbdev_dec_op_free_bulk(ops_enq, num_ops);
1580
1581         double tb_len_bits = calc_dec_TB_size(ref_op);
1582
1583         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1584                         ((double)total_time / (double)rte_get_tsc_hz());
1585         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) /
1586                         1000000.0) / ((double)total_time /
1587                         (double)rte_get_tsc_hz());
1588
1589         return TEST_SUCCESS;
1590 }
1591
1592 static int
1593 throughput_pmd_lcore_enc(void *arg)
1594 {
1595         struct thread_params *tp = arg;
1596         uint16_t enq, deq;
1597         uint64_t total_time = 0, start_time;
1598         const uint16_t queue_id = tp->queue_id;
1599         const uint16_t burst_sz = tp->op_params->burst_sz;
1600         const uint16_t num_ops = tp->op_params->num_to_process;
1601         struct rte_bbdev_enc_op *ops_enq[num_ops];
1602         struct rte_bbdev_enc_op *ops_deq[num_ops];
1603         struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op;
1604         struct test_buffers *bufs = NULL;
1605         int i, j, ret;
1606         struct rte_bbdev_info info;
1607         uint16_t num_to_enq;
1608
1609         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
1610                         "BURST_SIZE should be <= %u", MAX_BURST);
1611
1612         rte_bbdev_info_get(tp->dev_id, &info);
1613
1614         TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim),
1615                         "NUM_OPS cannot exceed %u for this device",
1616                         info.drv.queue_size_lim);
1617
1618         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
1619
1620         while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
1621                 rte_pause();
1622
1623         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
1624                         num_ops);
1625         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
1626                         num_ops);
1627         if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1628                 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs,
1629                                 bufs->hard_outputs, ref_op);
1630
1631         /* Set counter to validate the ordering */
1632         for (j = 0; j < num_ops; ++j)
1633                 ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1634
1635         for (i = 0; i < TEST_REPETITIONS; ++i) {
1636
1637                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1638                         for (j = 0; j < num_ops; ++j)
1639                                 mbuf_reset(ops_enq[j]->turbo_enc.output.data);
1640
1641                 start_time = rte_rdtsc_precise();
1642
1643                 for (enq = 0, deq = 0; enq < num_ops;) {
1644                         num_to_enq = burst_sz;
1645
1646                         if (unlikely(num_ops - enq < num_to_enq))
1647                                 num_to_enq = num_ops - enq;
1648
1649                         enq += rte_bbdev_enqueue_enc_ops(tp->dev_id,
1650                                         queue_id, &ops_enq[enq], num_to_enq);
1651
1652                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1653                                         queue_id, &ops_deq[deq], enq - deq);
1654                 }
1655
1656                 /* dequeue the remaining */
1657                 while (deq < enq) {
1658                         deq += rte_bbdev_dequeue_enc_ops(tp->dev_id,
1659                                         queue_id, &ops_deq[deq], enq - deq);
1660                 }
1661
1662                 total_time += rte_rdtsc_precise() - start_time;
1663         }
1664
1665         if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1666                 ret = validate_enc_op(ops_deq, num_ops, ref_op);
1667                 TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1668         }
1669
1670         double tb_len_bits = calc_enc_TB_size(ref_op);
1671
1672         tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) /
1673                         ((double)total_time / (double)rte_get_tsc_hz());
1674         tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits))
1675                         / 1000000.0) / ((double)total_time /
1676                         (double)rte_get_tsc_hz());
1677
1678         return TEST_SUCCESS;
1679 }
1680
1681 static void
1682 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores)
1683 {
1684         unsigned int iter = 0;
1685         double total_mops = 0, total_mbps = 0;
1686
1687         for (iter = 0; iter < used_cores; iter++) {
1688                 printf(
1689                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n",
1690                         t_params[iter].lcore_id, t_params[iter].ops_per_sec,
1691                         t_params[iter].mbps);
1692                 total_mops += t_params[iter].ops_per_sec;
1693                 total_mbps += t_params[iter].mbps;
1694         }
1695         printf(
1696                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n",
1697                 used_cores, total_mops, total_mbps);
1698 }
1699
1700 static void
1701 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores)
1702 {
1703         unsigned int iter = 0;
1704         double total_mops = 0, total_mbps = 0;
1705         uint8_t iter_count = 0;
1706
1707         for (iter = 0; iter < used_cores; iter++) {
1708                 printf(
1709                         "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n",
1710                         t_params[iter].lcore_id, t_params[iter].ops_per_sec,
1711                         t_params[iter].mbps, t_params[iter].iter_count);
1712                 total_mops += t_params[iter].ops_per_sec;
1713                 total_mbps += t_params[iter].mbps;
1714                 iter_count = RTE_MAX(iter_count, t_params[iter].iter_count);
1715         }
1716         printf(
1717                 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n",
1718                 used_cores, total_mops, total_mbps, iter_count);
1719 }
1720
1721 /*
1722  * Test function that determines how long an enqueue + dequeue of a burst
1723  * takes on available lcores.
1724  */
1725 static int
1726 throughput_test(struct active_device *ad,
1727                 struct test_op_params *op_params)
1728 {
1729         int ret;
1730         unsigned int lcore_id, used_cores = 0;
1731         struct thread_params *t_params, *tp;
1732         struct rte_bbdev_info info;
1733         lcore_function_t *throughput_function;
1734         uint16_t num_lcores;
1735         const char *op_type_str;
1736
1737         rte_bbdev_info_get(ad->dev_id, &info);
1738
1739         op_type_str = rte_bbdev_op_type_str(test_vector.op_type);
1740         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u",
1741                         test_vector.op_type);
1742
1743         printf(
1744                 "Throughput test: dev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, int mode: %s, GHz: %lg\n",
1745                         info.dev_name, ad->nb_queues, op_params->burst_sz,
1746                         op_params->num_to_process, op_params->num_lcores,
1747                         op_type_str,
1748                         intr_enabled ? "Interrupt mode" : "PMD mode",
1749                         (double)rte_get_tsc_hz() / 1000000000.0);
1750
1751         /* Set number of lcores */
1752         num_lcores = (ad->nb_queues < (op_params->num_lcores))
1753                         ? ad->nb_queues
1754                         : op_params->num_lcores;
1755
1756         /* Allocate memory for thread parameters structure */
1757         t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params),
1758                         RTE_CACHE_LINE_SIZE);
1759         TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params",
1760                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
1761                                 RTE_CACHE_LINE_SIZE));
1762
1763         if (intr_enabled) {
1764                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1765                         throughput_function = throughput_intr_lcore_dec;
1766                 else
1767                         throughput_function = throughput_intr_lcore_enc;
1768
1769                 /* Dequeue interrupt callback registration */
1770                 ret = rte_bbdev_callback_register(ad->dev_id,
1771                                 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback,
1772                                 t_params);
1773                 if (ret < 0) {
1774                         rte_free(t_params);
1775                         return ret;
1776                 }
1777         } else {
1778                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1779                         throughput_function = throughput_pmd_lcore_dec;
1780                 else
1781                         throughput_function = throughput_pmd_lcore_enc;
1782         }
1783
1784         rte_atomic16_set(&op_params->sync, SYNC_WAIT);
1785
1786         /* Master core is set at first entry */
1787         t_params[0].dev_id = ad->dev_id;
1788         t_params[0].lcore_id = rte_lcore_id();
1789         t_params[0].op_params = op_params;
1790         t_params[0].queue_id = ad->queue_ids[used_cores++];
1791         t_params[0].iter_count = 0;
1792
1793         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1794                 if (used_cores >= num_lcores)
1795                         break;
1796
1797                 t_params[used_cores].dev_id = ad->dev_id;
1798                 t_params[used_cores].lcore_id = lcore_id;
1799                 t_params[used_cores].op_params = op_params;
1800                 t_params[used_cores].queue_id = ad->queue_ids[used_cores];
1801                 t_params[used_cores].iter_count = 0;
1802
1803                 rte_eal_remote_launch(throughput_function,
1804                                 &t_params[used_cores++], lcore_id);
1805         }
1806
1807         rte_atomic16_set(&op_params->sync, SYNC_START);
1808         ret = throughput_function(&t_params[0]);
1809
1810         /* Master core is always used */
1811         for (used_cores = 1; used_cores < num_lcores; used_cores++)
1812                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
1813
1814         /* Return if test failed */
1815         if (ret) {
1816                 rte_free(t_params);
1817                 return ret;
1818         }
1819
1820         /* Print throughput if interrupts are disabled and test passed */
1821         if (!intr_enabled) {
1822                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1823                         print_dec_throughput(t_params, num_lcores);
1824                 else
1825                         print_enc_throughput(t_params, num_lcores);
1826                 rte_free(t_params);
1827                 return ret;
1828         }
1829
1830         /* In interrupt TC we need to wait for the interrupt callback to deqeue
1831          * all pending operations. Skip waiting for queues which reported an
1832          * error using processing_status variable.
1833          * Wait for master lcore operations.
1834          */
1835         tp = &t_params[0];
1836         while ((rte_atomic16_read(&tp->nb_dequeued) <
1837                         op_params->num_to_process) &&
1838                         (rte_atomic16_read(&tp->processing_status) !=
1839                         TEST_FAILED))
1840                 rte_pause();
1841
1842         tp->ops_per_sec /= TEST_REPETITIONS;
1843         tp->mbps /= TEST_REPETITIONS;
1844         ret |= rte_atomic16_read(&tp->processing_status);
1845
1846         /* Wait for slave lcores operations */
1847         for (used_cores = 1; used_cores < num_lcores; used_cores++) {
1848                 tp = &t_params[used_cores];
1849
1850                 while ((rte_atomic16_read(&tp->nb_dequeued) <
1851                                 op_params->num_to_process) &&
1852                                 (rte_atomic16_read(&tp->processing_status) !=
1853                                 TEST_FAILED))
1854                         rte_pause();
1855
1856                 tp->ops_per_sec /= TEST_REPETITIONS;
1857                 tp->mbps /= TEST_REPETITIONS;
1858                 ret |= rte_atomic16_read(&tp->processing_status);
1859         }
1860
1861         /* Print throughput if test passed */
1862         if (!ret) {
1863                 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
1864                         print_dec_throughput(t_params, num_lcores);
1865                 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC)
1866                         print_enc_throughput(t_params, num_lcores);
1867         }
1868
1869         rte_free(t_params);
1870         return ret;
1871 }
1872
1873 static int
1874 latency_test_dec(struct rte_mempool *mempool,
1875                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
1876                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
1877                 const uint16_t num_to_process, uint16_t burst_sz,
1878                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1879 {
1880         int ret = TEST_SUCCESS;
1881         uint16_t i, j, dequeued;
1882         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1883         uint64_t start_time = 0, last_time = 0;
1884
1885         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1886                 uint16_t enq = 0, deq = 0;
1887                 bool first_time = true;
1888                 last_time = 0;
1889
1890                 if (unlikely(num_to_process - dequeued < burst_sz))
1891                         burst_sz = num_to_process - dequeued;
1892
1893                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
1894                 TEST_ASSERT_SUCCESS(ret,
1895                                 "rte_bbdev_dec_op_alloc_bulk() failed");
1896                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1897                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
1898                                         bufs->inputs,
1899                                         bufs->hard_outputs,
1900                                         bufs->soft_outputs,
1901                                         ref_op);
1902
1903                 /* Set counter to validate the ordering */
1904                 for (j = 0; j < burst_sz; ++j)
1905                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1906
1907                 start_time = rte_rdtsc_precise();
1908
1909                 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq],
1910                                 burst_sz);
1911                 TEST_ASSERT(enq == burst_sz,
1912                                 "Error enqueueing burst, expected %u, got %u",
1913                                 burst_sz, enq);
1914
1915                 /* Dequeue */
1916                 do {
1917                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
1918                                         &ops_deq[deq], burst_sz - deq);
1919                         if (likely(first_time && (deq > 0))) {
1920                                 last_time = rte_rdtsc_precise() - start_time;
1921                                 first_time = false;
1922                         }
1923                 } while (unlikely(burst_sz != deq));
1924
1925                 *max_time = RTE_MAX(*max_time, last_time);
1926                 *min_time = RTE_MIN(*min_time, last_time);
1927                 *total_time += last_time;
1928
1929                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1930                         ret = validate_dec_op(ops_deq, burst_sz, ref_op,
1931                                         vector_mask);
1932                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
1933                 }
1934
1935                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
1936                 dequeued += deq;
1937         }
1938
1939         return i;
1940 }
1941
1942 static int
1943 latency_test_enc(struct rte_mempool *mempool,
1944                 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op,
1945                 uint16_t dev_id, uint16_t queue_id,
1946                 const uint16_t num_to_process, uint16_t burst_sz,
1947                 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
1948 {
1949         int ret = TEST_SUCCESS;
1950         uint16_t i, j, dequeued;
1951         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
1952         uint64_t start_time = 0, last_time = 0;
1953
1954         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
1955                 uint16_t enq = 0, deq = 0;
1956                 bool first_time = true;
1957                 last_time = 0;
1958
1959                 if (unlikely(num_to_process - dequeued < burst_sz))
1960                         burst_sz = num_to_process - dequeued;
1961
1962                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
1963                 TEST_ASSERT_SUCCESS(ret,
1964                                 "rte_bbdev_enc_op_alloc_bulk() failed");
1965                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
1966                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
1967                                         bufs->inputs,
1968                                         bufs->hard_outputs,
1969                                         ref_op);
1970
1971                 /* Set counter to validate the ordering */
1972                 for (j = 0; j < burst_sz; ++j)
1973                         ops_enq[j]->opaque_data = (void *)(uintptr_t)j;
1974
1975                 start_time = rte_rdtsc_precise();
1976
1977                 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq],
1978                                 burst_sz);
1979                 TEST_ASSERT(enq == burst_sz,
1980                                 "Error enqueueing burst, expected %u, got %u",
1981                                 burst_sz, enq);
1982
1983                 /* Dequeue */
1984                 do {
1985                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
1986                                         &ops_deq[deq], burst_sz - deq);
1987                         if (likely(first_time && (deq > 0))) {
1988                                 last_time += rte_rdtsc_precise() - start_time;
1989                                 first_time = false;
1990                         }
1991                 } while (unlikely(burst_sz != deq));
1992
1993                 *max_time = RTE_MAX(*max_time, last_time);
1994                 *min_time = RTE_MIN(*min_time, last_time);
1995                 *total_time += last_time;
1996
1997                 if (test_vector.op_type != RTE_BBDEV_OP_NONE) {
1998                         ret = validate_enc_op(ops_deq, burst_sz, ref_op);
1999                         TEST_ASSERT_SUCCESS(ret, "Validation failed!");
2000                 }
2001
2002                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2003                 dequeued += deq;
2004         }
2005
2006         return i;
2007 }
2008
2009 static int
2010 latency_test(struct active_device *ad,
2011                 struct test_op_params *op_params)
2012 {
2013         int iter;
2014         uint16_t burst_sz = op_params->burst_sz;
2015         const uint16_t num_to_process = op_params->num_to_process;
2016         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2017         const uint16_t queue_id = ad->queue_ids[0];
2018         struct test_buffers *bufs = NULL;
2019         struct rte_bbdev_info info;
2020         uint64_t total_time, min_time, max_time;
2021         const char *op_type_str;
2022
2023         total_time = max_time = 0;
2024         min_time = UINT64_MAX;
2025
2026         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2027                         "BURST_SIZE should be <= %u", MAX_BURST);
2028
2029         rte_bbdev_info_get(ad->dev_id, &info);
2030         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2031
2032         op_type_str = rte_bbdev_op_type_str(op_type);
2033         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2034
2035         printf(
2036                 "\nValidation/Latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2037                         info.dev_name, burst_sz, num_to_process, op_type_str);
2038
2039         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2040                 iter = latency_test_dec(op_params->mp, bufs,
2041                                 op_params->ref_dec_op, op_params->vector_mask,
2042                                 ad->dev_id, queue_id, num_to_process,
2043                                 burst_sz, &total_time, &min_time, &max_time);
2044         else
2045                 iter = latency_test_enc(op_params->mp, bufs,
2046                                 op_params->ref_enc_op, ad->dev_id, queue_id,
2047                                 num_to_process, burst_sz, &total_time,
2048                                 &min_time, &max_time);
2049
2050         if (iter <= 0)
2051                 return TEST_FAILED;
2052
2053         printf("Operation latency:\n"
2054                         "\tavg latency: %lg cycles, %lg us\n"
2055                         "\tmin latency: %lg cycles, %lg us\n"
2056                         "\tmax latency: %lg cycles, %lg us\n",
2057                         (double)total_time / (double)iter,
2058                         (double)(total_time * 1000000) / (double)iter /
2059                         (double)rte_get_tsc_hz(), (double)min_time,
2060                         (double)(min_time * 1000000) / (double)rte_get_tsc_hz(),
2061                         (double)max_time, (double)(max_time * 1000000) /
2062                         (double)rte_get_tsc_hz());
2063
2064         return TEST_SUCCESS;
2065 }
2066
2067 #ifdef RTE_BBDEV_OFFLOAD_COST
2068 static int
2069 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
2070                 struct rte_bbdev_stats *stats)
2071 {
2072         struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
2073         struct rte_bbdev_stats *q_stats;
2074
2075         if (queue_id >= dev->data->num_queues)
2076                 return -1;
2077
2078         q_stats = &dev->data->queues[queue_id].queue_stats;
2079
2080         stats->enqueued_count = q_stats->enqueued_count;
2081         stats->dequeued_count = q_stats->dequeued_count;
2082         stats->enqueue_err_count = q_stats->enqueue_err_count;
2083         stats->dequeue_err_count = q_stats->dequeue_err_count;
2084         stats->acc_offload_cycles = q_stats->acc_offload_cycles;
2085
2086         return 0;
2087 }
2088
2089 static int
2090 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
2091                 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id,
2092                 uint16_t queue_id, const uint16_t num_to_process,
2093                 uint16_t burst_sz, struct test_time_stats *time_st)
2094 {
2095         int i, dequeued, ret;
2096         struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2097         uint64_t enq_start_time, deq_start_time;
2098         uint64_t enq_sw_last_time, deq_last_time;
2099         struct rte_bbdev_stats stats;
2100
2101         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2102                 uint16_t enq = 0, deq = 0;
2103
2104                 if (unlikely(num_to_process - dequeued < burst_sz))
2105                         burst_sz = num_to_process - dequeued;
2106
2107                 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
2108                 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2109                                 burst_sz);
2110
2111                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2112                         copy_reference_dec_op(ops_enq, burst_sz, dequeued,
2113                                         bufs->inputs,
2114                                         bufs->hard_outputs,
2115                                         bufs->soft_outputs,
2116                                         ref_op);
2117
2118                 /* Start time meas for enqueue function offload latency */
2119                 enq_start_time = rte_rdtsc_precise();
2120                 do {
2121                         enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id,
2122                                         &ops_enq[enq], burst_sz - enq);
2123                 } while (unlikely(burst_sz != enq));
2124
2125                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2126                 TEST_ASSERT_SUCCESS(ret,
2127                                 "Failed to get stats for queue (%u) of device (%u)",
2128                                 queue_id, dev_id);
2129
2130                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2131                                 stats.acc_offload_cycles;
2132                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2133                                 enq_sw_last_time);
2134                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2135                                 enq_sw_last_time);
2136                 time_st->enq_sw_total_time += enq_sw_last_time;
2137
2138                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2139                                 stats.acc_offload_cycles);
2140                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2141                                 stats.acc_offload_cycles);
2142                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2143
2144                 /* ensure enqueue has been completed */
2145                 rte_delay_us(200);
2146
2147                 /* Start time meas for dequeue function offload latency */
2148                 deq_start_time = rte_rdtsc_precise();
2149                 /* Dequeue one operation */
2150                 do {
2151                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2152                                         &ops_deq[deq], 1);
2153                 } while (unlikely(deq != 1));
2154
2155                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2156                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2157                                 deq_last_time);
2158                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2159                                 deq_last_time);
2160                 time_st->deq_total_time += deq_last_time;
2161
2162                 /* Dequeue remaining operations if needed*/
2163                 while (burst_sz != deq)
2164                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
2165                                         &ops_deq[deq], burst_sz - deq);
2166
2167                 rte_bbdev_dec_op_free_bulk(ops_enq, deq);
2168                 dequeued += deq;
2169         }
2170
2171         return i;
2172 }
2173
2174 static int
2175 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
2176                 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id,
2177                 uint16_t queue_id, const uint16_t num_to_process,
2178                 uint16_t burst_sz, struct test_time_stats *time_st)
2179 {
2180         int i, dequeued, ret;
2181         struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST];
2182         uint64_t enq_start_time, deq_start_time;
2183         uint64_t enq_sw_last_time, deq_last_time;
2184         struct rte_bbdev_stats stats;
2185
2186         for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) {
2187                 uint16_t enq = 0, deq = 0;
2188
2189                 if (unlikely(num_to_process - dequeued < burst_sz))
2190                         burst_sz = num_to_process - dequeued;
2191
2192                 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
2193                 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
2194                                 burst_sz);
2195
2196                 if (test_vector.op_type != RTE_BBDEV_OP_NONE)
2197                         copy_reference_enc_op(ops_enq, burst_sz, dequeued,
2198                                         bufs->inputs,
2199                                         bufs->hard_outputs,
2200                                         ref_op);
2201
2202                 /* Start time meas for enqueue function offload latency */
2203                 enq_start_time = rte_rdtsc_precise();
2204                 do {
2205                         enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id,
2206                                         &ops_enq[enq], burst_sz - enq);
2207                 } while (unlikely(burst_sz != enq));
2208
2209                 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats);
2210                 TEST_ASSERT_SUCCESS(ret,
2211                                 "Failed to get stats for queue (%u) of device (%u)",
2212                                 queue_id, dev_id);
2213
2214                 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time -
2215                                 stats.acc_offload_cycles;
2216                 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time,
2217                                 enq_sw_last_time);
2218                 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time,
2219                                 enq_sw_last_time);
2220                 time_st->enq_sw_total_time += enq_sw_last_time;
2221
2222                 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time,
2223                                 stats.acc_offload_cycles);
2224                 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time,
2225                                 stats.acc_offload_cycles);
2226                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
2227
2228                 /* ensure enqueue has been completed */
2229                 rte_delay_us(200);
2230
2231                 /* Start time meas for dequeue function offload latency */
2232                 deq_start_time = rte_rdtsc_precise();
2233                 /* Dequeue one operation */
2234                 do {
2235                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2236                                         &ops_deq[deq], 1);
2237                 } while (unlikely(deq != 1));
2238
2239                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2240                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
2241                                 deq_last_time);
2242                 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time,
2243                                 deq_last_time);
2244                 time_st->deq_total_time += deq_last_time;
2245
2246                 while (burst_sz != deq)
2247                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
2248                                         &ops_deq[deq], burst_sz - deq);
2249
2250                 rte_bbdev_enc_op_free_bulk(ops_enq, deq);
2251                 dequeued += deq;
2252         }
2253
2254         return i;
2255 }
2256 #endif
2257
2258 static int
2259 offload_cost_test(struct active_device *ad,
2260                 struct test_op_params *op_params)
2261 {
2262 #ifndef RTE_BBDEV_OFFLOAD_COST
2263         RTE_SET_USED(ad);
2264         RTE_SET_USED(op_params);
2265         printf("Offload latency test is disabled.\n");
2266         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2267         return TEST_SKIPPED;
2268 #else
2269         int iter;
2270         uint16_t burst_sz = op_params->burst_sz;
2271         const uint16_t num_to_process = op_params->num_to_process;
2272         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2273         const uint16_t queue_id = ad->queue_ids[0];
2274         struct test_buffers *bufs = NULL;
2275         struct rte_bbdev_info info;
2276         const char *op_type_str;
2277         struct test_time_stats time_st;
2278
2279         memset(&time_st, 0, sizeof(struct test_time_stats));
2280         time_st.enq_sw_min_time = UINT64_MAX;
2281         time_st.enq_acc_min_time = UINT64_MAX;
2282         time_st.deq_min_time = UINT64_MAX;
2283
2284         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2285                         "BURST_SIZE should be <= %u", MAX_BURST);
2286
2287         rte_bbdev_info_get(ad->dev_id, &info);
2288         bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
2289
2290         op_type_str = rte_bbdev_op_type_str(op_type);
2291         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2292
2293         printf(
2294                 "\nOffload latency test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2295                         info.dev_name, burst_sz, num_to_process, op_type_str);
2296
2297         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2298                 iter = offload_latency_test_dec(op_params->mp, bufs,
2299                                 op_params->ref_dec_op, ad->dev_id, queue_id,
2300                                 num_to_process, burst_sz, &time_st);
2301         else
2302                 iter = offload_latency_test_enc(op_params->mp, bufs,
2303                                 op_params->ref_enc_op, ad->dev_id, queue_id,
2304                                 num_to_process, burst_sz, &time_st);
2305
2306         if (iter <= 0)
2307                 return TEST_FAILED;
2308
2309         printf("Enqueue offload cost latency:\n"
2310                         "\tDriver offload avg %lg cycles, %lg us\n"
2311                         "\tDriver offload min %lg cycles, %lg us\n"
2312                         "\tDriver offload max %lg cycles, %lg us\n"
2313                         "\tAccelerator offload avg %lg cycles, %lg us\n"
2314                         "\tAccelerator offload min %lg cycles, %lg us\n"
2315                         "\tAccelerator offload max %lg cycles, %lg us\n",
2316                         (double)time_st.enq_sw_total_time / (double)iter,
2317                         (double)(time_st.enq_sw_total_time * 1000000) /
2318                         (double)iter / (double)rte_get_tsc_hz(),
2319                         (double)time_st.enq_sw_min_time,
2320                         (double)(time_st.enq_sw_min_time * 1000000) /
2321                         rte_get_tsc_hz(), (double)time_st.enq_sw_max_time,
2322                         (double)(time_st.enq_sw_max_time * 1000000) /
2323                         rte_get_tsc_hz(), (double)time_st.enq_acc_total_time /
2324                         (double)iter,
2325                         (double)(time_st.enq_acc_total_time * 1000000) /
2326                         (double)iter / (double)rte_get_tsc_hz(),
2327                         (double)time_st.enq_acc_min_time,
2328                         (double)(time_st.enq_acc_min_time * 1000000) /
2329                         rte_get_tsc_hz(), (double)time_st.enq_acc_max_time,
2330                         (double)(time_st.enq_acc_max_time * 1000000) /
2331                         rte_get_tsc_hz());
2332
2333         printf("Dequeue offload cost latency - one op:\n"
2334                         "\tavg %lg cycles, %lg us\n"
2335                         "\tmin %lg cycles, %lg us\n"
2336                         "\tmax %lg cycles, %lg us\n",
2337                         (double)time_st.deq_total_time / (double)iter,
2338                         (double)(time_st.deq_total_time * 1000000) /
2339                         (double)iter / (double)rte_get_tsc_hz(),
2340                         (double)time_st.deq_min_time,
2341                         (double)(time_st.deq_min_time * 1000000) /
2342                         rte_get_tsc_hz(), (double)time_st.deq_max_time,
2343                         (double)(time_st.deq_max_time * 1000000) /
2344                         rte_get_tsc_hz());
2345
2346         return TEST_SUCCESS;
2347 #endif
2348 }
2349
2350 #ifdef RTE_BBDEV_OFFLOAD_COST
2351 static int
2352 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id,
2353                 const uint16_t num_to_process, uint16_t burst_sz,
2354                 uint64_t *deq_total_time, uint64_t *deq_min_time,
2355                 uint64_t *deq_max_time)
2356 {
2357         int i, deq_total;
2358         struct rte_bbdev_dec_op *ops[MAX_BURST];
2359         uint64_t deq_start_time, deq_last_time;
2360
2361         /* Test deq offload latency from an empty queue */
2362
2363         for (i = 0, deq_total = 0; deq_total < num_to_process;
2364                         ++i, deq_total += burst_sz) {
2365                 deq_start_time = rte_rdtsc_precise();
2366
2367                 if (unlikely(num_to_process - deq_total < burst_sz))
2368                         burst_sz = num_to_process - deq_total;
2369                 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, burst_sz);
2370
2371                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2372                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2373                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2374                 *deq_total_time += deq_last_time;
2375         }
2376
2377         return i;
2378 }
2379
2380 static int
2381 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id,
2382                 const uint16_t num_to_process, uint16_t burst_sz,
2383                 uint64_t *deq_total_time, uint64_t *deq_min_time,
2384                 uint64_t *deq_max_time)
2385 {
2386         int i, deq_total;
2387         struct rte_bbdev_enc_op *ops[MAX_BURST];
2388         uint64_t deq_start_time, deq_last_time;
2389
2390         /* Test deq offload latency from an empty queue */
2391         for (i = 0, deq_total = 0; deq_total < num_to_process;
2392                         ++i, deq_total += burst_sz) {
2393                 deq_start_time = rte_rdtsc_precise();
2394
2395                 if (unlikely(num_to_process - deq_total < burst_sz))
2396                         burst_sz = num_to_process - deq_total;
2397                 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, burst_sz);
2398
2399                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
2400                 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time);
2401                 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time);
2402                 *deq_total_time += deq_last_time;
2403         }
2404
2405         return i;
2406 }
2407 #endif
2408
2409 static int
2410 offload_latency_empty_q_test(struct active_device *ad,
2411                 struct test_op_params *op_params)
2412 {
2413 #ifndef RTE_BBDEV_OFFLOAD_COST
2414         RTE_SET_USED(ad);
2415         RTE_SET_USED(op_params);
2416         printf("Offload latency empty dequeue test is disabled.\n");
2417         printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n");
2418         return TEST_SKIPPED;
2419 #else
2420         int iter;
2421         uint64_t deq_total_time, deq_min_time, deq_max_time;
2422         uint16_t burst_sz = op_params->burst_sz;
2423         const uint16_t num_to_process = op_params->num_to_process;
2424         const enum rte_bbdev_op_type op_type = test_vector.op_type;
2425         const uint16_t queue_id = ad->queue_ids[0];
2426         struct rte_bbdev_info info;
2427         const char *op_type_str;
2428
2429         deq_total_time = deq_max_time = 0;
2430         deq_min_time = UINT64_MAX;
2431
2432         TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
2433                         "BURST_SIZE should be <= %u", MAX_BURST);
2434
2435         rte_bbdev_info_get(ad->dev_id, &info);
2436
2437         op_type_str = rte_bbdev_op_type_str(op_type);
2438         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
2439
2440         printf(
2441                 "\nOffload latency empty dequeue test: dev: %s, burst size: %u, num ops: %u, op type: %s\n",
2442                         info.dev_name, burst_sz, num_to_process, op_type_str);
2443
2444         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
2445                 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id,
2446                                 num_to_process, burst_sz, &deq_total_time,
2447                                 &deq_min_time, &deq_max_time);
2448         else
2449                 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id,
2450                                 num_to_process, burst_sz, &deq_total_time,
2451                                 &deq_min_time, &deq_max_time);
2452
2453         if (iter <= 0)
2454                 return TEST_FAILED;
2455
2456         printf("Empty dequeue offload\n"
2457                         "\tavg. latency: %lg cycles, %lg us\n"
2458                         "\tmin. latency: %lg cycles, %lg us\n"
2459                         "\tmax. latency: %lg cycles, %lg us\n",
2460                         (double)deq_total_time / (double)iter,
2461                         (double)(deq_total_time * 1000000) / (double)iter /
2462                         (double)rte_get_tsc_hz(), (double)deq_min_time,
2463                         (double)(deq_min_time * 1000000) / rte_get_tsc_hz(),
2464                         (double)deq_max_time, (double)(deq_max_time * 1000000) /
2465                         rte_get_tsc_hz());
2466
2467         return TEST_SUCCESS;
2468 #endif
2469 }
2470
2471 static int
2472 throughput_tc(void)
2473 {
2474         return run_test_case(throughput_test);
2475 }
2476
2477 static int
2478 offload_cost_tc(void)
2479 {
2480         return run_test_case(offload_cost_test);
2481 }
2482
2483 static int
2484 offload_latency_empty_q_tc(void)
2485 {
2486         return run_test_case(offload_latency_empty_q_test);
2487 }
2488
2489 static int
2490 latency_tc(void)
2491 {
2492         return run_test_case(latency_test);
2493 }
2494
2495 static int
2496 interrupt_tc(void)
2497 {
2498         return run_test_case(throughput_test);
2499 }
2500
2501 static struct unit_test_suite bbdev_throughput_testsuite = {
2502         .suite_name = "BBdev Throughput Tests",
2503         .setup = testsuite_setup,
2504         .teardown = testsuite_teardown,
2505         .unit_test_cases = {
2506                 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc),
2507                 TEST_CASES_END() /**< NULL terminate unit test array */
2508         }
2509 };
2510
2511 static struct unit_test_suite bbdev_validation_testsuite = {
2512         .suite_name = "BBdev Validation Tests",
2513         .setup = testsuite_setup,
2514         .teardown = testsuite_teardown,
2515         .unit_test_cases = {
2516                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2517                 TEST_CASES_END() /**< NULL terminate unit test array */
2518         }
2519 };
2520
2521 static struct unit_test_suite bbdev_latency_testsuite = {
2522         .suite_name = "BBdev Latency Tests",
2523         .setup = testsuite_setup,
2524         .teardown = testsuite_teardown,
2525         .unit_test_cases = {
2526                 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
2527                 TEST_CASES_END() /**< NULL terminate unit test array */
2528         }
2529 };
2530
2531 static struct unit_test_suite bbdev_offload_cost_testsuite = {
2532         .suite_name = "BBdev Offload Cost Tests",
2533         .setup = testsuite_setup,
2534         .teardown = testsuite_teardown,
2535         .unit_test_cases = {
2536                 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc),
2537                 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc),
2538                 TEST_CASES_END() /**< NULL terminate unit test array */
2539         }
2540 };
2541
2542 static struct unit_test_suite bbdev_interrupt_testsuite = {
2543         .suite_name = "BBdev Interrupt Tests",
2544         .setup = interrupt_testsuite_setup,
2545         .teardown = testsuite_teardown,
2546         .unit_test_cases = {
2547                 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc),
2548                 TEST_CASES_END() /**< NULL terminate unit test array */
2549         }
2550 };
2551
2552 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite);
2553 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite);
2554 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite);
2555 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite);
2556 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite);