doc: add GRE option flow item to feature list

[dpdk.git] / app / test-bbdev / test_bbdev_perf.c
diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c

index 6ddf012..0fa119a 100644 (file)
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -24,9 +24,10 @@
  #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id))
  
  #define MAX_QUEUES RTE_MAX_LCORE
-#define TEST_REPETITIONS 1000
+#define TEST_REPETITIONS 100
+#define WAIT_OFFLOAD_US 1000
  
-#ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
+#ifdef RTE_BASEBAND_FPGA_LTE_FEC
  #include <fpga_lte_fec.h>
  #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf")
  #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf")
@@ -39,7 +40,7 @@
  #define FLR_4G_TIMEOUT 610
  #endif
  
-#ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC
+#ifdef RTE_BASEBAND_FPGA_5GNR_FEC
  #include <rte_pmd_fpga_5gnr_fec.h>
  #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf")
  #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf")
@@ -52,7 +53,7 @@
  #define FLR_5G_TIMEOUT 610
  #endif
  
-#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
+#ifdef RTE_BASEBAND_ACC100
  #include <rte_acc100_cfg.h>
  #define ACC100PF_DRIVER_NAME   ("intel_acc100_pf")
  #define ACC100VF_DRIVER_NAME   ("intel_acc100_vf")
@@ -132,7 +133,7 @@ struct test_op_params {
         uint16_t num_to_process;
         uint16_t num_lcores;
         int vector_mask;
-       rte_atomic16_t sync;
+       uint16_t sync;
         struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
  };
  
@@ -147,9 +148,9 @@ struct thread_params {
         uint8_t iter_count;
         double iter_average;
         double bler;
-       rte_atomic16_t nb_dequeued;
-       rte_atomic16_t processing_status;
-       rte_atomic16_t burst_sz;
+       uint16_t nb_dequeued;
+       int16_t processing_status;
+       uint16_t burst_sz;
         struct test_op_params *op_params;
         struct rte_bbdev_dec_op *dec_ops[MAX_BURST];
         struct rte_bbdev_enc_op *enc_ops[MAX_BURST];
@@ -226,6 +227,45 @@ clear_soft_out_cap(uint32_t *op_flags)
         *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT;
  }
  
+/* This API is to convert all the test vector op data entries
+ * to big endian format. It is used when the device supports
+ * the input in the big endian format.
+ */
+static inline void
+convert_op_data_to_be(void)
+{
+       struct op_data_entries *op;
+       enum op_data_type type;
+       uint8_t nb_segs, *rem_data, temp;
+       uint32_t *data, len;
+       int complete, rem, i, j;
+
+       for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) {
+               nb_segs = test_vector.entries[type].nb_segments;
+               op = &test_vector.entries[type];
+
+               /* Invert byte endianness for all the segments */
+               for (i = 0; i < nb_segs; ++i) {
+                       len = op->segments[i].length;
+                       data = op->segments[i].addr;
+
+                       /* Swap complete u32 bytes */
+                       complete = len / 4;
+                       for (j = 0; j < complete; j++)
+                               data[j] = rte_bswap32(data[j]);
+
+                       /* Swap any remaining bytes */
+                       rem = len % 4;
+                       rem_data = (uint8_t *)&data[j];
+                       for (j = 0; j < rem/2; j++) {
+                               temp = rem_data[j];
+                               rem_data[j] = rem_data[rem - j - 1];
+                               rem_data[rem - j - 1] = temp;
+                       }
+               }
+       }
+}
+
  static int
  check_dev_cap(const struct rte_bbdev_info *dev_info)
  {
@@ -233,6 +273,7 @@ check_dev_cap(const struct rte_bbdev_info *dev_info)
         unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs,
                 nb_harq_inputs, nb_harq_outputs;
         const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities;
+       uint8_t dev_data_endianness = dev_info->drv.data_endianness;
  
         nb_inputs = test_vector.entries[DATA_INPUT].nb_segments;
         nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments;
@@ -244,6 +285,9 @@ check_dev_cap(const struct rte_bbdev_info *dev_info)
                 if (op_cap->type != test_vector.op_type)
                         continue;
  
+               if (dev_data_endianness == RTE_BIG_ENDIAN)
+                       convert_op_data_to_be();
+
                 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) {
                         const struct rte_bbdev_op_cap_turbo_dec *cap =
                                         &op_cap->cap.turbo_dec;
@@ -371,14 +415,14 @@ check_dev_cap(const struct rte_bbdev_info *dev_info)
                         if (nb_harq_inputs > cap->num_buffers_hard_out) {
                                 printf(
                                         "Too many HARQ inputs defined: %u, max: %u\n",
-                                       nb_hard_outputs,
+                                       nb_harq_inputs,
                                         cap->num_buffers_hard_out);
                                 return TEST_FAILED;
                         }
                         if (nb_harq_outputs > cap->num_buffers_hard_out) {
                                 printf(
                                         "Too many HARQ outputs defined: %u, max: %u\n",
-                                       nb_hard_outputs,
+                                       nb_harq_outputs,
                                         cap->num_buffers_hard_out);
                                 return TEST_FAILED;
                         }
@@ -577,17 +621,17 @@ add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
  /* Configure fpga lte fec with PF & VF values
   * if '-i' flag is set and using fpga device
   */
-#ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
+#ifdef RTE_BASEBAND_FPGA_LTE_FEC
         if ((get_init_device() == true) &&
                 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) {
-               struct fpga_lte_fec_conf conf;
+               struct rte_fpga_lte_fec_conf conf;
                 unsigned int i;
  
                 printf("Configure FPGA LTE FEC Driver %s with default values\n",
                                 info->drv.driver_name);
  
                 /* clear default configuration before initialization */
-               memset(&conf, 0, sizeof(struct fpga_lte_fec_conf));
+               memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf));
  
                 /* Set PF mode :
                  * true if PF is used for data plane
@@ -615,23 +659,23 @@ add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
                 conf.flr_time_out = FLR_4G_TIMEOUT;
  
                 /* setup FPGA PF with configuration information */
-               ret = fpga_lte_fec_configure(info->dev_name, &conf);
+               ret = rte_fpga_lte_fec_configure(info->dev_name, &conf);
                 TEST_ASSERT_SUCCESS(ret,
                                 "Failed to configure 4G FPGA PF for bbdev %s",
                                 info->dev_name);
         }
  #endif
-#ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC
+#ifdef RTE_BASEBAND_FPGA_5GNR_FEC
         if ((get_init_device() == true) &&
                 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) {
-               struct fpga_5gnr_fec_conf conf;
+               struct rte_fpga_5gnr_fec_conf conf;
                 unsigned int i;
  
                 printf("Configure FPGA 5GNR FEC Driver %s with default values\n",
                                 info->drv.driver_name);
  
                 /* clear default configuration before initialization */
-               memset(&conf, 0, sizeof(struct fpga_5gnr_fec_conf));
+               memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf));
  
                 /* Set PF mode :
                  * true if PF is used for data plane
@@ -659,13 +703,13 @@ add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info,
                 conf.flr_time_out = FLR_5G_TIMEOUT;
  
                 /* setup FPGA PF with configuration information */
-               ret = fpga_5gnr_fec_configure(info->dev_name, &conf);
+               ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf);
                 TEST_ASSERT_SUCCESS(ret,
                                 "Failed to configure 5G FPGA PF for bbdev %s",
                                 info->dev_name);
         }
  #endif
-#ifdef RTE_LIBRTE_PMD_BBDEV_ACC100
+#ifdef RTE_BASEBAND_ACC100
         if ((get_init_device() == true) &&
                 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) {
                 struct rte_acc100_conf conf;
@@ -956,6 +1000,9 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
                         if ((op_type == DATA_INPUT) && large_input) {
                                 /* Allocate a fake overused mbuf */
                                 data = rte_malloc(NULL, seg->length, 0);
+                               TEST_ASSERT_NOT_NULL(data,
+                                       "rte malloc failed with %u bytes",
+                                       seg->length);
                                 memcpy(data, seg->addr, seg->length);
                                 m_head->buf_addr = data;
                                 m_head->buf_iova = rte_malloc_virt2iova(data);
@@ -1257,7 +1304,7 @@ copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
         struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec;
  
         for (i = 0; i < n; ++i) {
-               if (turbo_dec->code_block_mode == 0) {
+               if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
                         ops[i]->turbo_dec.tb_params.ea =
                                         turbo_dec->tb_params.ea;
                         ops[i]->turbo_dec.tb_params.eb =
@@ -1305,7 +1352,7 @@ copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
         unsigned int i;
         struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc;
         for (i = 0; i < n; ++i) {
-               if (turbo_enc->code_block_mode == 0) {
+               if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
                         ops[i]->turbo_enc.tb_params.ea =
                                         turbo_enc->tb_params.ea;
                         ops[i]->turbo_enc.tb_params.eb =
@@ -1660,7 +1707,7 @@ copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n,
         struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec;
  
         for (i = 0; i < n; ++i) {
-               if (ldpc_dec->code_block_mode == 0) {
+               if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
                         ops[i]->ldpc_dec.tb_params.ea =
                                         ldpc_dec->tb_params.ea;
                         ops[i]->ldpc_dec.tb_params.eb =
@@ -1714,7 +1761,7 @@ copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n,
         unsigned int i;
         struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc;
         for (i = 0; i < n; ++i) {
-               if (ldpc_enc->code_block_mode == 0) {
+               if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
                         ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea;
                         ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb;
                         ops[i]->ldpc_enc.tb_params.cab =
@@ -2238,7 +2285,7 @@ calc_dec_TB_size(struct rte_bbdev_dec_op *op)
         uint8_t i;
         uint32_t c, r, tb_size = 0;
  
-       if (op->turbo_dec.code_block_mode) {
+       if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
                 tb_size = op->turbo_dec.tb_params.k_neg;
         } else {
                 c = op->turbo_dec.tb_params.c;
@@ -2258,7 +2305,7 @@ calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op)
         uint32_t c, r, tb_size = 0;
         uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10;
  
-       if (op->ldpc_dec.code_block_mode) {
+       if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
                 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
         } else {
                 c = op->ldpc_dec.tb_params.c;
@@ -2276,7 +2323,7 @@ calc_enc_TB_size(struct rte_bbdev_enc_op *op)
         uint8_t i;
         uint32_t c, r, tb_size = 0;
  
-       if (op->turbo_enc.code_block_mode) {
+       if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
                 tb_size = op->turbo_enc.tb_params.k_neg;
         } else {
                 c = op->turbo_enc.tb_params.c;
@@ -2296,7 +2343,7 @@ calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op)
         uint32_t c, r, tb_size = 0;
         uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10;
  
-       if (op->turbo_enc.code_block_mode) {
+       if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) {
                 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler;
         } else {
                 c = op->turbo_enc.tb_params.c;
@@ -2467,7 +2514,7 @@ retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id,
  {
         uint16_t j;
         int save_status, ret;
-       uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
+       uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
         struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
         uint32_t flags = ops[0]->ldpc_dec.op_flags;
         bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK;
@@ -2513,20 +2560,20 @@ preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
                 bool preload)
  {
         uint16_t j;
-       int ret;
-       uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
-       struct rte_bbdev_op_data save_hc_in, save_hc_out;
-       struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
+       int deq;
+       uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
+       struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
+       struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
         uint32_t flags = ops[0]->ldpc_dec.op_flags;
         bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
         bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
         bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
         bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
         bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
-       for (j = 0; j < n; ++j) {
-               if ((mem_in || hc_in) && preload) {
-                       save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
-                       save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
+       if ((mem_in || hc_in) && preload) {
+               for (j = 0; j < n; ++j) {
+                       save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
+                       save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
                         ops[j]->ldpc_dec.op_flags =
                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
@@ -2536,16 +2583,23 @@ preload_harq_ddr(uint16_t dev_id, uint16_t queue_id,
                         ops[j]->ldpc_dec.harq_combined_output.offset =
                                         harq_offset;
                         ops[j]->ldpc_dec.harq_combined_input.offset = 0;
-                       rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
-                                       &ops[j], 1);
-                       ret = 0;
-                       while (ret == 0)
-                               ret = rte_bbdev_dequeue_ldpc_dec_ops(
-                                       dev_id, queue_id, &ops_deq[j], 1);
+                       harq_offset += HARQ_INCR;
+               }
+               rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
+               deq = 0;
+               while (deq != n)
+                       deq += rte_bbdev_dequeue_ldpc_dec_ops(
+                                       dev_id, queue_id, &ops_deq[deq],
+                                       n - deq);
+               /* Restore the operations */
+               for (j = 0; j < n; ++j) {
                         ops[j]->ldpc_dec.op_flags = flags;
-                       ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
-                       ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
+                       ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
+                       ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
                 }
+       }
+       harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
+       for (j = 0; j < n; ++j) {
                 /* Adjust HARQ offset when we reach external DDR */
                 if (mem_in || hc_in)
                         ops[j]->ldpc_dec.harq_combined_input.offset
@@ -2583,46 +2637,46 @@ dequeue_event_callback(uint16_t dev_id,
         }
  
         if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) {
-               rte_atomic16_set(&tp->processing_status, TEST_FAILED);
+               __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
                 printf(
                         "Dequeue interrupt handler called for incorrect event!\n");
                 return;
         }
  
-       burst_sz = rte_atomic16_read(&tp->burst_sz);
+       burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED);
         num_ops = tp->op_params->num_to_process;
  
         if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
                 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
                                 &tp->dec_ops[
-                                       rte_atomic16_read(&tp->nb_dequeued)],
+                                       __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
                                 burst_sz);
         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
                 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
                                 &tp->dec_ops[
-                                       rte_atomic16_read(&tp->nb_dequeued)],
+                                       __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
                                 burst_sz);
         else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
                 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
                                 &tp->enc_ops[
-                                       rte_atomic16_read(&tp->nb_dequeued)],
+                                       __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
                                 burst_sz);
         else /*RTE_BBDEV_OP_TURBO_ENC*/
                 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
                                 &tp->enc_ops[
-                                       rte_atomic16_read(&tp->nb_dequeued)],
+                                       __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
                                 burst_sz);
  
         if (deq < burst_sz) {
                 printf(
                         "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n",
                         burst_sz, deq);
-               rte_atomic16_set(&tp->processing_status, TEST_FAILED);
+               __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
                 return;
         }
  
-       if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) {
-               rte_atomic16_add(&tp->nb_dequeued, deq);
+       if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
+               __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
                 return;
         }
  
@@ -2659,7 +2713,7 @@ dequeue_event_callback(uint16_t dev_id,
  
         if (ret) {
                 printf("Buffers validation failed\n");
-               rte_atomic16_set(&tp->processing_status, TEST_FAILED);
+               __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
         }
  
         switch (test_vector.op_type) {
@@ -2680,7 +2734,7 @@ dequeue_event_callback(uint16_t dev_id,
                 break;
         default:
                 printf("Unknown op type: %d\n", test_vector.op_type);
-               rte_atomic16_set(&tp->processing_status, TEST_FAILED);
+               __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED);
                 return;
         }
  
@@ -2689,7 +2743,7 @@ dequeue_event_callback(uint16_t dev_id,
         tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
                         ((double)total_time / (double)rte_get_tsc_hz());
  
-       rte_atomic16_add(&tp->nb_dequeued, deq);
+       __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
  }
  
  static int
@@ -2727,11 +2781,10 @@ throughput_intr_lcore_ldpc_dec(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       rte_atomic16_clear(&tp->processing_status);
-       rte_atomic16_clear(&tp->nb_dequeued);
+       __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
+       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
                                 num_to_process);
@@ -2779,17 +2832,15 @@ throughput_intr_lcore_ldpc_dec(void *arg)
                          * the number of operations is not a multiple of
                          * burst size.
                          */
-                       rte_atomic16_set(&tp->burst_sz, num_to_enq);
+                       __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
  
                         /* Wait until processing of previous batch is
                          * completed
                          */
-                       while (rte_atomic16_read(&tp->nb_dequeued) !=
-                                       (int16_t) enqueued)
-                               rte_pause();
+                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
                 }
                 if (j != TEST_REPETITIONS - 1)
-                       rte_atomic16_clear(&tp->nb_dequeued);
+                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
         }
  
         return TEST_SUCCESS;
@@ -2824,11 +2875,10 @@ throughput_intr_lcore_dec(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       rte_atomic16_clear(&tp->processing_status);
-       rte_atomic16_clear(&tp->nb_dequeued);
+       __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
+       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
                                 num_to_process);
@@ -2869,17 +2919,15 @@ throughput_intr_lcore_dec(void *arg)
                          * the number of operations is not a multiple of
                          * burst size.
                          */
-                       rte_atomic16_set(&tp->burst_sz, num_to_enq);
+                       __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
  
                         /* Wait until processing of previous batch is
                          * completed
                          */
-                       while (rte_atomic16_read(&tp->nb_dequeued) !=
-                                       (int16_t) enqueued)
-                               rte_pause();
+                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
                 }
                 if (j != TEST_REPETITIONS - 1)
-                       rte_atomic16_clear(&tp->nb_dequeued);
+                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
         }
  
         return TEST_SUCCESS;
@@ -2914,11 +2962,10 @@ throughput_intr_lcore_enc(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       rte_atomic16_clear(&tp->processing_status);
-       rte_atomic16_clear(&tp->nb_dequeued);
+       __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
+       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
                         num_to_process);
@@ -2958,17 +3005,15 @@ throughput_intr_lcore_enc(void *arg)
                          * the number of operations is not a multiple of
                          * burst size.
                          */
-                       rte_atomic16_set(&tp->burst_sz, num_to_enq);
+                       __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
  
                         /* Wait until processing of previous batch is
                          * completed
                          */
-                       while (rte_atomic16_read(&tp->nb_dequeued) !=
-                                       (int16_t) enqueued)
-                               rte_pause();
+                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
                 }
                 if (j != TEST_REPETITIONS - 1)
-                       rte_atomic16_clear(&tp->nb_dequeued);
+                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
         }
  
         return TEST_SUCCESS;
@@ -3004,11 +3049,10 @@ throughput_intr_lcore_ldpc_enc(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       rte_atomic16_clear(&tp->processing_status);
-       rte_atomic16_clear(&tp->nb_dequeued);
+       __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
+       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
                         num_to_process);
@@ -3050,17 +3094,15 @@ throughput_intr_lcore_ldpc_enc(void *arg)
                          * the number of operations is not a multiple of
                          * burst size.
                          */
-                       rte_atomic16_set(&tp->burst_sz, num_to_enq);
+                       __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED);
  
                         /* Wait until processing of previous batch is
                          * completed
                          */
-                       while (rte_atomic16_read(&tp->nb_dequeued) !=
-                                       (int16_t) enqueued)
-                               rte_pause();
+                       rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
                 }
                 if (j != TEST_REPETITIONS - 1)
-                       rte_atomic16_clear(&tp->nb_dequeued);
+                       __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
         }
  
         return TEST_SUCCESS;
@@ -3094,8 +3136,7 @@ throughput_pmd_lcore_dec(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3198,8 +3239,7 @@ bler_pmd_lcore_ldpc_dec(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3231,11 +3271,9 @@ bler_pmd_lcore_ldpc_dec(void *arg)
                                 mbuf_reset(
                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
                 }
-               if (extDdr) {
-                       bool preload = i == (TEST_REPETITIONS - 1);
+               if (extDdr)
                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
-                                       num_ops, preload);
-               }
+                                       num_ops, true);
                 start_time = rte_rdtsc_precise();
  
                 for (enq = 0, deq = 0; enq < num_ops;) {
@@ -3330,8 +3368,7 @@ throughput_pmd_lcore_ldpc_dec(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
         TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3362,11 +3399,9 @@ throughput_pmd_lcore_ldpc_dec(void *arg)
                                 mbuf_reset(
                                 ops_enq[j]->ldpc_dec.harq_combined_output.data);
                 }
-               if (extDdr) {
-                       bool preload = i == (TEST_REPETITIONS - 1);
+               if (extDdr)
                         preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
-                                       num_ops, preload);
-               }
+                                       num_ops, true);
                 start_time = rte_rdtsc_precise();
  
                 for (enq = 0, deq = 0; enq < num_ops;) {
@@ -3449,8 +3484,7 @@ throughput_pmd_lcore_enc(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
                         num_ops);
@@ -3540,8 +3574,7 @@ throughput_pmd_lcore_ldpc_enc(void *arg)
  
         bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
  
-       while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT)
-               rte_pause();
+       rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
  
         ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
                         num_ops);
@@ -3715,21 +3748,25 @@ bler_test(struct active_device *ad,
                         RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
                                 RTE_CACHE_LINE_SIZE));
  
-       if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
+       if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
+                       !check_bit(test_vector.ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
+                       && !check_bit(test_vector.ldpc_dec.op_flags,
+                       RTE_BBDEV_LDPC_LLR_COMPRESSION))
                 bler_function = bler_pmd_lcore_ldpc_dec;
         else
                 return TEST_SKIPPED;
  
-       rte_atomic16_set(&op_params->sync, SYNC_WAIT);
+       __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
  
-       /* Master core is set at first entry */
+       /* Main core is set at first entry */
         t_params[0].dev_id = ad->dev_id;
         t_params[0].lcore_id = rte_lcore_id();
         t_params[0].op_params = op_params;
         t_params[0].queue_id = ad->queue_ids[used_cores++];
         t_params[0].iter_count = 0;
  
-       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+       RTE_LCORE_FOREACH_WORKER(lcore_id) {
                 if (used_cores >= num_lcores)
                         break;
  
@@ -3743,10 +3780,10 @@ bler_test(struct active_device *ad,
                                 &t_params[used_cores++], lcore_id);
         }
  
-       rte_atomic16_set(&op_params->sync, SYNC_START);
+       __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
         ret = bler_function(&t_params[0]);
  
-       /* Master core is always used */
+       /* Main core is always used */
         for (used_cores = 1; used_cores < num_lcores; used_cores++)
                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
  
@@ -3838,16 +3875,16 @@ throughput_test(struct active_device *ad,
                         throughput_function = throughput_pmd_lcore_enc;
         }
  
-       rte_atomic16_set(&op_params->sync, SYNC_WAIT);
+       __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
  
-       /* Master core is set at first entry */
+       /* Main core is set at first entry */
         t_params[0].dev_id = ad->dev_id;
         t_params[0].lcore_id = rte_lcore_id();
         t_params[0].op_params = op_params;
         t_params[0].queue_id = ad->queue_ids[used_cores++];
         t_params[0].iter_count = 0;
  
-       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+       RTE_LCORE_FOREACH_WORKER(lcore_id) {
                 if (used_cores >= num_lcores)
                         break;
  
@@ -3861,10 +3898,10 @@ throughput_test(struct active_device *ad,
                                 &t_params[used_cores++], lcore_id);
         }
  
-       rte_atomic16_set(&op_params->sync, SYNC_START);
+       __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
         ret = throughput_function(&t_params[0]);
  
-       /* Master core is always used */
+       /* Main core is always used */
         for (used_cores = 1; used_cores < num_lcores; used_cores++)
                 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id);
  
@@ -3888,32 +3925,32 @@ throughput_test(struct active_device *ad,
         /* In interrupt TC we need to wait for the interrupt callback to deqeue
          * all pending operations. Skip waiting for queues which reported an
          * error using processing_status variable.
-        * Wait for master lcore operations.
+        * Wait for main lcore operations.
          */
         tp = &t_params[0];
-       while ((rte_atomic16_read(&tp->nb_dequeued) <
-                       op_params->num_to_process) &&
-                       (rte_atomic16_read(&tp->processing_status) !=
-                       TEST_FAILED))
+       while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
+               op_params->num_to_process) &&
+               (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
+               TEST_FAILED))
                 rte_pause();
  
         tp->ops_per_sec /= TEST_REPETITIONS;
         tp->mbps /= TEST_REPETITIONS;
-       ret |= (int)rte_atomic16_read(&tp->processing_status);
+       ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
  
-       /* Wait for slave lcores operations */
+       /* Wait for worker lcores operations */
         for (used_cores = 1; used_cores < num_lcores; used_cores++) {
                 tp = &t_params[used_cores];
  
-               while ((rte_atomic16_read(&tp->nb_dequeued) <
-                               op_params->num_to_process) &&
-                               (rte_atomic16_read(&tp->processing_status) !=
-                               TEST_FAILED))
+               while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
+                       op_params->num_to_process) &&
+                       (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
+                       TEST_FAILED))
                         rte_pause();
  
                 tp->ops_per_sec /= TEST_REPETITIONS;
                 tp->mbps /= TEST_REPETITIONS;
-               ret |= (int)rte_atomic16_read(&tp->processing_status);
+               ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED);
         }
  
         /* Print throughput if test passed */
@@ -3999,12 +4036,14 @@ latency_test_dec(struct rte_mempool *mempool,
         return i;
  }
  
+/* Test case for latency/validation for LDPC Decoder */
  static int
  latency_test_ldpc_dec(struct rte_mempool *mempool,
                 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
                 int vector_mask, uint16_t dev_id, uint16_t queue_id,
                 const uint16_t num_to_process, uint16_t burst_sz,
-               uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
+               uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
+               bool disable_et)
  {
         int ret = TEST_SUCCESS;
         uint16_t i, j, dequeued;
@@ -4026,7 +4065,7 @@ latency_test_ldpc_dec(struct rte_mempool *mempool,
                                 "rte_bbdev_dec_op_alloc_bulk() failed");
  
                 /* For latency tests we need to disable early termination */
-               if (check_bit(ref_op->ldpc_dec.op_flags,
+               if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
                         ref_op->ldpc_dec.op_flags -=
                                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
@@ -4221,9 +4260,10 @@ latency_test_ldpc_enc(struct rte_mempool *mempool,
         return i;
  }
  
+/* Common function for running validation and latency test cases */
  static int
-latency_test(struct active_device *ad,
-               struct test_op_params *op_params)
+validation_latency_test(struct active_device *ad,
+               struct test_op_params *op_params, bool latency_flag)
  {
         int iter;
         uint16_t burst_sz = op_params->burst_sz;
@@ -4248,7 +4288,11 @@ latency_test(struct active_device *ad,
         TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
  
         printf("+ ------------------------------------------------------- +\n");
-       printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n",
+       if (latency_flag)
+               printf("== test: latency\ndev:");
+       else
+               printf("== test: validation\ndev:");
+       printf("%s, burst size: %u, num ops: %u, op type: %s\n",
                         info.dev_name, burst_sz, num_to_process, op_type_str);
  
         if (op_type == RTE_BBDEV_OP_TURBO_DEC)
@@ -4256,11 +4300,6 @@ latency_test(struct active_device *ad,
                                 op_params->ref_dec_op, op_params->vector_mask,
                                 ad->dev_id, queue_id, num_to_process,
                                 burst_sz, &total_time, &min_time, &max_time);
-       else if (op_type == RTE_BBDEV_OP_TURBO_ENC)
-               iter = latency_test_enc(op_params->mp, bufs,
-                               op_params->ref_enc_op, ad->dev_id, queue_id,
-                               num_to_process, burst_sz, &total_time,
-                               &min_time, &max_time);
         else if (op_type == RTE_BBDEV_OP_LDPC_ENC)
                 iter = latency_test_ldpc_enc(op_params->mp, bufs,
                                 op_params->ref_enc_op, ad->dev_id, queue_id,
@@ -4270,13 +4309,14 @@ latency_test(struct active_device *ad,
                 iter = latency_test_ldpc_dec(op_params->mp, bufs,
                                 op_params->ref_dec_op, op_params->vector_mask,
                                 ad->dev_id, queue_id, num_to_process,
-                               burst_sz, &total_time, &min_time, &max_time);
-       else
+                               burst_sz, &total_time, &min_time, &max_time,
+                               latency_flag);
+       else /* RTE_BBDEV_OP_TURBO_ENC */
                 iter = latency_test_enc(op_params->mp, bufs,
-                                       op_params->ref_enc_op,
-                                       ad->dev_id, queue_id,
-                                       num_to_process, burst_sz, &total_time,
-                                       &min_time, &max_time);
+                               op_params->ref_enc_op,
+                               ad->dev_id, queue_id,
+                               num_to_process, burst_sz, &total_time,
+                               &min_time, &max_time);
  
         if (iter <= 0)
                 return TEST_FAILED;
@@ -4295,6 +4335,18 @@ latency_test(struct active_device *ad,
         return TEST_SUCCESS;
  }
  
+static int
+latency_test(struct active_device *ad, struct test_op_params *op_params)
+{
+       return validation_latency_test(ad, op_params, true);
+}
+
+static int
+validation_test(struct active_device *ad, struct test_op_params *op_params)
+{
+       return validation_latency_test(ad, op_params, false);
+}
+
  #ifdef RTE_BBDEV_OFFLOAD_COST
  static int
  get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
@@ -4370,15 +4422,15 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
  
                 /* give time for device to process ops */
-               rte_delay_us(200);
+               rte_delay_us(WAIT_OFFLOAD_US);
  
                 /* Start time meas for dequeue function offload latency */
                 deq_start_time = rte_rdtsc_precise();
                 /* Dequeue one operation */
                 do {
                         deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
-                                       &ops_deq[deq], 1);
-               } while (unlikely(deq != 1));
+                                       &ops_deq[deq], enq);
+               } while (unlikely(deq == 0));
  
                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
@@ -4461,15 +4513,15 @@ offload_latency_test_ldpc_dec(struct rte_mempool *mempool,
                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
  
                 /* give time for device to process ops */
-               rte_delay_us(200);
+               rte_delay_us(WAIT_OFFLOAD_US);
  
                 /* Start time meas for dequeue function offload latency */
                 deq_start_time = rte_rdtsc_precise();
                 /* Dequeue one operation */
                 do {
                         deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
-                                       &ops_deq[deq], 1);
-               } while (unlikely(deq != 1));
+                                       &ops_deq[deq], enq);
+               } while (unlikely(deq == 0));
  
                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
@@ -4549,15 +4601,15 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
  
                 /* give time for device to process ops */
-               rte_delay_us(200);
+               rte_delay_us(WAIT_OFFLOAD_US);
  
                 /* Start time meas for dequeue function offload latency */
                 deq_start_time = rte_rdtsc_precise();
                 /* Dequeue one operation */
                 do {
                         deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
-                                       &ops_deq[deq], 1);
-               } while (unlikely(deq != 1));
+                                       &ops_deq[deq], enq);
+               } while (unlikely(deq == 0));
  
                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
@@ -4632,15 +4684,15 @@ offload_latency_test_ldpc_enc(struct rte_mempool *mempool,
                 time_st->enq_acc_total_time += stats.acc_offload_cycles;
  
                 /* give time for device to process ops */
-               rte_delay_us(200);
+               rte_delay_us(WAIT_OFFLOAD_US);
  
                 /* Start time meas for dequeue function offload latency */
                 deq_start_time = rte_rdtsc_precise();
                 /* Dequeue one operation */
                 do {
                         deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
-                                       &ops_deq[deq], 1);
-               } while (unlikely(deq != 1));
+                                       &ops_deq[deq], enq);
+               } while (unlikely(deq == 0));
  
                 deq_last_time = rte_rdtsc_precise() - deq_start_time;
                 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
@@ -4762,6 +4814,23 @@ offload_cost_test(struct active_device *ad,
                         (double)(time_st.deq_max_time * 1000000) /
                         rte_get_tsc_hz());
  
+       struct rte_bbdev_stats stats = {0};
+       get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
+       if (op_type != RTE_BBDEV_OP_LDPC_DEC) {
+               TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
+                               "Mismatch in enqueue count %10"PRIu64" %d",
+                               stats.enqueued_count, num_to_process);
+               TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
+                               "Mismatch in dequeue count %10"PRIu64" %d",
+                               stats.dequeued_count, num_to_process);
+       }
+       TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
+                       "Enqueue count Error %10"PRIu64"",
+                       stats.enqueue_err_count);
+       TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
+                       "Dequeue count Error (%10"PRIu64"",
+                       stats.dequeue_err_count);
+
         return TEST_SUCCESS;
  #endif
  }
@@ -4929,6 +4998,12 @@ latency_tc(void)
         return run_test_case(latency_test);
  }
  
+static int
+validation_tc(void)
+{
+       return run_test_case(validation_test);
+}
+
  static int
  interrupt_tc(void)
  {
@@ -4960,7 +5035,7 @@ static struct unit_test_suite bbdev_validation_testsuite = {
         .setup = testsuite_setup,
         .teardown = testsuite_teardown,
         .unit_test_cases = {
-               TEST_CASE_ST(ut_setup, ut_teardown, latency_tc),
+               TEST_CASE_ST(ut_setup, ut_teardown, validation_tc),
                 TEST_CASES_END() /**< NULL terminate unit test array */
         }
  };