X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=app%2Ftest-bbdev%2Ftest_bbdev_perf.c;h=0fa119a5028b921dd8684cb80dec67f616531a8d;hb=81c2337e044dc16f1d93745d2a1668cdebc37c81;hp=de148b15de70a929440a930a95b1424244fc536b;hpb=c25604355a15c96e096b9441730a3d38831c9f5a;p=dpdk.git diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index de148b15de..0fa119a502 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -24,7 +24,8 @@ #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) #define MAX_QUEUES RTE_MAX_LCORE -#define TEST_REPETITIONS 1000 +#define TEST_REPETITIONS 100 +#define WAIT_OFFLOAD_US 1000 #ifdef RTE_BASEBAND_FPGA_LTE_FEC #include @@ -132,7 +133,7 @@ struct test_op_params { uint16_t num_to_process; uint16_t num_lcores; int vector_mask; - rte_atomic16_t sync; + uint16_t sync; struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; }; @@ -147,9 +148,9 @@ struct thread_params { uint8_t iter_count; double iter_average; double bler; - rte_atomic16_t nb_dequeued; - rte_atomic16_t processing_status; - rte_atomic16_t burst_sz; + uint16_t nb_dequeued; + int16_t processing_status; + uint16_t burst_sz; struct test_op_params *op_params; struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; @@ -226,6 +227,45 @@ clear_soft_out_cap(uint32_t *op_flags) *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; } +/* This API is to convert all the test vector op data entries + * to big endian format. It is used when the device supports + * the input in the big endian format. + */ +static inline void +convert_op_data_to_be(void) +{ + struct op_data_entries *op; + enum op_data_type type; + uint8_t nb_segs, *rem_data, temp; + uint32_t *data, len; + int complete, rem, i, j; + + for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { + nb_segs = test_vector.entries[type].nb_segments; + op = &test_vector.entries[type]; + + /* Invert byte endianness for all the segments */ + for (i = 0; i < nb_segs; ++i) { + len = op->segments[i].length; + data = op->segments[i].addr; + + /* Swap complete u32 bytes */ + complete = len / 4; + for (j = 0; j < complete; j++) + data[j] = rte_bswap32(data[j]); + + /* Swap any remaining bytes */ + rem = len % 4; + rem_data = (uint8_t *)&data[j]; + for (j = 0; j < rem/2; j++) { + temp = rem_data[j]; + rem_data[j] = rem_data[rem - j - 1]; + rem_data[rem - j - 1] = temp; + } + } + } +} + static int check_dev_cap(const struct rte_bbdev_info *dev_info) { @@ -233,6 +273,7 @@ check_dev_cap(const struct rte_bbdev_info *dev_info) unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, nb_harq_inputs, nb_harq_outputs; const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; + uint8_t dev_data_endianness = dev_info->drv.data_endianness; nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; @@ -244,6 +285,9 @@ check_dev_cap(const struct rte_bbdev_info *dev_info) if (op_cap->type != test_vector.op_type) continue; + if (dev_data_endianness == RTE_BIG_ENDIAN) + convert_op_data_to_be(); + if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { const struct rte_bbdev_op_cap_turbo_dec *cap = &op_cap->cap.turbo_dec; @@ -371,14 +415,14 @@ check_dev_cap(const struct rte_bbdev_info *dev_info) if (nb_harq_inputs > cap->num_buffers_hard_out) { printf( "Too many HARQ inputs defined: %u, max: %u\n", - nb_hard_outputs, + nb_harq_inputs, cap->num_buffers_hard_out); return TEST_FAILED; } if (nb_harq_outputs > cap->num_buffers_hard_out) { printf( "Too many HARQ outputs defined: %u, max: %u\n", - nb_hard_outputs, + nb_harq_outputs, cap->num_buffers_hard_out); return TEST_FAILED; } @@ -956,6 +1000,9 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs, if ((op_type == DATA_INPUT) && large_input) { /* Allocate a fake overused mbuf */ data = rte_malloc(NULL, seg->length, 0); + TEST_ASSERT_NOT_NULL(data, + "rte malloc failed with %u bytes", + seg->length); memcpy(data, seg->addr, seg->length); m_head->buf_addr = data; m_head->buf_iova = rte_malloc_virt2iova(data); @@ -1257,7 +1304,7 @@ copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; for (i = 0; i < n; ++i) { - if (turbo_dec->code_block_mode == 0) { + if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { ops[i]->turbo_dec.tb_params.ea = turbo_dec->tb_params.ea; ops[i]->turbo_dec.tb_params.eb = @@ -1305,7 +1352,7 @@ copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, unsigned int i; struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; for (i = 0; i < n; ++i) { - if (turbo_enc->code_block_mode == 0) { + if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { ops[i]->turbo_enc.tb_params.ea = turbo_enc->tb_params.ea; ops[i]->turbo_enc.tb_params.eb = @@ -1660,7 +1707,7 @@ copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; for (i = 0; i < n; ++i) { - if (ldpc_dec->code_block_mode == 0) { + if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { ops[i]->ldpc_dec.tb_params.ea = ldpc_dec->tb_params.ea; ops[i]->ldpc_dec.tb_params.eb = @@ -1714,7 +1761,7 @@ copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, unsigned int i; struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; for (i = 0; i < n; ++i) { - if (ldpc_enc->code_block_mode == 0) { + if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; ops[i]->ldpc_enc.tb_params.cab = @@ -2238,7 +2285,7 @@ calc_dec_TB_size(struct rte_bbdev_dec_op *op) uint8_t i; uint32_t c, r, tb_size = 0; - if (op->turbo_dec.code_block_mode) { + if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { tb_size = op->turbo_dec.tb_params.k_neg; } else { c = op->turbo_dec.tb_params.c; @@ -2258,7 +2305,7 @@ calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) uint32_t c, r, tb_size = 0; uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; - if (op->ldpc_dec.code_block_mode) { + if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; } else { c = op->ldpc_dec.tb_params.c; @@ -2276,7 +2323,7 @@ calc_enc_TB_size(struct rte_bbdev_enc_op *op) uint8_t i; uint32_t c, r, tb_size = 0; - if (op->turbo_enc.code_block_mode) { + if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { tb_size = op->turbo_enc.tb_params.k_neg; } else { c = op->turbo_enc.tb_params.c; @@ -2296,7 +2343,7 @@ calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) uint32_t c, r, tb_size = 0; uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; - if (op->turbo_enc.code_block_mode) { + if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; } else { c = op->turbo_enc.tb_params.c; @@ -2467,7 +2514,7 @@ retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, { uint16_t j; int save_status, ret; - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; uint32_t flags = ops[0]->ldpc_dec.op_flags; bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; @@ -2513,20 +2560,20 @@ preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, bool preload) { uint16_t j; - int ret; - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; - struct rte_bbdev_op_data save_hc_in, save_hc_out; - struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; + int deq; + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; + struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; uint32_t flags = ops[0]->ldpc_dec.op_flags; bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; - for (j = 0; j < n; ++j) { - if ((mem_in || hc_in) && preload) { - save_hc_in = ops[j]->ldpc_dec.harq_combined_input; - save_hc_out = ops[j]->ldpc_dec.harq_combined_output; + if ((mem_in || hc_in) && preload) { + for (j = 0; j < n; ++j) { + save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; + save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; ops[j]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; @@ -2536,16 +2583,23 @@ preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, ops[j]->ldpc_dec.harq_combined_output.offset = harq_offset; ops[j]->ldpc_dec.harq_combined_input.offset = 0; - rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, - &ops[j], 1); - ret = 0; - while (ret == 0) - ret = rte_bbdev_dequeue_ldpc_dec_ops( - dev_id, queue_id, &ops_deq[j], 1); + harq_offset += HARQ_INCR; + } + rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); + deq = 0; + while (deq != n) + deq += rte_bbdev_dequeue_ldpc_dec_ops( + dev_id, queue_id, &ops_deq[deq], + n - deq); + /* Restore the operations */ + for (j = 0; j < n; ++j) { ops[j]->ldpc_dec.op_flags = flags; - ops[j]->ldpc_dec.harq_combined_input = save_hc_in; - ops[j]->ldpc_dec.harq_combined_output = save_hc_out; + ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; + ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; } + } + harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + for (j = 0; j < n; ++j) { /* Adjust HARQ offset when we reach external DDR */ if (mem_in || hc_in) ops[j]->ldpc_dec.harq_combined_input.offset @@ -2583,46 +2637,46 @@ dequeue_event_callback(uint16_t dev_id, } if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); printf( "Dequeue interrupt handler called for incorrect event!\n"); return; } - burst_sz = rte_atomic16_read(&tp->burst_sz); + burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED); num_ops = tp->op_params->num_to_process; if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, &tp->dec_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, &tp->dec_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, &tp->enc_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); else /*RTE_BBDEV_OP_TURBO_ENC*/ deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, &tp->enc_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); if (deq < burst_sz) { printf( "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", burst_sz, deq); - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); return; } - if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { - rte_atomic16_add(&tp->nb_dequeued, deq); + if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) { + __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); return; } @@ -2659,7 +2713,7 @@ dequeue_event_callback(uint16_t dev_id, if (ret) { printf("Buffers validation failed\n"); - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); } switch (test_vector.op_type) { @@ -2680,7 +2734,7 @@ dequeue_event_callback(uint16_t dev_id, break; default: printf("Unknown op type: %d\n", test_vector.op_type); - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); return; } @@ -2689,7 +2743,7 @@ dequeue_event_callback(uint16_t dev_id, tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / ((double)total_time / (double)rte_get_tsc_hz()); - rte_atomic16_add(&tp->nb_dequeued, deq); + __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); } static int @@ -2727,11 +2781,10 @@ throughput_intr_lcore_ldpc_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -2779,17 +2832,15 @@ throughput_intr_lcore_ldpc_dec(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -2824,11 +2875,10 @@ throughput_intr_lcore_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -2869,17 +2919,15 @@ throughput_intr_lcore_dec(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -2914,11 +2962,10 @@ throughput_intr_lcore_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -2958,17 +3005,15 @@ throughput_intr_lcore_enc(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -3004,11 +3049,10 @@ throughput_intr_lcore_ldpc_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -3050,17 +3094,15 @@ throughput_intr_lcore_ldpc_enc(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -3094,8 +3136,7 @@ throughput_pmd_lcore_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); @@ -3198,8 +3239,7 @@ bler_pmd_lcore_ldpc_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); @@ -3231,11 +3271,9 @@ bler_pmd_lcore_ldpc_dec(void *arg) mbuf_reset( ops_enq[j]->ldpc_dec.harq_combined_output.data); } - if (extDdr) { - bool preload = i == (TEST_REPETITIONS - 1); + if (extDdr) preload_harq_ddr(tp->dev_id, queue_id, ops_enq, - num_ops, preload); - } + num_ops, true); start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) { @@ -3330,8 +3368,7 @@ throughput_pmd_lcore_ldpc_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); @@ -3362,11 +3399,9 @@ throughput_pmd_lcore_ldpc_dec(void *arg) mbuf_reset( ops_enq[j]->ldpc_dec.harq_combined_output.data); } - if (extDdr) { - bool preload = i == (TEST_REPETITIONS - 1); + if (extDdr) preload_harq_ddr(tp->dev_id, queue_id, ops_enq, - num_ops, preload); - } + num_ops, true); start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) { @@ -3449,8 +3484,7 @@ throughput_pmd_lcore_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); @@ -3540,8 +3574,7 @@ throughput_pmd_lcore_ldpc_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); @@ -3715,12 +3748,16 @@ bler_test(struct active_device *ad, RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE)); - if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) + if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && + !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) + && !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_LLR_COMPRESSION)) bler_function = bler_pmd_lcore_ldpc_dec; else return TEST_SKIPPED; - rte_atomic16_set(&op_params->sync, SYNC_WAIT); + __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); /* Main core is set at first entry */ t_params[0].dev_id = ad->dev_id; @@ -3743,7 +3780,7 @@ bler_test(struct active_device *ad, &t_params[used_cores++], lcore_id); } - rte_atomic16_set(&op_params->sync, SYNC_START); + __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = bler_function(&t_params[0]); /* Main core is always used */ @@ -3838,7 +3875,7 @@ throughput_test(struct active_device *ad, throughput_function = throughput_pmd_lcore_enc; } - rte_atomic16_set(&op_params->sync, SYNC_WAIT); + __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); /* Main core is set at first entry */ t_params[0].dev_id = ad->dev_id; @@ -3861,7 +3898,7 @@ throughput_test(struct active_device *ad, &t_params[used_cores++], lcore_id); } - rte_atomic16_set(&op_params->sync, SYNC_START); + __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = throughput_function(&t_params[0]); /* Main core is always used */ @@ -3891,29 +3928,29 @@ throughput_test(struct active_device *ad, * Wait for main lcore operations. */ tp = &t_params[0]; - while ((rte_atomic16_read(&tp->nb_dequeued) < - op_params->num_to_process) && - (rte_atomic16_read(&tp->processing_status) != - TEST_FAILED)) + while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < + op_params->num_to_process) && + (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != + TEST_FAILED)) rte_pause(); tp->ops_per_sec /= TEST_REPETITIONS; tp->mbps /= TEST_REPETITIONS; - ret |= (int)rte_atomic16_read(&tp->processing_status); + ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); /* Wait for worker lcores operations */ for (used_cores = 1; used_cores < num_lcores; used_cores++) { tp = &t_params[used_cores]; - while ((rte_atomic16_read(&tp->nb_dequeued) < - op_params->num_to_process) && - (rte_atomic16_read(&tp->processing_status) != - TEST_FAILED)) + while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < + op_params->num_to_process) && + (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != + TEST_FAILED)) rte_pause(); tp->ops_per_sec /= TEST_REPETITIONS; tp->mbps /= TEST_REPETITIONS; - ret |= (int)rte_atomic16_read(&tp->processing_status); + ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); } /* Print throughput if test passed */ @@ -4385,15 +4422,15 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4476,15 +4513,15 @@ offload_latency_test_ldpc_dec(struct rte_mempool *mempool, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4564,15 +4601,15 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4647,15 +4684,15 @@ offload_latency_test_ldpc_enc(struct rte_mempool *mempool, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,