From: Nicolas Chautru Date: Wed, 3 Jul 2019 15:24:07 +0000 (-0700) Subject: baseband/turbo_sw: extend for 5G X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=c769c711757a;p=dpdk.git baseband/turbo_sw: extend for 5G Implementation still based on Intel SDK libraries optimized for AVX512 instructions set and 5GNR. This can be also build for AVX2 for 4G capability or without SDK dependency for maintenance. Signed-off-by: Nicolas Chautru Acked-by: Amr Mokhtar --- diff --git a/config/common_base b/config/common_base index a104b5f8ad..8ef75c2039 100644 --- a/config/common_base +++ b/config/common_base @@ -541,6 +541,7 @@ CONFIG_RTE_LIBRTE_BBDEV_DEBUG=n CONFIG_RTE_BBDEV_MAX_DEVS=128 CONFIG_RTE_BBDEV_OFFLOAD_COST=y CONFIG_RTE_BBDEV_SDK_AVX2=n +CONFIG_RTE_BBDEV_SDK_AVX512=n # # Compile PMD for NULL bbdev device diff --git a/doc/guides/rel_notes/release_19_08.rst b/doc/guides/rel_notes/release_19_08.rst index 0c7d76200d..6fdb83718b 100644 --- a/doc/guides/rel_notes/release_19_08.rst +++ b/doc/guides/rel_notes/release_19_08.rst @@ -139,6 +139,14 @@ New Features (Programmable Acceleration Card) N3000. See the :doc:`../bbdevs/fpga_lte_fec` BBDEV guide for more details on this new driver. +* **Updated TURBO_SW bbdev PMD.** + + Updated the ``turbo_sw`` bbdev driver with changes including: + + * Added option to build the driver with or without dependency of external + SDK libraries. + * Added support for 5GNR encode/decode operations. + * **Updated the QuickAssist Technology (QAT) symmetric crypto PMD.** Added support for digest-encrypted cases where digest is appended diff --git a/drivers/baseband/turbo_sw/Makefile b/drivers/baseband/turbo_sw/Makefile index 414d0d921d..4aa05d2db6 100644 --- a/drivers/baseband/turbo_sw/Makefile +++ b/drivers/baseband/turbo_sw/Makefile @@ -3,7 +3,6 @@ include $(RTE_SDK)/mk/rte.vars.mk - # library name LIB = librte_pmd_bbdev_turbo_sw.a @@ -34,6 +33,20 @@ LDLIBS += -L$(FLEXRAN_SDK)/lib_common -lcommon LDLIBS += -lstdc++ -lirc -limf -lipps -lsvml endif +ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y) +ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX2),n) +$(error "CONFIG_RTE_BBDEV_SDK_AVX512 requires CONFIG_RTE_BBDEV_SDK_AVX2 set") +endif +CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr +CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr +CFLAGS += -I$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr +CFLAGS += -I$(FLEXRAN_SDK)/lib_rate_dematching_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr +endif + # library version LIBABIVER := 1 diff --git a/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/drivers/baseband/turbo_sw/bbdev_turbo_software.c index 5551f8484d..acb63a4478 100644 --- a/drivers/baseband/turbo_sw/bbdev_turbo_software.c +++ b/drivers/baseband/turbo_sw/bbdev_turbo_software.c @@ -14,11 +14,23 @@ #include #include +#include +#include + #ifdef RTE_BBDEV_SDK_AVX2 +#include +#include #include #include #include #endif +#ifdef RTE_BBDEV_SDK_AVX512 +#include +#include +#include +#include +#include +#endif #define DRIVER_NAME baseband_turbo_sw @@ -84,6 +96,7 @@ struct turbo_sw_queue { enum rte_bbdev_op_type type; } __rte_cache_aligned; + #ifdef RTE_BBDEV_SDK_AVX2 static inline char * mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) @@ -179,6 +192,41 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, } }, +#endif +#ifdef RTE_BBDEV_SDK_AVX512 + { + .type = RTE_BBDEV_OP_LDPC_ENC, + .cap.ldpc_enc = { + .capability_flags = + RTE_BBDEV_LDPC_RATE_MATCH | + RTE_BBDEV_LDPC_CRC_24A_ATTACH | + RTE_BBDEV_LDPC_CRC_24B_ATTACH, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_dst = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + } + }, + { + .type = RTE_BBDEV_OP_LDPC_DEC, + .cap.ldpc_dec = { + .capability_flags = + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | + RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, + .llr_size = 8, + .llr_decimals = 2, + .harq_memory_size = 0, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_hard_out = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_soft_out = 0, + } + }, #endif RTE_BBDEV_END_OF_CAPABILITIES_LIST() }; @@ -186,14 +234,12 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) static struct rte_bbdev_queue_conf default_queue_conf = { .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, }; - #ifdef RTE_BBDEV_SDK_AVX2 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; dev_info->cpu_flag_reqs = &cpu_flag; #else dev_info->cpu_flag_reqs = NULL; #endif - default_queue_conf.socket = dev->data->socket_id; dev_info->driver_name = RTE_STR(DRIVER_NAME); @@ -280,7 +326,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id, return -ENAMETOOLONG; } q->enc_in = rte_zmalloc_socket(name, - (RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), + (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), RTE_CACHE_LINE_SIZE, queue_conf->socket); if (q->enc_in == NULL) { rte_bbdev_log(ERR, @@ -288,7 +334,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id, goto free_q; } - /* Allocate memory for Aplha Gamma temp buffer. */ + /* Allocate memory for Alpha Gamma temp buffer. */ ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", dev->data->dev_id, q_id); if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { @@ -423,6 +469,7 @@ static const struct rte_bbdev_ops pmd_ops = { }; #ifdef RTE_BBDEV_SDK_AVX2 +#ifdef RTE_LIBRTE_BBDEV_DEBUG /* Checks if the encoder input buffer is correct. * Returns 0 if it's valid, -1 otherwise. */ @@ -478,16 +525,21 @@ is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) return 0; } #endif +#endif static inline void process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, - struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, + struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, uint16_t in_length, struct rte_bbdev_stats *q_stats) { #ifdef RTE_BBDEV_SDK_AVX2 +#ifdef RTE_LIBRTE_BBDEV_DEBUG int ret; +#else + RTE_SET_USED(in_length); +#endif int16_t k_idx; uint16_t m; uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; @@ -511,11 +563,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, /* CRC24A (for TB) */ if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && (enc->code_block_mode == 1)) { +#ifdef RTE_LIBRTE_BBDEV_DEBUG ret = is_enc_input_valid(k - 24, k_idx, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; } +#endif + crc_req.data = in; crc_req.len = k - 24; /* Check if there is a room for CRC bits if not use @@ -544,11 +599,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, #endif } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { /* CRC24B */ +#ifdef RTE_LIBRTE_BBDEV_DEBUG ret = is_enc_input_valid(k - 24, k_idx, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; } +#endif + crc_req.data = in; crc_req.len = k - 24; /* Check if there is a room for CRC bits if this is the last @@ -575,13 +633,16 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, #ifdef RTE_BBDEV_OFFLOAD_COST q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; #endif - } else { + } +#ifdef RTE_LIBRTE_BBDEV_DEBUG + else { ret = is_enc_input_valid(k, k_idx, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; } } +#endif /* Turbo encoder */ @@ -757,6 +818,143 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, #endif } + +static inline void +process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, + uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, + struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, + uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) +{ +#ifdef RTE_BBDEV_SDK_AVX512 + RTE_SET_USED(seg_total_left); + uint8_t *in, *rm_out; + struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; + struct bblib_ldpc_encoder_5gnr_request ldpc_req; + struct bblib_ldpc_encoder_5gnr_response ldpc_resp; + struct bblib_LDPC_ratematch_5gnr_request rm_req; + struct bblib_LDPC_ratematch_5gnr_response rm_resp; + struct bblib_crc_request crc_req; + struct bblib_crc_response crc_resp; + uint16_t msgLen, puntBits, parity_offset, out_len; + uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; + uint16_t in_length_in_bits = K - enc->n_filler; + uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; + +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time = rte_rdtsc_precise(); +#else + RTE_SET_USED(q_stats); +#endif + + in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); + + /* Masking the Filler bits explicitly */ + memset(q->enc_in + (in_length_in_bytes - 3), 0, + ((K + 7) >> 3) - (in_length_in_bytes - 3)); + /* CRC Generation */ + if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { + rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); + crc_req.data = in; + crc_req.len = in_length_in_bits - 24; + crc_resp.data = q->enc_in; + bblib_lte_crc24a_gen(&crc_req, &crc_resp); + } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { + rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); + crc_req.data = in; + crc_req.len = in_length_in_bits - 24; + crc_resp.data = q->enc_in; + bblib_lte_crc24b_gen(&crc_req, &crc_resp); + } else + rte_memcpy(q->enc_in, in, in_length_in_bytes); + + /* LDPC Encoding */ + ldpc_req.Zc = enc->z_c; + ldpc_req.baseGraph = enc->basegraph; + /* Number of rows set to maximum */ + ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; + ldpc_req.numberCodeblocks = 1; + ldpc_req.input[0] = (int8_t *) q->enc_in; + ldpc_resp.output[0] = (int8_t *) q->enc_out; + + bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); + + if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "LDPC Encoder failed"); + return; + } + + /* + * Systematic + Parity : Recreating stream with filler bits, ideally + * the bit select could handle this in the RM SDK + */ + msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; + puntBits = 2 * ldpc_req.Zc; + parity_offset = msgLen - puntBits; + ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), + puntBits%8, q->adapter_output, 0, parity_offset); + ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), + parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); + + out_len = (e + 7) >> 3; + /* get output data starting address */ + rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); + if (rm_out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Too little space in output mbuf"); + return; + } + /* + * rte_bbdev_op_data.offset can be different than the offset + * of the appended bytes + */ + rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + + /* Rate-Matching */ + rm_req.E = e; + rm_req.Ncb = enc->n_cb; + rm_req.Qm = enc->q_m; + rm_req.Zc = enc->z_c; + rm_req.baseGraph = enc->basegraph; + rm_req.input = q->adapter_output; + rm_req.nLen = enc->n_filler; + rm_req.nullIndex = parity_offset - enc->n_filler; + rm_req.rvidx = enc->rv_index; + rm_resp.output = q->deint_output; + + if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "Rate matching failed"); + return; + } + + /* RM SDK may provide non zero bits on last byte */ + if ((e % 8) != 0) + q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; + + bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); + + rte_memcpy(rm_out, q->deint_output, out_len); + enc->output.length += out_len; + +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif +#else + RTE_SET_USED(q); + RTE_SET_USED(op); + RTE_SET_USED(e); + RTE_SET_USED(m_in); + RTE_SET_USED(m_out_head); + RTE_SET_USED(m_out); + RTE_SET_USED(in_offset); + RTE_SET_USED(out_offset); + RTE_SET_USED(seg_total_left); + RTE_SET_USED(q_stats); +#endif +} + static inline void enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, struct rte_bbdev_stats *queue_stats) @@ -850,6 +1048,93 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, } } + +static inline void +enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, + struct rte_bbdev_stats *queue_stats) +{ + uint8_t c, r, crc24_bits = 0; + uint32_t e; + struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; + uint16_t in_offset = enc->input.offset; + uint16_t out_offset = enc->output.offset; + struct rte_mbuf *m_in = enc->input.data; + struct rte_mbuf *m_out = enc->output.data; + struct rte_mbuf *m_out_head = enc->output.data; + uint32_t in_length, mbuf_total_left = enc->input.length; + + uint16_t seg_total_left; + + /* Clear op status */ + op->status = 0; + + if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { + rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", + mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || + (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) + crc24_bits = 24; + + if (enc->code_block_mode == 0) { /* For Transport Block mode */ + c = enc->tb_params.c; + r = enc->tb_params.r; + } else { /* For Code Block mode */ + c = 1; + r = 0; + } + + while (mbuf_total_left > 0 && r < c) { + + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + + if (enc->code_block_mode == 0) { + e = (r < enc->tb_params.cab) ? + enc->tb_params.ea : enc->tb_params.eb; + } else { + e = enc->cb_params.e; + } + + process_ldpc_enc_cb(q, op, e, m_in, m_out_head, + m_out, in_offset, out_offset, seg_total_left, + queue_stats); + /* Update total_left */ + in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; + in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); + mbuf_total_left -= in_length; + /* Update offsets for next CBs (if exist) */ + in_offset += in_length; + out_offset += (e + 7) >> 3; + + /* Update offsets */ + if (seg_total_left == in_length) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + in_offset = 0; + out_offset = 0; + } + r++; + } + + /* check if all input data was processed */ + if (mbuf_total_left != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CBs sizes %d", + mbuf_total_left); + } +} + static inline uint16_t enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) @@ -866,6 +1151,23 @@ enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, NULL); } +static inline uint16_t +enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, + struct rte_bbdev_enc_op **ops, + uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) +{ + uint16_t i; +#ifdef RTE_BBDEV_OFFLOAD_COST + queue_stats->acc_offload_cycles = 0; +#endif + + for (i = 0; i < nb_ops; ++i) + enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); + + return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, + NULL); +} + #ifdef RTE_BBDEV_SDK_AVX2 static inline void move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, @@ -890,7 +1192,11 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, struct rte_bbdev_stats *q_stats) { #ifdef RTE_BBDEV_SDK_AVX2 +#ifdef RTE_LIBRTE_BBDEV_DEBUG int ret; +#else + RTE_SET_USED(in_length); +#endif int32_t k_idx; int32_t iter_cnt; uint8_t *in, *out, *adapter_input; @@ -908,11 +1214,13 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, k_idx = compute_idx(k); +#ifdef RTE_LIBRTE_BBDEV_DEBUG ret = is_dec_input_valid(k_idx, kw, in_length); if (ret != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; return; } +#endif in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); ncb = kw; @@ -928,11 +1236,12 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, deint_resp.pinteleavebuffer = q->deint_output; #ifdef RTE_BBDEV_OFFLOAD_COST - start_time = rte_rdtsc_precise(); + start_time = rte_rdtsc_precise(); #endif + /* Sub-block De-Interleaving */ bblib_deinterleave_ul(&deint_req, &deint_resp); #ifdef RTE_BBDEV_OFFLOAD_COST - q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; #endif } else move_padding_bytes(in, q->deint_output, k, ncb); @@ -1024,6 +1333,202 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, #endif } +static inline void +process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, + uint8_t c, uint16_t out_length, uint16_t e, + struct rte_mbuf *m_in, + struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, + struct rte_mbuf *m_harq_in, + struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, + uint16_t in_offset, uint16_t out_offset, + uint16_t harq_in_offset, uint16_t harq_out_offset, + bool check_crc_24b, + uint16_t crc24_overlap, uint16_t in_length, + struct rte_bbdev_stats *q_stats) +{ +#ifdef RTE_BBDEV_SDK_AVX512 + RTE_SET_USED(in_length); + RTE_SET_USED(c); + uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; + struct bblib_rate_dematching_5gnr_request derm_req; + struct bblib_rate_dematching_5gnr_response derm_resp; + struct bblib_ldpc_decoder_5gnr_request dec_req; + struct bblib_ldpc_decoder_5gnr_response dec_resp; + struct bblib_crc_request crc_req; + struct bblib_crc_response crc_resp; + struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; + uint16_t K, parity_offset, sys_cols, outLenWithCrc; + int16_t deRmOutSize, numRows; + + /* Compute some LDPC BG lengths */ + outLenWithCrc = out_length + (crc24_overlap >> 3); + sys_cols = (dec->basegraph == 1) ? 22 : 10; + K = sys_cols * dec->z_c; + parity_offset = K - 2 * dec->z_c; + +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time = rte_rdtsc_precise(); +#else + RTE_SET_USED(q_stats); +#endif + + in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { + /** + * Single contiguous block from the first LLR of the + * circular buffer. + */ + harq_in = NULL; + if (m_harq_in != NULL) + harq_in = rte_pktmbuf_mtod_offset(m_harq_in, + uint8_t *, harq_in_offset); + if (harq_in == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, "No space in harq input mbuf"); + return; + } + uint16_t harq_in_length = RTE_MIN( + dec->harq_combined_input.length, + (uint32_t) dec->n_cb); + memset(q->ag + harq_in_length, 0, + dec->n_cb - harq_in_length); + rte_memcpy(q->ag, harq_in, harq_in_length); + } + + derm_req.p_in = (int8_t *) in; + derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ + derm_req.base_graph = dec->basegraph; + derm_req.zc = dec->z_c; + derm_req.ncb = dec->n_cb; + derm_req.e = e; + derm_req.k0 = 0; /* Actual output from SDK */ + derm_req.isretx = check_bit(dec->op_flags, + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); + derm_req.rvid = dec->rv_index; + derm_req.modulation_order = dec->q_m; + derm_req.start_null_index = parity_offset - dec->n_filler; + derm_req.num_of_null = dec->n_filler; + + bblib_rate_dematching_5gnr(&derm_req, &derm_resp); + + /* Compute RM out size and number of rows */ + deRmOutSize = RTE_MIN( + derm_req.k0 + derm_req.e - + ((derm_req.k0 < derm_req.start_null_index) ? + 0 : dec->n_filler), + dec->n_cb - dec->n_filler); + if (m_harq_in != NULL) + deRmOutSize = RTE_MAX(deRmOutSize, + RTE_MIN(dec->n_cb - dec->n_filler, + m_harq_in->data_len)); + numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) + - sys_cols + 2; + numRows = RTE_MAX(4, numRows); + + /* get output data starting address */ + out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); + if (out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Too little space in LDPC decoder output mbuf"); + return; + } + + /* rte_bbdev_op_data.offset can be different than the offset + * of the appended bytes + */ + out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + adapter_input = q->enc_out; + + dec_req.Zc = dec->z_c; + dec_req.baseGraph = dec->basegraph; + dec_req.nRows = numRows; + dec_req.numChannelLlrs = deRmOutSize; + dec_req.varNodes = derm_req.p_harq; + dec_req.numFillerBits = dec->n_filler; + dec_req.maxIterations = dec->iter_max; + dec_req.enableEarlyTermination = check_bit(dec->op_flags, + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); + dec_resp.varNodes = (int16_t *) q->adapter_output; + dec_resp.compactedMessageBytes = q->enc_out; + + bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); + + dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, + dec->iter_count); + if (!dec_resp.parityPassedAtTermination) + op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; + + bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || + check_bit(dec->op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { + crc_req.data = adapter_input; + crc_req.len = K - dec->n_filler - 24; + crc_resp.check_passed = false; + crc_resp.data = adapter_input; + if (check_crc_24b) + bblib_lte_crc24b_check(&crc_req, &crc_resp); + else + bblib_lte_crc24a_check(&crc_req, &crc_resp); + if (!crc_resp.check_passed) + op->status |= 1 << RTE_BBDEV_CRC_ERROR; + } + +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { + harq_out = NULL; + if (m_harq_out != NULL) { + /* Initialize HARQ data length since we overwrite */ + m_harq_out->data_len = 0; + /* Check there is enough space + * in the HARQ outbound buffer + */ + harq_out = (uint8_t *)mbuf_append(m_harq_out_head, + m_harq_out, deRmOutSize); + } + if (harq_out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, "No space in HARQ output mbuf"); + return; + } + /* get output data starting address and overwrite the data */ + harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, + harq_out_offset); + rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); + dec->harq_combined_output.length += deRmOutSize; + } + + rte_memcpy(out, adapter_input, out_length); + dec->hard_output.length += out_length; +#else + RTE_SET_USED(q); + RTE_SET_USED(op); + RTE_SET_USED(c); + RTE_SET_USED(out_length); + RTE_SET_USED(e); + RTE_SET_USED(m_in); + RTE_SET_USED(m_out_head); + RTE_SET_USED(m_out); + RTE_SET_USED(m_harq_in); + RTE_SET_USED(m_harq_out_head); + RTE_SET_USED(m_harq_out); + RTE_SET_USED(harq_in_offset); + RTE_SET_USED(harq_out_offset); + RTE_SET_USED(in_offset); + RTE_SET_USED(out_offset); + RTE_SET_USED(check_crc_24b); + RTE_SET_USED(crc24_overlap); + RTE_SET_USED(in_length); + RTE_SET_USED(q_stats); +#endif +} + + static inline void enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, struct rte_bbdev_stats *queue_stats) @@ -1083,6 +1588,7 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, in_offset, out_offset, check_bit(dec->op_flags, RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, seg_total_left, queue_stats); + /* To keep CRC24 attached to end of Code block, use * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it * removed by default once verified. @@ -1104,6 +1610,103 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, } r++; } + + if (mbuf_total_left != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included Circular buffer sizes"); + } +} + +static inline void +enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, + struct rte_bbdev_stats *queue_stats) +{ + uint8_t c, r = 0; + uint16_t e, out_length; + uint16_t crc24_overlap = 0; + struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; + struct rte_mbuf *m_in = dec->input.data; + struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; + struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; + struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; + struct rte_mbuf *m_out = dec->hard_output.data; + struct rte_mbuf *m_out_head = dec->hard_output.data; + uint16_t in_offset = dec->input.offset; + uint16_t harq_in_offset = dec->harq_combined_input.offset; + uint16_t harq_out_offset = dec->harq_combined_output.offset; + uint16_t out_offset = dec->hard_output.offset; + uint32_t mbuf_total_left = dec->input.length; + uint16_t seg_total_left; + + /* Clear op status */ + op->status = 0; + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if (dec->code_block_mode == 0) { /* For Transport Block mode */ + c = dec->tb_params.c; + e = dec->tb_params.ea; + } else { /* For Code Block mode */ + c = 1; + e = dec->cb_params.e; + } + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) + crc24_overlap = 24; + + out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ + out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); + + while (mbuf_total_left > 0) { + if (dec->code_block_mode == 0) + e = (r < dec->tb_params.cab) ? + dec->tb_params.ea : dec->tb_params.eb; + + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + + process_ldpc_dec_cb(q, op, c, out_length, e, + m_in, m_out_head, m_out, + m_harq_in, m_harq_out_head, m_harq_out, + in_offset, out_offset, harq_in_offset, + harq_out_offset, + check_bit(dec->op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), + crc24_overlap, + seg_total_left, queue_stats); + + /* To keep CRC24 attached to end of Code block, use + * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it + * removed by default once verified. + */ + + mbuf_total_left -= e; + + /* Update offsets */ + if (seg_total_left == e) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + if (m_harq_in != NULL) + m_harq_in = m_harq_in->next; + if (m_harq_out != NULL) + m_harq_out = m_harq_out->next; + in_offset = 0; + out_offset = 0; + harq_in_offset = 0; + harq_out_offset = 0; + } else { + /* Update offsets for next CBs (if exist) */ + in_offset += e; + out_offset += out_length; + } + r++; + } + if (mbuf_total_left != 0) { op->status |= 1 << RTE_BBDEV_DATA_ERROR; rte_bbdev_log(ERR, @@ -1127,6 +1730,23 @@ enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, NULL); } +static inline uint16_t +enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, + struct rte_bbdev_dec_op **ops, + uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) +{ + uint16_t i; +#ifdef RTE_BBDEV_OFFLOAD_COST + queue_stats->acc_offload_cycles = 0; +#endif + + for (i = 0; i < nb_ops; ++i) + enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); + + return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, + NULL); +} + /* Enqueue burst */ static uint16_t enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, @@ -1144,6 +1764,24 @@ enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, return nb_enqueued; } +/* Enqueue burst */ +static uint16_t +enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t nb_ops) +{ + void *queue = q_data->queue_private; + struct turbo_sw_queue *q = queue; + uint16_t nb_enqueued = 0; + + nb_enqueued = enqueue_ldpc_enc_all_ops( + q, ops, nb_ops, &q_data->queue_stats); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + /* Enqueue burst */ static uint16_t enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, @@ -1161,6 +1799,24 @@ enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, return nb_enqueued; } +/* Enqueue burst */ +static uint16_t +enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t nb_ops) +{ + void *queue = q_data->queue_private; + struct turbo_sw_queue *q = queue; + uint16_t nb_enqueued = 0; + + nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, + &q_data->queue_stats); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + /* Dequeue decode burst */ static uint16_t dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, @@ -1273,6 +1929,10 @@ turbo_sw_bbdev_create(struct rte_vdev_device *vdev, bbdev->dequeue_dec_ops = dequeue_dec_ops; bbdev->enqueue_enc_ops = enqueue_enc_ops; bbdev->enqueue_dec_ops = enqueue_dec_ops; + bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; + bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; + bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; + bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = init_params->queues_num; diff --git a/drivers/baseband/turbo_sw/meson.build b/drivers/baseband/turbo_sw/meson.build index 438b5a72dd..33345aa010 100644 --- a/drivers/baseband/turbo_sw/meson.build +++ b/drivers/baseband/turbo_sw/meson.build @@ -23,6 +23,16 @@ if dpdk_conf.has('RTE_BBDEV_SDK_AVX2') includes += include_directories(path + '/lib_common') endif endif +if dpdk_conf.has('RTE_BBDEV_SDK_AVX512') + ext_deps += cc.find_library('libldpc_encoder_5gnr', dirs: [path + '/lib_ldpc_encoder_5gnr'], required: true) + ext_deps += cc.find_library('libldpc_decoder_5gnr', dirs: [path + '/lib_ldpc_decoder_5gnr'], required: true) + ext_deps += cc.find_library('libLDPC_ratematch_5gnr', dirs: [path + '/lib_LDPC_ratematch_5gnr'], required: true) + ext_deps += cc.find_library('librate_dematching_5gnr', dirs: [path + '/lib_rate_dematching_5gnr'], required: true) + includes += include_directories(path + '/lib_ldpc_encoder_5gnr') + includes += include_directories(path + '/lib_ldpc_decoder_5gnr') + includes += include_directories(path + '/lib_LDPC_ratematch_5gnr') + includes += include_directories(path + '/lib_rate_dematching_5gnr') +endif deps += ['bbdev', 'bus_vdev', 'ring'] name = 'bbdev_turbo_sw' diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 8bda05ab0e..a277c808ed 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -236,7 +236,13 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_crc -lcr _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_turbo -lturbo _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_rate_matching -lrate_matching _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_common -lcommon -_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -lirc -limf -lstdc++ -lipps +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -lirc -limf -lstdc++ -lipps -lsvml +ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y) +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr +endif # CONFIG_RTE_BBDEV_SDK_AVX512 endif # CONFIG_RTE_BBDEV_SDK_AVX2 endif # CONFIG_RTE_LIBRTE_BBDEV