baseband/turbo_sw: extend for 5G
authorNicolas Chautru <nicolas.chautru@intel.com>
Wed, 3 Jul 2019 15:24:07 +0000 (08:24 -0700)
committerAkhil Goyal <akhil.goyal@nxp.com>
Fri, 5 Jul 2019 13:28:14 +0000 (15:28 +0200)
Implementation still based on Intel SDK libraries
optimized for AVX512 instructions set and 5GNR.
This can be also build for AVX2 for 4G capability or
without SDK dependency for maintenance.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Amr Mokhtar <amr.mokhtar@intel.com>
config/common_base
doc/guides/rel_notes/release_19_08.rst
drivers/baseband/turbo_sw/Makefile
drivers/baseband/turbo_sw/bbdev_turbo_software.c
drivers/baseband/turbo_sw/meson.build
mk/rte.app.mk

index a104b5f..8ef75c2 100644 (file)
@@ -541,6 +541,7 @@ CONFIG_RTE_LIBRTE_BBDEV_DEBUG=n
 CONFIG_RTE_BBDEV_MAX_DEVS=128
 CONFIG_RTE_BBDEV_OFFLOAD_COST=y
 CONFIG_RTE_BBDEV_SDK_AVX2=n
+CONFIG_RTE_BBDEV_SDK_AVX512=n
 
 #
 # Compile PMD for NULL bbdev device
index 0c7d762..6fdb837 100644 (file)
@@ -139,6 +139,14 @@ New Features
   (Programmable  Acceleration Card) N3000.  See the
   :doc:`../bbdevs/fpga_lte_fec` BBDEV guide for more details on this new driver.
 
+* **Updated TURBO_SW bbdev PMD.**
+
+  Updated the ``turbo_sw`` bbdev driver with changes including:
+
+  * Added option to build the driver with or without dependency of external
+    SDK libraries.
+  * Added support for 5GNR encode/decode operations.
+
 * **Updated the QuickAssist Technology (QAT) symmetric crypto PMD.**
 
   Added support for digest-encrypted cases where digest is appended
index 414d0d9..4aa05d2 100644 (file)
@@ -3,7 +3,6 @@
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
-
 # library name
 LIB = librte_pmd_bbdev_turbo_sw.a
 
@@ -34,6 +33,20 @@ LDLIBS += -L$(FLEXRAN_SDK)/lib_common -lcommon
 LDLIBS += -lstdc++ -lirc -limf -lipps -lsvml
 endif
 
+ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y)
+ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX2),n)
+$(error "CONFIG_RTE_BBDEV_SDK_AVX512 requires CONFIG_RTE_BBDEV_SDK_AVX2 set")
+endif
+CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr
+CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr
+CFLAGS += -I$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr
+CFLAGS += -I$(FLEXRAN_SDK)/lib_rate_dematching_5gnr
+LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr
+LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr
+LDLIBS += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr
+LDLIBS += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr
+endif
+
 # library version
 LIBABIVER := 1
 
index 5551f84..acb63a4 100644 (file)
 #include <rte_bbdev.h>
 #include <rte_bbdev_pmd.h>
 
+#include <rte_hexdump.h>
+#include <rte_log.h>
+
 #ifdef RTE_BBDEV_SDK_AVX2
+#include <ipp.h>
+#include <ipps.h>
 #include <phy_turbo.h>
 #include <phy_crc.h>
 #include <phy_rate_match.h>
 #endif
+#ifdef RTE_BBDEV_SDK_AVX512
+#include <bit_reverse.h>
+#include <phy_ldpc_encoder_5gnr.h>
+#include <phy_ldpc_decoder_5gnr.h>
+#include <phy_LDPC_ratematch_5gnr.h>
+#include <phy_rate_dematching_5gnr.h>
+#endif
 
 #define DRIVER_NAME baseband_turbo_sw
 
@@ -84,6 +96,7 @@ struct turbo_sw_queue {
        enum rte_bbdev_op_type type;
 } __rte_cache_aligned;
 
+
 #ifdef RTE_BBDEV_SDK_AVX2
 static inline char *
 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
@@ -179,6 +192,41 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
                                                RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
                        }
                },
+#endif
+#ifdef RTE_BBDEV_SDK_AVX512
+               {
+                       .type   = RTE_BBDEV_OP_LDPC_ENC,
+                       .cap.ldpc_enc = {
+                               .capability_flags =
+                                               RTE_BBDEV_LDPC_RATE_MATCH |
+                                               RTE_BBDEV_LDPC_CRC_24A_ATTACH |
+                                               RTE_BBDEV_LDPC_CRC_24B_ATTACH,
+                               .num_buffers_src =
+                                               RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                               .num_buffers_dst =
+                                               RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                       }
+               },
+               {
+               .type   = RTE_BBDEV_OP_LDPC_DEC,
+               .cap.ldpc_dec = {
+                       .capability_flags =
+                                       RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
+                                       RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
+                                       RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
+                                       RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
+                                       RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
+                                       RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE,
+                       .llr_size = 8,
+                       .llr_decimals = 2,
+                       .harq_memory_size = 0,
+                       .num_buffers_src =
+                                       RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                       .num_buffers_hard_out =
+                                       RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
+                       .num_buffers_soft_out = 0,
+               }
+               },
 #endif
                RTE_BBDEV_END_OF_CAPABILITIES_LIST()
        };
@@ -186,14 +234,12 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
        static struct rte_bbdev_queue_conf default_queue_conf = {
                .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT,
        };
-
 #ifdef RTE_BBDEV_SDK_AVX2
        static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2;
        dev_info->cpu_flag_reqs = &cpu_flag;
 #else
        dev_info->cpu_flag_reqs = NULL;
 #endif
-
        default_queue_conf.socket = dev->data->socket_id;
 
        dev_info->driver_name = RTE_STR(DRIVER_NAME);
@@ -280,7 +326,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->enc_in = rte_zmalloc_socket(name,
-                       (RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
+                       (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->enc_in == NULL) {
                rte_bbdev_log(ERR,
@@ -288,7 +334,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                goto free_q;
        }
 
-       /* Allocate memory for Aplha Gamma temp buffer. */
+       /* Allocate memory for Alpha Gamma temp buffer. */
        ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u",
                        dev->data->dev_id, q_id);
        if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
@@ -423,6 +469,7 @@ static const struct rte_bbdev_ops pmd_ops = {
 };
 
 #ifdef RTE_BBDEV_SDK_AVX2
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
 /* Checks if the encoder input buffer is correct.
  * Returns 0 if it's valid, -1 otherwise.
  */
@@ -478,16 +525,21 @@ is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
        return 0;
 }
 #endif
+#endif
 
 static inline void
 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
                uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
-               struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
+               struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
                uint16_t in_length, struct rte_bbdev_stats *q_stats)
 {
 #ifdef RTE_BBDEV_SDK_AVX2
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
        int ret;
+#else
+       RTE_SET_USED(in_length);
+#endif
        int16_t k_idx;
        uint16_t m;
        uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
@@ -511,11 +563,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
        /* CRC24A (for TB) */
        if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
                (enc->code_block_mode == 1)) {
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
                ret = is_enc_input_valid(k - 24, k_idx, in_length);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
                }
+#endif
+
                crc_req.data = in;
                crc_req.len = k - 24;
                /* Check if there is a room for CRC bits if not use
@@ -544,11 +599,14 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
 #endif
        } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
                /* CRC24B */
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
                ret = is_enc_input_valid(k - 24, k_idx, in_length);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
                }
+#endif
+
                crc_req.data = in;
                crc_req.len = k - 24;
                /* Check if there is a room for CRC bits if this is the last
@@ -575,13 +633,16 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
 #ifdef RTE_BBDEV_OFFLOAD_COST
                q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
 #endif
-       } else {
+       }
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+       else {
                ret = is_enc_input_valid(k, k_idx, in_length);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
                }
        }
+#endif
 
        /* Turbo encoder */
 
@@ -757,6 +818,143 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
 #endif
 }
 
+
+static inline void
+process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
+               uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
+               struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
+               uint16_t seg_total_left, struct rte_bbdev_stats *q_stats)
+{
+#ifdef RTE_BBDEV_SDK_AVX512
+       RTE_SET_USED(seg_total_left);
+       uint8_t *in, *rm_out;
+       struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
+       struct bblib_ldpc_encoder_5gnr_request ldpc_req;
+       struct bblib_ldpc_encoder_5gnr_response ldpc_resp;
+       struct bblib_LDPC_ratematch_5gnr_request rm_req;
+       struct bblib_LDPC_ratematch_5gnr_response rm_resp;
+       struct bblib_crc_request crc_req;
+       struct bblib_crc_response crc_resp;
+       uint16_t msgLen, puntBits, parity_offset, out_len;
+       uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
+       uint16_t in_length_in_bits = K - enc->n_filler;
+       uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3;
+
+#ifdef RTE_BBDEV_OFFLOAD_COST
+       uint64_t start_time = rte_rdtsc_precise();
+#else
+       RTE_SET_USED(q_stats);
+#endif
+
+       in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
+
+       /* Masking the Filler bits explicitly */
+       memset(q->enc_in  + (in_length_in_bytes - 3), 0,
+                       ((K + 7) >> 3) - (in_length_in_bytes - 3));
+       /* CRC Generation */
+       if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) {
+               rte_memcpy(q->enc_in, in, in_length_in_bytes - 3);
+               crc_req.data = in;
+               crc_req.len = in_length_in_bits - 24;
+               crc_resp.data = q->enc_in;
+               bblib_lte_crc24a_gen(&crc_req, &crc_resp);
+       } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) {
+               rte_memcpy(q->enc_in, in, in_length_in_bytes - 3);
+               crc_req.data = in;
+               crc_req.len = in_length_in_bits - 24;
+               crc_resp.data = q->enc_in;
+               bblib_lte_crc24b_gen(&crc_req, &crc_resp);
+       } else
+               rte_memcpy(q->enc_in, in, in_length_in_bytes);
+
+       /* LDPC Encoding */
+       ldpc_req.Zc = enc->z_c;
+       ldpc_req.baseGraph = enc->basegraph;
+       /* Number of rows set to maximum */
+       ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42;
+       ldpc_req.numberCodeblocks = 1;
+       ldpc_req.input[0] = (int8_t *) q->enc_in;
+       ldpc_resp.output[0] = (int8_t *) q->enc_out;
+
+       bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3);
+
+       if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) {
+               op->status |= 1 << RTE_BBDEV_DRV_ERROR;
+               rte_bbdev_log(ERR, "LDPC Encoder failed");
+               return;
+       }
+
+       /*
+        * Systematic + Parity : Recreating stream with filler bits, ideally
+        * the bit select could handle this in the RM SDK
+        */
+       msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc;
+       puntBits = 2 * ldpc_req.Zc;
+       parity_offset = msgLen - puntBits;
+       ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8),
+                       puntBits%8, q->adapter_output, 0, parity_offset);
+       ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8),
+                       parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc);
+
+       out_len = (e + 7) >> 3;
+       /* get output data starting address */
+       rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
+       if (rm_out == NULL) {
+               op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+               rte_bbdev_log(ERR,
+                               "Too little space in output mbuf");
+               return;
+       }
+       /*
+        * rte_bbdev_op_data.offset can be different than the offset
+        * of the appended bytes
+        */
+       rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
+
+       /* Rate-Matching */
+       rm_req.E = e;
+       rm_req.Ncb = enc->n_cb;
+       rm_req.Qm = enc->q_m;
+       rm_req.Zc = enc->z_c;
+       rm_req.baseGraph = enc->basegraph;
+       rm_req.input = q->adapter_output;
+       rm_req.nLen = enc->n_filler;
+       rm_req.nullIndex = parity_offset - enc->n_filler;
+       rm_req.rvidx = enc->rv_index;
+       rm_resp.output = q->deint_output;
+
+       if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) {
+               op->status |= 1 << RTE_BBDEV_DRV_ERROR;
+               rte_bbdev_log(ERR, "Rate matching failed");
+               return;
+       }
+
+       /* RM SDK may provide non zero bits on last byte */
+       if ((e % 8) != 0)
+               q->deint_output[out_len-1] &= (1 << (e % 8)) - 1;
+
+       bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3);
+
+       rte_memcpy(rm_out, q->deint_output, out_len);
+       enc->output.length += out_len;
+
+#ifdef RTE_BBDEV_OFFLOAD_COST
+       q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
+#endif
+#else
+       RTE_SET_USED(q);
+       RTE_SET_USED(op);
+       RTE_SET_USED(e);
+       RTE_SET_USED(m_in);
+       RTE_SET_USED(m_out_head);
+       RTE_SET_USED(m_out);
+       RTE_SET_USED(in_offset);
+       RTE_SET_USED(out_offset);
+       RTE_SET_USED(seg_total_left);
+       RTE_SET_USED(q_stats);
+#endif
+}
+
 static inline void
 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                struct rte_bbdev_stats *queue_stats)
@@ -850,6 +1048,93 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
        }
 }
 
+
+static inline void
+enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
+               struct rte_bbdev_stats *queue_stats)
+{
+       uint8_t c, r, crc24_bits = 0;
+       uint32_t e;
+       struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
+       uint16_t in_offset = enc->input.offset;
+       uint16_t out_offset = enc->output.offset;
+       struct rte_mbuf *m_in = enc->input.data;
+       struct rte_mbuf *m_out = enc->output.data;
+       struct rte_mbuf *m_out_head = enc->output.data;
+       uint32_t in_length, mbuf_total_left = enc->input.length;
+
+       uint16_t seg_total_left;
+
+       /* Clear op status */
+       op->status = 0;
+
+       if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) {
+               rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
+                               mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE);
+               op->status = 1 << RTE_BBDEV_DATA_ERROR;
+               return;
+       }
+
+       if (m_in == NULL || m_out == NULL) {
+               rte_bbdev_log(ERR, "Invalid mbuf pointer");
+               op->status = 1 << RTE_BBDEV_DATA_ERROR;
+               return;
+       }
+
+       if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
+               (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
+               crc24_bits = 24;
+
+       if (enc->code_block_mode == 0) { /* For Transport Block mode */
+               c = enc->tb_params.c;
+               r = enc->tb_params.r;
+       } else { /* For Code Block mode */
+               c = 1;
+               r = 0;
+       }
+
+       while (mbuf_total_left > 0 && r < c) {
+
+               seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
+
+               if (enc->code_block_mode == 0) {
+                       e = (r < enc->tb_params.cab) ?
+                               enc->tb_params.ea : enc->tb_params.eb;
+               } else {
+                       e = enc->cb_params.e;
+               }
+
+               process_ldpc_enc_cb(q, op, e, m_in, m_out_head,
+                               m_out, in_offset, out_offset, seg_total_left,
+                               queue_stats);
+               /* Update total_left */
+               in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
+               in_length = ((in_length - crc24_bits - enc->n_filler) >> 3);
+               mbuf_total_left -= in_length;
+               /* Update offsets for next CBs (if exist) */
+               in_offset += in_length;
+               out_offset += (e + 7) >> 3;
+
+               /* Update offsets */
+               if (seg_total_left == in_length) {
+                       /* Go to the next mbuf */
+                       m_in = m_in->next;
+                       m_out = m_out->next;
+                       in_offset = 0;
+                       out_offset = 0;
+               }
+               r++;
+       }
+
+       /* check if all input data was processed */
+       if (mbuf_total_left != 0) {
+               op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+               rte_bbdev_log(ERR,
+                               "Mismatch between mbuf length and included CBs sizes %d",
+                               mbuf_total_left);
+       }
+}
+
 static inline uint16_t
 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
                uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
@@ -866,6 +1151,23 @@ enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
                        NULL);
 }
 
+static inline uint16_t
+enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q,
+               struct rte_bbdev_enc_op **ops,
+               uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
+{
+       uint16_t i;
+#ifdef RTE_BBDEV_OFFLOAD_COST
+       queue_stats->acc_offload_cycles = 0;
+#endif
+
+       for (i = 0; i < nb_ops; ++i)
+               enqueue_ldpc_enc_one_op(q, ops[i], queue_stats);
+
+       return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
+                       NULL);
+}
+
 #ifdef RTE_BBDEV_SDK_AVX2
 static inline void
 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
@@ -890,7 +1192,11 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                struct rte_bbdev_stats *q_stats)
 {
 #ifdef RTE_BBDEV_SDK_AVX2
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
        int ret;
+#else
+       RTE_SET_USED(in_length);
+#endif
        int32_t k_idx;
        int32_t iter_cnt;
        uint8_t *in, *out, *adapter_input;
@@ -908,11 +1214,13 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
 
        k_idx = compute_idx(k);
 
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
        ret = is_dec_input_valid(k_idx, kw, in_length);
        if (ret != 0) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                return;
        }
+#endif
 
        in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
        ncb = kw;
@@ -928,11 +1236,12 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                deint_resp.pinteleavebuffer = q->deint_output;
 
 #ifdef RTE_BBDEV_OFFLOAD_COST
-               start_time = rte_rdtsc_precise();
+       start_time = rte_rdtsc_precise();
 #endif
+               /* Sub-block De-Interleaving */
                bblib_deinterleave_ul(&deint_req, &deint_resp);
 #ifdef RTE_BBDEV_OFFLOAD_COST
-               q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
+       q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
 #endif
        } else
                move_padding_bytes(in, q->deint_output, k, ncb);
@@ -1024,6 +1333,202 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
 #endif
 }
 
+static inline void
+process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
+               uint8_t c, uint16_t out_length, uint16_t e,
+               struct rte_mbuf *m_in,
+               struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
+               struct rte_mbuf *m_harq_in,
+               struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out,
+               uint16_t in_offset, uint16_t out_offset,
+               uint16_t harq_in_offset, uint16_t harq_out_offset,
+               bool check_crc_24b,
+               uint16_t crc24_overlap, uint16_t in_length,
+               struct rte_bbdev_stats *q_stats)
+{
+#ifdef RTE_BBDEV_SDK_AVX512
+       RTE_SET_USED(in_length);
+       RTE_SET_USED(c);
+       uint8_t *in, *out, *harq_in, *harq_out, *adapter_input;
+       struct bblib_rate_dematching_5gnr_request derm_req;
+       struct bblib_rate_dematching_5gnr_response derm_resp;
+       struct bblib_ldpc_decoder_5gnr_request dec_req;
+       struct bblib_ldpc_decoder_5gnr_response dec_resp;
+       struct bblib_crc_request crc_req;
+       struct bblib_crc_response crc_resp;
+       struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
+       uint16_t K, parity_offset, sys_cols, outLenWithCrc;
+       int16_t deRmOutSize, numRows;
+
+       /* Compute some LDPC BG lengths */
+       outLenWithCrc = out_length + (crc24_overlap >> 3);
+       sys_cols = (dec->basegraph == 1) ? 22 : 10;
+       K = sys_cols * dec->z_c;
+       parity_offset = K - 2 * dec->z_c;
+
+#ifdef RTE_BBDEV_OFFLOAD_COST
+       uint64_t start_time = rte_rdtsc_precise();
+#else
+       RTE_SET_USED(q_stats);
+#endif
+
+       in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
+
+       if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
+               /**
+                *  Single contiguous block from the first LLR of the
+                *  circular buffer.
+                */
+               harq_in = NULL;
+               if (m_harq_in != NULL)
+                       harq_in = rte_pktmbuf_mtod_offset(m_harq_in,
+                               uint8_t *, harq_in_offset);
+               if (harq_in == NULL) {
+                       op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+                       rte_bbdev_log(ERR, "No space in harq input mbuf");
+                       return;
+               }
+               uint16_t harq_in_length = RTE_MIN(
+                               dec->harq_combined_input.length,
+                               (uint32_t) dec->n_cb);
+               memset(q->ag + harq_in_length, 0,
+                               dec->n_cb - harq_in_length);
+               rte_memcpy(q->ag, harq_in, harq_in_length);
+       }
+
+       derm_req.p_in = (int8_t *) in;
+       derm_req.p_harq = q->ag; /* This doesn't include the filler bits */
+       derm_req.base_graph = dec->basegraph;
+       derm_req.zc = dec->z_c;
+       derm_req.ncb = dec->n_cb;
+       derm_req.e = e;
+       derm_req.k0 = 0; /* Actual output from SDK */
+       derm_req.isretx = check_bit(dec->op_flags,
+                       RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
+       derm_req.rvid = dec->rv_index;
+       derm_req.modulation_order = dec->q_m;
+       derm_req.start_null_index = parity_offset - dec->n_filler;
+       derm_req.num_of_null = dec->n_filler;
+
+       bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
+
+       /* Compute RM out size and number of rows */
+       deRmOutSize = RTE_MIN(
+                       derm_req.k0 + derm_req.e -
+                       ((derm_req.k0 < derm_req.start_null_index) ?
+                                       0 : dec->n_filler),
+                       dec->n_cb - dec->n_filler);
+       if (m_harq_in != NULL)
+               deRmOutSize = RTE_MAX(deRmOutSize,
+                               RTE_MIN(dec->n_cb - dec->n_filler,
+                                               m_harq_in->data_len));
+       numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c)
+                       - sys_cols + 2;
+       numRows = RTE_MAX(4, numRows);
+
+       /* get output data starting address */
+       out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length);
+       if (out == NULL) {
+               op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+               rte_bbdev_log(ERR,
+                               "Too little space in LDPC decoder output mbuf");
+               return;
+       }
+
+       /* rte_bbdev_op_data.offset can be different than the offset
+        * of the appended bytes
+        */
+       out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
+       adapter_input = q->enc_out;
+
+       dec_req.Zc = dec->z_c;
+       dec_req.baseGraph = dec->basegraph;
+       dec_req.nRows = numRows;
+       dec_req.numChannelLlrs = deRmOutSize;
+       dec_req.varNodes = derm_req.p_harq;
+       dec_req.numFillerBits = dec->n_filler;
+       dec_req.maxIterations = dec->iter_max;
+       dec_req.enableEarlyTermination = check_bit(dec->op_flags,
+                       RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
+       dec_resp.varNodes = (int16_t *) q->adapter_output;
+       dec_resp.compactedMessageBytes = q->enc_out;
+
+       bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp);
+
+       dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination,
+                       dec->iter_count);
+       if (!dec_resp.parityPassedAtTermination)
+               op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR;
+
+       bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3);
+
+       if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) ||
+                       check_bit(dec->op_flags,
+                                       RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) {
+               crc_req.data = adapter_input;
+               crc_req.len  = K - dec->n_filler - 24;
+               crc_resp.check_passed = false;
+               crc_resp.data = adapter_input;
+               if (check_crc_24b)
+                       bblib_lte_crc24b_check(&crc_req, &crc_resp);
+               else
+                       bblib_lte_crc24a_check(&crc_req, &crc_resp);
+               if (!crc_resp.check_passed)
+                       op->status |= 1 << RTE_BBDEV_CRC_ERROR;
+       }
+
+#ifdef RTE_BBDEV_OFFLOAD_COST
+       q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
+#endif
+       if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
+               harq_out = NULL;
+               if (m_harq_out != NULL) {
+                       /* Initialize HARQ data length since we overwrite */
+                       m_harq_out->data_len = 0;
+                       /* Check there is enough space
+                        * in the HARQ outbound buffer
+                        */
+                       harq_out = (uint8_t *)mbuf_append(m_harq_out_head,
+                                       m_harq_out, deRmOutSize);
+               }
+               if (harq_out == NULL) {
+                       op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+                       rte_bbdev_log(ERR, "No space in HARQ output mbuf");
+                       return;
+               }
+               /* get output data starting address and overwrite the data */
+               harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *,
+                               harq_out_offset);
+               rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize);
+               dec->harq_combined_output.length += deRmOutSize;
+       }
+
+       rte_memcpy(out, adapter_input, out_length);
+       dec->hard_output.length += out_length;
+#else
+       RTE_SET_USED(q);
+       RTE_SET_USED(op);
+       RTE_SET_USED(c);
+       RTE_SET_USED(out_length);
+       RTE_SET_USED(e);
+       RTE_SET_USED(m_in);
+       RTE_SET_USED(m_out_head);
+       RTE_SET_USED(m_out);
+       RTE_SET_USED(m_harq_in);
+       RTE_SET_USED(m_harq_out_head);
+       RTE_SET_USED(m_harq_out);
+       RTE_SET_USED(harq_in_offset);
+       RTE_SET_USED(harq_out_offset);
+       RTE_SET_USED(in_offset);
+       RTE_SET_USED(out_offset);
+       RTE_SET_USED(check_crc_24b);
+       RTE_SET_USED(crc24_overlap);
+       RTE_SET_USED(in_length);
+       RTE_SET_USED(q_stats);
+#endif
+}
+
+
 static inline void
 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                struct rte_bbdev_stats *queue_stats)
@@ -1083,6 +1588,7 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                                in_offset, out_offset, check_bit(dec->op_flags,
                                RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
                                seg_total_left, queue_stats);
+
                /* To keep CRC24 attached to end of Code block, use
                 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
                 * removed by default once verified.
@@ -1104,6 +1610,103 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                }
                r++;
        }
+
+       if (mbuf_total_left != 0) {
+               op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+               rte_bbdev_log(ERR,
+                               "Mismatch between mbuf length and included Circular buffer sizes");
+       }
+}
+
+static inline void
+enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
+               struct rte_bbdev_stats *queue_stats)
+{
+       uint8_t c, r = 0;
+       uint16_t e, out_length;
+       uint16_t crc24_overlap = 0;
+       struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
+       struct rte_mbuf *m_in = dec->input.data;
+       struct rte_mbuf *m_harq_in = dec->harq_combined_input.data;
+       struct rte_mbuf *m_harq_out = dec->harq_combined_output.data;
+       struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data;
+       struct rte_mbuf *m_out = dec->hard_output.data;
+       struct rte_mbuf *m_out_head = dec->hard_output.data;
+       uint16_t in_offset = dec->input.offset;
+       uint16_t harq_in_offset = dec->harq_combined_input.offset;
+       uint16_t harq_out_offset = dec->harq_combined_output.offset;
+       uint16_t out_offset = dec->hard_output.offset;
+       uint32_t mbuf_total_left = dec->input.length;
+       uint16_t seg_total_left;
+
+       /* Clear op status */
+       op->status = 0;
+
+       if (m_in == NULL || m_out == NULL) {
+               rte_bbdev_log(ERR, "Invalid mbuf pointer");
+               op->status = 1 << RTE_BBDEV_DATA_ERROR;
+               return;
+       }
+
+       if (dec->code_block_mode == 0) { /* For Transport Block mode */
+               c = dec->tb_params.c;
+               e = dec->tb_params.ea;
+       } else { /* For Code Block mode */
+               c = 1;
+               e = dec->cb_params.e;
+       }
+
+       if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
+               crc24_overlap = 24;
+
+       out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */
+       out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3);
+
+       while (mbuf_total_left > 0) {
+               if (dec->code_block_mode == 0)
+                       e = (r < dec->tb_params.cab) ?
+                               dec->tb_params.ea : dec->tb_params.eb;
+
+               seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
+
+               process_ldpc_dec_cb(q, op, c, out_length, e,
+                               m_in, m_out_head, m_out,
+                               m_harq_in, m_harq_out_head, m_harq_out,
+                               in_offset, out_offset, harq_in_offset,
+                               harq_out_offset,
+                               check_bit(dec->op_flags,
+                               RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK),
+                               crc24_overlap,
+                               seg_total_left, queue_stats);
+
+               /* To keep CRC24 attached to end of Code block, use
+                * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it
+                * removed by default once verified.
+                */
+
+               mbuf_total_left -= e;
+
+               /* Update offsets */
+               if (seg_total_left == e) {
+                       /* Go to the next mbuf */
+                       m_in = m_in->next;
+                       m_out = m_out->next;
+                       if (m_harq_in != NULL)
+                               m_harq_in = m_harq_in->next;
+                       if (m_harq_out != NULL)
+                               m_harq_out = m_harq_out->next;
+                       in_offset = 0;
+                       out_offset = 0;
+                       harq_in_offset = 0;
+                       harq_out_offset = 0;
+               } else {
+                       /* Update offsets for next CBs (if exist) */
+                       in_offset += e;
+                       out_offset += out_length;
+               }
+               r++;
+       }
+
        if (mbuf_total_left != 0) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                rte_bbdev_log(ERR,
@@ -1127,6 +1730,23 @@ enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
                        NULL);
 }
 
+static inline uint16_t
+enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q,
+               struct rte_bbdev_dec_op **ops,
+               uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
+{
+       uint16_t i;
+#ifdef RTE_BBDEV_OFFLOAD_COST
+       queue_stats->acc_offload_cycles = 0;
+#endif
+
+       for (i = 0; i < nb_ops; ++i)
+               enqueue_ldpc_dec_one_op(q, ops[i], queue_stats);
+
+       return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
+                       NULL);
+}
+
 /* Enqueue burst */
 static uint16_t
 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
@@ -1144,6 +1764,24 @@ enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
        return nb_enqueued;
 }
 
+/* Enqueue burst */
+static uint16_t
+enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data,
+               struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
+{
+       void *queue = q_data->queue_private;
+       struct turbo_sw_queue *q = queue;
+       uint16_t nb_enqueued = 0;
+
+       nb_enqueued = enqueue_ldpc_enc_all_ops(
+                       q, ops, nb_ops, &q_data->queue_stats);
+
+       q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
+       q_data->queue_stats.enqueued_count += nb_enqueued;
+
+       return nb_enqueued;
+}
+
 /* Enqueue burst */
 static uint16_t
 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
@@ -1161,6 +1799,24 @@ enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
        return nb_enqueued;
 }
 
+/* Enqueue burst */
+static uint16_t
+enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data,
+                struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
+{
+       void *queue = q_data->queue_private;
+       struct turbo_sw_queue *q = queue;
+       uint16_t nb_enqueued = 0;
+
+       nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops,
+                       &q_data->queue_stats);
+
+       q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
+       q_data->queue_stats.enqueued_count += nb_enqueued;
+
+       return nb_enqueued;
+}
+
 /* Dequeue decode burst */
 static uint16_t
 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data,
@@ -1273,6 +1929,10 @@ turbo_sw_bbdev_create(struct rte_vdev_device *vdev,
        bbdev->dequeue_dec_ops = dequeue_dec_ops;
        bbdev->enqueue_enc_ops = enqueue_enc_ops;
        bbdev->enqueue_dec_ops = enqueue_dec_ops;
+       bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops;
+       bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops;
+       bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops;
+       bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops;
        ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues =
                        init_params->queues_num;
 
index 438b5a7..33345aa 100644 (file)
@@ -23,6 +23,16 @@ if dpdk_conf.has('RTE_BBDEV_SDK_AVX2')
                includes += include_directories(path + '/lib_common')
        endif
 endif
+if dpdk_conf.has('RTE_BBDEV_SDK_AVX512')
+       ext_deps += cc.find_library('libldpc_encoder_5gnr', dirs: [path + '/lib_ldpc_encoder_5gnr'], required: true)
+       ext_deps += cc.find_library('libldpc_decoder_5gnr', dirs: [path + '/lib_ldpc_decoder_5gnr'], required: true)
+       ext_deps += cc.find_library('libLDPC_ratematch_5gnr', dirs: [path + '/lib_LDPC_ratematch_5gnr'], required: true)
+       ext_deps += cc.find_library('librate_dematching_5gnr', dirs: [path + '/lib_rate_dematching_5gnr'], required: true)
+       includes += include_directories(path + '/lib_ldpc_encoder_5gnr')
+       includes += include_directories(path + '/lib_ldpc_decoder_5gnr')
+       includes += include_directories(path + '/lib_LDPC_ratematch_5gnr')
+       includes += include_directories(path + '/lib_rate_dematching_5gnr')
+endif
 
 deps += ['bbdev', 'bus_vdev', 'ring']
 name = 'bbdev_turbo_sw'
index 8bda05a..a277c80 100644 (file)
@@ -236,7 +236,13 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_crc -lcr
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_turbo -lturbo
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_rate_matching -lrate_matching
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_common -lcommon
-_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -lirc -limf -lstdc++ -lipps
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -lirc -limf -lstdc++ -lipps -lsvml
+ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr
+endif # CONFIG_RTE_BBDEV_SDK_AVX512
 endif # CONFIG_RTE_BBDEV_SDK_AVX2
 endif # CONFIG_RTE_LIBRTE_BBDEV