bbdev: support bigger transport block
authorKamil Chalupnik <kamilx.chalupnik@intel.com>
Fri, 7 Dec 2018 15:15:33 +0000 (16:15 +0100)
committerAkhil Goyal <akhil.goyal@nxp.com>
Wed, 19 Dec 2018 10:19:10 +0000 (11:19 +0100)
Test application and Turbo Software driver were adapted
to support chained-mbuf for bigger TB sizes.

Signed-off-by: Kamil Chalupnik <kamilx.chalupnik@intel.com>
Acked-by: Amr Mokhtar <amr.mokhtar@intel.com>
app/test-bbdev/test_bbdev_perf.c
drivers/baseband/turbo_sw/bbdev_turbo_software.c

index a25e3a7..5bec70d 100644 (file)
@@ -113,6 +113,17 @@ struct test_time_stats {
 typedef int (test_case_function)(struct active_device *ad,
                struct test_op_params *op_params);
 
+static inline void
+mbuf_reset(struct rte_mbuf *m)
+{
+       m->pkt_len = 0;
+
+       do {
+               m->data_len = 0;
+               m = m->next;
+       } while (m != NULL);
+}
+
 static inline void
 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type)
 {
@@ -573,6 +584,10 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
                                op_type, n * ref_entries->nb_segments,
                                mbuf_pool->size);
 
+               TEST_ASSERT_SUCCESS(((seg->length + RTE_PKTMBUF_HEADROOM) >
+                               (uint32_t)UINT16_MAX),
+                               "Given data is bigger than allowed mbuf segment size");
+
                bufs[i].data = m_head;
                bufs[i].offset = 0;
                bufs[i].length = 0;
@@ -589,7 +604,6 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
                        rte_memcpy(data, seg->addr, seg->length);
                        bufs[i].length += seg->length;
 
-
                        for (j = 1; j < ref_entries->nb_segments; ++j) {
                                struct rte_mbuf *m_tail =
                                                rte_pktmbuf_alloc(mbuf_pool);
@@ -617,6 +631,24 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs,
                                                "Couldn't chain mbufs from %d data type mbuf pool",
                                                op_type);
                        }
+
+               } else {
+
+                       /* allocate chained-mbuf for output buffer */
+                       for (j = 1; j < ref_entries->nb_segments; ++j) {
+                               struct rte_mbuf *m_tail =
+                                               rte_pktmbuf_alloc(mbuf_pool);
+                               TEST_ASSERT_NOT_NULL(m_tail,
+                                               "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)",
+                                               op_type,
+                                               n * ref_entries->nb_segments,
+                                               mbuf_pool->size);
+
+                               ret = rte_pktmbuf_chain(m_head, m_tail);
+                               TEST_ASSERT_SUCCESS(ret,
+                                               "Couldn't chain mbufs from %d data type mbuf pool",
+                                               op_type);
+                       }
                }
        }
 
@@ -655,7 +687,7 @@ limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops,
                while (m != NULL) {
                        int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *,
                                        input_ops[i].offset);
-                       for (byte_idx = 0; byte_idx < input_ops[i].length;
+                       for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m);
                                        ++byte_idx)
                                llr[byte_idx] = round((double)max_llr_modulus *
                                                llr[byte_idx] / INT8_MAX);
@@ -864,15 +896,18 @@ validate_op_chain(struct rte_bbdev_op_data *op,
        uint8_t i;
        struct rte_mbuf *m = op->data;
        uint8_t nb_dst_segments = orig_op->nb_segments;
+       uint32_t total_data_size = 0;
 
        TEST_ASSERT(nb_dst_segments == m->nb_segs,
                        "Number of segments differ in original (%u) and filled (%u) op",
                        nb_dst_segments, m->nb_segs);
 
+       /* Validate each mbuf segment length */
        for (i = 0; i < nb_dst_segments; ++i) {
                /* Apply offset to the first mbuf segment */
                uint16_t offset = (i == 0) ? op->offset : 0;
-               uint16_t data_len = m->data_len - offset;
+               uint16_t data_len = rte_pktmbuf_data_len(m) - offset;
+               total_data_size += orig_op->segments[i].length;
 
                TEST_ASSERT(orig_op->segments[i].length == data_len,
                                "Length of segment differ in original (%u) and filled (%u) op",
@@ -884,6 +919,12 @@ validate_op_chain(struct rte_bbdev_op_data *op,
                m = m->next;
        }
 
+       /* Validate total mbuf pkt length */
+       uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset;
+       TEST_ASSERT(total_data_size == pkt_len,
+                       "Length of data differ in original (%u) and filled (%u) op",
+                       total_data_size, pkt_len);
+
        return TEST_SUCCESS;
 }
 
@@ -1427,10 +1468,8 @@ throughput_pmd_lcore_dec(void *arg)
 
        for (i = 0; i < TEST_REPETITIONS; ++i) {
 
-               for (j = 0; j < num_ops; ++j) {
-                       struct rte_bbdev_dec_op *op = ops_enq[j];
-                       rte_pktmbuf_reset(op->turbo_dec.hard_output.data);
-               }
+               for (j = 0; j < num_ops; ++j)
+                       mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data);
 
                start_time = rte_rdtsc_precise();
 
@@ -1529,8 +1568,7 @@ throughput_pmd_lcore_enc(void *arg)
 
                if (test_vector.op_type != RTE_BBDEV_OP_NONE)
                        for (j = 0; j < num_ops; ++j)
-                               rte_pktmbuf_reset(
-                                       ops_enq[j]->turbo_enc.output.data);
+                               mbuf_reset(ops_enq[j]->turbo_enc.output.data);
 
                start_time = rte_rdtsc_precise();
 
@@ -2025,7 +2063,7 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
                time_st->enq_acc_total_time += stats.acc_offload_cycles;
 
                /* ensure enqueue has been completed */
-               rte_delay_ms(10);
+               rte_delay_us(200);
 
                /* Start time meas for dequeue function offload latency */
                deq_start_time = rte_rdtsc_precise();
@@ -2106,7 +2144,7 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
                time_st->enq_acc_total_time += stats.acc_offload_cycles;
 
                /* ensure enqueue has been completed */
-               rte_delay_ms(10);
+               rte_delay_us(200);
 
                /* Start time meas for dequeue function offload latency */
                deq_start_time = rte_rdtsc_precise();
index 57f6ba1..19fbb55 100644 (file)
@@ -83,6 +83,18 @@ struct turbo_sw_queue {
        enum rte_bbdev_op_type type;
 } __rte_cache_aligned;
 
+static inline char *
+mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
+{
+       if (unlikely(len > rte_pktmbuf_tailroom(m)))
+               return NULL;
+
+       char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
+       m->data_len = (uint16_t)(m->data_len + len);
+       m_head->pkt_len  = (m_head->pkt_len + len);
+       return tail;
+}
+
 /* Calculate index based on Table 5.1.3-3 from TS34.212 */
 static inline int32_t
 compute_idx(uint16_t k)
@@ -437,7 +449,7 @@ is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
                return -1;
        }
 
-       if (in_length - kw < 0) {
+       if (in_length < kw) {
                rte_bbdev_log(ERR,
                                "Mismatch between input length (%u) and kw (%u)",
                                in_length, kw);
@@ -456,9 +468,9 @@ is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
 static inline void
 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
-               uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out,
-               uint16_t in_offset, uint16_t out_offset, uint16_t total_left,
-               struct rte_bbdev_stats *q_stats)
+               uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
+               struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
+               uint16_t in_length, struct rte_bbdev_stats *q_stats)
 {
        int ret;
        int16_t k_idx;
@@ -484,7 +496,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
        /* CRC24A (for TB) */
        if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
                (enc->code_block_mode == 1)) {
-               ret = is_enc_input_valid(k - 24, k_idx, total_left);
+               ret = is_enc_input_valid(k - 24, k_idx, in_length);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
@@ -494,7 +506,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                /* Check if there is a room for CRC bits if not use
                 * the temporary buffer.
                 */
-               if (rte_pktmbuf_append(m_in, 3) == NULL) {
+               if (mbuf_append(m_in, m_in, 3) == NULL) {
                        rte_memcpy(q->enc_in, in, (k - 24) >> 3);
                        in = q->enc_in;
                } else {
@@ -517,7 +529,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
 #endif
        } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
                /* CRC24B */
-               ret = is_enc_input_valid(k - 24, k_idx, total_left);
+               ret = is_enc_input_valid(k - 24, k_idx, in_length);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
@@ -527,7 +539,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                /* Check if there is a room for CRC bits if this is the last
                 * CB in TB. If not use temporary buffer.
                 */
-               if ((c - r == 1) && (rte_pktmbuf_append(m_in, 3) == NULL)) {
+               if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) {
                        rte_memcpy(q->enc_in, in, (k - 24) >> 3);
                        in = q->enc_in;
                } else if (c - r > 1) {
@@ -549,7 +561,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
 #endif
        } else {
-               ret = is_enc_input_valid(k, k_idx, total_left);
+               ret = is_enc_input_valid(k, k_idx, in_length);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
@@ -570,7 +582,8 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
                out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
        } else {
-               out0 = (uint8_t *)rte_pktmbuf_append(m_out, (k >> 3) * 3 + 2);
+               out0 = (uint8_t *)mbuf_append(m_out_head, m_out,
+                               (k >> 3) * 3 + 2);
                if (out0 == NULL) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        rte_bbdev_log(ERR,
@@ -623,7 +636,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC};
 
                /* get output data starting address */
-               rm_out = (uint8_t *)rte_pktmbuf_append(m_out, out_len);
+               rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
                if (rm_out == NULL) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        rte_bbdev_log(ERR,
@@ -725,14 +738,16 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
        uint16_t out_offset = enc->output.offset;
        struct rte_mbuf *m_in = enc->input.data;
        struct rte_mbuf *m_out = enc->output.data;
-       uint16_t total_left = enc->input.length;
+       struct rte_mbuf *m_out_head = enc->output.data;
+       uint32_t in_length, mbuf_total_left = enc->input.length;
+       uint16_t seg_total_left;
 
        /* Clear op status */
        op->status = 0;
 
-       if (total_left > RTE_BBDEV_MAX_TB_SIZE >> 3) {
+       if (mbuf_total_left > RTE_BBDEV_MAX_TB_SIZE >> 3) {
                rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
-                               total_left, RTE_BBDEV_MAX_TB_SIZE);
+                               mbuf_total_left, RTE_BBDEV_MAX_TB_SIZE);
                op->status = 1 << RTE_BBDEV_DATA_ERROR;
                return;
        }
@@ -755,7 +770,10 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                r = 0;
        }
 
-       while (total_left > 0 && r < c) {
+       while (mbuf_total_left > 0 && r < c) {
+
+               seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
+
                if (enc->code_block_mode == 0) {
                        k = (r < enc->tb_params.c_neg) ?
                                enc->tb_params.k_neg : enc->tb_params.k_pos;
@@ -769,22 +787,32 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
                        e = enc->cb_params.e;
                }
 
-               process_enc_cb(q, op, r, c, k, ncb, e, m_in,
-                               m_out, in_offset, out_offset, total_left,
+               process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head,
+                               m_out, in_offset, out_offset, seg_total_left,
                                queue_stats);
                /* Update total_left */
-               total_left -= (k - crc24_bits) >> 3;
+               in_length = ((k - crc24_bits) >> 3);
+               mbuf_total_left -= in_length;
                /* Update offsets for next CBs (if exist) */
                in_offset += (k - crc24_bits) >> 3;
                if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH)
                        out_offset += e >> 3;
                else
                        out_offset += (k >> 3) * 3 + 2;
+
+               /* Update offsets */
+               if (seg_total_left == in_length) {
+                       /* Go to the next mbuf */
+                       m_in = m_in->next;
+                       m_out = m_out->next;
+                       in_offset = 0;
+                       out_offset = 0;
+               }
                r++;
        }
 
        /* check if all input data was processed */
-       if (total_left != 0) {
+       if (mbuf_total_left != 0) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                rte_bbdev_log(ERR,
                                "Mismatch between mbuf length and included CBs sizes");
@@ -903,8 +931,9 @@ move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
 static inline void
 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,
-               struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
-               bool check_crc_24b, uint16_t crc24_overlap, uint16_t total_left,
+               struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
+               uint16_t in_offset, uint16_t out_offset, bool check_crc_24b,
+               uint16_t crc24_overlap, uint16_t in_length,
                struct rte_bbdev_stats *q_stats)
 {
        int ret;
@@ -925,7 +954,7 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
 
        k_idx = compute_idx(k);
 
-       ret = is_dec_input_valid(k_idx, kw, total_left);
+       ret = is_dec_input_valid(k_idx, kw, in_length);
        if (ret != 0) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                return;
@@ -983,7 +1012,8 @@ process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
        q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
 #endif
 
-       out = (uint8_t *)rte_pktmbuf_append(m_out, ((k - crc24_overlap) >> 3));
+       out = (uint8_t *)mbuf_append(m_out_head, m_out,
+                       ((k - crc24_overlap) >> 3));
        if (out == NULL) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                rte_bbdev_log(ERR, "Too little space in output mbuf");
@@ -1038,9 +1068,11 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
        struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
        struct rte_mbuf *m_in = dec->input.data;
        struct rte_mbuf *m_out = dec->hard_output.data;
+       struct rte_mbuf *m_out_head = dec->hard_output.data;
        uint16_t in_offset = dec->input.offset;
-       uint16_t total_left = dec->input.length;
        uint16_t out_offset = dec->hard_output.offset;
+       uint32_t mbuf_total_left = dec->input.length;
+       uint16_t seg_total_left;
 
        /* Clear op status */
        op->status = 0;
@@ -1062,11 +1094,13 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
                crc24_overlap = 24;
 
-       while (total_left > 0) {
+       while (mbuf_total_left > 0) {
                if (dec->code_block_mode == 0)
                        k = (r < dec->tb_params.c_neg) ?
                                dec->tb_params.k_neg : dec->tb_params.k_pos;
 
+               seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
+
                /* Calculates circular buffer size (Kw).
                 * According to 3gpp 36.212 section 5.1.4.2
                 *   Kw = 3 * Kpi,
@@ -1079,23 +1113,32 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
                 */
                kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_C_SUBBLOCK) * 3;
 
-               process_dec_cb(q, op, c, k, kw, m_in, m_out, in_offset,
-                               out_offset, check_bit(dec->op_flags,
+               process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out,
+                               in_offset, out_offset, check_bit(dec->op_flags,
                                RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
-                               total_left, queue_stats);
+                               seg_total_left, queue_stats);
                /* To keep CRC24 attached to end of Code block, use
                 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
                 * removed by default once verified.
                 */
 
-               /* Update total_left */
-               total_left -= kw;
-               /* Update offsets for next CBs (if exist) */
-               in_offset += kw;
-               out_offset += ((k - crc24_overlap) >> 3);
+               mbuf_total_left -= kw;
+
+               /* Update offsets */
+               if (seg_total_left == kw) {
+                       /* Go to the next mbuf */
+                       m_in = m_in->next;
+                       m_out = m_out->next;
+                       in_offset = 0;
+                       out_offset = 0;
+               } else {
+                       /* Update offsets for next CBs (if exist) */
+                       in_offset += kw;
+                       out_offset += ((k - crc24_overlap) >> 3);
+               }
                r++;
        }
-       if (total_left != 0) {
+       if (mbuf_total_left != 0) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                rte_bbdev_log(ERR,
                                "Mismatch between mbuf length and included Circular buffer sizes");