1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
7 #include <rte_common.h>
8 #include <rte_bus_vdev.h>
9 #include <rte_malloc.h>
11 #include <rte_kvargs.h>
12 #include <rte_cycles.h>
14 #include <rte_bbdev.h>
15 #include <rte_bbdev_pmd.h>
17 #ifdef RTE_BBDEV_SDK_AVX2
18 #include <phy_turbo.h>
20 #include <phy_rate_match.h>
23 #define DRIVER_NAME baseband_turbo_sw
25 /* Turbo SW PMD logging ID */
26 static int bbdev_turbo_sw_logtype;
28 /* Helper macro for logging */
29 #define rte_bbdev_log(level, fmt, ...) \
30 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \
33 #define rte_bbdev_log_debug(fmt, ...) \
34 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \
37 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48)
38 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6)
39 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48)
41 /* private data structure */
42 struct bbdev_private {
43 unsigned int max_nb_queues; /**< Max number of queues */
46 /* Initialisation params structure that can be used by Turbo SW driver */
47 struct turbo_sw_params {
48 int socket_id; /*< Turbo SW device socket */
49 uint16_t queues_num; /*< Turbo SW device queues number */
52 /* Accecptable params for Turbo SW devices */
53 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues"
54 #define TURBO_SW_SOCKET_ID_ARG "socket_id"
56 static const char * const turbo_sw_valid_params[] = {
57 TURBO_SW_MAX_NB_QUEUES_ARG,
58 TURBO_SW_SOCKET_ID_ARG
62 struct turbo_sw_queue {
63 /* Ring for processed (encoded/decoded) operations which are ready to
66 struct rte_ring *processed_pkts;
67 /* Stores input for turbo encoder (used when CRC attachment is
71 /* Stores output from turbo encoder */
73 /* Alpha gamma buf for bblib_turbo_decoder() function */
75 /* Temp buf for bblib_turbo_decoder() function */
77 /* Input buf for bblib_rate_dematching_lte() function */
79 /* Output buf for bblib_rate_dematching_lte() function */
80 uint8_t *deint_output;
81 /* Output buf for bblib_turbodec_adapter_lte() function */
82 uint8_t *adapter_output;
83 /* Operation type of this queue */
84 enum rte_bbdev_op_type type;
85 } __rte_cache_aligned;
87 #ifdef RTE_BBDEV_SDK_AVX2
89 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
91 if (unlikely(len > rte_pktmbuf_tailroom(m)))
94 char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
95 m->data_len = (uint16_t)(m->data_len + len);
96 m_head->pkt_len = (m_head->pkt_len + len);
100 /* Calculate index based on Table 5.1.3-3 from TS34.212 */
101 static inline int32_t
102 compute_idx(uint16_t k)
106 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE)
110 if ((k - 2048) % 64 != 0)
113 result = 124 + (k - 2048) / 64;
114 } else if (k <= 512) {
115 if ((k - 40) % 8 != 0)
118 result = (k - 40) / 8 + 1;
119 } else if (k <= 1024) {
120 if ((k - 512) % 16 != 0)
123 result = 60 + (k - 512) / 16;
124 } else { /* 1024 < k <= 2048 */
125 if ((k - 1024) % 32 != 0)
128 result = 92 + (k - 1024) / 32;
135 /* Read flag value 0/1 from bitmap */
137 check_bit(uint32_t bitmap, uint32_t bitmask)
139 return bitmap & bitmask;
142 /* Get device info */
144 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
146 struct bbdev_private *internals = dev->data->dev_private;
148 static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
149 #ifdef RTE_BBDEV_SDK_AVX2
151 .type = RTE_BBDEV_OP_TURBO_DEC,
154 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
155 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN |
156 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
157 RTE_BBDEV_TURBO_CRC_TYPE_24B |
158 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
159 RTE_BBDEV_TURBO_EARLY_TERMINATION,
160 .max_llr_modulus = 16,
162 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
163 .num_buffers_hard_out =
164 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
165 .num_buffers_soft_out = 0,
169 .type = RTE_BBDEV_OP_TURBO_ENC,
172 RTE_BBDEV_TURBO_CRC_24B_ATTACH |
173 RTE_BBDEV_TURBO_CRC_24A_ATTACH |
174 RTE_BBDEV_TURBO_RATE_MATCH |
175 RTE_BBDEV_TURBO_RV_INDEX_BYPASS,
177 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
179 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
183 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
186 static struct rte_bbdev_queue_conf default_queue_conf = {
187 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT,
190 #ifdef RTE_BBDEV_SDK_AVX2
191 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2;
192 dev_info->cpu_flag_reqs = &cpu_flag;
194 dev_info->cpu_flag_reqs = NULL;
197 default_queue_conf.socket = dev->data->socket_id;
199 dev_info->driver_name = RTE_STR(DRIVER_NAME);
200 dev_info->max_num_queues = internals->max_nb_queues;
201 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT;
202 dev_info->hardware_accelerated = false;
203 dev_info->max_dl_queue_priority = 0;
204 dev_info->max_ul_queue_priority = 0;
205 dev_info->default_queue_conf = default_queue_conf;
206 dev_info->capabilities = bbdev_capabilities;
207 dev_info->min_alignment = 64;
209 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id);
214 q_release(struct rte_bbdev *dev, uint16_t q_id)
216 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private;
219 rte_ring_free(q->processed_pkts);
220 rte_free(q->enc_out);
223 rte_free(q->code_block);
224 rte_free(q->deint_input);
225 rte_free(q->deint_output);
226 rte_free(q->adapter_output);
228 dev->data->queues[q_id].queue_private = NULL;
231 rte_bbdev_log_debug("released device queue %u:%u",
232 dev->data->dev_id, q_id);
238 q_setup(struct rte_bbdev *dev, uint16_t q_id,
239 const struct rte_bbdev_queue_conf *queue_conf)
242 struct turbo_sw_queue *q;
243 char name[RTE_RING_NAMESIZE];
245 /* Allocate the queue data structure. */
246 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q),
247 RTE_CACHE_LINE_SIZE, queue_conf->socket);
249 rte_bbdev_log(ERR, "Failed to allocate queue memory");
253 /* Allocate memory for encoder output. */
254 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u",
255 dev->data->dev_id, q_id);
256 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
258 "Creating queue name for device %u queue %u failed",
259 dev->data->dev_id, q_id);
260 return -ENAMETOOLONG;
262 q->enc_out = rte_zmalloc_socket(name,
263 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) *
264 sizeof(*q->enc_out) * 3,
265 RTE_CACHE_LINE_SIZE, queue_conf->socket);
266 if (q->enc_out == NULL) {
268 "Failed to allocate queue memory for %s", name);
272 /* Allocate memory for rate matching output. */
273 ret = snprintf(name, RTE_RING_NAMESIZE,
274 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id,
276 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
278 "Creating queue name for device %u queue %u failed",
279 dev->data->dev_id, q_id);
280 return -ENAMETOOLONG;
282 q->enc_in = rte_zmalloc_socket(name,
283 (RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
284 RTE_CACHE_LINE_SIZE, queue_conf->socket);
285 if (q->enc_in == NULL) {
287 "Failed to allocate queue memory for %s", name);
291 /* Allocate memory for Aplha Gamma temp buffer. */
292 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u",
293 dev->data->dev_id, q_id);
294 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
296 "Creating queue name for device %u queue %u failed",
297 dev->data->dev_id, q_id);
298 return -ENAMETOOLONG;
300 q->ag = rte_zmalloc_socket(name,
301 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag),
302 RTE_CACHE_LINE_SIZE, queue_conf->socket);
305 "Failed to allocate queue memory for %s", name);
309 /* Allocate memory for code block temp buffer. */
310 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u",
311 dev->data->dev_id, q_id);
312 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
314 "Creating queue name for device %u queue %u failed",
315 dev->data->dev_id, q_id);
316 return -ENAMETOOLONG;
318 q->code_block = rte_zmalloc_socket(name,
319 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block),
320 RTE_CACHE_LINE_SIZE, queue_conf->socket);
321 if (q->code_block == NULL) {
323 "Failed to allocate queue memory for %s", name);
327 /* Allocate memory for Deinterleaver input. */
328 ret = snprintf(name, RTE_RING_NAMESIZE,
329 RTE_STR(DRIVER_NAME)"_de_i%u:%u",
330 dev->data->dev_id, q_id);
331 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
333 "Creating queue name for device %u queue %u failed",
334 dev->data->dev_id, q_id);
335 return -ENAMETOOLONG;
337 q->deint_input = rte_zmalloc_socket(name,
338 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input),
339 RTE_CACHE_LINE_SIZE, queue_conf->socket);
340 if (q->deint_input == NULL) {
342 "Failed to allocate queue memory for %s", name);
346 /* Allocate memory for Deinterleaver output. */
347 ret = snprintf(name, RTE_RING_NAMESIZE,
348 RTE_STR(DRIVER_NAME)"_de_o%u:%u",
349 dev->data->dev_id, q_id);
350 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
352 "Creating queue name for device %u queue %u failed",
353 dev->data->dev_id, q_id);
354 return -ENAMETOOLONG;
356 q->deint_output = rte_zmalloc_socket(NULL,
357 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output),
358 RTE_CACHE_LINE_SIZE, queue_conf->socket);
359 if (q->deint_output == NULL) {
361 "Failed to allocate queue memory for %s", name);
365 /* Allocate memory for Adapter output. */
366 ret = snprintf(name, RTE_RING_NAMESIZE,
367 RTE_STR(DRIVER_NAME)"_ada_o%u:%u",
368 dev->data->dev_id, q_id);
369 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
371 "Creating queue name for device %u queue %u failed",
372 dev->data->dev_id, q_id);
373 return -ENAMETOOLONG;
375 q->adapter_output = rte_zmalloc_socket(NULL,
376 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output),
377 RTE_CACHE_LINE_SIZE, queue_conf->socket);
378 if (q->adapter_output == NULL) {
380 "Failed to allocate queue memory for %s", name);
384 /* Create ring for packets awaiting to be dequeued. */
385 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u",
386 dev->data->dev_id, q_id);
387 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
389 "Creating queue name for device %u queue %u failed",
390 dev->data->dev_id, q_id);
391 return -ENAMETOOLONG;
393 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size,
394 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
395 if (q->processed_pkts == NULL) {
396 rte_bbdev_log(ERR, "Failed to create ring for %s", name);
400 q->type = queue_conf->op_type;
402 dev->data->queues[q_id].queue_private = q;
403 rte_bbdev_log_debug("setup device queue %s", name);
407 rte_ring_free(q->processed_pkts);
408 rte_free(q->enc_out);
411 rte_free(q->code_block);
412 rte_free(q->deint_input);
413 rte_free(q->deint_output);
414 rte_free(q->adapter_output);
419 static const struct rte_bbdev_ops pmd_ops = {
420 .info_get = info_get,
421 .queue_setup = q_setup,
422 .queue_release = q_release
425 #ifdef RTE_BBDEV_SDK_AVX2
426 /* Checks if the encoder input buffer is correct.
427 * Returns 0 if it's valid, -1 otherwise.
430 is_enc_input_valid(const uint16_t k, const int32_t k_idx,
431 const uint16_t in_length)
434 rte_bbdev_log(ERR, "K Index is invalid");
438 if (in_length - (k >> 3) < 0) {
440 "Mismatch between input length (%u bytes) and K (%u bits)",
445 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) {
446 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d",
447 k, RTE_BBDEV_TURBO_MAX_CB_SIZE);
454 /* Checks if the decoder input buffer is correct.
455 * Returns 0 if it's valid, -1 otherwise.
458 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
461 rte_bbdev_log(ERR, "K index is invalid");
465 if (in_length < kw) {
467 "Mismatch between input length (%u) and kw (%u)",
472 if (kw > RTE_BBDEV_TURBO_MAX_KW) {
473 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d",
474 kw, RTE_BBDEV_TURBO_MAX_KW);
483 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
484 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
485 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
486 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
487 uint16_t in_length, struct rte_bbdev_stats *q_stats)
489 #ifdef RTE_BBDEV_SDK_AVX2
493 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
494 uint64_t first_3_bytes = 0;
495 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
496 struct bblib_crc_request crc_req;
497 struct bblib_crc_response crc_resp;
498 struct bblib_turbo_encoder_request turbo_req;
499 struct bblib_turbo_encoder_response turbo_resp;
500 struct bblib_rate_match_dl_request rm_req;
501 struct bblib_rate_match_dl_response rm_resp;
502 #ifdef RTE_BBDEV_OFFLOAD_COST
505 RTE_SET_USED(q_stats);
508 k_idx = compute_idx(k);
509 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
511 /* CRC24A (for TB) */
512 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
513 (enc->code_block_mode == 1)) {
514 ret = is_enc_input_valid(k - 24, k_idx, in_length);
516 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
520 crc_req.len = k - 24;
521 /* Check if there is a room for CRC bits if not use
522 * the temporary buffer.
524 if (mbuf_append(m_in, m_in, 3) == NULL) {
525 rte_memcpy(q->enc_in, in, (k - 24) >> 3);
528 /* Store 3 first bytes of next CB as they will be
529 * overwritten by CRC bytes. If it is the last CB then
530 * there is no point to store 3 next bytes and this
531 * if..else branch will be omitted.
533 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
537 #ifdef RTE_BBDEV_OFFLOAD_COST
538 start_time = rte_rdtsc_precise();
540 /* CRC24A generation */
541 bblib_lte_crc24a_gen(&crc_req, &crc_resp);
542 #ifdef RTE_BBDEV_OFFLOAD_COST
543 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
545 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
547 ret = is_enc_input_valid(k - 24, k_idx, in_length);
549 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
553 crc_req.len = k - 24;
554 /* Check if there is a room for CRC bits if this is the last
555 * CB in TB. If not use temporary buffer.
557 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) {
558 rte_memcpy(q->enc_in, in, (k - 24) >> 3);
560 } else if (c - r > 1) {
561 /* Store 3 first bytes of next CB as they will be
562 * overwritten by CRC bytes. If it is the last CB then
563 * there is no point to store 3 next bytes and this
564 * if..else branch will be omitted.
566 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
570 #ifdef RTE_BBDEV_OFFLOAD_COST
571 start_time = rte_rdtsc_precise();
573 /* CRC24B generation */
574 bblib_lte_crc24b_gen(&crc_req, &crc_resp);
575 #ifdef RTE_BBDEV_OFFLOAD_COST
576 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
579 ret = is_enc_input_valid(k, k_idx, in_length);
581 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
588 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e.
589 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up.
590 * So dst_data's length should be 3*(k/8) + 3 bytes.
591 * In Rate-matching bypass case outputs pointers passed to encoder
592 * (out0, out1 and out2) can directly point to addresses of output from
595 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
597 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
598 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
600 out0 = (uint8_t *)mbuf_append(m_out_head, m_out,
603 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
605 "Too little space in output mbuf");
608 enc->output.length += (k >> 3) * 3 + 2;
609 /* rte_bbdev_op_data.offset can be different than the
610 * offset of the appended bytes
612 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
613 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
614 out_offset + (k >> 3) + 1);
615 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
616 out_offset + 2 * ((k >> 3) + 1));
619 turbo_req.case_id = k_idx;
620 turbo_req.input_win = in;
621 turbo_req.length = k >> 3;
622 turbo_resp.output_win_0 = out0;
623 turbo_resp.output_win_1 = out1;
624 turbo_resp.output_win_2 = out2;
626 #ifdef RTE_BBDEV_OFFLOAD_COST
627 start_time = rte_rdtsc_precise();
630 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {
631 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
632 rte_bbdev_log(ERR, "Turbo Encoder failed");
635 #ifdef RTE_BBDEV_OFFLOAD_COST
636 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
639 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/
640 if (first_3_bytes != 0)
641 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes;
644 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
646 /* Integer round up division by 8 */
647 uint16_t out_len = (e + 7) >> 3;
648 /* The mask array is indexed using E%8. E is an even number so
649 * there are only 4 possible values.
651 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC};
653 /* get output data starting address */
654 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
655 if (rm_out == NULL) {
656 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
658 "Too little space in output mbuf");
661 /* rte_bbdev_op_data.offset can be different than the offset
662 * of the appended bytes
664 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
666 /* index of current code block */
668 /* total number of code block */
670 /* For DL - 1, UL - 0 */
671 rm_req.direction = 1;
672 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO
673 * and MDL_HARQ are used for Ncb calculation. As Ncb is already
674 * known we can adjust those parameters
676 rm_req.Nsoft = ncb * rm_req.C;
679 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G
680 * are used for E calculation. As E is already known we can
681 * adjust those parameters
685 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C;
687 rm_req.rvidx = enc->rv_index;
688 rm_req.Kidx = k_idx - 1;
693 rm_resp.output = rm_out;
694 rm_resp.OutputLen = out_len;
695 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS)
696 rm_req.bypass_rvidx = 1;
698 rm_req.bypass_rvidx = 0;
700 #ifdef RTE_BBDEV_OFFLOAD_COST
701 start_time = rte_rdtsc_precise();
704 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {
705 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
706 rte_bbdev_log(ERR, "Rate matching failed");
709 #ifdef RTE_BBDEV_OFFLOAD_COST
710 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
713 /* SW fills an entire last byte even if E%8 != 0. Clear the
714 * superfluous data bits for consistency with HW device.
716 mask_id = (e & 7) >> 1;
717 rm_out[out_len - 1] &= mask_out[mask_id];
718 enc->output.length += rm_resp.OutputLen;
720 /* Rate matching is bypassed */
722 /* Completing last byte of out0 (where 4 tail bits are stored)
723 * by moving first 4 bits from out1
725 tmp_out = (uint8_t *) --out1;
726 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4);
728 /* Shifting out1 data by 4 bits to the left */
729 for (m = 0; m < k >> 3; ++m) {
730 uint8_t *first = tmp_out;
731 uint8_t second = *(tmp_out + 1);
732 *first = (*first << 4) | ((second & 0xF0) >> 4);
735 /* Shifting out2 data by 8 bits to the left */
736 for (m = 0; m < (k >> 3) + 1; ++m) {
737 *tmp_out = *(tmp_out + 1);
751 RTE_SET_USED(m_out_head);
753 RTE_SET_USED(in_offset);
754 RTE_SET_USED(out_offset);
755 RTE_SET_USED(in_length);
756 RTE_SET_USED(q_stats);
761 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
762 struct rte_bbdev_stats *queue_stats)
764 uint8_t c, r, crc24_bits = 0;
767 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
768 uint16_t in_offset = enc->input.offset;
769 uint16_t out_offset = enc->output.offset;
770 struct rte_mbuf *m_in = enc->input.data;
771 struct rte_mbuf *m_out = enc->output.data;
772 struct rte_mbuf *m_out_head = enc->output.data;
773 uint32_t in_length, mbuf_total_left = enc->input.length;
774 uint16_t seg_total_left;
776 /* Clear op status */
779 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) {
780 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
781 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE);
782 op->status = 1 << RTE_BBDEV_DATA_ERROR;
786 if (m_in == NULL || m_out == NULL) {
787 rte_bbdev_log(ERR, "Invalid mbuf pointer");
788 op->status = 1 << RTE_BBDEV_DATA_ERROR;
792 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
793 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
796 if (enc->code_block_mode == 0) { /* For Transport Block mode */
797 c = enc->tb_params.c;
798 r = enc->tb_params.r;
799 } else {/* For Code Block mode */
804 while (mbuf_total_left > 0 && r < c) {
806 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
808 if (enc->code_block_mode == 0) {
809 k = (r < enc->tb_params.c_neg) ?
810 enc->tb_params.k_neg : enc->tb_params.k_pos;
811 ncb = (r < enc->tb_params.c_neg) ?
812 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos;
813 e = (r < enc->tb_params.cab) ?
814 enc->tb_params.ea : enc->tb_params.eb;
816 k = enc->cb_params.k;
817 ncb = enc->cb_params.ncb;
818 e = enc->cb_params.e;
821 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head,
822 m_out, in_offset, out_offset, seg_total_left,
824 /* Update total_left */
825 in_length = ((k - crc24_bits) >> 3);
826 mbuf_total_left -= in_length;
827 /* Update offsets for next CBs (if exist) */
828 in_offset += (k - crc24_bits) >> 3;
829 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH)
830 out_offset += e >> 3;
832 out_offset += (k >> 3) * 3 + 2;
835 if (seg_total_left == in_length) {
836 /* Go to the next mbuf */
845 /* check if all input data was processed */
846 if (mbuf_total_left != 0) {
847 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
849 "Mismatch between mbuf length and included CBs sizes");
853 static inline uint16_t
854 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
855 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
858 #ifdef RTE_BBDEV_OFFLOAD_COST
859 queue_stats->acc_offload_cycles = 0;
862 for (i = 0; i < nb_ops; ++i)
863 enqueue_enc_one_op(q, ops[i], queue_stats);
865 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
869 #ifdef RTE_BBDEV_SDK_AVX2
871 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
875 uint16_t kpi = ncb / 3;
876 uint16_t nd = kpi - d;
878 rte_memcpy(&out[nd], in, d);
879 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d);
880 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d);
885 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
886 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,
887 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
888 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b,
889 uint16_t crc24_overlap, uint16_t in_length,
890 struct rte_bbdev_stats *q_stats)
892 #ifdef RTE_BBDEV_SDK_AVX2
896 uint8_t *in, *out, *adapter_input;
897 int32_t ncb, ncb_without_null;
898 struct bblib_turbo_adapter_ul_response adapter_resp;
899 struct bblib_turbo_adapter_ul_request adapter_req;
900 struct bblib_turbo_decoder_request turbo_req;
901 struct bblib_turbo_decoder_response turbo_resp;
902 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
903 #ifdef RTE_BBDEV_OFFLOAD_COST
906 RTE_SET_USED(q_stats);
909 k_idx = compute_idx(k);
911 ret = is_dec_input_valid(k_idx, kw, in_length);
913 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
917 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
919 ncb_without_null = (k + 4) * 3;
921 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) {
922 struct bblib_deinterleave_ul_request deint_req;
923 struct bblib_deinterleave_ul_response deint_resp;
925 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER;
926 deint_req.pharqbuffer = in;
928 deint_resp.pinteleavebuffer = q->deint_output;
930 #ifdef RTE_BBDEV_OFFLOAD_COST
931 start_time = rte_rdtsc_precise();
933 bblib_deinterleave_ul(&deint_req, &deint_resp);
934 #ifdef RTE_BBDEV_OFFLOAD_COST
935 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
938 move_padding_bytes(in, q->deint_output, k, ncb);
940 adapter_input = q->deint_output;
942 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN)
943 adapter_req.isinverted = 1;
944 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN)
945 adapter_req.isinverted = 0;
947 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
948 rte_bbdev_log(ERR, "LLR format wasn't specified");
952 adapter_req.ncb = ncb_without_null;
953 adapter_req.pinteleavebuffer = adapter_input;
954 adapter_resp.pharqout = q->adapter_output;
956 #ifdef RTE_BBDEV_OFFLOAD_COST
957 start_time = rte_rdtsc_precise();
959 /* Turbo decode adaptation */
960 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp);
961 #ifdef RTE_BBDEV_OFFLOAD_COST
962 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
965 out = (uint8_t *)mbuf_append(m_out_head, m_out,
966 ((k - crc24_overlap) >> 3));
968 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
969 rte_bbdev_log(ERR, "Too little space in output mbuf");
972 /* rte_bbdev_op_data.offset can be different than the offset of the
975 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
980 turbo_req.input = (int8_t *)q->adapter_output;
982 turbo_req.k_idx = k_idx;
983 turbo_req.max_iter_num = dec->iter_max;
984 turbo_req.early_term_disable = !check_bit(dec->op_flags,
985 RTE_BBDEV_TURBO_EARLY_TERMINATION);
986 turbo_resp.ag_buf = q->ag;
987 turbo_resp.cb_buf = q->code_block;
988 turbo_resp.output = out;
990 #ifdef RTE_BBDEV_OFFLOAD_COST
991 start_time = rte_rdtsc_precise();
994 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp);
995 #ifdef RTE_BBDEV_OFFLOAD_COST
996 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
998 dec->hard_output.length += (k >> 3);
1001 /* Temporary solution for returned iter_count from SDK */
1002 iter_cnt = (iter_cnt - 1) >> 1;
1003 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count);
1005 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1006 rte_bbdev_log(ERR, "Turbo Decoder failed");
1016 RTE_SET_USED(m_out_head);
1017 RTE_SET_USED(m_out);
1018 RTE_SET_USED(in_offset);
1019 RTE_SET_USED(out_offset);
1020 RTE_SET_USED(check_crc_24b);
1021 RTE_SET_USED(crc24_overlap);
1022 RTE_SET_USED(in_length);
1023 RTE_SET_USED(q_stats);
1028 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1029 struct rte_bbdev_stats *queue_stats)
1033 uint16_t crc24_overlap = 0;
1034 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
1035 struct rte_mbuf *m_in = dec->input.data;
1036 struct rte_mbuf *m_out = dec->hard_output.data;
1037 struct rte_mbuf *m_out_head = dec->hard_output.data;
1038 uint16_t in_offset = dec->input.offset;
1039 uint16_t out_offset = dec->hard_output.offset;
1040 uint32_t mbuf_total_left = dec->input.length;
1041 uint16_t seg_total_left;
1043 /* Clear op status */
1046 if (m_in == NULL || m_out == NULL) {
1047 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1048 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1052 if (dec->code_block_mode == 0) { /* For Transport Block mode */
1053 c = dec->tb_params.c;
1054 } else { /* For Code Block mode */
1055 k = dec->cb_params.k;
1059 if ((c > 1) && !check_bit(dec->op_flags,
1060 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1063 while (mbuf_total_left > 0) {
1064 if (dec->code_block_mode == 0)
1065 k = (r < dec->tb_params.c_neg) ?
1066 dec->tb_params.k_neg : dec->tb_params.k_pos;
1068 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1070 /* Calculates circular buffer size (Kw).
1071 * According to 3gpp 36.212 section 5.1.4.2
1075 * where nCol is 32 and nRow can be calculated from:
1077 * where D is the size of each output from turbo encoder block
1080 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3;
1082 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out,
1083 in_offset, out_offset, check_bit(dec->op_flags,
1084 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
1085 seg_total_left, queue_stats);
1086 /* To keep CRC24 attached to end of Code block, use
1087 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
1088 * removed by default once verified.
1091 mbuf_total_left -= kw;
1093 /* Update offsets */
1094 if (seg_total_left == kw) {
1095 /* Go to the next mbuf */
1097 m_out = m_out->next;
1101 /* Update offsets for next CBs (if exist) */
1103 out_offset += ((k - crc24_overlap) >> 3);
1107 if (mbuf_total_left != 0) {
1108 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1110 "Mismatch between mbuf length and included Circular buffer sizes");
1114 static inline uint16_t
1115 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
1116 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1119 #ifdef RTE_BBDEV_OFFLOAD_COST
1120 queue_stats->acc_offload_cycles = 0;
1123 for (i = 0; i < nb_ops; ++i)
1124 enqueue_dec_one_op(q, ops[i], queue_stats);
1126 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1132 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
1133 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1135 void *queue = q_data->queue_private;
1136 struct turbo_sw_queue *q = queue;
1137 uint16_t nb_enqueued = 0;
1139 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1141 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1142 q_data->queue_stats.enqueued_count += nb_enqueued;
1149 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
1150 struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1152 void *queue = q_data->queue_private;
1153 struct turbo_sw_queue *q = queue;
1154 uint16_t nb_enqueued = 0;
1156 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1158 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1159 q_data->queue_stats.enqueued_count += nb_enqueued;
1164 /* Dequeue decode burst */
1166 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data,
1167 struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1169 struct turbo_sw_queue *q = q_data->queue_private;
1170 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1171 (void **)ops, nb_ops, NULL);
1172 q_data->queue_stats.dequeued_count += nb_dequeued;
1177 /* Dequeue encode burst */
1179 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data,
1180 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1182 struct turbo_sw_queue *q = q_data->queue_private;
1183 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1184 (void **)ops, nb_ops, NULL);
1185 q_data->queue_stats.dequeued_count += nb_dequeued;
1190 /* Parse 16bit integer from string argument */
1192 parse_u16_arg(const char *key, const char *value, void *extra_args)
1194 uint16_t *u16 = extra_args;
1195 unsigned int long result;
1197 if ((value == NULL) || (extra_args == NULL))
1200 result = strtoul(value, NULL, 0);
1201 if ((result >= (1 << 16)) || (errno != 0)) {
1202 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key);
1205 *u16 = (uint16_t)result;
1209 /* Parse parameters used to create device */
1211 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args)
1213 struct rte_kvargs *kvlist = NULL;
1219 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params);
1223 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0],
1224 &parse_u16_arg, ¶ms->queues_num);
1228 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1],
1229 &parse_u16_arg, ¶ms->socket_id);
1233 if (params->socket_id >= RTE_MAX_NUMA_NODES) {
1234 rte_bbdev_log(ERR, "Invalid socket, must be < %u",
1235 RTE_MAX_NUMA_NODES);
1242 rte_kvargs_free(kvlist);
1248 turbo_sw_bbdev_create(struct rte_vdev_device *vdev,
1249 struct turbo_sw_params *init_params)
1251 struct rte_bbdev *bbdev;
1252 const char *name = rte_vdev_device_name(vdev);
1254 bbdev = rte_bbdev_allocate(name);
1258 bbdev->data->dev_private = rte_zmalloc_socket(name,
1259 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE,
1260 init_params->socket_id);
1261 if (bbdev->data->dev_private == NULL) {
1262 rte_bbdev_release(bbdev);
1266 bbdev->dev_ops = &pmd_ops;
1267 bbdev->device = &vdev->device;
1268 bbdev->data->socket_id = init_params->socket_id;
1269 bbdev->intr_handle = NULL;
1271 /* register rx/tx burst functions for data path */
1272 bbdev->dequeue_enc_ops = dequeue_enc_ops;
1273 bbdev->dequeue_dec_ops = dequeue_dec_ops;
1274 bbdev->enqueue_enc_ops = enqueue_enc_ops;
1275 bbdev->enqueue_dec_ops = enqueue_dec_ops;
1276 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues =
1277 init_params->queues_num;
1282 /* Initialise device */
1284 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev)
1286 struct turbo_sw_params init_params = {
1288 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES
1291 const char *input_args;
1296 name = rte_vdev_device_name(vdev);
1299 input_args = rte_vdev_device_args(vdev);
1300 parse_turbo_sw_params(&init_params, input_args);
1302 rte_bbdev_log_debug(
1303 "Initialising %s on NUMA node %d with max queues: %d\n",
1304 name, init_params.socket_id, init_params.queues_num);
1306 return turbo_sw_bbdev_create(vdev, &init_params);
1309 /* Uninitialise device */
1311 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev)
1313 struct rte_bbdev *bbdev;
1319 name = rte_vdev_device_name(vdev);
1323 bbdev = rte_bbdev_get_named_dev(name);
1327 rte_free(bbdev->data->dev_private);
1329 return rte_bbdev_release(bbdev);
1332 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = {
1333 .probe = turbo_sw_bbdev_probe,
1334 .remove = turbo_sw_bbdev_remove
1337 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv);
1338 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME,
1339 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> "
1340 TURBO_SW_SOCKET_ID_ARG"=<int>");
1341 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw);
1343 RTE_INIT(turbo_sw_bbdev_init_log)
1345 bbdev_turbo_sw_logtype = rte_log_register("pmd.bb.turbo_sw");
1346 if (bbdev_turbo_sw_logtype >= 0)
1347 rte_log_set_level(bbdev_turbo_sw_logtype, RTE_LOG_NOTICE);