1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
7 #include <rte_common.h>
8 #include <rte_bus_vdev.h>
9 #include <rte_malloc.h>
11 #include <rte_kvargs.h>
12 #include <rte_cycles.h>
14 #include <rte_bbdev.h>
15 #include <rte_bbdev_pmd.h>
17 #ifdef RTE_BBDEV_SDK_AVX2
18 #include <phy_turbo.h>
20 #include <phy_rate_match.h>
23 #define DRIVER_NAME baseband_turbo_sw
25 /* Turbo SW PMD logging ID */
26 static int bbdev_turbo_sw_logtype;
28 /* Helper macro for logging */
29 #define rte_bbdev_log(level, fmt, ...) \
30 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \
33 #define rte_bbdev_log_debug(fmt, ...) \
34 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \
37 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_MAX_CB_SIZE >> 3) + 1) * 48)
38 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6)
39 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_MAX_CB_SIZE + 4) * 48)
41 /* private data structure */
42 struct bbdev_private {
43 unsigned int max_nb_queues; /**< Max number of queues */
46 /* Initialisation params structure that can be used by Turbo SW driver */
47 struct turbo_sw_params {
48 int socket_id; /*< Turbo SW device socket */
49 uint16_t queues_num; /*< Turbo SW device queues number */
52 /* Accecptable params for Turbo SW devices */
53 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues"
54 #define TURBO_SW_SOCKET_ID_ARG "socket_id"
56 static const char * const turbo_sw_valid_params[] = {
57 TURBO_SW_MAX_NB_QUEUES_ARG,
58 TURBO_SW_SOCKET_ID_ARG
62 struct turbo_sw_queue {
63 /* Ring for processed (encoded/decoded) operations which are ready to
66 struct rte_ring *processed_pkts;
67 /* Stores input for turbo encoder (used when CRC attachment is
71 /* Stores output from turbo encoder */
73 /* Alpha gamma buf for bblib_turbo_decoder() function */
75 /* Temp buf for bblib_turbo_decoder() function */
77 /* Input buf for bblib_rate_dematching_lte() function */
79 /* Output buf for bblib_rate_dematching_lte() function */
80 uint8_t *deint_output;
81 /* Output buf for bblib_turbodec_adapter_lte() function */
82 uint8_t *adapter_output;
83 /* Operation type of this queue */
84 enum rte_bbdev_op_type type;
85 } __rte_cache_aligned;
87 #ifdef RTE_BBDEV_SDK_AVX2
89 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
91 if (unlikely(len > rte_pktmbuf_tailroom(m)))
94 char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
95 m->data_len = (uint16_t)(m->data_len + len);
96 m_head->pkt_len = (m_head->pkt_len + len);
100 /* Calculate index based on Table 5.1.3-3 from TS34.212 */
101 static inline int32_t
102 compute_idx(uint16_t k)
106 if (k < RTE_BBDEV_MIN_CB_SIZE || k > RTE_BBDEV_MAX_CB_SIZE)
110 if ((k - 2048) % 64 != 0)
113 result = 124 + (k - 2048) / 64;
114 } else if (k <= 512) {
115 if ((k - 40) % 8 != 0)
118 result = (k - 40) / 8 + 1;
119 } else if (k <= 1024) {
120 if ((k - 512) % 16 != 0)
123 result = 60 + (k - 512) / 16;
124 } else { /* 1024 < k <= 2048 */
125 if ((k - 1024) % 32 != 0)
128 result = 92 + (k - 1024) / 32;
135 /* Read flag value 0/1 from bitmap */
137 check_bit(uint32_t bitmap, uint32_t bitmask)
139 return bitmap & bitmask;
142 /* Get device info */
144 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
146 struct bbdev_private *internals = dev->data->dev_private;
148 static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
149 #ifdef RTE_BBDEV_SDK_AVX2
151 .type = RTE_BBDEV_OP_TURBO_DEC,
154 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
155 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN |
156 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
157 RTE_BBDEV_TURBO_CRC_TYPE_24B |
158 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
159 RTE_BBDEV_TURBO_EARLY_TERMINATION,
160 .max_llr_modulus = 16,
161 .num_buffers_src = RTE_BBDEV_MAX_CODE_BLOCKS,
162 .num_buffers_hard_out =
163 RTE_BBDEV_MAX_CODE_BLOCKS,
164 .num_buffers_soft_out = 0,
168 .type = RTE_BBDEV_OP_TURBO_ENC,
171 RTE_BBDEV_TURBO_CRC_24B_ATTACH |
172 RTE_BBDEV_TURBO_CRC_24A_ATTACH |
173 RTE_BBDEV_TURBO_RATE_MATCH |
174 RTE_BBDEV_TURBO_RV_INDEX_BYPASS,
175 .num_buffers_src = RTE_BBDEV_MAX_CODE_BLOCKS,
176 .num_buffers_dst = RTE_BBDEV_MAX_CODE_BLOCKS,
180 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
183 static struct rte_bbdev_queue_conf default_queue_conf = {
184 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT,
187 #ifdef RTE_BBDEV_SDK_AVX2
188 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2;
189 dev_info->cpu_flag_reqs = &cpu_flag;
191 dev_info->cpu_flag_reqs = NULL;
194 default_queue_conf.socket = dev->data->socket_id;
196 dev_info->driver_name = RTE_STR(DRIVER_NAME);
197 dev_info->max_num_queues = internals->max_nb_queues;
198 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT;
199 dev_info->hardware_accelerated = false;
200 dev_info->max_dl_queue_priority = 0;
201 dev_info->max_ul_queue_priority = 0;
202 dev_info->default_queue_conf = default_queue_conf;
203 dev_info->capabilities = bbdev_capabilities;
204 dev_info->min_alignment = 64;
206 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id);
211 q_release(struct rte_bbdev *dev, uint16_t q_id)
213 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private;
216 rte_ring_free(q->processed_pkts);
217 rte_free(q->enc_out);
220 rte_free(q->code_block);
221 rte_free(q->deint_input);
222 rte_free(q->deint_output);
223 rte_free(q->adapter_output);
225 dev->data->queues[q_id].queue_private = NULL;
228 rte_bbdev_log_debug("released device queue %u:%u",
229 dev->data->dev_id, q_id);
235 q_setup(struct rte_bbdev *dev, uint16_t q_id,
236 const struct rte_bbdev_queue_conf *queue_conf)
239 struct turbo_sw_queue *q;
240 char name[RTE_RING_NAMESIZE];
242 /* Allocate the queue data structure. */
243 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q),
244 RTE_CACHE_LINE_SIZE, queue_conf->socket);
246 rte_bbdev_log(ERR, "Failed to allocate queue memory");
250 /* Allocate memory for encoder output. */
251 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u",
252 dev->data->dev_id, q_id);
253 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
255 "Creating queue name for device %u queue %u failed",
256 dev->data->dev_id, q_id);
257 return -ENAMETOOLONG;
259 q->enc_out = rte_zmalloc_socket(name,
260 ((RTE_BBDEV_MAX_TB_SIZE >> 3) + 3) *
261 sizeof(*q->enc_out) * 3,
262 RTE_CACHE_LINE_SIZE, queue_conf->socket);
263 if (q->enc_out == NULL) {
265 "Failed to allocate queue memory for %s", name);
269 /* Allocate memory for rate matching output. */
270 ret = snprintf(name, RTE_RING_NAMESIZE,
271 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id,
273 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
275 "Creating queue name for device %u queue %u failed",
276 dev->data->dev_id, q_id);
277 return -ENAMETOOLONG;
279 q->enc_in = rte_zmalloc_socket(name,
280 (RTE_BBDEV_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
281 RTE_CACHE_LINE_SIZE, queue_conf->socket);
282 if (q->enc_in == NULL) {
284 "Failed to allocate queue memory for %s", name);
288 /* Allocate memory for Aplha Gamma temp buffer. */
289 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u",
290 dev->data->dev_id, q_id);
291 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
293 "Creating queue name for device %u queue %u failed",
294 dev->data->dev_id, q_id);
295 return -ENAMETOOLONG;
297 q->ag = rte_zmalloc_socket(name,
298 RTE_BBDEV_MAX_CB_SIZE * 10 * sizeof(*q->ag),
299 RTE_CACHE_LINE_SIZE, queue_conf->socket);
302 "Failed to allocate queue memory for %s", name);
306 /* Allocate memory for code block temp buffer. */
307 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u",
308 dev->data->dev_id, q_id);
309 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
311 "Creating queue name for device %u queue %u failed",
312 dev->data->dev_id, q_id);
313 return -ENAMETOOLONG;
315 q->code_block = rte_zmalloc_socket(name,
316 RTE_BBDEV_MAX_CB_SIZE * sizeof(*q->code_block),
317 RTE_CACHE_LINE_SIZE, queue_conf->socket);
318 if (q->code_block == NULL) {
320 "Failed to allocate queue memory for %s", name);
324 /* Allocate memory for Deinterleaver input. */
325 ret = snprintf(name, RTE_RING_NAMESIZE,
326 RTE_STR(DRIVER_NAME)"_de_i%u:%u",
327 dev->data->dev_id, q_id);
328 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
330 "Creating queue name for device %u queue %u failed",
331 dev->data->dev_id, q_id);
332 return -ENAMETOOLONG;
334 q->deint_input = rte_zmalloc_socket(name,
335 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input),
336 RTE_CACHE_LINE_SIZE, queue_conf->socket);
337 if (q->deint_input == NULL) {
339 "Failed to allocate queue memory for %s", name);
343 /* Allocate memory for Deinterleaver output. */
344 ret = snprintf(name, RTE_RING_NAMESIZE,
345 RTE_STR(DRIVER_NAME)"_de_o%u:%u",
346 dev->data->dev_id, q_id);
347 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
349 "Creating queue name for device %u queue %u failed",
350 dev->data->dev_id, q_id);
351 return -ENAMETOOLONG;
353 q->deint_output = rte_zmalloc_socket(NULL,
354 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output),
355 RTE_CACHE_LINE_SIZE, queue_conf->socket);
356 if (q->deint_output == NULL) {
358 "Failed to allocate queue memory for %s", name);
362 /* Allocate memory for Adapter output. */
363 ret = snprintf(name, RTE_RING_NAMESIZE,
364 RTE_STR(DRIVER_NAME)"_ada_o%u:%u",
365 dev->data->dev_id, q_id);
366 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
368 "Creating queue name for device %u queue %u failed",
369 dev->data->dev_id, q_id);
370 return -ENAMETOOLONG;
372 q->adapter_output = rte_zmalloc_socket(NULL,
373 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output),
374 RTE_CACHE_LINE_SIZE, queue_conf->socket);
375 if (q->adapter_output == NULL) {
377 "Failed to allocate queue memory for %s", name);
381 /* Create ring for packets awaiting to be dequeued. */
382 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u",
383 dev->data->dev_id, q_id);
384 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
386 "Creating queue name for device %u queue %u failed",
387 dev->data->dev_id, q_id);
388 return -ENAMETOOLONG;
390 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size,
391 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
392 if (q->processed_pkts == NULL) {
393 rte_bbdev_log(ERR, "Failed to create ring for %s", name);
397 q->type = queue_conf->op_type;
399 dev->data->queues[q_id].queue_private = q;
400 rte_bbdev_log_debug("setup device queue %s", name);
404 rte_ring_free(q->processed_pkts);
405 rte_free(q->enc_out);
408 rte_free(q->code_block);
409 rte_free(q->deint_input);
410 rte_free(q->deint_output);
411 rte_free(q->adapter_output);
416 static const struct rte_bbdev_ops pmd_ops = {
417 .info_get = info_get,
418 .queue_setup = q_setup,
419 .queue_release = q_release
422 #ifdef RTE_BBDEV_SDK_AVX2
423 /* Checks if the encoder input buffer is correct.
424 * Returns 0 if it's valid, -1 otherwise.
427 is_enc_input_valid(const uint16_t k, const int32_t k_idx,
428 const uint16_t in_length)
431 rte_bbdev_log(ERR, "K Index is invalid");
435 if (in_length - (k >> 3) < 0) {
437 "Mismatch between input length (%u bytes) and K (%u bits)",
442 if (k > RTE_BBDEV_MAX_CB_SIZE) {
443 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d",
444 k, RTE_BBDEV_MAX_CB_SIZE);
451 /* Checks if the decoder input buffer is correct.
452 * Returns 0 if it's valid, -1 otherwise.
455 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
458 rte_bbdev_log(ERR, "K index is invalid");
462 if (in_length < kw) {
464 "Mismatch between input length (%u) and kw (%u)",
469 if (kw > RTE_BBDEV_MAX_KW) {
470 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d",
471 kw, RTE_BBDEV_MAX_KW);
480 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
481 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
482 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
483 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
484 uint16_t in_length, struct rte_bbdev_stats *q_stats)
486 #ifdef RTE_BBDEV_SDK_AVX2
490 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
491 uint64_t first_3_bytes = 0;
492 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
493 struct bblib_crc_request crc_req;
494 struct bblib_crc_response crc_resp;
495 struct bblib_turbo_encoder_request turbo_req;
496 struct bblib_turbo_encoder_response turbo_resp;
497 struct bblib_rate_match_dl_request rm_req;
498 struct bblib_rate_match_dl_response rm_resp;
499 #ifdef RTE_BBDEV_OFFLOAD_COST
502 RTE_SET_USED(q_stats);
505 k_idx = compute_idx(k);
506 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
508 /* CRC24A (for TB) */
509 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
510 (enc->code_block_mode == 1)) {
511 ret = is_enc_input_valid(k - 24, k_idx, in_length);
513 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
517 crc_req.len = k - 24;
518 /* Check if there is a room for CRC bits if not use
519 * the temporary buffer.
521 if (mbuf_append(m_in, m_in, 3) == NULL) {
522 rte_memcpy(q->enc_in, in, (k - 24) >> 3);
525 /* Store 3 first bytes of next CB as they will be
526 * overwritten by CRC bytes. If it is the last CB then
527 * there is no point to store 3 next bytes and this
528 * if..else branch will be omitted.
530 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
534 #ifdef RTE_BBDEV_OFFLOAD_COST
535 start_time = rte_rdtsc_precise();
537 /* CRC24A generation */
538 bblib_lte_crc24a_gen(&crc_req, &crc_resp);
539 #ifdef RTE_BBDEV_OFFLOAD_COST
540 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
542 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
544 ret = is_enc_input_valid(k - 24, k_idx, in_length);
546 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
550 crc_req.len = k - 24;
551 /* Check if there is a room for CRC bits if this is the last
552 * CB in TB. If not use temporary buffer.
554 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) {
555 rte_memcpy(q->enc_in, in, (k - 24) >> 3);
557 } else if (c - r > 1) {
558 /* Store 3 first bytes of next CB as they will be
559 * overwritten by CRC bytes. If it is the last CB then
560 * there is no point to store 3 next bytes and this
561 * if..else branch will be omitted.
563 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
567 #ifdef RTE_BBDEV_OFFLOAD_COST
568 start_time = rte_rdtsc_precise();
570 /* CRC24B generation */
571 bblib_lte_crc24b_gen(&crc_req, &crc_resp);
572 #ifdef RTE_BBDEV_OFFLOAD_COST
573 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
576 ret = is_enc_input_valid(k, k_idx, in_length);
578 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
585 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e.
586 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up.
587 * So dst_data's length should be 3*(k/8) + 3 bytes.
588 * In Rate-matching bypass case outputs pointers passed to encoder
589 * (out0, out1 and out2) can directly point to addresses of output from
592 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
594 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
595 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
597 out0 = (uint8_t *)mbuf_append(m_out_head, m_out,
600 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
602 "Too little space in output mbuf");
605 enc->output.length += (k >> 3) * 3 + 2;
606 /* rte_bbdev_op_data.offset can be different than the
607 * offset of the appended bytes
609 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
610 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
611 out_offset + (k >> 3) + 1);
612 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
613 out_offset + 2 * ((k >> 3) + 1));
616 turbo_req.case_id = k_idx;
617 turbo_req.input_win = in;
618 turbo_req.length = k >> 3;
619 turbo_resp.output_win_0 = out0;
620 turbo_resp.output_win_1 = out1;
621 turbo_resp.output_win_2 = out2;
623 #ifdef RTE_BBDEV_OFFLOAD_COST
624 start_time = rte_rdtsc_precise();
627 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {
628 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
629 rte_bbdev_log(ERR, "Turbo Encoder failed");
632 #ifdef RTE_BBDEV_OFFLOAD_COST
633 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
636 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/
637 if (first_3_bytes != 0)
638 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes;
641 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
643 /* Integer round up division by 8 */
644 uint16_t out_len = (e + 7) >> 3;
645 /* The mask array is indexed using E%8. E is an even number so
646 * there are only 4 possible values.
648 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC};
650 /* get output data starting address */
651 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
652 if (rm_out == NULL) {
653 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
655 "Too little space in output mbuf");
658 /* rte_bbdev_op_data.offset can be different than the offset
659 * of the appended bytes
661 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
663 /* index of current code block */
665 /* total number of code block */
667 /* For DL - 1, UL - 0 */
668 rm_req.direction = 1;
669 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO
670 * and MDL_HARQ are used for Ncb calculation. As Ncb is already
671 * known we can adjust those parameters
673 rm_req.Nsoft = ncb * rm_req.C;
676 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G
677 * are used for E calculation. As E is already known we can
678 * adjust those parameters
682 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C;
684 rm_req.rvidx = enc->rv_index;
685 rm_req.Kidx = k_idx - 1;
690 rm_resp.output = rm_out;
691 rm_resp.OutputLen = out_len;
692 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS)
693 rm_req.bypass_rvidx = 1;
695 rm_req.bypass_rvidx = 0;
697 #ifdef RTE_BBDEV_OFFLOAD_COST
698 start_time = rte_rdtsc_precise();
701 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {
702 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
703 rte_bbdev_log(ERR, "Rate matching failed");
706 #ifdef RTE_BBDEV_OFFLOAD_COST
707 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
710 /* SW fills an entire last byte even if E%8 != 0. Clear the
711 * superfluous data bits for consistency with HW device.
713 mask_id = (e & 7) >> 1;
714 rm_out[out_len - 1] &= mask_out[mask_id];
715 enc->output.length += rm_resp.OutputLen;
717 /* Rate matching is bypassed */
719 /* Completing last byte of out0 (where 4 tail bits are stored)
720 * by moving first 4 bits from out1
722 tmp_out = (uint8_t *) --out1;
723 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4);
725 /* Shifting out1 data by 4 bits to the left */
726 for (m = 0; m < k >> 3; ++m) {
727 uint8_t *first = tmp_out;
728 uint8_t second = *(tmp_out + 1);
729 *first = (*first << 4) | ((second & 0xF0) >> 4);
732 /* Shifting out2 data by 8 bits to the left */
733 for (m = 0; m < (k >> 3) + 1; ++m) {
734 *tmp_out = *(tmp_out + 1);
748 RTE_SET_USED(m_out_head);
750 RTE_SET_USED(in_offset);
751 RTE_SET_USED(out_offset);
752 RTE_SET_USED(in_length);
753 RTE_SET_USED(q_stats);
758 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
759 struct rte_bbdev_stats *queue_stats)
761 uint8_t c, r, crc24_bits = 0;
764 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
765 uint16_t in_offset = enc->input.offset;
766 uint16_t out_offset = enc->output.offset;
767 struct rte_mbuf *m_in = enc->input.data;
768 struct rte_mbuf *m_out = enc->output.data;
769 struct rte_mbuf *m_out_head = enc->output.data;
770 uint32_t in_length, mbuf_total_left = enc->input.length;
771 uint16_t seg_total_left;
773 /* Clear op status */
776 if (mbuf_total_left > RTE_BBDEV_MAX_TB_SIZE >> 3) {
777 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
778 mbuf_total_left, RTE_BBDEV_MAX_TB_SIZE);
779 op->status = 1 << RTE_BBDEV_DATA_ERROR;
783 if (m_in == NULL || m_out == NULL) {
784 rte_bbdev_log(ERR, "Invalid mbuf pointer");
785 op->status = 1 << RTE_BBDEV_DATA_ERROR;
789 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
790 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
793 if (enc->code_block_mode == 0) { /* For Transport Block mode */
794 c = enc->tb_params.c;
795 r = enc->tb_params.r;
796 } else {/* For Code Block mode */
801 while (mbuf_total_left > 0 && r < c) {
803 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
805 if (enc->code_block_mode == 0) {
806 k = (r < enc->tb_params.c_neg) ?
807 enc->tb_params.k_neg : enc->tb_params.k_pos;
808 ncb = (r < enc->tb_params.c_neg) ?
809 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos;
810 e = (r < enc->tb_params.cab) ?
811 enc->tb_params.ea : enc->tb_params.eb;
813 k = enc->cb_params.k;
814 ncb = enc->cb_params.ncb;
815 e = enc->cb_params.e;
818 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head,
819 m_out, in_offset, out_offset, seg_total_left,
821 /* Update total_left */
822 in_length = ((k - crc24_bits) >> 3);
823 mbuf_total_left -= in_length;
824 /* Update offsets for next CBs (if exist) */
825 in_offset += (k - crc24_bits) >> 3;
826 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH)
827 out_offset += e >> 3;
829 out_offset += (k >> 3) * 3 + 2;
832 if (seg_total_left == in_length) {
833 /* Go to the next mbuf */
842 /* check if all input data was processed */
843 if (mbuf_total_left != 0) {
844 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
846 "Mismatch between mbuf length and included CBs sizes");
850 static inline uint16_t
851 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
852 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
855 #ifdef RTE_BBDEV_OFFLOAD_COST
856 queue_stats->acc_offload_cycles = 0;
859 for (i = 0; i < nb_ops; ++i)
860 enqueue_enc_one_op(q, ops[i], queue_stats);
862 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
866 #ifdef RTE_BBDEV_SDK_AVX2
868 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
872 uint16_t kpi = ncb / 3;
873 uint16_t nd = kpi - d;
875 rte_memcpy(&out[nd], in, d);
876 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d);
877 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d);
882 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
883 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,
884 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
885 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b,
886 uint16_t crc24_overlap, uint16_t in_length,
887 struct rte_bbdev_stats *q_stats)
889 #ifdef RTE_BBDEV_SDK_AVX2
893 uint8_t *in, *out, *adapter_input;
894 int32_t ncb, ncb_without_null;
895 struct bblib_turbo_adapter_ul_response adapter_resp;
896 struct bblib_turbo_adapter_ul_request adapter_req;
897 struct bblib_turbo_decoder_request turbo_req;
898 struct bblib_turbo_decoder_response turbo_resp;
899 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
900 #ifdef RTE_BBDEV_OFFLOAD_COST
903 RTE_SET_USED(q_stats);
906 k_idx = compute_idx(k);
908 ret = is_dec_input_valid(k_idx, kw, in_length);
910 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
914 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
916 ncb_without_null = (k + 4) * 3;
918 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) {
919 struct bblib_deinterleave_ul_request deint_req;
920 struct bblib_deinterleave_ul_response deint_resp;
922 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER;
923 deint_req.pharqbuffer = in;
925 deint_resp.pinteleavebuffer = q->deint_output;
927 #ifdef RTE_BBDEV_OFFLOAD_COST
928 start_time = rte_rdtsc_precise();
930 bblib_deinterleave_ul(&deint_req, &deint_resp);
931 #ifdef RTE_BBDEV_OFFLOAD_COST
932 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
935 move_padding_bytes(in, q->deint_output, k, ncb);
937 adapter_input = q->deint_output;
939 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN)
940 adapter_req.isinverted = 1;
941 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN)
942 adapter_req.isinverted = 0;
944 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
945 rte_bbdev_log(ERR, "LLR format wasn't specified");
949 adapter_req.ncb = ncb_without_null;
950 adapter_req.pinteleavebuffer = adapter_input;
951 adapter_resp.pharqout = q->adapter_output;
953 #ifdef RTE_BBDEV_OFFLOAD_COST
954 start_time = rte_rdtsc_precise();
956 /* Turbo decode adaptation */
957 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp);
958 #ifdef RTE_BBDEV_OFFLOAD_COST
959 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
962 out = (uint8_t *)mbuf_append(m_out_head, m_out,
963 ((k - crc24_overlap) >> 3));
965 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
966 rte_bbdev_log(ERR, "Too little space in output mbuf");
969 /* rte_bbdev_op_data.offset can be different than the offset of the
972 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
977 turbo_req.input = (int8_t *)q->adapter_output;
979 turbo_req.k_idx = k_idx;
980 turbo_req.max_iter_num = dec->iter_max;
981 turbo_req.early_term_disable = !check_bit(dec->op_flags,
982 RTE_BBDEV_TURBO_EARLY_TERMINATION);
983 turbo_resp.ag_buf = q->ag;
984 turbo_resp.cb_buf = q->code_block;
985 turbo_resp.output = out;
987 #ifdef RTE_BBDEV_OFFLOAD_COST
988 start_time = rte_rdtsc_precise();
991 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp);
992 #ifdef RTE_BBDEV_OFFLOAD_COST
993 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
995 dec->hard_output.length += (k >> 3);
998 /* Temporary solution for returned iter_count from SDK */
999 iter_cnt = (iter_cnt - 1) >> 1;
1000 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count);
1002 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1003 rte_bbdev_log(ERR, "Turbo Decoder failed");
1013 RTE_SET_USED(m_out_head);
1014 RTE_SET_USED(m_out);
1015 RTE_SET_USED(in_offset);
1016 RTE_SET_USED(out_offset);
1017 RTE_SET_USED(check_crc_24b);
1018 RTE_SET_USED(crc24_overlap);
1019 RTE_SET_USED(in_length);
1020 RTE_SET_USED(q_stats);
1025 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1026 struct rte_bbdev_stats *queue_stats)
1030 uint16_t crc24_overlap = 0;
1031 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
1032 struct rte_mbuf *m_in = dec->input.data;
1033 struct rte_mbuf *m_out = dec->hard_output.data;
1034 struct rte_mbuf *m_out_head = dec->hard_output.data;
1035 uint16_t in_offset = dec->input.offset;
1036 uint16_t out_offset = dec->hard_output.offset;
1037 uint32_t mbuf_total_left = dec->input.length;
1038 uint16_t seg_total_left;
1040 /* Clear op status */
1043 if (m_in == NULL || m_out == NULL) {
1044 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1045 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1049 if (dec->code_block_mode == 0) { /* For Transport Block mode */
1050 c = dec->tb_params.c;
1051 } else { /* For Code Block mode */
1052 k = dec->cb_params.k;
1056 if ((c > 1) && !check_bit(dec->op_flags,
1057 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1060 while (mbuf_total_left > 0) {
1061 if (dec->code_block_mode == 0)
1062 k = (r < dec->tb_params.c_neg) ?
1063 dec->tb_params.k_neg : dec->tb_params.k_pos;
1065 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1067 /* Calculates circular buffer size (Kw).
1068 * According to 3gpp 36.212 section 5.1.4.2
1072 * where nCol is 32 and nRow can be calculated from:
1074 * where D is the size of each output from turbo encoder block
1077 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_C_SUBBLOCK) * 3;
1079 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out,
1080 in_offset, out_offset, check_bit(dec->op_flags,
1081 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
1082 seg_total_left, queue_stats);
1083 /* To keep CRC24 attached to end of Code block, use
1084 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
1085 * removed by default once verified.
1088 mbuf_total_left -= kw;
1090 /* Update offsets */
1091 if (seg_total_left == kw) {
1092 /* Go to the next mbuf */
1094 m_out = m_out->next;
1098 /* Update offsets for next CBs (if exist) */
1100 out_offset += ((k - crc24_overlap) >> 3);
1104 if (mbuf_total_left != 0) {
1105 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1107 "Mismatch between mbuf length and included Circular buffer sizes");
1111 static inline uint16_t
1112 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
1113 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1116 #ifdef RTE_BBDEV_OFFLOAD_COST
1117 queue_stats->acc_offload_cycles = 0;
1120 for (i = 0; i < nb_ops; ++i)
1121 enqueue_dec_one_op(q, ops[i], queue_stats);
1123 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1129 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
1130 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1132 void *queue = q_data->queue_private;
1133 struct turbo_sw_queue *q = queue;
1134 uint16_t nb_enqueued = 0;
1136 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1138 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1139 q_data->queue_stats.enqueued_count += nb_enqueued;
1146 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
1147 struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1149 void *queue = q_data->queue_private;
1150 struct turbo_sw_queue *q = queue;
1151 uint16_t nb_enqueued = 0;
1153 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1155 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1156 q_data->queue_stats.enqueued_count += nb_enqueued;
1161 /* Dequeue decode burst */
1163 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data,
1164 struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1166 struct turbo_sw_queue *q = q_data->queue_private;
1167 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1168 (void **)ops, nb_ops, NULL);
1169 q_data->queue_stats.dequeued_count += nb_dequeued;
1174 /* Dequeue encode burst */
1176 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data,
1177 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1179 struct turbo_sw_queue *q = q_data->queue_private;
1180 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1181 (void **)ops, nb_ops, NULL);
1182 q_data->queue_stats.dequeued_count += nb_dequeued;
1187 /* Parse 16bit integer from string argument */
1189 parse_u16_arg(const char *key, const char *value, void *extra_args)
1191 uint16_t *u16 = extra_args;
1192 unsigned int long result;
1194 if ((value == NULL) || (extra_args == NULL))
1197 result = strtoul(value, NULL, 0);
1198 if ((result >= (1 << 16)) || (errno != 0)) {
1199 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key);
1202 *u16 = (uint16_t)result;
1206 /* Parse parameters used to create device */
1208 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args)
1210 struct rte_kvargs *kvlist = NULL;
1216 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params);
1220 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0],
1221 &parse_u16_arg, ¶ms->queues_num);
1225 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1],
1226 &parse_u16_arg, ¶ms->socket_id);
1230 if (params->socket_id >= RTE_MAX_NUMA_NODES) {
1231 rte_bbdev_log(ERR, "Invalid socket, must be < %u",
1232 RTE_MAX_NUMA_NODES);
1239 rte_kvargs_free(kvlist);
1245 turbo_sw_bbdev_create(struct rte_vdev_device *vdev,
1246 struct turbo_sw_params *init_params)
1248 struct rte_bbdev *bbdev;
1249 const char *name = rte_vdev_device_name(vdev);
1251 bbdev = rte_bbdev_allocate(name);
1255 bbdev->data->dev_private = rte_zmalloc_socket(name,
1256 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE,
1257 init_params->socket_id);
1258 if (bbdev->data->dev_private == NULL) {
1259 rte_bbdev_release(bbdev);
1263 bbdev->dev_ops = &pmd_ops;
1264 bbdev->device = &vdev->device;
1265 bbdev->data->socket_id = init_params->socket_id;
1266 bbdev->intr_handle = NULL;
1268 /* register rx/tx burst functions for data path */
1269 bbdev->dequeue_enc_ops = dequeue_enc_ops;
1270 bbdev->dequeue_dec_ops = dequeue_dec_ops;
1271 bbdev->enqueue_enc_ops = enqueue_enc_ops;
1272 bbdev->enqueue_dec_ops = enqueue_dec_ops;
1273 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues =
1274 init_params->queues_num;
1279 /* Initialise device */
1281 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev)
1283 struct turbo_sw_params init_params = {
1285 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES
1288 const char *input_args;
1293 name = rte_vdev_device_name(vdev);
1296 input_args = rte_vdev_device_args(vdev);
1297 parse_turbo_sw_params(&init_params, input_args);
1299 rte_bbdev_log_debug(
1300 "Initialising %s on NUMA node %d with max queues: %d\n",
1301 name, init_params.socket_id, init_params.queues_num);
1303 return turbo_sw_bbdev_create(vdev, &init_params);
1306 /* Uninitialise device */
1308 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev)
1310 struct rte_bbdev *bbdev;
1316 name = rte_vdev_device_name(vdev);
1320 bbdev = rte_bbdev_get_named_dev(name);
1324 rte_free(bbdev->data->dev_private);
1326 return rte_bbdev_release(bbdev);
1329 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = {
1330 .probe = turbo_sw_bbdev_probe,
1331 .remove = turbo_sw_bbdev_remove
1334 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv);
1335 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME,
1336 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> "
1337 TURBO_SW_SOCKET_ID_ARG"=<int>");
1338 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw);
1340 RTE_INIT(turbo_sw_bbdev_init_log)
1342 bbdev_turbo_sw_logtype = rte_log_register("pmd.bb.turbo_sw");
1343 if (bbdev_turbo_sw_logtype >= 0)
1344 rte_log_set_level(bbdev_turbo_sw_logtype, RTE_LOG_NOTICE);