baseband/turbo_sw: support CRC16
[dpdk.git] / drivers / baseband / turbo_sw / bbdev_turbo_software.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <string.h>
6
7 #include <rte_common.h>
8 #include <rte_bus_vdev.h>
9 #include <rte_malloc.h>
10 #include <rte_ring.h>
11 #include <rte_kvargs.h>
12 #include <rte_cycles.h>
13 #include <rte_errno.h>
14
15 #include <rte_bbdev.h>
16 #include <rte_bbdev_pmd.h>
17
18 #include <rte_hexdump.h>
19 #include <rte_log.h>
20
21 #ifdef RTE_BBDEV_SDK_AVX2
22 #include <ipp.h>
23 #include <ipps.h>
24 #include <phy_turbo.h>
25 #include <phy_crc.h>
26 #include <phy_rate_match.h>
27 #endif
28 #ifdef RTE_BBDEV_SDK_AVX512
29 #include <bit_reverse.h>
30 #include <phy_ldpc_encoder_5gnr.h>
31 #include <phy_ldpc_decoder_5gnr.h>
32 #include <phy_LDPC_ratematch_5gnr.h>
33 #include <phy_rate_dematching_5gnr.h>
34 #endif
35
36 #define DRIVER_NAME baseband_turbo_sw
37
38 RTE_LOG_REGISTER_DEFAULT(bbdev_turbo_sw_logtype, NOTICE);
39
40 /* Helper macro for logging */
41 #define rte_bbdev_log(level, fmt, ...) \
42         rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \
43                 ##__VA_ARGS__)
44
45 #define rte_bbdev_log_debug(fmt, ...) \
46         rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \
47                 ##__VA_ARGS__)
48
49 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48)
50 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6)
51 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48)
52
53 /* private data structure */
54 struct bbdev_private {
55         unsigned int max_nb_queues;  /**< Max number of queues */
56 };
57
58 /*  Initialisation params structure that can be used by Turbo SW driver */
59 struct turbo_sw_params {
60         int socket_id;  /*< Turbo SW device socket */
61         uint16_t queues_num;  /*< Turbo SW device queues number */
62 };
63
64 /* Accecptable params for Turbo SW devices */
65 #define TURBO_SW_MAX_NB_QUEUES_ARG  "max_nb_queues"
66 #define TURBO_SW_SOCKET_ID_ARG      "socket_id"
67
68 static const char * const turbo_sw_valid_params[] = {
69         TURBO_SW_MAX_NB_QUEUES_ARG,
70         TURBO_SW_SOCKET_ID_ARG
71 };
72
73 /* queue */
74 struct turbo_sw_queue {
75         /* Ring for processed (encoded/decoded) operations which are ready to
76          * be dequeued.
77          */
78         struct rte_ring *processed_pkts;
79         /* Stores input for turbo encoder (used when CRC attachment is
80          * performed
81          */
82         uint8_t *enc_in;
83         /* Stores output from turbo encoder */
84         uint8_t *enc_out;
85         /* Alpha gamma buf for bblib_turbo_decoder() function */
86         int8_t *ag;
87         /* Temp buf for bblib_turbo_decoder() function */
88         uint16_t *code_block;
89         /* Input buf for bblib_rate_dematching_lte() function */
90         uint8_t *deint_input;
91         /* Output buf for bblib_rate_dematching_lte() function */
92         uint8_t *deint_output;
93         /* Output buf for bblib_turbodec_adapter_lte() function */
94         uint8_t *adapter_output;
95         /* Operation type of this queue */
96         enum rte_bbdev_op_type type;
97 } __rte_cache_aligned;
98
99
100 #ifdef RTE_BBDEV_SDK_AVX2
101 static inline char *
102 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
103 {
104         if (unlikely(len > rte_pktmbuf_tailroom(m)))
105                 return NULL;
106
107         char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
108         m->data_len = (uint16_t)(m->data_len + len);
109         m_head->pkt_len  = (m_head->pkt_len + len);
110         return tail;
111 }
112
113 /* Calculate index based on Table 5.1.3-3 from TS34.212 */
114 static inline int32_t
115 compute_idx(uint16_t k)
116 {
117         int32_t result = 0;
118
119         if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE)
120                 return -1;
121
122         if (k > 2048) {
123                 if ((k - 2048) % 64 != 0)
124                         result = -1;
125
126                 result = 124 + (k - 2048) / 64;
127         } else if (k <= 512) {
128                 if ((k - 40) % 8 != 0)
129                         result = -1;
130
131                 result = (k - 40) / 8 + 1;
132         } else if (k <= 1024) {
133                 if ((k - 512) % 16 != 0)
134                         result = -1;
135
136                 result = 60 + (k - 512) / 16;
137         } else { /* 1024 < k <= 2048 */
138                 if ((k - 1024) % 32 != 0)
139                         result = -1;
140
141                 result = 92 + (k - 1024) / 32;
142         }
143
144         return result;
145 }
146 #endif
147
148 /* Read flag value 0/1 from bitmap */
149 static inline bool
150 check_bit(uint32_t bitmap, uint32_t bitmask)
151 {
152         return bitmap & bitmask;
153 }
154
155 /* Get device info */
156 static void
157 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
158 {
159         struct bbdev_private *internals = dev->data->dev_private;
160
161         static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
162 #ifdef RTE_BBDEV_SDK_AVX2
163                 {
164                         .type = RTE_BBDEV_OP_TURBO_DEC,
165                         .cap.turbo_dec = {
166                                 .capability_flags =
167                                         RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
168                                         RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN |
169                                         RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
170                                         RTE_BBDEV_TURBO_CRC_TYPE_24B |
171                                         RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
172                                         RTE_BBDEV_TURBO_EARLY_TERMINATION,
173                                 .max_llr_modulus = 16,
174                                 .num_buffers_src =
175                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
176                                 .num_buffers_hard_out =
177                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
178                                 .num_buffers_soft_out = 0,
179                         }
180                 },
181                 {
182                         .type   = RTE_BBDEV_OP_TURBO_ENC,
183                         .cap.turbo_enc = {
184                                 .capability_flags =
185                                                 RTE_BBDEV_TURBO_CRC_24B_ATTACH |
186                                                 RTE_BBDEV_TURBO_CRC_24A_ATTACH |
187                                                 RTE_BBDEV_TURBO_RATE_MATCH |
188                                                 RTE_BBDEV_TURBO_RV_INDEX_BYPASS,
189                                 .num_buffers_src =
190                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
191                                 .num_buffers_dst =
192                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
193                         }
194                 },
195 #endif
196 #ifdef RTE_BBDEV_SDK_AVX512
197                 {
198                         .type   = RTE_BBDEV_OP_LDPC_ENC,
199                         .cap.ldpc_enc = {
200                                 .capability_flags =
201                                                 RTE_BBDEV_LDPC_RATE_MATCH |
202                                                 RTE_BBDEV_LDPC_CRC_16_ATTACH |
203                                                 RTE_BBDEV_LDPC_CRC_24A_ATTACH |
204                                                 RTE_BBDEV_LDPC_CRC_24B_ATTACH,
205                                 .num_buffers_src =
206                                                 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
207                                 .num_buffers_dst =
208                                                 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
209                         }
210                 },
211                 {
212                 .type   = RTE_BBDEV_OP_LDPC_DEC,
213                 .cap.ldpc_dec = {
214                         .capability_flags =
215                                         RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK |
216                                         RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
217                                         RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK |
218                                         RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
219                                         RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
220                                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
221                                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE,
222                         .llr_size = 8,
223                         .llr_decimals = 4,
224                         .num_buffers_src =
225                                         RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
226                         .num_buffers_hard_out =
227                                         RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
228                         .num_buffers_soft_out = 0,
229                 }
230                 },
231 #endif
232                 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
233         };
234
235         static struct rte_bbdev_queue_conf default_queue_conf = {
236                 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT,
237         };
238 #ifdef RTE_BBDEV_SDK_AVX2
239         static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2;
240         dev_info->cpu_flag_reqs = &cpu_flag;
241 #else
242         dev_info->cpu_flag_reqs = NULL;
243 #endif
244         default_queue_conf.socket = dev->data->socket_id;
245
246         dev_info->driver_name = RTE_STR(DRIVER_NAME);
247         dev_info->max_num_queues = internals->max_nb_queues;
248         dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT;
249         dev_info->hardware_accelerated = false;
250         dev_info->max_dl_queue_priority = 0;
251         dev_info->max_ul_queue_priority = 0;
252         dev_info->default_queue_conf = default_queue_conf;
253         dev_info->capabilities = bbdev_capabilities;
254         dev_info->min_alignment = 64;
255         dev_info->harq_buffer_size = 0;
256
257         rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id);
258 }
259
260 /* Release queue */
261 static int
262 q_release(struct rte_bbdev *dev, uint16_t q_id)
263 {
264         struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private;
265
266         if (q != NULL) {
267                 rte_ring_free(q->processed_pkts);
268                 rte_free(q->enc_out);
269                 rte_free(q->enc_in);
270                 rte_free(q->ag);
271                 rte_free(q->code_block);
272                 rte_free(q->deint_input);
273                 rte_free(q->deint_output);
274                 rte_free(q->adapter_output);
275                 rte_free(q);
276                 dev->data->queues[q_id].queue_private = NULL;
277         }
278
279         rte_bbdev_log_debug("released device queue %u:%u",
280                         dev->data->dev_id, q_id);
281         return 0;
282 }
283
284 /* Setup a queue */
285 static int
286 q_setup(struct rte_bbdev *dev, uint16_t q_id,
287                 const struct rte_bbdev_queue_conf *queue_conf)
288 {
289         int ret;
290         struct turbo_sw_queue *q;
291         char name[RTE_RING_NAMESIZE];
292
293         /* Allocate the queue data structure. */
294         q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q),
295                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
296         if (q == NULL) {
297                 rte_bbdev_log(ERR, "Failed to allocate queue memory");
298                 return -ENOMEM;
299         }
300
301         /* Allocate memory for encoder output. */
302         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u",
303                         dev->data->dev_id, q_id);
304         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
305                 rte_bbdev_log(ERR,
306                                 "Creating queue name for device %u queue %u failed",
307                                 dev->data->dev_id, q_id);
308                 ret = -ENAMETOOLONG;
309                 goto free_q;
310         }
311         q->enc_out = rte_zmalloc_socket(name,
312                         ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) *
313                         sizeof(*q->enc_out) * 3,
314                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
315         if (q->enc_out == NULL) {
316                 rte_bbdev_log(ERR,
317                         "Failed to allocate queue memory for %s", name);
318                 ret = -ENOMEM;
319                 goto free_q;
320         }
321
322         /* Allocate memory for rate matching output. */
323         ret = snprintf(name, RTE_RING_NAMESIZE,
324                         RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id,
325                         q_id);
326         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
327                 rte_bbdev_log(ERR,
328                                 "Creating queue name for device %u queue %u failed",
329                                 dev->data->dev_id, q_id);
330                 ret = -ENAMETOOLONG;
331                 goto free_q;
332         }
333         q->enc_in = rte_zmalloc_socket(name,
334                         (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
335                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
336         if (q->enc_in == NULL) {
337                 rte_bbdev_log(ERR,
338                         "Failed to allocate queue memory for %s", name);
339                 ret = -ENOMEM;
340                 goto free_q;
341         }
342
343         /* Allocate memory for Alpha Gamma temp buffer. */
344         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u",
345                         dev->data->dev_id, q_id);
346         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
347                 rte_bbdev_log(ERR,
348                                 "Creating queue name for device %u queue %u failed",
349                                 dev->data->dev_id, q_id);
350                 ret = -ENAMETOOLONG;
351                 goto free_q;
352         }
353         q->ag = rte_zmalloc_socket(name,
354                         RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag),
355                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
356         if (q->ag == NULL) {
357                 rte_bbdev_log(ERR,
358                         "Failed to allocate queue memory for %s", name);
359                 ret = -ENOMEM;
360                 goto free_q;
361         }
362
363         /* Allocate memory for code block temp buffer. */
364         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u",
365                         dev->data->dev_id, q_id);
366         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
367                 rte_bbdev_log(ERR,
368                                 "Creating queue name for device %u queue %u failed",
369                                 dev->data->dev_id, q_id);
370                 ret = -ENAMETOOLONG;
371                 goto free_q;
372         }
373         q->code_block = rte_zmalloc_socket(name,
374                         RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block),
375                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
376         if (q->code_block == NULL) {
377                 rte_bbdev_log(ERR,
378                         "Failed to allocate queue memory for %s", name);
379                 ret = -ENOMEM;
380                 goto free_q;
381         }
382
383         /* Allocate memory for Deinterleaver input. */
384         ret = snprintf(name, RTE_RING_NAMESIZE,
385                         RTE_STR(DRIVER_NAME)"_de_i%u:%u",
386                         dev->data->dev_id, q_id);
387         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
388                 rte_bbdev_log(ERR,
389                                 "Creating queue name for device %u queue %u failed",
390                                 dev->data->dev_id, q_id);
391                 ret = -ENAMETOOLONG;
392                 goto free_q;
393         }
394         q->deint_input = rte_zmalloc_socket(name,
395                         DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input),
396                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
397         if (q->deint_input == NULL) {
398                 rte_bbdev_log(ERR,
399                         "Failed to allocate queue memory for %s", name);
400                 ret = -ENOMEM;
401                 goto free_q;
402         }
403
404         /* Allocate memory for Deinterleaver output. */
405         ret = snprintf(name, RTE_RING_NAMESIZE,
406                         RTE_STR(DRIVER_NAME)"_de_o%u:%u",
407                         dev->data->dev_id, q_id);
408         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
409                 rte_bbdev_log(ERR,
410                                 "Creating queue name for device %u queue %u failed",
411                                 dev->data->dev_id, q_id);
412                 ret = -ENAMETOOLONG;
413                 goto free_q;
414         }
415         q->deint_output = rte_zmalloc_socket(NULL,
416                         DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output),
417                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
418         if (q->deint_output == NULL) {
419                 rte_bbdev_log(ERR,
420                         "Failed to allocate queue memory for %s", name);
421                 ret = -ENOMEM;
422                 goto free_q;
423         }
424
425         /* Allocate memory for Adapter output. */
426         ret = snprintf(name, RTE_RING_NAMESIZE,
427                         RTE_STR(DRIVER_NAME)"_ada_o%u:%u",
428                         dev->data->dev_id, q_id);
429         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
430                 rte_bbdev_log(ERR,
431                                 "Creating queue name for device %u queue %u failed",
432                                 dev->data->dev_id, q_id);
433                 ret = -ENAMETOOLONG;
434                 goto free_q;
435         }
436         q->adapter_output = rte_zmalloc_socket(NULL,
437                         ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output),
438                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
439         if (q->adapter_output == NULL) {
440                 rte_bbdev_log(ERR,
441                         "Failed to allocate queue memory for %s", name);
442                 ret = -ENOMEM;
443                 goto free_q;
444         }
445
446         /* Create ring for packets awaiting to be dequeued. */
447         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u",
448                         dev->data->dev_id, q_id);
449         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
450                 rte_bbdev_log(ERR,
451                                 "Creating queue name for device %u queue %u failed",
452                                 dev->data->dev_id, q_id);
453                 ret = -ENAMETOOLONG;
454                 goto free_q;
455         }
456         q->processed_pkts = rte_ring_create(name, queue_conf->queue_size,
457                         queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
458         if (q->processed_pkts == NULL) {
459                 rte_bbdev_log(ERR, "Failed to create ring for %s", name);
460                 ret = -rte_errno;
461                 goto free_q;
462         }
463
464         q->type = queue_conf->op_type;
465
466         dev->data->queues[q_id].queue_private = q;
467         rte_bbdev_log_debug("setup device queue %s", name);
468         return 0;
469
470 free_q:
471         rte_ring_free(q->processed_pkts);
472         rte_free(q->enc_out);
473         rte_free(q->enc_in);
474         rte_free(q->ag);
475         rte_free(q->code_block);
476         rte_free(q->deint_input);
477         rte_free(q->deint_output);
478         rte_free(q->adapter_output);
479         rte_free(q);
480         return ret;
481 }
482
483 static const struct rte_bbdev_ops pmd_ops = {
484         .info_get = info_get,
485         .queue_setup = q_setup,
486         .queue_release = q_release
487 };
488
489 #ifdef RTE_BBDEV_SDK_AVX2
490 #ifdef RTE_LIBRTE_BBDEV_DEBUG
491 /* Checks if the encoder input buffer is correct.
492  * Returns 0 if it's valid, -1 otherwise.
493  */
494 static inline int
495 is_enc_input_valid(const uint16_t k, const int32_t k_idx,
496                 const uint16_t in_length)
497 {
498         if (k_idx < 0) {
499                 rte_bbdev_log(ERR, "K Index is invalid");
500                 return -1;
501         }
502
503         if (in_length - (k >> 3) < 0) {
504                 rte_bbdev_log(ERR,
505                                 "Mismatch between input length (%u bytes) and K (%u bits)",
506                                 in_length, k);
507                 return -1;
508         }
509
510         if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) {
511                 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d",
512                                 k, RTE_BBDEV_TURBO_MAX_CB_SIZE);
513                 return -1;
514         }
515
516         return 0;
517 }
518
519 /* Checks if the decoder input buffer is correct.
520  * Returns 0 if it's valid, -1 otherwise.
521  */
522 static inline int
523 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
524 {
525         if (k_idx < 0) {
526                 rte_bbdev_log(ERR, "K index is invalid");
527                 return -1;
528         }
529
530         if (in_length < kw) {
531                 rte_bbdev_log(ERR,
532                                 "Mismatch between input length (%u) and kw (%u)",
533                                 in_length, kw);
534                 return -1;
535         }
536
537         if (kw > RTE_BBDEV_TURBO_MAX_KW) {
538                 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d",
539                                 kw, RTE_BBDEV_TURBO_MAX_KW);
540                 return -1;
541         }
542
543         return 0;
544 }
545 #endif
546 #endif
547
548 static inline void
549 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
550                 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
551                 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
552                 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
553                 uint16_t in_length, struct rte_bbdev_stats *q_stats)
554 {
555 #ifdef RTE_BBDEV_SDK_AVX2
556 #ifdef RTE_LIBRTE_BBDEV_DEBUG
557         int ret;
558 #else
559         RTE_SET_USED(in_length);
560 #endif
561         int16_t k_idx;
562         uint16_t m;
563         uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
564         uint64_t first_3_bytes = 0;
565         struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
566         struct bblib_crc_request crc_req;
567         struct bblib_crc_response crc_resp;
568         struct bblib_turbo_encoder_request turbo_req;
569         struct bblib_turbo_encoder_response turbo_resp;
570         struct bblib_rate_match_dl_request rm_req;
571         struct bblib_rate_match_dl_response rm_resp;
572 #ifdef RTE_BBDEV_OFFLOAD_COST
573         uint64_t start_time;
574 #else
575         RTE_SET_USED(q_stats);
576 #endif
577
578         k_idx = compute_idx(k);
579         in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
580
581         /* CRC24A (for TB) */
582         if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
583                 (enc->code_block_mode == RTE_BBDEV_CODE_BLOCK)) {
584 #ifdef RTE_LIBRTE_BBDEV_DEBUG
585                 ret = is_enc_input_valid(k - 24, k_idx, in_length);
586                 if (ret != 0) {
587                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
588                         return;
589                 }
590 #endif
591
592                 crc_req.data = in;
593                 crc_req.len = k - 24;
594                 /* Check if there is a room for CRC bits if not use
595                  * the temporary buffer.
596                  */
597                 if (mbuf_append(m_in, m_in, 3) == NULL) {
598                         rte_memcpy(q->enc_in, in, (k - 24) >> 3);
599                         in = q->enc_in;
600                 } else {
601                         /* Store 3 first bytes of next CB as they will be
602                          * overwritten by CRC bytes. If it is the last CB then
603                          * there is no point to store 3 next bytes and this
604                          * if..else branch will be omitted.
605                          */
606                         first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
607                 }
608
609                 crc_resp.data = in;
610 #ifdef RTE_BBDEV_OFFLOAD_COST
611                 start_time = rte_rdtsc_precise();
612 #endif
613                 /* CRC24A generation */
614                 bblib_lte_crc24a_gen(&crc_req, &crc_resp);
615 #ifdef RTE_BBDEV_OFFLOAD_COST
616                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
617 #endif
618         } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
619                 /* CRC24B */
620 #ifdef RTE_LIBRTE_BBDEV_DEBUG
621                 ret = is_enc_input_valid(k - 24, k_idx, in_length);
622                 if (ret != 0) {
623                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
624                         return;
625                 }
626 #endif
627
628                 crc_req.data = in;
629                 crc_req.len = k - 24;
630                 /* Check if there is a room for CRC bits if this is the last
631                  * CB in TB. If not use temporary buffer.
632                  */
633                 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) {
634                         rte_memcpy(q->enc_in, in, (k - 24) >> 3);
635                         in = q->enc_in;
636                 } else if (c - r > 1) {
637                         /* Store 3 first bytes of next CB as they will be
638                          * overwritten by CRC bytes. If it is the last CB then
639                          * there is no point to store 3 next bytes and this
640                          * if..else branch will be omitted.
641                          */
642                         first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
643                 }
644
645                 crc_resp.data = in;
646 #ifdef RTE_BBDEV_OFFLOAD_COST
647                 start_time = rte_rdtsc_precise();
648 #endif
649                 /* CRC24B generation */
650                 bblib_lte_crc24b_gen(&crc_req, &crc_resp);
651 #ifdef RTE_BBDEV_OFFLOAD_COST
652                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
653 #endif
654         }
655 #ifdef RTE_LIBRTE_BBDEV_DEBUG
656         else {
657                 ret = is_enc_input_valid(k, k_idx, in_length);
658                 if (ret != 0) {
659                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
660                         return;
661                 }
662         }
663 #endif
664
665         /* Turbo encoder */
666
667         /* Each bit layer output from turbo encoder is (k+4) bits long, i.e.
668          * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up.
669          * So dst_data's length should be 3*(k/8) + 3 bytes.
670          * In Rate-matching bypass case outputs pointers passed to encoder
671          * (out0, out1 and out2) can directly point to addresses of output from
672          * turbo_enc entity.
673          */
674         if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
675                 out0 = q->enc_out;
676                 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
677                 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
678         } else {
679                 out0 = (uint8_t *)mbuf_append(m_out_head, m_out,
680                                 (k >> 3) * 3 + 2);
681                 if (out0 == NULL) {
682                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
683                         rte_bbdev_log(ERR,
684                                         "Too little space in output mbuf");
685                         return;
686                 }
687                 enc->output.length += (k >> 3) * 3 + 2;
688                 /* rte_bbdev_op_data.offset can be different than the
689                  * offset of the appended bytes
690                  */
691                 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
692                 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
693                                 out_offset + (k >> 3) + 1);
694                 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
695                                 out_offset + 2 * ((k >> 3) + 1));
696         }
697
698         turbo_req.case_id = k_idx;
699         turbo_req.input_win = in;
700         turbo_req.length = k >> 3;
701         turbo_resp.output_win_0 = out0;
702         turbo_resp.output_win_1 = out1;
703         turbo_resp.output_win_2 = out2;
704
705 #ifdef RTE_BBDEV_OFFLOAD_COST
706         start_time = rte_rdtsc_precise();
707 #endif
708         /* Turbo encoding */
709         if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {
710                 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
711                 rte_bbdev_log(ERR, "Turbo Encoder failed");
712                 return;
713         }
714 #ifdef RTE_BBDEV_OFFLOAD_COST
715         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
716 #endif
717
718         /* Restore 3 first bytes of next CB if they were overwritten by CRC*/
719         if (first_3_bytes != 0)
720                 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes;
721
722         /* Rate-matching */
723         if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
724                 uint8_t mask_id;
725                 /* Integer round up division by 8 */
726                 uint16_t out_len = (e + 7) >> 3;
727                 /* The mask array is indexed using E%8. E is an even number so
728                  * there are only 4 possible values.
729                  */
730                 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC};
731
732                 /* get output data starting address */
733                 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
734                 if (rm_out == NULL) {
735                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
736                         rte_bbdev_log(ERR,
737                                         "Too little space in output mbuf");
738                         return;
739                 }
740                 /* rte_bbdev_op_data.offset can be different than the offset
741                  * of the appended bytes
742                  */
743                 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
744
745                 /* index of current code block */
746                 rm_req.r = r;
747                 /* total number of code block */
748                 rm_req.C = c;
749                 /* For DL - 1, UL - 0 */
750                 rm_req.direction = 1;
751                 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO
752                  * and MDL_HARQ are used for Ncb calculation. As Ncb is already
753                  * known we can adjust those parameters
754                  */
755                 rm_req.Nsoft = ncb * rm_req.C;
756                 rm_req.KMIMO = 1;
757                 rm_req.MDL_HARQ = 1;
758                 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G
759                  * are used for E calculation. As E is already known we can
760                  * adjust those parameters
761                  */
762                 rm_req.NL = e;
763                 rm_req.Qm = 1;
764                 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C;
765
766                 rm_req.rvidx = enc->rv_index;
767                 rm_req.Kidx = k_idx - 1;
768                 rm_req.nLen = k + 4;
769                 rm_req.tin0 = out0;
770                 rm_req.tin1 = out1;
771                 rm_req.tin2 = out2;
772                 rm_resp.output = rm_out;
773                 rm_resp.OutputLen = out_len;
774                 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS)
775                         rm_req.bypass_rvidx = 1;
776                 else
777                         rm_req.bypass_rvidx = 0;
778
779 #ifdef RTE_BBDEV_OFFLOAD_COST
780                 start_time = rte_rdtsc_precise();
781 #endif
782                 /* Rate-Matching */
783                 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {
784                         op->status |= 1 << RTE_BBDEV_DRV_ERROR;
785                         rte_bbdev_log(ERR, "Rate matching failed");
786                         return;
787                 }
788 #ifdef RTE_BBDEV_OFFLOAD_COST
789                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
790 #endif
791
792                 /* SW fills an entire last byte even if E%8 != 0. Clear the
793                  * superfluous data bits for consistency with HW device.
794                  */
795                 mask_id = (e & 7) >> 1;
796                 rm_out[out_len - 1] &= mask_out[mask_id];
797                 enc->output.length += rm_resp.OutputLen;
798         } else {
799                 /* Rate matching is bypassed */
800
801                 /* Completing last byte of out0 (where 4 tail bits are stored)
802                  * by moving first 4 bits from out1
803                  */
804                 tmp_out = (uint8_t *) --out1;
805                 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4);
806                 tmp_out++;
807                 /* Shifting out1 data by 4 bits to the left */
808                 for (m = 0; m < k >> 3; ++m) {
809                         uint8_t *first = tmp_out;
810                         uint8_t second = *(tmp_out + 1);
811                         *first = (*first << 4) | ((second & 0xF0) >> 4);
812                         tmp_out++;
813                 }
814                 /* Shifting out2 data by 8 bits to the left */
815                 for (m = 0; m < (k >> 3) + 1; ++m) {
816                         *tmp_out = *(tmp_out + 1);
817                         tmp_out++;
818                 }
819                 *tmp_out = 0;
820         }
821 #else
822         RTE_SET_USED(q);
823         RTE_SET_USED(op);
824         RTE_SET_USED(r);
825         RTE_SET_USED(c);
826         RTE_SET_USED(k);
827         RTE_SET_USED(ncb);
828         RTE_SET_USED(e);
829         RTE_SET_USED(m_in);
830         RTE_SET_USED(m_out_head);
831         RTE_SET_USED(m_out);
832         RTE_SET_USED(in_offset);
833         RTE_SET_USED(out_offset);
834         RTE_SET_USED(in_length);
835         RTE_SET_USED(q_stats);
836 #endif
837 }
838
839
840 static inline void
841 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
842                 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
843                 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
844                 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats)
845 {
846 #ifdef RTE_BBDEV_SDK_AVX512
847         RTE_SET_USED(seg_total_left);
848         uint8_t *in, *rm_out;
849         struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
850         struct bblib_ldpc_encoder_5gnr_request ldpc_req;
851         struct bblib_ldpc_encoder_5gnr_response ldpc_resp;
852         struct bblib_LDPC_ratematch_5gnr_request rm_req;
853         struct bblib_LDPC_ratematch_5gnr_response rm_resp;
854         struct bblib_crc_request crc_req;
855         struct bblib_crc_response crc_resp;
856         uint16_t msgLen, puntBits, parity_offset, out_len;
857         uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
858         uint16_t in_length_in_bits = K - enc->n_filler;
859         uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3;
860
861 #ifdef RTE_BBDEV_OFFLOAD_COST
862         uint64_t start_time = rte_rdtsc_precise();
863 #else
864         RTE_SET_USED(q_stats);
865 #endif
866
867         in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
868
869         /* Masking the Filler bits explicitly */
870         memset(q->enc_in  + (in_length_in_bytes - 3), 0,
871                         ((K + 7) >> 3) - (in_length_in_bytes - 3));
872         /* CRC Generation */
873         if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) {
874                 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3);
875                 crc_req.data = in;
876                 crc_req.len = in_length_in_bits - 24;
877                 crc_resp.data = q->enc_in;
878                 bblib_lte_crc24a_gen(&crc_req, &crc_resp);
879         } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) {
880                 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3);
881                 crc_req.data = in;
882                 crc_req.len = in_length_in_bits - 24;
883                 crc_resp.data = q->enc_in;
884                 bblib_lte_crc24b_gen(&crc_req, &crc_resp);
885         } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_16_ATTACH) {
886                 rte_memcpy(q->enc_in, in, in_length_in_bytes - 2);
887                 crc_req.data = in;
888                 crc_req.len = in_length_in_bits - 16;
889                 crc_resp.data = q->enc_in;
890                 bblib_lte_crc16_gen(&crc_req, &crc_resp);
891         } else
892                 rte_memcpy(q->enc_in, in, in_length_in_bytes);
893
894         /* LDPC Encoding */
895         ldpc_req.Zc = enc->z_c;
896         ldpc_req.baseGraph = enc->basegraph;
897         /* Number of rows set to maximum */
898         ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42;
899         ldpc_req.numberCodeblocks = 1;
900         ldpc_req.input[0] = (int8_t *) q->enc_in;
901         ldpc_resp.output[0] = (int8_t *) q->enc_out;
902
903         bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3);
904
905         if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) {
906                 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
907                 rte_bbdev_log(ERR, "LDPC Encoder failed");
908                 return;
909         }
910
911         /*
912          * Systematic + Parity : Recreating stream with filler bits, ideally
913          * the bit select could handle this in the RM SDK
914          */
915         msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc;
916         puntBits = 2 * ldpc_req.Zc;
917         parity_offset = msgLen - puntBits;
918         ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8),
919                         puntBits%8, q->adapter_output, 0, parity_offset);
920         ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8),
921                         parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc);
922
923         out_len = (e + 7) >> 3;
924         /* get output data starting address */
925         rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
926         if (rm_out == NULL) {
927                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
928                 rte_bbdev_log(ERR,
929                                 "Too little space in output mbuf");
930                 return;
931         }
932         /*
933          * rte_bbdev_op_data.offset can be different than the offset
934          * of the appended bytes
935          */
936         rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
937
938         /* Rate-Matching */
939         rm_req.E = e;
940         rm_req.Ncb = enc->n_cb;
941         rm_req.Qm = enc->q_m;
942         rm_req.Zc = enc->z_c;
943         rm_req.baseGraph = enc->basegraph;
944         rm_req.input = q->adapter_output;
945         rm_req.nLen = enc->n_filler;
946         rm_req.nullIndex = parity_offset - enc->n_filler;
947         rm_req.rvidx = enc->rv_index;
948         rm_resp.output = q->deint_output;
949
950         if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) {
951                 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
952                 rte_bbdev_log(ERR, "Rate matching failed");
953                 return;
954         }
955
956         /* RM SDK may provide non zero bits on last byte */
957         if ((e % 8) != 0)
958                 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1;
959
960         bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3);
961
962         rte_memcpy(rm_out, q->deint_output, out_len);
963         enc->output.length += out_len;
964
965 #ifdef RTE_BBDEV_OFFLOAD_COST
966         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
967 #endif
968 #else
969         RTE_SET_USED(q);
970         RTE_SET_USED(op);
971         RTE_SET_USED(e);
972         RTE_SET_USED(m_in);
973         RTE_SET_USED(m_out_head);
974         RTE_SET_USED(m_out);
975         RTE_SET_USED(in_offset);
976         RTE_SET_USED(out_offset);
977         RTE_SET_USED(seg_total_left);
978         RTE_SET_USED(q_stats);
979 #endif
980 }
981
982 static inline void
983 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
984                 struct rte_bbdev_stats *queue_stats)
985 {
986         uint8_t c, r, crc24_bits = 0;
987         uint16_t k, ncb;
988         uint32_t e;
989         struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
990         uint16_t in_offset = enc->input.offset;
991         uint16_t out_offset = enc->output.offset;
992         struct rte_mbuf *m_in = enc->input.data;
993         struct rte_mbuf *m_out = enc->output.data;
994         struct rte_mbuf *m_out_head = enc->output.data;
995         uint32_t in_length, mbuf_total_left = enc->input.length;
996         uint16_t seg_total_left;
997
998         /* Clear op status */
999         op->status = 0;
1000
1001         if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) {
1002                 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
1003                                 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE);
1004                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1005                 return;
1006         }
1007
1008         if (m_in == NULL || m_out == NULL) {
1009                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1010                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1011                 return;
1012         }
1013
1014         if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
1015                 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
1016                 crc24_bits = 24;
1017
1018         if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1019                 c = enc->tb_params.c;
1020                 r = enc->tb_params.r;
1021         } else {/* For Code Block mode */
1022                 c = 1;
1023                 r = 0;
1024         }
1025
1026         while (mbuf_total_left > 0 && r < c) {
1027
1028                 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1029
1030                 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1031                         k = (r < enc->tb_params.c_neg) ?
1032                                 enc->tb_params.k_neg : enc->tb_params.k_pos;
1033                         ncb = (r < enc->tb_params.c_neg) ?
1034                                 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos;
1035                         e = (r < enc->tb_params.cab) ?
1036                                 enc->tb_params.ea : enc->tb_params.eb;
1037                 } else {
1038                         k = enc->cb_params.k;
1039                         ncb = enc->cb_params.ncb;
1040                         e = enc->cb_params.e;
1041                 }
1042
1043                 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head,
1044                                 m_out, in_offset, out_offset, seg_total_left,
1045                                 queue_stats);
1046                 /* Update total_left */
1047                 in_length = ((k - crc24_bits) >> 3);
1048                 mbuf_total_left -= in_length;
1049                 /* Update offsets for next CBs (if exist) */
1050                 in_offset += (k - crc24_bits) >> 3;
1051                 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH)
1052                         out_offset += e >> 3;
1053                 else
1054                         out_offset += (k >> 3) * 3 + 2;
1055
1056                 /* Update offsets */
1057                 if (seg_total_left == in_length) {
1058                         /* Go to the next mbuf */
1059                         m_in = m_in->next;
1060                         m_out = m_out->next;
1061                         in_offset = 0;
1062                         out_offset = 0;
1063                 }
1064                 r++;
1065         }
1066
1067         /* check if all input data was processed */
1068         if (mbuf_total_left != 0) {
1069                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1070                 rte_bbdev_log(ERR,
1071                                 "Mismatch between mbuf length and included CBs sizes");
1072         }
1073 }
1074
1075
1076 static inline void
1077 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
1078                 struct rte_bbdev_stats *queue_stats)
1079 {
1080         uint8_t c, r, crc24_bits = 0;
1081         uint32_t e;
1082         struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
1083         uint16_t in_offset = enc->input.offset;
1084         uint16_t out_offset = enc->output.offset;
1085         struct rte_mbuf *m_in = enc->input.data;
1086         struct rte_mbuf *m_out = enc->output.data;
1087         struct rte_mbuf *m_out_head = enc->output.data;
1088         uint32_t in_length, mbuf_total_left = enc->input.length;
1089
1090         uint16_t seg_total_left;
1091
1092         /* Clear op status */
1093         op->status = 0;
1094
1095         if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) {
1096                 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
1097                                 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE);
1098                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1099                 return;
1100         }
1101
1102         if (m_in == NULL || m_out == NULL) {
1103                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1104                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1105                 return;
1106         }
1107
1108         if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
1109                 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
1110                 crc24_bits = 24;
1111
1112         if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1113                 c = enc->tb_params.c;
1114                 r = enc->tb_params.r;
1115         } else { /* For Code Block mode */
1116                 c = 1;
1117                 r = 0;
1118         }
1119
1120         while (mbuf_total_left > 0 && r < c) {
1121
1122                 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1123
1124                 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1125                         e = (r < enc->tb_params.cab) ?
1126                                 enc->tb_params.ea : enc->tb_params.eb;
1127                 } else {
1128                         e = enc->cb_params.e;
1129                 }
1130
1131                 process_ldpc_enc_cb(q, op, e, m_in, m_out_head,
1132                                 m_out, in_offset, out_offset, seg_total_left,
1133                                 queue_stats);
1134                 /* Update total_left */
1135                 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
1136                 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3);
1137                 mbuf_total_left -= in_length;
1138                 /* Update offsets for next CBs (if exist) */
1139                 in_offset += in_length;
1140                 out_offset += (e + 7) >> 3;
1141
1142                 /* Update offsets */
1143                 if (seg_total_left == in_length) {
1144                         /* Go to the next mbuf */
1145                         m_in = m_in->next;
1146                         m_out = m_out->next;
1147                         in_offset = 0;
1148                         out_offset = 0;
1149                 }
1150                 r++;
1151         }
1152
1153         /* check if all input data was processed */
1154         if (mbuf_total_left != 0) {
1155                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1156                 rte_bbdev_log(ERR,
1157                                 "Mismatch between mbuf length and included CBs sizes %d",
1158                                 mbuf_total_left);
1159         }
1160 }
1161
1162 static inline uint16_t
1163 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
1164                 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1165 {
1166         uint16_t i;
1167 #ifdef RTE_BBDEV_OFFLOAD_COST
1168         queue_stats->acc_offload_cycles = 0;
1169 #endif
1170
1171         for (i = 0; i < nb_ops; ++i)
1172                 enqueue_enc_one_op(q, ops[i], queue_stats);
1173
1174         return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1175                         NULL);
1176 }
1177
1178 static inline uint16_t
1179 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q,
1180                 struct rte_bbdev_enc_op **ops,
1181                 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1182 {
1183         uint16_t i;
1184 #ifdef RTE_BBDEV_OFFLOAD_COST
1185         queue_stats->acc_offload_cycles = 0;
1186 #endif
1187
1188         for (i = 0; i < nb_ops; ++i)
1189                 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats);
1190
1191         return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1192                         NULL);
1193 }
1194
1195 #ifdef RTE_BBDEV_SDK_AVX2
1196 static inline void
1197 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
1198                 uint16_t ncb)
1199 {
1200         uint16_t d = k + 4;
1201         uint16_t kpi = ncb / 3;
1202         uint16_t nd = kpi - d;
1203
1204         rte_memcpy(&out[nd], in, d);
1205         rte_memcpy(&out[nd + kpi + 64], &in[kpi], d);
1206         rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d);
1207 }
1208 #endif
1209
1210 static inline void
1211 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1212                 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,
1213                 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
1214                 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b,
1215                 uint16_t crc24_overlap, uint16_t in_length,
1216                 struct rte_bbdev_stats *q_stats)
1217 {
1218 #ifdef RTE_BBDEV_SDK_AVX2
1219 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1220         int ret;
1221 #else
1222         RTE_SET_USED(in_length);
1223 #endif
1224         int32_t k_idx;
1225         int32_t iter_cnt;
1226         uint8_t *in, *out, *adapter_input;
1227         int32_t ncb, ncb_without_null;
1228         struct bblib_turbo_adapter_ul_response adapter_resp;
1229         struct bblib_turbo_adapter_ul_request adapter_req;
1230         struct bblib_turbo_decoder_request turbo_req;
1231         struct bblib_turbo_decoder_response turbo_resp;
1232         struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
1233 #ifdef RTE_BBDEV_OFFLOAD_COST
1234         uint64_t start_time;
1235 #else
1236         RTE_SET_USED(q_stats);
1237 #endif
1238
1239         k_idx = compute_idx(k);
1240
1241 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1242         ret = is_dec_input_valid(k_idx, kw, in_length);
1243         if (ret != 0) {
1244                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1245                 return;
1246         }
1247 #endif
1248
1249         in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
1250         ncb = kw;
1251         ncb_without_null = (k + 4) * 3;
1252
1253         if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) {
1254                 struct bblib_deinterleave_ul_request deint_req;
1255                 struct bblib_deinterleave_ul_response deint_resp;
1256
1257                 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER;
1258                 deint_req.pharqbuffer = in;
1259                 deint_req.ncb = ncb;
1260                 deint_resp.pinteleavebuffer = q->deint_output;
1261
1262 #ifdef RTE_BBDEV_OFFLOAD_COST
1263         start_time = rte_rdtsc_precise();
1264 #endif
1265                 /* Sub-block De-Interleaving */
1266                 bblib_deinterleave_ul(&deint_req, &deint_resp);
1267 #ifdef RTE_BBDEV_OFFLOAD_COST
1268         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
1269 #endif
1270         } else
1271                 move_padding_bytes(in, q->deint_output, k, ncb);
1272
1273         adapter_input = q->deint_output;
1274
1275         if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN)
1276                 adapter_req.isinverted = 1;
1277         else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN)
1278                 adapter_req.isinverted = 0;
1279         else {
1280                 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
1281                 rte_bbdev_log(ERR, "LLR format wasn't specified");
1282                 return;
1283         }
1284
1285         adapter_req.ncb = ncb_without_null;
1286         adapter_req.pinteleavebuffer = adapter_input;
1287         adapter_resp.pharqout = q->adapter_output;
1288
1289 #ifdef RTE_BBDEV_OFFLOAD_COST
1290         start_time = rte_rdtsc_precise();
1291 #endif
1292         /* Turbo decode adaptation */
1293         bblib_turbo_adapter_ul(&adapter_req, &adapter_resp);
1294 #ifdef RTE_BBDEV_OFFLOAD_COST
1295         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
1296 #endif
1297
1298         out = (uint8_t *)mbuf_append(m_out_head, m_out,
1299                         ((k - crc24_overlap) >> 3));
1300         if (out == NULL) {
1301                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1302                 rte_bbdev_log(ERR, "Too little space in output mbuf");
1303                 return;
1304         }
1305         /* rte_bbdev_op_data.offset can be different than the offset of the
1306          * appended bytes
1307          */
1308         out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
1309         if (check_crc_24b)
1310                 turbo_req.c = c + 1;
1311         else
1312                 turbo_req.c = c;
1313         turbo_req.input = (int8_t *)q->adapter_output;
1314         turbo_req.k = k;
1315         turbo_req.k_idx = k_idx;
1316         turbo_req.max_iter_num = dec->iter_max;
1317         turbo_req.early_term_disable = !check_bit(dec->op_flags,
1318                         RTE_BBDEV_TURBO_EARLY_TERMINATION);
1319         turbo_resp.ag_buf = q->ag;
1320         turbo_resp.cb_buf = q->code_block;
1321         turbo_resp.output = out;
1322
1323 #ifdef RTE_BBDEV_OFFLOAD_COST
1324         start_time = rte_rdtsc_precise();
1325 #endif
1326         /* Turbo decode */
1327         iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp);
1328 #ifdef RTE_BBDEV_OFFLOAD_COST
1329         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
1330 #endif
1331         dec->hard_output.length += (k >> 3);
1332
1333         if (iter_cnt > 0) {
1334                 /* Temporary solution for returned iter_count from SDK */
1335                 iter_cnt = (iter_cnt - 1) >> 1;
1336                 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count);
1337         } else {
1338                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1339                 rte_bbdev_log(ERR, "Turbo Decoder failed");
1340                 return;
1341         }
1342 #else
1343         RTE_SET_USED(q);
1344         RTE_SET_USED(op);
1345         RTE_SET_USED(c);
1346         RTE_SET_USED(k);
1347         RTE_SET_USED(kw);
1348         RTE_SET_USED(m_in);
1349         RTE_SET_USED(m_out_head);
1350         RTE_SET_USED(m_out);
1351         RTE_SET_USED(in_offset);
1352         RTE_SET_USED(out_offset);
1353         RTE_SET_USED(check_crc_24b);
1354         RTE_SET_USED(crc24_overlap);
1355         RTE_SET_USED(in_length);
1356         RTE_SET_USED(q_stats);
1357 #endif
1358 }
1359
1360 static inline void
1361 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1362                 uint8_t c, uint16_t out_length, uint32_t e,
1363                 struct rte_mbuf *m_in,
1364                 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
1365                 struct rte_mbuf *m_harq_in,
1366                 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out,
1367                 uint16_t in_offset, uint16_t out_offset,
1368                 uint16_t harq_in_offset, uint16_t harq_out_offset,
1369                 bool check_crc_24b,
1370                 uint16_t crc24_overlap, uint16_t in_length,
1371                 struct rte_bbdev_stats *q_stats)
1372 {
1373 #ifdef RTE_BBDEV_SDK_AVX512
1374         RTE_SET_USED(in_length);
1375         RTE_SET_USED(c);
1376         uint8_t *in, *out, *harq_in, *harq_out, *adapter_input;
1377         struct bblib_rate_dematching_5gnr_request derm_req;
1378         struct bblib_rate_dematching_5gnr_response derm_resp;
1379         struct bblib_ldpc_decoder_5gnr_request dec_req;
1380         struct bblib_ldpc_decoder_5gnr_response dec_resp;
1381         struct bblib_crc_request crc_req;
1382         struct bblib_crc_response crc_resp;
1383         struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
1384         uint16_t K, parity_offset, sys_cols, outLenWithCrc;
1385         int16_t deRmOutSize, numRows;
1386
1387         /* Compute some LDPC BG lengths */
1388         outLenWithCrc = out_length + (crc24_overlap >> 3);
1389         sys_cols = (dec->basegraph == 1) ? 22 : 10;
1390         K = sys_cols * dec->z_c;
1391         parity_offset = K - 2 * dec->z_c;
1392
1393 #ifdef RTE_BBDEV_OFFLOAD_COST
1394         uint64_t start_time = rte_rdtsc_precise();
1395 #else
1396         RTE_SET_USED(q_stats);
1397 #endif
1398
1399         in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
1400
1401         if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1402                 /**
1403                  *  Single contiguous block from the first LLR of the
1404                  *  circular buffer.
1405                  */
1406                 harq_in = NULL;
1407                 if (m_harq_in != NULL)
1408                         harq_in = rte_pktmbuf_mtod_offset(m_harq_in,
1409                                 uint8_t *, harq_in_offset);
1410                 if (harq_in == NULL) {
1411                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1412                         rte_bbdev_log(ERR, "No space in harq input mbuf");
1413                         return;
1414                 }
1415                 uint16_t harq_in_length = RTE_MIN(
1416                                 dec->harq_combined_input.length,
1417                                 (uint32_t) dec->n_cb);
1418                 memset(q->ag + harq_in_length, 0,
1419                                 dec->n_cb - harq_in_length);
1420                 rte_memcpy(q->ag, harq_in, harq_in_length);
1421         }
1422
1423         derm_req.p_in = (int8_t *) in;
1424         derm_req.p_harq = q->ag; /* This doesn't include the filler bits */
1425         derm_req.base_graph = dec->basegraph;
1426         derm_req.zc = dec->z_c;
1427         derm_req.ncb = dec->n_cb;
1428         derm_req.e = e;
1429         derm_req.k0 = 0; /* Actual output from SDK */
1430         derm_req.isretx = check_bit(dec->op_flags,
1431                         RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
1432         derm_req.rvid = dec->rv_index;
1433         derm_req.modulation_order = dec->q_m;
1434         derm_req.start_null_index = parity_offset - dec->n_filler;
1435         derm_req.num_of_null = dec->n_filler;
1436
1437         bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
1438
1439         /* Compute RM out size and number of rows */
1440         deRmOutSize = RTE_MIN(
1441                         derm_req.k0 + derm_req.e -
1442                         ((derm_req.k0 < derm_req.start_null_index) ?
1443                                         0 : dec->n_filler),
1444                         dec->n_cb - dec->n_filler);
1445         if (m_harq_in != NULL)
1446                 deRmOutSize = RTE_MAX(deRmOutSize,
1447                                 RTE_MIN(dec->n_cb - dec->n_filler,
1448                                                 m_harq_in->data_len));
1449         numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c)
1450                         - sys_cols + 2;
1451         numRows = RTE_MAX(4, numRows);
1452
1453         /* get output data starting address */
1454         out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length);
1455         if (out == NULL) {
1456                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1457                 rte_bbdev_log(ERR,
1458                                 "Too little space in LDPC decoder output mbuf");
1459                 return;
1460         }
1461
1462         /* rte_bbdev_op_data.offset can be different than the offset
1463          * of the appended bytes
1464          */
1465         out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
1466         adapter_input = q->enc_out;
1467
1468         dec_req.Zc = dec->z_c;
1469         dec_req.baseGraph = dec->basegraph;
1470         dec_req.nRows = numRows;
1471         dec_req.numChannelLlrs = deRmOutSize;
1472         dec_req.varNodes = derm_req.p_harq;
1473         dec_req.numFillerBits = dec->n_filler;
1474         dec_req.maxIterations = dec->iter_max;
1475         dec_req.enableEarlyTermination = check_bit(dec->op_flags,
1476                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
1477         dec_resp.varNodes = (int16_t *) q->adapter_output;
1478         dec_resp.compactedMessageBytes = q->enc_out;
1479
1480         bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp);
1481
1482         dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination,
1483                         dec->iter_count);
1484         if (!dec_resp.parityPassedAtTermination)
1485                 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR;
1486
1487         bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3);
1488
1489         if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) ||
1490                         check_bit(dec->op_flags,
1491                                         RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) {
1492                 crc_req.data = adapter_input;
1493                 crc_req.len  = K - dec->n_filler - 24;
1494                 crc_resp.check_passed = false;
1495                 crc_resp.data = adapter_input;
1496                 if (check_crc_24b)
1497                         bblib_lte_crc24b_check(&crc_req, &crc_resp);
1498                 else
1499                         bblib_lte_crc24a_check(&crc_req, &crc_resp);
1500                 if (!crc_resp.check_passed)
1501                         op->status |= 1 << RTE_BBDEV_CRC_ERROR;
1502         } else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) {
1503                 crc_req.data = adapter_input;
1504                 crc_req.len  = K - dec->n_filler - 16;
1505                 crc_resp.check_passed = false;
1506                 crc_resp.data = adapter_input;
1507                 bblib_lte_crc16_check(&crc_req, &crc_resp);
1508                 if (!crc_resp.check_passed)
1509                         op->status |= 1 << RTE_BBDEV_CRC_ERROR;
1510         }
1511
1512 #ifdef RTE_BBDEV_OFFLOAD_COST
1513         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
1514 #endif
1515         if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1516                 harq_out = NULL;
1517                 if (m_harq_out != NULL) {
1518                         /* Initialize HARQ data length since we overwrite */
1519                         m_harq_out->data_len = 0;
1520                         /* Check there is enough space
1521                          * in the HARQ outbound buffer
1522                          */
1523                         harq_out = (uint8_t *)mbuf_append(m_harq_out_head,
1524                                         m_harq_out, deRmOutSize);
1525                 }
1526                 if (harq_out == NULL) {
1527                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1528                         rte_bbdev_log(ERR, "No space in HARQ output mbuf");
1529                         return;
1530                 }
1531                 /* get output data starting address and overwrite the data */
1532                 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *,
1533                                 harq_out_offset);
1534                 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize);
1535                 dec->harq_combined_output.length += deRmOutSize;
1536         }
1537
1538         rte_memcpy(out, adapter_input, out_length);
1539         dec->hard_output.length += out_length;
1540 #else
1541         RTE_SET_USED(q);
1542         RTE_SET_USED(op);
1543         RTE_SET_USED(c);
1544         RTE_SET_USED(out_length);
1545         RTE_SET_USED(e);
1546         RTE_SET_USED(m_in);
1547         RTE_SET_USED(m_out_head);
1548         RTE_SET_USED(m_out);
1549         RTE_SET_USED(m_harq_in);
1550         RTE_SET_USED(m_harq_out_head);
1551         RTE_SET_USED(m_harq_out);
1552         RTE_SET_USED(harq_in_offset);
1553         RTE_SET_USED(harq_out_offset);
1554         RTE_SET_USED(in_offset);
1555         RTE_SET_USED(out_offset);
1556         RTE_SET_USED(check_crc_24b);
1557         RTE_SET_USED(crc24_overlap);
1558         RTE_SET_USED(in_length);
1559         RTE_SET_USED(q_stats);
1560 #endif
1561 }
1562
1563
1564 static inline void
1565 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1566                 struct rte_bbdev_stats *queue_stats)
1567 {
1568         uint8_t c, r = 0;
1569         uint16_t kw, k = 0;
1570         uint16_t crc24_overlap = 0;
1571         struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
1572         struct rte_mbuf *m_in = dec->input.data;
1573         struct rte_mbuf *m_out = dec->hard_output.data;
1574         struct rte_mbuf *m_out_head = dec->hard_output.data;
1575         uint16_t in_offset = dec->input.offset;
1576         uint16_t out_offset = dec->hard_output.offset;
1577         uint32_t mbuf_total_left = dec->input.length;
1578         uint16_t seg_total_left;
1579
1580         /* Clear op status */
1581         op->status = 0;
1582
1583         if (m_in == NULL || m_out == NULL) {
1584                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1585                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1586                 return;
1587         }
1588
1589         if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1590                 c = dec->tb_params.c;
1591         } else { /* For Code Block mode */
1592                 k = dec->cb_params.k;
1593                 c = 1;
1594         }
1595
1596         if ((c > 1) && !check_bit(dec->op_flags,
1597                 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1598                 crc24_overlap = 24;
1599
1600         while (mbuf_total_left > 0) {
1601                 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
1602                         k = (r < dec->tb_params.c_neg) ?
1603                                 dec->tb_params.k_neg : dec->tb_params.k_pos;
1604
1605                 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1606
1607                 /* Calculates circular buffer size (Kw).
1608                  * According to 3gpp 36.212 section 5.1.4.2
1609                  *   Kw = 3 * Kpi,
1610                  * where:
1611                  *   Kpi = nCol * nRow
1612                  * where nCol is 32 and nRow can be calculated from:
1613                  *   D =< nCol * nRow
1614                  * where D is the size of each output from turbo encoder block
1615                  * (k + 4).
1616                  */
1617                 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3;
1618
1619                 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out,
1620                                 in_offset, out_offset, check_bit(dec->op_flags,
1621                                 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
1622                                 seg_total_left, queue_stats);
1623
1624                 /* To keep CRC24 attached to end of Code block, use
1625                  * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
1626                  * removed by default once verified.
1627                  */
1628
1629                 mbuf_total_left -= kw;
1630
1631                 /* Update offsets */
1632                 if (seg_total_left == kw) {
1633                         /* Go to the next mbuf */
1634                         m_in = m_in->next;
1635                         m_out = m_out->next;
1636                         in_offset = 0;
1637                         out_offset = 0;
1638                 } else {
1639                         /* Update offsets for next CBs (if exist) */
1640                         in_offset += kw;
1641                         out_offset += ((k - crc24_overlap) >> 3);
1642                 }
1643                 r++;
1644         }
1645 }
1646
1647 static inline void
1648 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1649                 struct rte_bbdev_stats *queue_stats)
1650 {
1651         uint8_t c, r = 0;
1652         uint32_t e;
1653         uint16_t out_length, crc24_overlap = 0;
1654         struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
1655         struct rte_mbuf *m_in = dec->input.data;
1656         struct rte_mbuf *m_harq_in = dec->harq_combined_input.data;
1657         struct rte_mbuf *m_harq_out = dec->harq_combined_output.data;
1658         struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data;
1659         struct rte_mbuf *m_out = dec->hard_output.data;
1660         struct rte_mbuf *m_out_head = dec->hard_output.data;
1661         uint16_t in_offset = dec->input.offset;
1662         uint16_t harq_in_offset = dec->harq_combined_input.offset;
1663         uint16_t harq_out_offset = dec->harq_combined_output.offset;
1664         uint16_t out_offset = dec->hard_output.offset;
1665         uint32_t mbuf_total_left = dec->input.length;
1666         uint16_t seg_total_left;
1667
1668         /* Clear op status */
1669         op->status = 0;
1670
1671         if (m_in == NULL || m_out == NULL) {
1672                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1673                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1674                 return;
1675         }
1676
1677         if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1678                 c = dec->tb_params.c;
1679                 e = dec->tb_params.ea;
1680         } else { /* For Code Block mode */
1681                 c = 1;
1682                 e = dec->cb_params.e;
1683         }
1684
1685         if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
1686                 crc24_overlap = 24;
1687
1688         out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */
1689         out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3);
1690
1691         while (mbuf_total_left > 0) {
1692                 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
1693                         e = (r < dec->tb_params.cab) ?
1694                                 dec->tb_params.ea : dec->tb_params.eb;
1695                 /* Special case handling when overusing mbuf */
1696                 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF)
1697                         seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1698                 else
1699                         seg_total_left = e;
1700
1701                 process_ldpc_dec_cb(q, op, c, out_length, e,
1702                                 m_in, m_out_head, m_out,
1703                                 m_harq_in, m_harq_out_head, m_harq_out,
1704                                 in_offset, out_offset, harq_in_offset,
1705                                 harq_out_offset,
1706                                 check_bit(dec->op_flags,
1707                                 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK),
1708                                 crc24_overlap,
1709                                 seg_total_left, queue_stats);
1710
1711                 /* To keep CRC24 attached to end of Code block, use
1712                  * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it
1713                  * removed by default once verified.
1714                  */
1715
1716                 mbuf_total_left -= e;
1717
1718                 /* Update offsets */
1719                 if (seg_total_left == e) {
1720                         /* Go to the next mbuf */
1721                         m_in = m_in->next;
1722                         m_out = m_out->next;
1723                         if (m_harq_in != NULL)
1724                                 m_harq_in = m_harq_in->next;
1725                         if (m_harq_out != NULL)
1726                                 m_harq_out = m_harq_out->next;
1727                         in_offset = 0;
1728                         out_offset = 0;
1729                         harq_in_offset = 0;
1730                         harq_out_offset = 0;
1731                 } else {
1732                         /* Update offsets for next CBs (if exist) */
1733                         in_offset += e;
1734                         out_offset += out_length;
1735                 }
1736                 r++;
1737         }
1738 }
1739
1740 static inline uint16_t
1741 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
1742                 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1743 {
1744         uint16_t i;
1745 #ifdef RTE_BBDEV_OFFLOAD_COST
1746         queue_stats->acc_offload_cycles = 0;
1747 #endif
1748
1749         for (i = 0; i < nb_ops; ++i)
1750                 enqueue_dec_one_op(q, ops[i], queue_stats);
1751
1752         return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1753                         NULL);
1754 }
1755
1756 static inline uint16_t
1757 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q,
1758                 struct rte_bbdev_dec_op **ops,
1759                 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1760 {
1761         uint16_t i;
1762 #ifdef RTE_BBDEV_OFFLOAD_COST
1763         queue_stats->acc_offload_cycles = 0;
1764 #endif
1765
1766         for (i = 0; i < nb_ops; ++i)
1767                 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats);
1768
1769         return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1770                         NULL);
1771 }
1772
1773 /* Enqueue burst */
1774 static uint16_t
1775 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
1776                 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1777 {
1778         void *queue = q_data->queue_private;
1779         struct turbo_sw_queue *q = queue;
1780         uint16_t nb_enqueued = 0;
1781
1782         nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1783
1784         q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1785         q_data->queue_stats.enqueued_count += nb_enqueued;
1786
1787         return nb_enqueued;
1788 }
1789
1790 /* Enqueue burst */
1791 static uint16_t
1792 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data,
1793                 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1794 {
1795         void *queue = q_data->queue_private;
1796         struct turbo_sw_queue *q = queue;
1797         uint16_t nb_enqueued = 0;
1798
1799         nb_enqueued = enqueue_ldpc_enc_all_ops(
1800                         q, ops, nb_ops, &q_data->queue_stats);
1801
1802         q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1803         q_data->queue_stats.enqueued_count += nb_enqueued;
1804
1805         return nb_enqueued;
1806 }
1807
1808 /* Enqueue burst */
1809 static uint16_t
1810 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
1811                  struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1812 {
1813         void *queue = q_data->queue_private;
1814         struct turbo_sw_queue *q = queue;
1815         uint16_t nb_enqueued = 0;
1816
1817         nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1818
1819         q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1820         q_data->queue_stats.enqueued_count += nb_enqueued;
1821
1822         return nb_enqueued;
1823 }
1824
1825 /* Enqueue burst */
1826 static uint16_t
1827 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data,
1828                  struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1829 {
1830         void *queue = q_data->queue_private;
1831         struct turbo_sw_queue *q = queue;
1832         uint16_t nb_enqueued = 0;
1833
1834         nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops,
1835                         &q_data->queue_stats);
1836
1837         q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1838         q_data->queue_stats.enqueued_count += nb_enqueued;
1839
1840         return nb_enqueued;
1841 }
1842
1843 /* Dequeue decode burst */
1844 static uint16_t
1845 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data,
1846                 struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1847 {
1848         struct turbo_sw_queue *q = q_data->queue_private;
1849         uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1850                         (void **)ops, nb_ops, NULL);
1851         q_data->queue_stats.dequeued_count += nb_dequeued;
1852
1853         return nb_dequeued;
1854 }
1855
1856 /* Dequeue encode burst */
1857 static uint16_t
1858 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data,
1859                 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1860 {
1861         struct turbo_sw_queue *q = q_data->queue_private;
1862         uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1863                         (void **)ops, nb_ops, NULL);
1864         q_data->queue_stats.dequeued_count += nb_dequeued;
1865
1866         return nb_dequeued;
1867 }
1868
1869 /* Parse 16bit integer from string argument */
1870 static inline int
1871 parse_u16_arg(const char *key, const char *value, void *extra_args)
1872 {
1873         uint16_t *u16 = extra_args;
1874         unsigned int long result;
1875
1876         if ((value == NULL) || (extra_args == NULL))
1877                 return -EINVAL;
1878         errno = 0;
1879         result = strtoul(value, NULL, 0);
1880         if ((result >= (1 << 16)) || (errno != 0)) {
1881                 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key);
1882                 return -ERANGE;
1883         }
1884         *u16 = (uint16_t)result;
1885         return 0;
1886 }
1887
1888 /* Parse parameters used to create device */
1889 static int
1890 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args)
1891 {
1892         struct rte_kvargs *kvlist = NULL;
1893         int ret = 0;
1894
1895         if (params == NULL)
1896                 return -EINVAL;
1897         if (input_args) {
1898                 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params);
1899                 if (kvlist == NULL)
1900                         return -EFAULT;
1901
1902                 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0],
1903                                         &parse_u16_arg, &params->queues_num);
1904                 if (ret < 0)
1905                         goto exit;
1906
1907                 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1],
1908                                         &parse_u16_arg, &params->socket_id);
1909                 if (ret < 0)
1910                         goto exit;
1911
1912                 if (params->socket_id >= RTE_MAX_NUMA_NODES) {
1913                         rte_bbdev_log(ERR, "Invalid socket, must be < %u",
1914                                         RTE_MAX_NUMA_NODES);
1915                         goto exit;
1916                 }
1917         }
1918
1919 exit:
1920         if (kvlist)
1921                 rte_kvargs_free(kvlist);
1922         return ret;
1923 }
1924
1925 /* Create device */
1926 static int
1927 turbo_sw_bbdev_create(struct rte_vdev_device *vdev,
1928                 struct turbo_sw_params *init_params)
1929 {
1930         struct rte_bbdev *bbdev;
1931         const char *name = rte_vdev_device_name(vdev);
1932
1933         bbdev = rte_bbdev_allocate(name);
1934         if (bbdev == NULL)
1935                 return -ENODEV;
1936
1937         bbdev->data->dev_private = rte_zmalloc_socket(name,
1938                         sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE,
1939                         init_params->socket_id);
1940         if (bbdev->data->dev_private == NULL) {
1941                 rte_bbdev_release(bbdev);
1942                 return -ENOMEM;
1943         }
1944
1945         bbdev->dev_ops = &pmd_ops;
1946         bbdev->device = &vdev->device;
1947         bbdev->data->socket_id = init_params->socket_id;
1948         bbdev->intr_handle = NULL;
1949
1950         /* register rx/tx burst functions for data path */
1951         bbdev->dequeue_enc_ops = dequeue_enc_ops;
1952         bbdev->dequeue_dec_ops = dequeue_dec_ops;
1953         bbdev->enqueue_enc_ops = enqueue_enc_ops;
1954         bbdev->enqueue_dec_ops = enqueue_dec_ops;
1955         bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops;
1956         bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops;
1957         bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops;
1958         bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops;
1959         ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues =
1960                         init_params->queues_num;
1961
1962         return 0;
1963 }
1964
1965 /* Initialise device */
1966 static int
1967 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev)
1968 {
1969         struct turbo_sw_params init_params = {
1970                 rte_socket_id(),
1971                 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES
1972         };
1973         const char *name;
1974         const char *input_args;
1975
1976         if (vdev == NULL)
1977                 return -EINVAL;
1978
1979         name = rte_vdev_device_name(vdev);
1980         if (name == NULL)
1981                 return -EINVAL;
1982         input_args = rte_vdev_device_args(vdev);
1983         parse_turbo_sw_params(&init_params, input_args);
1984
1985         rte_bbdev_log_debug(
1986                         "Initialising %s on NUMA node %d with max queues: %d\n",
1987                         name, init_params.socket_id, init_params.queues_num);
1988
1989         return turbo_sw_bbdev_create(vdev, &init_params);
1990 }
1991
1992 /* Uninitialise device */
1993 static int
1994 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev)
1995 {
1996         struct rte_bbdev *bbdev;
1997         const char *name;
1998
1999         if (vdev == NULL)
2000                 return -EINVAL;
2001
2002         name = rte_vdev_device_name(vdev);
2003         if (name == NULL)
2004                 return -EINVAL;
2005
2006         bbdev = rte_bbdev_get_named_dev(name);
2007         if (bbdev == NULL)
2008                 return -EINVAL;
2009
2010         rte_free(bbdev->data->dev_private);
2011
2012         return rte_bbdev_release(bbdev);
2013 }
2014
2015 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = {
2016         .probe = turbo_sw_bbdev_probe,
2017         .remove = turbo_sw_bbdev_remove
2018 };
2019
2020 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv);
2021 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME,
2022         TURBO_SW_MAX_NB_QUEUES_ARG"=<int> "
2023         TURBO_SW_SOCKET_ID_ARG"=<int>");
2024 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw);