5551f8484d81ab4a4e1a75d309bc8f4c4d800d3d
[dpdk.git] / drivers / baseband / turbo_sw / bbdev_turbo_software.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <string.h>
6
7 #include <rte_common.h>
8 #include <rte_bus_vdev.h>
9 #include <rte_malloc.h>
10 #include <rte_ring.h>
11 #include <rte_kvargs.h>
12 #include <rte_cycles.h>
13
14 #include <rte_bbdev.h>
15 #include <rte_bbdev_pmd.h>
16
17 #ifdef RTE_BBDEV_SDK_AVX2
18 #include <phy_turbo.h>
19 #include <phy_crc.h>
20 #include <phy_rate_match.h>
21 #endif
22
23 #define DRIVER_NAME baseband_turbo_sw
24
25 /* Turbo SW PMD logging ID */
26 static int bbdev_turbo_sw_logtype;
27
28 /* Helper macro for logging */
29 #define rte_bbdev_log(level, fmt, ...) \
30         rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \
31                 ##__VA_ARGS__)
32
33 #define rte_bbdev_log_debug(fmt, ...) \
34         rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \
35                 ##__VA_ARGS__)
36
37 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48)
38 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6)
39 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48)
40
41 /* private data structure */
42 struct bbdev_private {
43         unsigned int max_nb_queues;  /**< Max number of queues */
44 };
45
46 /*  Initialisation params structure that can be used by Turbo SW driver */
47 struct turbo_sw_params {
48         int socket_id;  /*< Turbo SW device socket */
49         uint16_t queues_num;  /*< Turbo SW device queues number */
50 };
51
52 /* Accecptable params for Turbo SW devices */
53 #define TURBO_SW_MAX_NB_QUEUES_ARG  "max_nb_queues"
54 #define TURBO_SW_SOCKET_ID_ARG      "socket_id"
55
56 static const char * const turbo_sw_valid_params[] = {
57         TURBO_SW_MAX_NB_QUEUES_ARG,
58         TURBO_SW_SOCKET_ID_ARG
59 };
60
61 /* queue */
62 struct turbo_sw_queue {
63         /* Ring for processed (encoded/decoded) operations which are ready to
64          * be dequeued.
65          */
66         struct rte_ring *processed_pkts;
67         /* Stores input for turbo encoder (used when CRC attachment is
68          * performed
69          */
70         uint8_t *enc_in;
71         /* Stores output from turbo encoder */
72         uint8_t *enc_out;
73         /* Alpha gamma buf for bblib_turbo_decoder() function */
74         int8_t *ag;
75         /* Temp buf for bblib_turbo_decoder() function */
76         uint16_t *code_block;
77         /* Input buf for bblib_rate_dematching_lte() function */
78         uint8_t *deint_input;
79         /* Output buf for bblib_rate_dematching_lte() function */
80         uint8_t *deint_output;
81         /* Output buf for bblib_turbodec_adapter_lte() function */
82         uint8_t *adapter_output;
83         /* Operation type of this queue */
84         enum rte_bbdev_op_type type;
85 } __rte_cache_aligned;
86
87 #ifdef RTE_BBDEV_SDK_AVX2
88 static inline char *
89 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
90 {
91         if (unlikely(len > rte_pktmbuf_tailroom(m)))
92                 return NULL;
93
94         char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
95         m->data_len = (uint16_t)(m->data_len + len);
96         m_head->pkt_len  = (m_head->pkt_len + len);
97         return tail;
98 }
99
100 /* Calculate index based on Table 5.1.3-3 from TS34.212 */
101 static inline int32_t
102 compute_idx(uint16_t k)
103 {
104         int32_t result = 0;
105
106         if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE)
107                 return -1;
108
109         if (k > 2048) {
110                 if ((k - 2048) % 64 != 0)
111                         result = -1;
112
113                 result = 124 + (k - 2048) / 64;
114         } else if (k <= 512) {
115                 if ((k - 40) % 8 != 0)
116                         result = -1;
117
118                 result = (k - 40) / 8 + 1;
119         } else if (k <= 1024) {
120                 if ((k - 512) % 16 != 0)
121                         result = -1;
122
123                 result = 60 + (k - 512) / 16;
124         } else { /* 1024 < k <= 2048 */
125                 if ((k - 1024) % 32 != 0)
126                         result = -1;
127
128                 result = 92 + (k - 1024) / 32;
129         }
130
131         return result;
132 }
133 #endif
134
135 /* Read flag value 0/1 from bitmap */
136 static inline bool
137 check_bit(uint32_t bitmap, uint32_t bitmask)
138 {
139         return bitmap & bitmask;
140 }
141
142 /* Get device info */
143 static void
144 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
145 {
146         struct bbdev_private *internals = dev->data->dev_private;
147
148         static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
149 #ifdef RTE_BBDEV_SDK_AVX2
150                 {
151                         .type = RTE_BBDEV_OP_TURBO_DEC,
152                         .cap.turbo_dec = {
153                                 .capability_flags =
154                                         RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
155                                         RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN |
156                                         RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
157                                         RTE_BBDEV_TURBO_CRC_TYPE_24B |
158                                         RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
159                                         RTE_BBDEV_TURBO_EARLY_TERMINATION,
160                                 .max_llr_modulus = 16,
161                                 .num_buffers_src =
162                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
163                                 .num_buffers_hard_out =
164                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
165                                 .num_buffers_soft_out = 0,
166                         }
167                 },
168                 {
169                         .type   = RTE_BBDEV_OP_TURBO_ENC,
170                         .cap.turbo_enc = {
171                                 .capability_flags =
172                                                 RTE_BBDEV_TURBO_CRC_24B_ATTACH |
173                                                 RTE_BBDEV_TURBO_CRC_24A_ATTACH |
174                                                 RTE_BBDEV_TURBO_RATE_MATCH |
175                                                 RTE_BBDEV_TURBO_RV_INDEX_BYPASS,
176                                 .num_buffers_src =
177                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
178                                 .num_buffers_dst =
179                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
180                         }
181                 },
182 #endif
183                 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
184         };
185
186         static struct rte_bbdev_queue_conf default_queue_conf = {
187                 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT,
188         };
189
190 #ifdef RTE_BBDEV_SDK_AVX2
191         static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2;
192         dev_info->cpu_flag_reqs = &cpu_flag;
193 #else
194         dev_info->cpu_flag_reqs = NULL;
195 #endif
196
197         default_queue_conf.socket = dev->data->socket_id;
198
199         dev_info->driver_name = RTE_STR(DRIVER_NAME);
200         dev_info->max_num_queues = internals->max_nb_queues;
201         dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT;
202         dev_info->hardware_accelerated = false;
203         dev_info->max_dl_queue_priority = 0;
204         dev_info->max_ul_queue_priority = 0;
205         dev_info->default_queue_conf = default_queue_conf;
206         dev_info->capabilities = bbdev_capabilities;
207         dev_info->min_alignment = 64;
208
209         rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id);
210 }
211
212 /* Release queue */
213 static int
214 q_release(struct rte_bbdev *dev, uint16_t q_id)
215 {
216         struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private;
217
218         if (q != NULL) {
219                 rte_ring_free(q->processed_pkts);
220                 rte_free(q->enc_out);
221                 rte_free(q->enc_in);
222                 rte_free(q->ag);
223                 rte_free(q->code_block);
224                 rte_free(q->deint_input);
225                 rte_free(q->deint_output);
226                 rte_free(q->adapter_output);
227                 rte_free(q);
228                 dev->data->queues[q_id].queue_private = NULL;
229         }
230
231         rte_bbdev_log_debug("released device queue %u:%u",
232                         dev->data->dev_id, q_id);
233         return 0;
234 }
235
236 /* Setup a queue */
237 static int
238 q_setup(struct rte_bbdev *dev, uint16_t q_id,
239                 const struct rte_bbdev_queue_conf *queue_conf)
240 {
241         int ret;
242         struct turbo_sw_queue *q;
243         char name[RTE_RING_NAMESIZE];
244
245         /* Allocate the queue data structure. */
246         q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q),
247                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
248         if (q == NULL) {
249                 rte_bbdev_log(ERR, "Failed to allocate queue memory");
250                 return -ENOMEM;
251         }
252
253         /* Allocate memory for encoder output. */
254         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u",
255                         dev->data->dev_id, q_id);
256         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
257                 rte_bbdev_log(ERR,
258                                 "Creating queue name for device %u queue %u failed",
259                                 dev->data->dev_id, q_id);
260                 return -ENAMETOOLONG;
261         }
262         q->enc_out = rte_zmalloc_socket(name,
263                         ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) *
264                         sizeof(*q->enc_out) * 3,
265                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
266         if (q->enc_out == NULL) {
267                 rte_bbdev_log(ERR,
268                         "Failed to allocate queue memory for %s", name);
269                 goto free_q;
270         }
271
272         /* Allocate memory for rate matching output. */
273         ret = snprintf(name, RTE_RING_NAMESIZE,
274                         RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id,
275                         q_id);
276         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
277                 rte_bbdev_log(ERR,
278                                 "Creating queue name for device %u queue %u failed",
279                                 dev->data->dev_id, q_id);
280                 return -ENAMETOOLONG;
281         }
282         q->enc_in = rte_zmalloc_socket(name,
283                         (RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
284                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
285         if (q->enc_in == NULL) {
286                 rte_bbdev_log(ERR,
287                         "Failed to allocate queue memory for %s", name);
288                 goto free_q;
289         }
290
291         /* Allocate memory for Aplha Gamma temp buffer. */
292         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u",
293                         dev->data->dev_id, q_id);
294         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
295                 rte_bbdev_log(ERR,
296                                 "Creating queue name for device %u queue %u failed",
297                                 dev->data->dev_id, q_id);
298                 return -ENAMETOOLONG;
299         }
300         q->ag = rte_zmalloc_socket(name,
301                         RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag),
302                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
303         if (q->ag == NULL) {
304                 rte_bbdev_log(ERR,
305                         "Failed to allocate queue memory for %s", name);
306                 goto free_q;
307         }
308
309         /* Allocate memory for code block temp buffer. */
310         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u",
311                         dev->data->dev_id, q_id);
312         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
313                 rte_bbdev_log(ERR,
314                                 "Creating queue name for device %u queue %u failed",
315                                 dev->data->dev_id, q_id);
316                 return -ENAMETOOLONG;
317         }
318         q->code_block = rte_zmalloc_socket(name,
319                         RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block),
320                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
321         if (q->code_block == NULL) {
322                 rte_bbdev_log(ERR,
323                         "Failed to allocate queue memory for %s", name);
324                 goto free_q;
325         }
326
327         /* Allocate memory for Deinterleaver input. */
328         ret = snprintf(name, RTE_RING_NAMESIZE,
329                         RTE_STR(DRIVER_NAME)"_de_i%u:%u",
330                         dev->data->dev_id, q_id);
331         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
332                 rte_bbdev_log(ERR,
333                                 "Creating queue name for device %u queue %u failed",
334                                 dev->data->dev_id, q_id);
335                 return -ENAMETOOLONG;
336         }
337         q->deint_input = rte_zmalloc_socket(name,
338                         DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input),
339                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
340         if (q->deint_input == NULL) {
341                 rte_bbdev_log(ERR,
342                         "Failed to allocate queue memory for %s", name);
343                 goto free_q;
344         }
345
346         /* Allocate memory for Deinterleaver output. */
347         ret = snprintf(name, RTE_RING_NAMESIZE,
348                         RTE_STR(DRIVER_NAME)"_de_o%u:%u",
349                         dev->data->dev_id, q_id);
350         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
351                 rte_bbdev_log(ERR,
352                                 "Creating queue name for device %u queue %u failed",
353                                 dev->data->dev_id, q_id);
354                 return -ENAMETOOLONG;
355         }
356         q->deint_output = rte_zmalloc_socket(NULL,
357                         DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output),
358                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
359         if (q->deint_output == NULL) {
360                 rte_bbdev_log(ERR,
361                         "Failed to allocate queue memory for %s", name);
362                 goto free_q;
363         }
364
365         /* Allocate memory for Adapter output. */
366         ret = snprintf(name, RTE_RING_NAMESIZE,
367                         RTE_STR(DRIVER_NAME)"_ada_o%u:%u",
368                         dev->data->dev_id, q_id);
369         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
370                 rte_bbdev_log(ERR,
371                                 "Creating queue name for device %u queue %u failed",
372                                 dev->data->dev_id, q_id);
373                 return -ENAMETOOLONG;
374         }
375         q->adapter_output = rte_zmalloc_socket(NULL,
376                         ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output),
377                         RTE_CACHE_LINE_SIZE, queue_conf->socket);
378         if (q->adapter_output == NULL) {
379                 rte_bbdev_log(ERR,
380                         "Failed to allocate queue memory for %s", name);
381                 goto free_q;
382         }
383
384         /* Create ring for packets awaiting to be dequeued. */
385         ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u",
386                         dev->data->dev_id, q_id);
387         if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) {
388                 rte_bbdev_log(ERR,
389                                 "Creating queue name for device %u queue %u failed",
390                                 dev->data->dev_id, q_id);
391                 return -ENAMETOOLONG;
392         }
393         q->processed_pkts = rte_ring_create(name, queue_conf->queue_size,
394                         queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
395         if (q->processed_pkts == NULL) {
396                 rte_bbdev_log(ERR, "Failed to create ring for %s", name);
397                 goto free_q;
398         }
399
400         q->type = queue_conf->op_type;
401
402         dev->data->queues[q_id].queue_private = q;
403         rte_bbdev_log_debug("setup device queue %s", name);
404         return 0;
405
406 free_q:
407         rte_ring_free(q->processed_pkts);
408         rte_free(q->enc_out);
409         rte_free(q->enc_in);
410         rte_free(q->ag);
411         rte_free(q->code_block);
412         rte_free(q->deint_input);
413         rte_free(q->deint_output);
414         rte_free(q->adapter_output);
415         rte_free(q);
416         return -EFAULT;
417 }
418
419 static const struct rte_bbdev_ops pmd_ops = {
420         .info_get = info_get,
421         .queue_setup = q_setup,
422         .queue_release = q_release
423 };
424
425 #ifdef RTE_BBDEV_SDK_AVX2
426 /* Checks if the encoder input buffer is correct.
427  * Returns 0 if it's valid, -1 otherwise.
428  */
429 static inline int
430 is_enc_input_valid(const uint16_t k, const int32_t k_idx,
431                 const uint16_t in_length)
432 {
433         if (k_idx < 0) {
434                 rte_bbdev_log(ERR, "K Index is invalid");
435                 return -1;
436         }
437
438         if (in_length - (k >> 3) < 0) {
439                 rte_bbdev_log(ERR,
440                                 "Mismatch between input length (%u bytes) and K (%u bits)",
441                                 in_length, k);
442                 return -1;
443         }
444
445         if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) {
446                 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d",
447                                 k, RTE_BBDEV_TURBO_MAX_CB_SIZE);
448                 return -1;
449         }
450
451         return 0;
452 }
453
454 /* Checks if the decoder input buffer is correct.
455  * Returns 0 if it's valid, -1 otherwise.
456  */
457 static inline int
458 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length)
459 {
460         if (k_idx < 0) {
461                 rte_bbdev_log(ERR, "K index is invalid");
462                 return -1;
463         }
464
465         if (in_length < kw) {
466                 rte_bbdev_log(ERR,
467                                 "Mismatch between input length (%u) and kw (%u)",
468                                 in_length, kw);
469                 return -1;
470         }
471
472         if (kw > RTE_BBDEV_TURBO_MAX_KW) {
473                 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d",
474                                 kw, RTE_BBDEV_TURBO_MAX_KW);
475                 return -1;
476         }
477
478         return 0;
479 }
480 #endif
481
482 static inline void
483 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
484                 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
485                 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head,
486                 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
487                 uint16_t in_length, struct rte_bbdev_stats *q_stats)
488 {
489 #ifdef RTE_BBDEV_SDK_AVX2
490         int ret;
491         int16_t k_idx;
492         uint16_t m;
493         uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
494         uint64_t first_3_bytes = 0;
495         struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
496         struct bblib_crc_request crc_req;
497         struct bblib_crc_response crc_resp;
498         struct bblib_turbo_encoder_request turbo_req;
499         struct bblib_turbo_encoder_response turbo_resp;
500         struct bblib_rate_match_dl_request rm_req;
501         struct bblib_rate_match_dl_response rm_resp;
502 #ifdef RTE_BBDEV_OFFLOAD_COST
503         uint64_t start_time;
504 #else
505         RTE_SET_USED(q_stats);
506 #endif
507
508         k_idx = compute_idx(k);
509         in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
510
511         /* CRC24A (for TB) */
512         if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
513                 (enc->code_block_mode == 1)) {
514                 ret = is_enc_input_valid(k - 24, k_idx, in_length);
515                 if (ret != 0) {
516                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
517                         return;
518                 }
519                 crc_req.data = in;
520                 crc_req.len = k - 24;
521                 /* Check if there is a room for CRC bits if not use
522                  * the temporary buffer.
523                  */
524                 if (mbuf_append(m_in, m_in, 3) == NULL) {
525                         rte_memcpy(q->enc_in, in, (k - 24) >> 3);
526                         in = q->enc_in;
527                 } else {
528                         /* Store 3 first bytes of next CB as they will be
529                          * overwritten by CRC bytes. If it is the last CB then
530                          * there is no point to store 3 next bytes and this
531                          * if..else branch will be omitted.
532                          */
533                         first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
534                 }
535
536                 crc_resp.data = in;
537 #ifdef RTE_BBDEV_OFFLOAD_COST
538                 start_time = rte_rdtsc_precise();
539 #endif
540                 /* CRC24A generation */
541                 bblib_lte_crc24a_gen(&crc_req, &crc_resp);
542 #ifdef RTE_BBDEV_OFFLOAD_COST
543                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
544 #endif
545         } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
546                 /* CRC24B */
547                 ret = is_enc_input_valid(k - 24, k_idx, in_length);
548                 if (ret != 0) {
549                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
550                         return;
551                 }
552                 crc_req.data = in;
553                 crc_req.len = k - 24;
554                 /* Check if there is a room for CRC bits if this is the last
555                  * CB in TB. If not use temporary buffer.
556                  */
557                 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) {
558                         rte_memcpy(q->enc_in, in, (k - 24) >> 3);
559                         in = q->enc_in;
560                 } else if (c - r > 1) {
561                         /* Store 3 first bytes of next CB as they will be
562                          * overwritten by CRC bytes. If it is the last CB then
563                          * there is no point to store 3 next bytes and this
564                          * if..else branch will be omitted.
565                          */
566                         first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
567                 }
568
569                 crc_resp.data = in;
570 #ifdef RTE_BBDEV_OFFLOAD_COST
571                 start_time = rte_rdtsc_precise();
572 #endif
573                 /* CRC24B generation */
574                 bblib_lte_crc24b_gen(&crc_req, &crc_resp);
575 #ifdef RTE_BBDEV_OFFLOAD_COST
576                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
577 #endif
578         } else {
579                 ret = is_enc_input_valid(k, k_idx, in_length);
580                 if (ret != 0) {
581                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
582                         return;
583                 }
584         }
585
586         /* Turbo encoder */
587
588         /* Each bit layer output from turbo encoder is (k+4) bits long, i.e.
589          * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up.
590          * So dst_data's length should be 3*(k/8) + 3 bytes.
591          * In Rate-matching bypass case outputs pointers passed to encoder
592          * (out0, out1 and out2) can directly point to addresses of output from
593          * turbo_enc entity.
594          */
595         if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
596                 out0 = q->enc_out;
597                 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
598                 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
599         } else {
600                 out0 = (uint8_t *)mbuf_append(m_out_head, m_out,
601                                 (k >> 3) * 3 + 2);
602                 if (out0 == NULL) {
603                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
604                         rte_bbdev_log(ERR,
605                                         "Too little space in output mbuf");
606                         return;
607                 }
608                 enc->output.length += (k >> 3) * 3 + 2;
609                 /* rte_bbdev_op_data.offset can be different than the
610                  * offset of the appended bytes
611                  */
612                 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
613                 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
614                                 out_offset + (k >> 3) + 1);
615                 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
616                                 out_offset + 2 * ((k >> 3) + 1));
617         }
618
619         turbo_req.case_id = k_idx;
620         turbo_req.input_win = in;
621         turbo_req.length = k >> 3;
622         turbo_resp.output_win_0 = out0;
623         turbo_resp.output_win_1 = out1;
624         turbo_resp.output_win_2 = out2;
625
626 #ifdef RTE_BBDEV_OFFLOAD_COST
627         start_time = rte_rdtsc_precise();
628 #endif
629         /* Turbo encoding */
630         if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) {
631                 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
632                 rte_bbdev_log(ERR, "Turbo Encoder failed");
633                 return;
634         }
635 #ifdef RTE_BBDEV_OFFLOAD_COST
636         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
637 #endif
638
639         /* Restore 3 first bytes of next CB if they were overwritten by CRC*/
640         if (first_3_bytes != 0)
641                 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes;
642
643         /* Rate-matching */
644         if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
645                 uint8_t mask_id;
646                 /* Integer round up division by 8 */
647                 uint16_t out_len = (e + 7) >> 3;
648                 /* The mask array is indexed using E%8. E is an even number so
649                  * there are only 4 possible values.
650                  */
651                 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC};
652
653                 /* get output data starting address */
654                 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len);
655                 if (rm_out == NULL) {
656                         op->status |= 1 << RTE_BBDEV_DATA_ERROR;
657                         rte_bbdev_log(ERR,
658                                         "Too little space in output mbuf");
659                         return;
660                 }
661                 /* rte_bbdev_op_data.offset can be different than the offset
662                  * of the appended bytes
663                  */
664                 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
665
666                 /* index of current code block */
667                 rm_req.r = r;
668                 /* total number of code block */
669                 rm_req.C = c;
670                 /* For DL - 1, UL - 0 */
671                 rm_req.direction = 1;
672                 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO
673                  * and MDL_HARQ are used for Ncb calculation. As Ncb is already
674                  * known we can adjust those parameters
675                  */
676                 rm_req.Nsoft = ncb * rm_req.C;
677                 rm_req.KMIMO = 1;
678                 rm_req.MDL_HARQ = 1;
679                 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G
680                  * are used for E calculation. As E is already known we can
681                  * adjust those parameters
682                  */
683                 rm_req.NL = e;
684                 rm_req.Qm = 1;
685                 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C;
686
687                 rm_req.rvidx = enc->rv_index;
688                 rm_req.Kidx = k_idx - 1;
689                 rm_req.nLen = k + 4;
690                 rm_req.tin0 = out0;
691                 rm_req.tin1 = out1;
692                 rm_req.tin2 = out2;
693                 rm_resp.output = rm_out;
694                 rm_resp.OutputLen = out_len;
695                 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS)
696                         rm_req.bypass_rvidx = 1;
697                 else
698                         rm_req.bypass_rvidx = 0;
699
700 #ifdef RTE_BBDEV_OFFLOAD_COST
701                 start_time = rte_rdtsc_precise();
702 #endif
703                 /* Rate-Matching */
704                 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) {
705                         op->status |= 1 << RTE_BBDEV_DRV_ERROR;
706                         rte_bbdev_log(ERR, "Rate matching failed");
707                         return;
708                 }
709 #ifdef RTE_BBDEV_OFFLOAD_COST
710                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
711 #endif
712
713                 /* SW fills an entire last byte even if E%8 != 0. Clear the
714                  * superfluous data bits for consistency with HW device.
715                  */
716                 mask_id = (e & 7) >> 1;
717                 rm_out[out_len - 1] &= mask_out[mask_id];
718                 enc->output.length += rm_resp.OutputLen;
719         } else {
720                 /* Rate matching is bypassed */
721
722                 /* Completing last byte of out0 (where 4 tail bits are stored)
723                  * by moving first 4 bits from out1
724                  */
725                 tmp_out = (uint8_t *) --out1;
726                 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4);
727                 tmp_out++;
728                 /* Shifting out1 data by 4 bits to the left */
729                 for (m = 0; m < k >> 3; ++m) {
730                         uint8_t *first = tmp_out;
731                         uint8_t second = *(tmp_out + 1);
732                         *first = (*first << 4) | ((second & 0xF0) >> 4);
733                         tmp_out++;
734                 }
735                 /* Shifting out2 data by 8 bits to the left */
736                 for (m = 0; m < (k >> 3) + 1; ++m) {
737                         *tmp_out = *(tmp_out + 1);
738                         tmp_out++;
739                 }
740                 *tmp_out = 0;
741         }
742 #else
743         RTE_SET_USED(q);
744         RTE_SET_USED(op);
745         RTE_SET_USED(r);
746         RTE_SET_USED(c);
747         RTE_SET_USED(k);
748         RTE_SET_USED(ncb);
749         RTE_SET_USED(e);
750         RTE_SET_USED(m_in);
751         RTE_SET_USED(m_out_head);
752         RTE_SET_USED(m_out);
753         RTE_SET_USED(in_offset);
754         RTE_SET_USED(out_offset);
755         RTE_SET_USED(in_length);
756         RTE_SET_USED(q_stats);
757 #endif
758 }
759
760 static inline void
761 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
762                 struct rte_bbdev_stats *queue_stats)
763 {
764         uint8_t c, r, crc24_bits = 0;
765         uint16_t k, ncb;
766         uint32_t e;
767         struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
768         uint16_t in_offset = enc->input.offset;
769         uint16_t out_offset = enc->output.offset;
770         struct rte_mbuf *m_in = enc->input.data;
771         struct rte_mbuf *m_out = enc->output.data;
772         struct rte_mbuf *m_out_head = enc->output.data;
773         uint32_t in_length, mbuf_total_left = enc->input.length;
774         uint16_t seg_total_left;
775
776         /* Clear op status */
777         op->status = 0;
778
779         if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) {
780                 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
781                                 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE);
782                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
783                 return;
784         }
785
786         if (m_in == NULL || m_out == NULL) {
787                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
788                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
789                 return;
790         }
791
792         if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) ||
793                 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH))
794                 crc24_bits = 24;
795
796         if (enc->code_block_mode == 0) { /* For Transport Block mode */
797                 c = enc->tb_params.c;
798                 r = enc->tb_params.r;
799         } else {/* For Code Block mode */
800                 c = 1;
801                 r = 0;
802         }
803
804         while (mbuf_total_left > 0 && r < c) {
805
806                 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
807
808                 if (enc->code_block_mode == 0) {
809                         k = (r < enc->tb_params.c_neg) ?
810                                 enc->tb_params.k_neg : enc->tb_params.k_pos;
811                         ncb = (r < enc->tb_params.c_neg) ?
812                                 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos;
813                         e = (r < enc->tb_params.cab) ?
814                                 enc->tb_params.ea : enc->tb_params.eb;
815                 } else {
816                         k = enc->cb_params.k;
817                         ncb = enc->cb_params.ncb;
818                         e = enc->cb_params.e;
819                 }
820
821                 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head,
822                                 m_out, in_offset, out_offset, seg_total_left,
823                                 queue_stats);
824                 /* Update total_left */
825                 in_length = ((k - crc24_bits) >> 3);
826                 mbuf_total_left -= in_length;
827                 /* Update offsets for next CBs (if exist) */
828                 in_offset += (k - crc24_bits) >> 3;
829                 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH)
830                         out_offset += e >> 3;
831                 else
832                         out_offset += (k >> 3) * 3 + 2;
833
834                 /* Update offsets */
835                 if (seg_total_left == in_length) {
836                         /* Go to the next mbuf */
837                         m_in = m_in->next;
838                         m_out = m_out->next;
839                         in_offset = 0;
840                         out_offset = 0;
841                 }
842                 r++;
843         }
844
845         /* check if all input data was processed */
846         if (mbuf_total_left != 0) {
847                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
848                 rte_bbdev_log(ERR,
849                                 "Mismatch between mbuf length and included CBs sizes");
850         }
851 }
852
853 static inline uint16_t
854 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops,
855                 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
856 {
857         uint16_t i;
858 #ifdef RTE_BBDEV_OFFLOAD_COST
859         queue_stats->acc_offload_cycles = 0;
860 #endif
861
862         for (i = 0; i < nb_ops; ++i)
863                 enqueue_enc_one_op(q, ops[i], queue_stats);
864
865         return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
866                         NULL);
867 }
868
869 #ifdef RTE_BBDEV_SDK_AVX2
870 static inline void
871 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k,
872                 uint16_t ncb)
873 {
874         uint16_t d = k + 4;
875         uint16_t kpi = ncb / 3;
876         uint16_t nd = kpi - d;
877
878         rte_memcpy(&out[nd], in, d);
879         rte_memcpy(&out[nd + kpi + 64], &in[kpi], d);
880         rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d);
881 }
882 #endif
883
884 static inline void
885 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
886                 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in,
887                 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out,
888                 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b,
889                 uint16_t crc24_overlap, uint16_t in_length,
890                 struct rte_bbdev_stats *q_stats)
891 {
892 #ifdef RTE_BBDEV_SDK_AVX2
893         int ret;
894         int32_t k_idx;
895         int32_t iter_cnt;
896         uint8_t *in, *out, *adapter_input;
897         int32_t ncb, ncb_without_null;
898         struct bblib_turbo_adapter_ul_response adapter_resp;
899         struct bblib_turbo_adapter_ul_request adapter_req;
900         struct bblib_turbo_decoder_request turbo_req;
901         struct bblib_turbo_decoder_response turbo_resp;
902         struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
903 #ifdef RTE_BBDEV_OFFLOAD_COST
904         uint64_t start_time;
905 #else
906         RTE_SET_USED(q_stats);
907 #endif
908
909         k_idx = compute_idx(k);
910
911         ret = is_dec_input_valid(k_idx, kw, in_length);
912         if (ret != 0) {
913                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
914                 return;
915         }
916
917         in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
918         ncb = kw;
919         ncb_without_null = (k + 4) * 3;
920
921         if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) {
922                 struct bblib_deinterleave_ul_request deint_req;
923                 struct bblib_deinterleave_ul_response deint_resp;
924
925                 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER;
926                 deint_req.pharqbuffer = in;
927                 deint_req.ncb = ncb;
928                 deint_resp.pinteleavebuffer = q->deint_output;
929
930 #ifdef RTE_BBDEV_OFFLOAD_COST
931                 start_time = rte_rdtsc_precise();
932 #endif
933                 bblib_deinterleave_ul(&deint_req, &deint_resp);
934 #ifdef RTE_BBDEV_OFFLOAD_COST
935                 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
936 #endif
937         } else
938                 move_padding_bytes(in, q->deint_output, k, ncb);
939
940         adapter_input = q->deint_output;
941
942         if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN)
943                 adapter_req.isinverted = 1;
944         else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN)
945                 adapter_req.isinverted = 0;
946         else {
947                 op->status |= 1 << RTE_BBDEV_DRV_ERROR;
948                 rte_bbdev_log(ERR, "LLR format wasn't specified");
949                 return;
950         }
951
952         adapter_req.ncb = ncb_without_null;
953         adapter_req.pinteleavebuffer = adapter_input;
954         adapter_resp.pharqout = q->adapter_output;
955
956 #ifdef RTE_BBDEV_OFFLOAD_COST
957         start_time = rte_rdtsc_precise();
958 #endif
959         /* Turbo decode adaptation */
960         bblib_turbo_adapter_ul(&adapter_req, &adapter_resp);
961 #ifdef RTE_BBDEV_OFFLOAD_COST
962         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
963 #endif
964
965         out = (uint8_t *)mbuf_append(m_out_head, m_out,
966                         ((k - crc24_overlap) >> 3));
967         if (out == NULL) {
968                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
969                 rte_bbdev_log(ERR, "Too little space in output mbuf");
970                 return;
971         }
972         /* rte_bbdev_op_data.offset can be different than the offset of the
973          * appended bytes
974          */
975         out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
976         if (check_crc_24b)
977                 turbo_req.c = c + 1;
978         else
979                 turbo_req.c = c;
980         turbo_req.input = (int8_t *)q->adapter_output;
981         turbo_req.k = k;
982         turbo_req.k_idx = k_idx;
983         turbo_req.max_iter_num = dec->iter_max;
984         turbo_req.early_term_disable = !check_bit(dec->op_flags,
985                         RTE_BBDEV_TURBO_EARLY_TERMINATION);
986         turbo_resp.ag_buf = q->ag;
987         turbo_resp.cb_buf = q->code_block;
988         turbo_resp.output = out;
989
990 #ifdef RTE_BBDEV_OFFLOAD_COST
991         start_time = rte_rdtsc_precise();
992 #endif
993         /* Turbo decode */
994         iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp);
995 #ifdef RTE_BBDEV_OFFLOAD_COST
996         q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time;
997 #endif
998         dec->hard_output.length += (k >> 3);
999
1000         if (iter_cnt > 0) {
1001                 /* Temporary solution for returned iter_count from SDK */
1002                 iter_cnt = (iter_cnt - 1) >> 1;
1003                 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count);
1004         } else {
1005                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1006                 rte_bbdev_log(ERR, "Turbo Decoder failed");
1007                 return;
1008         }
1009 #else
1010         RTE_SET_USED(q);
1011         RTE_SET_USED(op);
1012         RTE_SET_USED(c);
1013         RTE_SET_USED(k);
1014         RTE_SET_USED(kw);
1015         RTE_SET_USED(m_in);
1016         RTE_SET_USED(m_out_head);
1017         RTE_SET_USED(m_out);
1018         RTE_SET_USED(in_offset);
1019         RTE_SET_USED(out_offset);
1020         RTE_SET_USED(check_crc_24b);
1021         RTE_SET_USED(crc24_overlap);
1022         RTE_SET_USED(in_length);
1023         RTE_SET_USED(q_stats);
1024 #endif
1025 }
1026
1027 static inline void
1028 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op,
1029                 struct rte_bbdev_stats *queue_stats)
1030 {
1031         uint8_t c, r = 0;
1032         uint16_t kw, k = 0;
1033         uint16_t crc24_overlap = 0;
1034         struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec;
1035         struct rte_mbuf *m_in = dec->input.data;
1036         struct rte_mbuf *m_out = dec->hard_output.data;
1037         struct rte_mbuf *m_out_head = dec->hard_output.data;
1038         uint16_t in_offset = dec->input.offset;
1039         uint16_t out_offset = dec->hard_output.offset;
1040         uint32_t mbuf_total_left = dec->input.length;
1041         uint16_t seg_total_left;
1042
1043         /* Clear op status */
1044         op->status = 0;
1045
1046         if (m_in == NULL || m_out == NULL) {
1047                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1048                 op->status = 1 << RTE_BBDEV_DATA_ERROR;
1049                 return;
1050         }
1051
1052         if (dec->code_block_mode == 0) { /* For Transport Block mode */
1053                 c = dec->tb_params.c;
1054         } else { /* For Code Block mode */
1055                 k = dec->cb_params.k;
1056                 c = 1;
1057         }
1058
1059         if ((c > 1) && !check_bit(dec->op_flags,
1060                 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1061                 crc24_overlap = 24;
1062
1063         while (mbuf_total_left > 0) {
1064                 if (dec->code_block_mode == 0)
1065                         k = (r < dec->tb_params.c_neg) ?
1066                                 dec->tb_params.k_neg : dec->tb_params.k_pos;
1067
1068                 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset;
1069
1070                 /* Calculates circular buffer size (Kw).
1071                  * According to 3gpp 36.212 section 5.1.4.2
1072                  *   Kw = 3 * Kpi,
1073                  * where:
1074                  *   Kpi = nCol * nRow
1075                  * where nCol is 32 and nRow can be calculated from:
1076                  *   D =< nCol * nRow
1077                  * where D is the size of each output from turbo encoder block
1078                  * (k + 4).
1079                  */
1080                 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3;
1081
1082                 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out,
1083                                 in_offset, out_offset, check_bit(dec->op_flags,
1084                                 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap,
1085                                 seg_total_left, queue_stats);
1086                 /* To keep CRC24 attached to end of Code block, use
1087                  * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
1088                  * removed by default once verified.
1089                  */
1090
1091                 mbuf_total_left -= kw;
1092
1093                 /* Update offsets */
1094                 if (seg_total_left == kw) {
1095                         /* Go to the next mbuf */
1096                         m_in = m_in->next;
1097                         m_out = m_out->next;
1098                         in_offset = 0;
1099                         out_offset = 0;
1100                 } else {
1101                         /* Update offsets for next CBs (if exist) */
1102                         in_offset += kw;
1103                         out_offset += ((k - crc24_overlap) >> 3);
1104                 }
1105                 r++;
1106         }
1107         if (mbuf_total_left != 0) {
1108                 op->status |= 1 << RTE_BBDEV_DATA_ERROR;
1109                 rte_bbdev_log(ERR,
1110                                 "Mismatch between mbuf length and included Circular buffer sizes");
1111         }
1112 }
1113
1114 static inline uint16_t
1115 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops,
1116                 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats)
1117 {
1118         uint16_t i;
1119 #ifdef RTE_BBDEV_OFFLOAD_COST
1120         queue_stats->acc_offload_cycles = 0;
1121 #endif
1122
1123         for (i = 0; i < nb_ops; ++i)
1124                 enqueue_dec_one_op(q, ops[i], queue_stats);
1125
1126         return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops,
1127                         NULL);
1128 }
1129
1130 /* Enqueue burst */
1131 static uint16_t
1132 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data,
1133                 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1134 {
1135         void *queue = q_data->queue_private;
1136         struct turbo_sw_queue *q = queue;
1137         uint16_t nb_enqueued = 0;
1138
1139         nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1140
1141         q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1142         q_data->queue_stats.enqueued_count += nb_enqueued;
1143
1144         return nb_enqueued;
1145 }
1146
1147 /* Enqueue burst */
1148 static uint16_t
1149 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data,
1150                  struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1151 {
1152         void *queue = q_data->queue_private;
1153         struct turbo_sw_queue *q = queue;
1154         uint16_t nb_enqueued = 0;
1155
1156         nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats);
1157
1158         q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued;
1159         q_data->queue_stats.enqueued_count += nb_enqueued;
1160
1161         return nb_enqueued;
1162 }
1163
1164 /* Dequeue decode burst */
1165 static uint16_t
1166 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data,
1167                 struct rte_bbdev_dec_op **ops, uint16_t nb_ops)
1168 {
1169         struct turbo_sw_queue *q = q_data->queue_private;
1170         uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1171                         (void **)ops, nb_ops, NULL);
1172         q_data->queue_stats.dequeued_count += nb_dequeued;
1173
1174         return nb_dequeued;
1175 }
1176
1177 /* Dequeue encode burst */
1178 static uint16_t
1179 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data,
1180                 struct rte_bbdev_enc_op **ops, uint16_t nb_ops)
1181 {
1182         struct turbo_sw_queue *q = q_data->queue_private;
1183         uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts,
1184                         (void **)ops, nb_ops, NULL);
1185         q_data->queue_stats.dequeued_count += nb_dequeued;
1186
1187         return nb_dequeued;
1188 }
1189
1190 /* Parse 16bit integer from string argument */
1191 static inline int
1192 parse_u16_arg(const char *key, const char *value, void *extra_args)
1193 {
1194         uint16_t *u16 = extra_args;
1195         unsigned int long result;
1196
1197         if ((value == NULL) || (extra_args == NULL))
1198                 return -EINVAL;
1199         errno = 0;
1200         result = strtoul(value, NULL, 0);
1201         if ((result >= (1 << 16)) || (errno != 0)) {
1202                 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key);
1203                 return -ERANGE;
1204         }
1205         *u16 = (uint16_t)result;
1206         return 0;
1207 }
1208
1209 /* Parse parameters used to create device */
1210 static int
1211 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args)
1212 {
1213         struct rte_kvargs *kvlist = NULL;
1214         int ret = 0;
1215
1216         if (params == NULL)
1217                 return -EINVAL;
1218         if (input_args) {
1219                 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params);
1220                 if (kvlist == NULL)
1221                         return -EFAULT;
1222
1223                 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0],
1224                                         &parse_u16_arg, &params->queues_num);
1225                 if (ret < 0)
1226                         goto exit;
1227
1228                 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1],
1229                                         &parse_u16_arg, &params->socket_id);
1230                 if (ret < 0)
1231                         goto exit;
1232
1233                 if (params->socket_id >= RTE_MAX_NUMA_NODES) {
1234                         rte_bbdev_log(ERR, "Invalid socket, must be < %u",
1235                                         RTE_MAX_NUMA_NODES);
1236                         goto exit;
1237                 }
1238         }
1239
1240 exit:
1241         if (kvlist)
1242                 rte_kvargs_free(kvlist);
1243         return ret;
1244 }
1245
1246 /* Create device */
1247 static int
1248 turbo_sw_bbdev_create(struct rte_vdev_device *vdev,
1249                 struct turbo_sw_params *init_params)
1250 {
1251         struct rte_bbdev *bbdev;
1252         const char *name = rte_vdev_device_name(vdev);
1253
1254         bbdev = rte_bbdev_allocate(name);
1255         if (bbdev == NULL)
1256                 return -ENODEV;
1257
1258         bbdev->data->dev_private = rte_zmalloc_socket(name,
1259                         sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE,
1260                         init_params->socket_id);
1261         if (bbdev->data->dev_private == NULL) {
1262                 rte_bbdev_release(bbdev);
1263                 return -ENOMEM;
1264         }
1265
1266         bbdev->dev_ops = &pmd_ops;
1267         bbdev->device = &vdev->device;
1268         bbdev->data->socket_id = init_params->socket_id;
1269         bbdev->intr_handle = NULL;
1270
1271         /* register rx/tx burst functions for data path */
1272         bbdev->dequeue_enc_ops = dequeue_enc_ops;
1273         bbdev->dequeue_dec_ops = dequeue_dec_ops;
1274         bbdev->enqueue_enc_ops = enqueue_enc_ops;
1275         bbdev->enqueue_dec_ops = enqueue_dec_ops;
1276         ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues =
1277                         init_params->queues_num;
1278
1279         return 0;
1280 }
1281
1282 /* Initialise device */
1283 static int
1284 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev)
1285 {
1286         struct turbo_sw_params init_params = {
1287                 rte_socket_id(),
1288                 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES
1289         };
1290         const char *name;
1291         const char *input_args;
1292
1293         if (vdev == NULL)
1294                 return -EINVAL;
1295
1296         name = rte_vdev_device_name(vdev);
1297         if (name == NULL)
1298                 return -EINVAL;
1299         input_args = rte_vdev_device_args(vdev);
1300         parse_turbo_sw_params(&init_params, input_args);
1301
1302         rte_bbdev_log_debug(
1303                         "Initialising %s on NUMA node %d with max queues: %d\n",
1304                         name, init_params.socket_id, init_params.queues_num);
1305
1306         return turbo_sw_bbdev_create(vdev, &init_params);
1307 }
1308
1309 /* Uninitialise device */
1310 static int
1311 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev)
1312 {
1313         struct rte_bbdev *bbdev;
1314         const char *name;
1315
1316         if (vdev == NULL)
1317                 return -EINVAL;
1318
1319         name = rte_vdev_device_name(vdev);
1320         if (name == NULL)
1321                 return -EINVAL;
1322
1323         bbdev = rte_bbdev_get_named_dev(name);
1324         if (bbdev == NULL)
1325                 return -EINVAL;
1326
1327         rte_free(bbdev->data->dev_private);
1328
1329         return rte_bbdev_release(bbdev);
1330 }
1331
1332 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = {
1333         .probe = turbo_sw_bbdev_probe,
1334         .remove = turbo_sw_bbdev_remove
1335 };
1336
1337 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv);
1338 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME,
1339         TURBO_SW_MAX_NB_QUEUES_ARG"=<int> "
1340         TURBO_SW_SOCKET_ID_ARG"=<int>");
1341 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw);
1342
1343 RTE_INIT(turbo_sw_bbdev_init_log)
1344 {
1345         bbdev_turbo_sw_logtype = rte_log_register("pmd.bb.turbo_sw");
1346         if (bbdev_turbo_sw_logtype >= 0)
1347                 rte_log_set_level(bbdev_turbo_sw_logtype, RTE_LOG_NOTICE);
1348 }