e9aa07d3b2a6c11041c0ee129378fe7a81fab261
[dpdk.git] / drivers / baseband / acc100 / rte_acc100_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4
5 #include <unistd.h>
6
7 #include <rte_common.h>
8 #include <rte_log.h>
9 #include <rte_dev.h>
10 #include <rte_malloc.h>
11 #include <rte_mempool.h>
12 #include <rte_byteorder.h>
13 #include <rte_errno.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_hexdump.h>
16 #include <rte_pci.h>
17 #include <rte_bus_pci.h>
18 #ifdef RTE_BBDEV_OFFLOAD_COST
19 #include <rte_cycles.h>
20 #endif
21
22 #include <rte_bbdev.h>
23 #include <rte_bbdev_pmd.h>
24 #include "rte_acc100_pmd.h"
25
26 #ifdef RTE_LIBRTE_BBDEV_DEBUG
27 RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, DEBUG);
28 #else
29 RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, NOTICE);
30 #endif
31
32 /* Write to MMIO register address */
33 static inline void
34 mmio_write(void *addr, uint32_t value)
35 {
36         *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value);
37 }
38
39 /* Write a register of a ACC100 device */
40 static inline void
41 acc100_reg_write(struct acc100_device *d, uint32_t offset, uint32_t payload)
42 {
43         void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
44         mmio_write(reg_addr, payload);
45         usleep(ACC100_LONG_WAIT);
46 }
47
48 /* Read a register of a ACC100 device */
49 static inline uint32_t
50 acc100_reg_read(struct acc100_device *d, uint32_t offset)
51 {
52
53         void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
54         uint32_t ret = *((volatile uint32_t *)(reg_addr));
55         return rte_le_to_cpu_32(ret);
56 }
57
58 /* Basic Implementation of Log2 for exact 2^N */
59 static inline uint32_t
60 log2_basic(uint32_t value)
61 {
62         return (value == 0) ? 0 : rte_bsf32(value);
63 }
64
65 /* Calculate memory alignment offset assuming alignment is 2^N */
66 static inline uint32_t
67 calc_mem_alignment_offset(void *unaligned_virt_mem, uint32_t alignment)
68 {
69         rte_iova_t unaligned_phy_mem = rte_malloc_virt2iova(unaligned_virt_mem);
70         return (uint32_t)(alignment -
71                         (unaligned_phy_mem & (alignment-1)));
72 }
73
74 /* Calculate the offset of the enqueue register */
75 static inline uint32_t
76 queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id)
77 {
78         if (pf_device)
79                 return ((vf_id << 12) + (qgrp_id << 7) + (aq_id << 3) +
80                                 HWPfQmgrIngressAq);
81         else
82                 return ((qgrp_id << 7) + (aq_id << 3) +
83                                 HWVfQmgrIngressAq);
84 }
85
86 enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
87
88 /* Return the queue topology for a Queue Group Index */
89 static inline void
90 qtopFromAcc(struct rte_acc100_queue_topology **qtop, int acc_enum,
91                 struct rte_acc100_conf *acc100_conf)
92 {
93         struct rte_acc100_queue_topology *p_qtop;
94         p_qtop = NULL;
95         switch (acc_enum) {
96         case UL_4G:
97                 p_qtop = &(acc100_conf->q_ul_4g);
98                 break;
99         case UL_5G:
100                 p_qtop = &(acc100_conf->q_ul_5g);
101                 break;
102         case DL_4G:
103                 p_qtop = &(acc100_conf->q_dl_4g);
104                 break;
105         case DL_5G:
106                 p_qtop = &(acc100_conf->q_dl_5g);
107                 break;
108         default:
109                 /* NOTREACHED */
110                 rte_bbdev_log(ERR, "Unexpected error evaluating qtopFromAcc");
111                 break;
112         }
113         *qtop = p_qtop;
114 }
115
116 static void
117 initQTop(struct rte_acc100_conf *acc100_conf)
118 {
119         acc100_conf->q_ul_4g.num_aqs_per_groups = 0;
120         acc100_conf->q_ul_4g.num_qgroups = 0;
121         acc100_conf->q_ul_4g.first_qgroup_index = -1;
122         acc100_conf->q_ul_5g.num_aqs_per_groups = 0;
123         acc100_conf->q_ul_5g.num_qgroups = 0;
124         acc100_conf->q_ul_5g.first_qgroup_index = -1;
125         acc100_conf->q_dl_4g.num_aqs_per_groups = 0;
126         acc100_conf->q_dl_4g.num_qgroups = 0;
127         acc100_conf->q_dl_4g.first_qgroup_index = -1;
128         acc100_conf->q_dl_5g.num_aqs_per_groups = 0;
129         acc100_conf->q_dl_5g.num_qgroups = 0;
130         acc100_conf->q_dl_5g.first_qgroup_index = -1;
131 }
132
133 static inline void
134 updateQtop(uint8_t acc, uint8_t qg, struct rte_acc100_conf *acc100_conf,
135                 struct acc100_device *d) {
136         uint32_t reg;
137         struct rte_acc100_queue_topology *q_top = NULL;
138         qtopFromAcc(&q_top, acc, acc100_conf);
139         if (unlikely(q_top == NULL))
140                 return;
141         uint16_t aq;
142         q_top->num_qgroups++;
143         if (q_top->first_qgroup_index == -1) {
144                 q_top->first_qgroup_index = qg;
145                 /* Can be optimized to assume all are enabled by default */
146                 reg = acc100_reg_read(d, queue_offset(d->pf_device,
147                                 0, qg, ACC100_NUM_AQS - 1));
148                 if (reg & ACC100_QUEUE_ENABLE) {
149                         q_top->num_aqs_per_groups = ACC100_NUM_AQS;
150                         return;
151                 }
152                 q_top->num_aqs_per_groups = 0;
153                 for (aq = 0; aq < ACC100_NUM_AQS; aq++) {
154                         reg = acc100_reg_read(d, queue_offset(d->pf_device,
155                                         0, qg, aq));
156                         if (reg & ACC100_QUEUE_ENABLE)
157                                 q_top->num_aqs_per_groups++;
158                 }
159         }
160 }
161
162 /* Fetch configuration enabled for the PF/VF using MMIO Read (slow) */
163 static inline void
164 fetch_acc100_config(struct rte_bbdev *dev)
165 {
166         struct acc100_device *d = dev->data->dev_private;
167         struct rte_acc100_conf *acc100_conf = &d->acc100_conf;
168         const struct acc100_registry_addr *reg_addr;
169         uint8_t acc, qg;
170         uint32_t reg, reg_aq, reg_len0, reg_len1;
171         uint32_t reg_mode;
172
173         /* No need to retrieve the configuration is already done */
174         if (d->configured)
175                 return;
176
177         /* Choose correct registry addresses for the device type */
178         if (d->pf_device)
179                 reg_addr = &pf_reg_addr;
180         else
181                 reg_addr = &vf_reg_addr;
182
183         d->ddr_size = (1 + acc100_reg_read(d, reg_addr->ddr_range)) << 10;
184
185         /* Single VF Bundle by VF */
186         acc100_conf->num_vf_bundles = 1;
187         initQTop(acc100_conf);
188
189         struct rte_acc100_queue_topology *q_top = NULL;
190         int qman_func_id[ACC100_NUM_ACCS] = {ACC100_ACCMAP_0, ACC100_ACCMAP_1,
191                         ACC100_ACCMAP_2, ACC100_ACCMAP_3, ACC100_ACCMAP_4};
192         reg = acc100_reg_read(d, reg_addr->qman_group_func);
193         for (qg = 0; qg < ACC100_NUM_QGRPS_PER_WORD; qg++) {
194                 reg_aq = acc100_reg_read(d,
195                                 queue_offset(d->pf_device, 0, qg, 0));
196                 if (reg_aq & ACC100_QUEUE_ENABLE) {
197                         uint32_t idx = (reg >> (qg * 4)) & 0x7;
198                         if (idx < ACC100_NUM_ACCS) {
199                                 acc = qman_func_id[idx];
200                                 updateQtop(acc, qg, acc100_conf, d);
201                         }
202                 }
203         }
204
205         /* Check the depth of the AQs*/
206         reg_len0 = acc100_reg_read(d, reg_addr->depth_log0_offset);
207         reg_len1 = acc100_reg_read(d, reg_addr->depth_log1_offset);
208         for (acc = 0; acc < NUM_ACC; acc++) {
209                 qtopFromAcc(&q_top, acc, acc100_conf);
210                 if (q_top->first_qgroup_index < ACC100_NUM_QGRPS_PER_WORD)
211                         q_top->aq_depth_log2 = (reg_len0 >>
212                                         (q_top->first_qgroup_index * 4))
213                                         & 0xF;
214                 else
215                         q_top->aq_depth_log2 = (reg_len1 >>
216                                         ((q_top->first_qgroup_index -
217                                         ACC100_NUM_QGRPS_PER_WORD) * 4))
218                                         & 0xF;
219         }
220
221         /* Read PF mode */
222         if (d->pf_device) {
223                 reg_mode = acc100_reg_read(d, HWPfHiPfMode);
224                 acc100_conf->pf_mode_en = (reg_mode == ACC100_PF_VAL) ? 1 : 0;
225         }
226
227         rte_bbdev_log_debug(
228                         "%s Config LLR SIGN IN/OUT %s %s QG %u %u %u %u AQ %u %u %u %u Len %u %u %u %u\n",
229                         (d->pf_device) ? "PF" : "VF",
230                         (acc100_conf->input_pos_llr_1_bit) ? "POS" : "NEG",
231                         (acc100_conf->output_pos_llr_1_bit) ? "POS" : "NEG",
232                         acc100_conf->q_ul_4g.num_qgroups,
233                         acc100_conf->q_dl_4g.num_qgroups,
234                         acc100_conf->q_ul_5g.num_qgroups,
235                         acc100_conf->q_dl_5g.num_qgroups,
236                         acc100_conf->q_ul_4g.num_aqs_per_groups,
237                         acc100_conf->q_dl_4g.num_aqs_per_groups,
238                         acc100_conf->q_ul_5g.num_aqs_per_groups,
239                         acc100_conf->q_dl_5g.num_aqs_per_groups,
240                         acc100_conf->q_ul_4g.aq_depth_log2,
241                         acc100_conf->q_dl_4g.aq_depth_log2,
242                         acc100_conf->q_ul_5g.aq_depth_log2,
243                         acc100_conf->q_dl_5g.aq_depth_log2);
244 }
245
246 static void
247 free_base_addresses(void **base_addrs, int size)
248 {
249         int i;
250         for (i = 0; i < size; i++)
251                 rte_free(base_addrs[i]);
252 }
253
254 static inline uint32_t
255 get_desc_len(void)
256 {
257         return sizeof(union acc100_dma_desc);
258 }
259
260 /* Allocate the 2 * 64MB block for the sw rings */
261 static int
262 alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct acc100_device *d,
263                 int socket)
264 {
265         uint32_t sw_ring_size = ACC100_SIZE_64MBYTE;
266         d->sw_rings_base = rte_zmalloc_socket(dev->device->driver->name,
267                         2 * sw_ring_size, RTE_CACHE_LINE_SIZE, socket);
268         if (d->sw_rings_base == NULL) {
269                 rte_bbdev_log(ERR, "Failed to allocate memory for %s:%u",
270                                 dev->device->driver->name,
271                                 dev->data->dev_id);
272                 return -ENOMEM;
273         }
274         uint32_t next_64mb_align_offset = calc_mem_alignment_offset(
275                         d->sw_rings_base, ACC100_SIZE_64MBYTE);
276         d->sw_rings = RTE_PTR_ADD(d->sw_rings_base, next_64mb_align_offset);
277         d->sw_rings_iova = rte_malloc_virt2iova(d->sw_rings_base) +
278                         next_64mb_align_offset;
279         d->sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
280         d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
281
282         return 0;
283 }
284
285 /* Attempt to allocate minimised memory space for sw rings */
286 static void
287 alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct acc100_device *d,
288                 uint16_t num_queues, int socket)
289 {
290         rte_iova_t sw_rings_base_iova, next_64mb_align_addr_iova;
291         uint32_t next_64mb_align_offset;
292         rte_iova_t sw_ring_iova_end_addr;
293         void *base_addrs[ACC100_SW_RING_MEM_ALLOC_ATTEMPTS];
294         void *sw_rings_base;
295         int i = 0;
296         uint32_t q_sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
297         uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
298
299         /* Find an aligned block of memory to store sw rings */
300         while (i < ACC100_SW_RING_MEM_ALLOC_ATTEMPTS) {
301                 /*
302                  * sw_ring allocated memory is guaranteed to be aligned to
303                  * q_sw_ring_size at the condition that the requested size is
304                  * less than the page size
305                  */
306                 sw_rings_base = rte_zmalloc_socket(
307                                 dev->device->driver->name,
308                                 dev_sw_ring_size, q_sw_ring_size, socket);
309
310                 if (sw_rings_base == NULL) {
311                         rte_bbdev_log(ERR,
312                                         "Failed to allocate memory for %s:%u",
313                                         dev->device->driver->name,
314                                         dev->data->dev_id);
315                         break;
316                 }
317
318                 sw_rings_base_iova = rte_malloc_virt2iova(sw_rings_base);
319                 next_64mb_align_offset = calc_mem_alignment_offset(
320                                 sw_rings_base, ACC100_SIZE_64MBYTE);
321                 next_64mb_align_addr_iova = sw_rings_base_iova +
322                                 next_64mb_align_offset;
323                 sw_ring_iova_end_addr = sw_rings_base_iova + dev_sw_ring_size;
324
325                 /* Check if the end of the sw ring memory block is before the
326                  * start of next 64MB aligned mem address
327                  */
328                 if (sw_ring_iova_end_addr < next_64mb_align_addr_iova) {
329                         d->sw_rings_iova = sw_rings_base_iova;
330                         d->sw_rings = sw_rings_base;
331                         d->sw_rings_base = sw_rings_base;
332                         d->sw_ring_size = q_sw_ring_size;
333                         d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
334                         break;
335                 }
336                 /* Store the address of the unaligned mem block */
337                 base_addrs[i] = sw_rings_base;
338                 i++;
339         }
340
341         /* Free all unaligned blocks of mem allocated in the loop */
342         free_base_addresses(base_addrs, i);
343 }
344
345 /* Allocate 64MB memory used for all software rings */
346 static int
347 acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
348 {
349         uint32_t phys_low, phys_high, payload;
350         struct acc100_device *d = dev->data->dev_private;
351         const struct acc100_registry_addr *reg_addr;
352
353         if (d->pf_device && !d->acc100_conf.pf_mode_en) {
354                 rte_bbdev_log(NOTICE,
355                                 "%s has PF mode disabled. This PF can't be used.",
356                                 dev->data->name);
357                 return -ENODEV;
358         }
359
360         alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
361
362         /* If minimal memory space approach failed, then allocate
363          * the 2 * 64MB block for the sw rings
364          */
365         if (d->sw_rings == NULL)
366                 alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
367
368         if (d->sw_rings == NULL) {
369                 rte_bbdev_log(NOTICE,
370                                 "Failure allocating sw_rings memory");
371                 return -ENODEV;
372         }
373
374         /* Configure ACC100 with the base address for DMA descriptor rings
375          * Same descriptor rings used for UL and DL DMA Engines
376          * Note : Assuming only VF0 bundle is used for PF mode
377          */
378         phys_high = (uint32_t)(d->sw_rings_iova >> 32);
379         phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC100_SIZE_64MBYTE-1));
380
381         /* Choose correct registry addresses for the device type */
382         if (d->pf_device)
383                 reg_addr = &pf_reg_addr;
384         else
385                 reg_addr = &vf_reg_addr;
386
387         /* Read the populated cfg from ACC100 registers */
388         fetch_acc100_config(dev);
389
390         /* Release AXI from PF */
391         if (d->pf_device)
392                 acc100_reg_write(d, HWPfDmaAxiControl, 1);
393
394         acc100_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
395         acc100_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
396         acc100_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
397         acc100_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
398         acc100_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
399         acc100_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
400         acc100_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
401         acc100_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
402
403         /*
404          * Configure Ring Size to the max queue ring size
405          * (used for wrapping purpose)
406          */
407         payload = log2_basic(d->sw_ring_size / 64);
408         acc100_reg_write(d, reg_addr->ring_size, payload);
409
410         /* Configure tail pointer for use when SDONE enabled */
411         d->tail_ptrs = rte_zmalloc_socket(
412                         dev->device->driver->name,
413                         ACC100_NUM_QGRPS * ACC100_NUM_AQS * sizeof(uint32_t),
414                         RTE_CACHE_LINE_SIZE, socket_id);
415         if (d->tail_ptrs == NULL) {
416                 rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
417                                 dev->device->driver->name,
418                                 dev->data->dev_id);
419                 rte_free(d->sw_rings);
420                 return -ENOMEM;
421         }
422         d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
423
424         phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
425         phys_low  = (uint32_t)(d->tail_ptr_iova);
426         acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
427         acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
428         acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
429         acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
430         acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
431         acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
432         acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
433         acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
434
435         d->harq_layout = rte_zmalloc_socket("HARQ Layout",
436                         ACC100_HARQ_LAYOUT * sizeof(*d->harq_layout),
437                         RTE_CACHE_LINE_SIZE, dev->data->socket_id);
438         if (d->harq_layout == NULL) {
439                 rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u",
440                                 dev->device->driver->name,
441                                 dev->data->dev_id);
442                 rte_free(d->sw_rings);
443                 return -ENOMEM;
444         }
445
446         /* Mark as configured properly */
447         d->configured = true;
448
449         rte_bbdev_log_debug(
450                         "ACC100 (%s) configured  sw_rings = %p, sw_rings_iova = %#"
451                         PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova);
452
453         return 0;
454 }
455
456 /* Free memory used for software rings */
457 static int
458 acc100_dev_close(struct rte_bbdev *dev)
459 {
460         struct acc100_device *d = dev->data->dev_private;
461         if (d->sw_rings_base != NULL) {
462                 rte_free(d->tail_ptrs);
463                 rte_free(d->sw_rings_base);
464                 d->sw_rings_base = NULL;
465         }
466         /* Ensure all in flight HW transactions are completed */
467         usleep(ACC100_LONG_WAIT);
468         return 0;
469 }
470
471 /**
472  * Report a ACC100 queue index which is free
473  * Return 0 to 16k for a valid queue_idx or -1 when no queue is available
474  * Note : Only supporting VF0 Bundle for PF mode
475  */
476 static int
477 acc100_find_free_queue_idx(struct rte_bbdev *dev,
478                 const struct rte_bbdev_queue_conf *conf)
479 {
480         struct acc100_device *d = dev->data->dev_private;
481         int op_2_acc[5] = {0, UL_4G, DL_4G, UL_5G, DL_5G};
482         int acc = op_2_acc[conf->op_type];
483         struct rte_acc100_queue_topology *qtop = NULL;
484
485         qtopFromAcc(&qtop, acc, &(d->acc100_conf));
486         if (qtop == NULL)
487                 return -1;
488         /* Identify matching QGroup Index which are sorted in priority order */
489         uint16_t group_idx = qtop->first_qgroup_index;
490         group_idx += conf->priority;
491         if (group_idx >= ACC100_NUM_QGRPS ||
492                         conf->priority >= qtop->num_qgroups) {
493                 rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
494                                 dev->data->name, conf->priority);
495                 return -1;
496         }
497         /* Find a free AQ_idx  */
498         uint16_t aq_idx;
499         for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
500                 if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) {
501                         /* Mark the Queue as assigned */
502                         d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
503                         /* Report the AQ Index */
504                         return (group_idx << ACC100_GRP_ID_SHIFT) + aq_idx;
505                 }
506         }
507         rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
508                         dev->data->name, conf->priority);
509         return -1;
510 }
511
512 /* Setup ACC100 queue */
513 static int
514 acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
515                 const struct rte_bbdev_queue_conf *conf)
516 {
517         struct acc100_device *d = dev->data->dev_private;
518         struct acc100_queue *q;
519         int16_t q_idx;
520
521         /* Allocate the queue data structure. */
522         q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
523                         RTE_CACHE_LINE_SIZE, conf->socket);
524         if (q == NULL) {
525                 rte_bbdev_log(ERR, "Failed to allocate queue memory");
526                 return -ENOMEM;
527         }
528         if (d == NULL) {
529                 rte_bbdev_log(ERR, "Undefined device");
530                 return -ENODEV;
531         }
532
533         q->d = d;
534         q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
535         q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
536
537         /* Prepare the Ring with default descriptor format */
538         union acc100_dma_desc *desc = NULL;
539         unsigned int desc_idx, b_idx;
540         int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
541                 ACC100_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
542                 ACC100_FCW_TD_BLEN : ACC100_FCW_LD_BLEN));
543
544         for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
545                 desc = q->ring_addr + desc_idx;
546                 desc->req.word0 = ACC100_DMA_DESC_TYPE;
547                 desc->req.word1 = 0; /**< Timestamp */
548                 desc->req.word2 = 0;
549                 desc->req.word3 = 0;
550                 uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
551                 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
552                 desc->req.data_ptrs[0].blen = fcw_len;
553                 desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
554                 desc->req.data_ptrs[0].last = 0;
555                 desc->req.data_ptrs[0].dma_ext = 0;
556                 for (b_idx = 1; b_idx < ACC100_DMA_MAX_NUM_POINTERS - 1;
557                                 b_idx++) {
558                         desc->req.data_ptrs[b_idx].blkid = ACC100_DMA_BLKID_IN;
559                         desc->req.data_ptrs[b_idx].last = 1;
560                         desc->req.data_ptrs[b_idx].dma_ext = 0;
561                         b_idx++;
562                         desc->req.data_ptrs[b_idx].blkid =
563                                         ACC100_DMA_BLKID_OUT_ENC;
564                         desc->req.data_ptrs[b_idx].last = 1;
565                         desc->req.data_ptrs[b_idx].dma_ext = 0;
566                 }
567                 /* Preset some fields of LDPC FCW */
568                 desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
569                 desc->req.fcw_ld.gain_i = 1;
570                 desc->req.fcw_ld.gain_h = 1;
571         }
572
573         q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
574                         RTE_CACHE_LINE_SIZE,
575                         RTE_CACHE_LINE_SIZE, conf->socket);
576         if (q->lb_in == NULL) {
577                 rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
578                 rte_free(q);
579                 return -ENOMEM;
580         }
581         q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
582         q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
583                         RTE_CACHE_LINE_SIZE,
584                         RTE_CACHE_LINE_SIZE, conf->socket);
585         if (q->lb_out == NULL) {
586                 rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
587                 rte_free(q->lb_in);
588                 rte_free(q);
589                 return -ENOMEM;
590         }
591         q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
592
593         /*
594          * Software queue ring wraps synchronously with the HW when it reaches
595          * the boundary of the maximum allocated queue size, no matter what the
596          * sw queue size is. This wrapping is guarded by setting the wrap_mask
597          * to represent the maximum queue size as allocated at the time when
598          * the device has been setup (in configure()).
599          *
600          * The queue depth is set to the queue size value (conf->queue_size).
601          * This limits the occupancy of the queue at any point of time, so that
602          * the queue does not get swamped with enqueue requests.
603          */
604         q->sw_ring_depth = conf->queue_size;
605         q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
606
607         q->op_type = conf->op_type;
608
609         q_idx = acc100_find_free_queue_idx(dev, conf);
610         if (q_idx == -1) {
611                 rte_free(q->lb_in);
612                 rte_free(q->lb_out);
613                 rte_free(q);
614                 return -1;
615         }
616
617         q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF;
618         q->vf_id = (q_idx >> ACC100_VF_ID_SHIFT)  & 0x3F;
619         q->aq_id = q_idx & 0xF;
620         q->aq_depth = (conf->op_type ==  RTE_BBDEV_OP_TURBO_DEC) ?
621                         (1 << d->acc100_conf.q_ul_4g.aq_depth_log2) :
622                         (1 << d->acc100_conf.q_dl_4g.aq_depth_log2);
623
624         q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
625                         queue_offset(d->pf_device,
626                                         q->vf_id, q->qgrp_id, q->aq_id));
627
628         rte_bbdev_log_debug(
629                         "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p",
630                         dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
631                         q->aq_id, q->aq_depth, q->mmio_reg_enqueue);
632
633         dev->data->queues[queue_id].queue_private = q;
634         return 0;
635 }
636
637 /* Release ACC100 queue */
638 static int
639 acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
640 {
641         struct acc100_device *d = dev->data->dev_private;
642         struct acc100_queue *q = dev->data->queues[q_id].queue_private;
643
644         if (q != NULL) {
645                 /* Mark the Queue as un-assigned */
646                 d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
647                                 (1 << q->aq_id));
648                 rte_free(q->lb_in);
649                 rte_free(q->lb_out);
650                 rte_free(q);
651                 dev->data->queues[q_id].queue_private = NULL;
652         }
653
654         return 0;
655 }
656
657 /* Get ACC100 device info */
658 static void
659 acc100_dev_info_get(struct rte_bbdev *dev,
660                 struct rte_bbdev_driver_info *dev_info)
661 {
662         struct acc100_device *d = dev->data->dev_private;
663
664         static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
665                 {
666                         .type = RTE_BBDEV_OP_TURBO_DEC,
667                         .cap.turbo_dec = {
668                                 .capability_flags =
669                                         RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
670                                         RTE_BBDEV_TURBO_CRC_TYPE_24B |
671                                         RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
672                                         RTE_BBDEV_TURBO_EARLY_TERMINATION |
673                                         RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
674                                         RTE_BBDEV_TURBO_MAP_DEC |
675                                         RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
676                                         RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
677                                 .max_llr_modulus = INT8_MAX,
678                                 .num_buffers_src =
679                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
680                                 .num_buffers_hard_out =
681                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
682                                 .num_buffers_soft_out =
683                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
684                         }
685                 },
686                 {
687                         .type = RTE_BBDEV_OP_TURBO_ENC,
688                         .cap.turbo_enc = {
689                                 .capability_flags =
690                                         RTE_BBDEV_TURBO_CRC_24B_ATTACH |
691                                         RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
692                                         RTE_BBDEV_TURBO_RATE_MATCH |
693                                         RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
694                                 .num_buffers_src =
695                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
696                                 .num_buffers_dst =
697                                                 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
698                         }
699                 },
700                 {
701                         .type   = RTE_BBDEV_OP_LDPC_ENC,
702                         .cap.ldpc_enc = {
703                                 .capability_flags =
704                                         RTE_BBDEV_LDPC_RATE_MATCH |
705                                         RTE_BBDEV_LDPC_CRC_24B_ATTACH |
706                                         RTE_BBDEV_LDPC_INTERLEAVER_BYPASS,
707                                 .num_buffers_src =
708                                                 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
709                                 .num_buffers_dst =
710                                                 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
711                         }
712                 },
713                 {
714                         .type   = RTE_BBDEV_OP_LDPC_DEC,
715                         .cap.ldpc_dec = {
716                         .capability_flags =
717                                 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
718                                 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
719                                 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
720                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
721 #ifdef ACC100_EXT_MEM
722                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK |
723                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE |
724                                 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE |
725 #endif
726                                 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
727                                 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
728                                 RTE_BBDEV_LDPC_DECODE_BYPASS |
729                                 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
730                                 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
731                                 RTE_BBDEV_LDPC_LLR_COMPRESSION,
732                         .llr_size = 8,
733                         .llr_decimals = 1,
734                         .num_buffers_src =
735                                         RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
736                         .num_buffers_hard_out =
737                                         RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
738                         .num_buffers_soft_out = 0,
739                         }
740                 },
741                 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
742         };
743
744         static struct rte_bbdev_queue_conf default_queue_conf;
745         default_queue_conf.socket = dev->data->socket_id;
746         default_queue_conf.queue_size = ACC100_MAX_QUEUE_DEPTH;
747
748         dev_info->driver_name = dev->device->driver->name;
749
750         /* Read and save the populated config from ACC100 registers */
751         fetch_acc100_config(dev);
752
753         /* This isn't ideal because it reports the maximum number of queues but
754          * does not provide info on how many can be uplink/downlink or different
755          * priorities
756          */
757         dev_info->max_num_queues =
758                         d->acc100_conf.q_dl_5g.num_aqs_per_groups *
759                         d->acc100_conf.q_dl_5g.num_qgroups +
760                         d->acc100_conf.q_ul_5g.num_aqs_per_groups *
761                         d->acc100_conf.q_ul_5g.num_qgroups +
762                         d->acc100_conf.q_dl_4g.num_aqs_per_groups *
763                         d->acc100_conf.q_dl_4g.num_qgroups +
764                         d->acc100_conf.q_ul_4g.num_aqs_per_groups *
765                         d->acc100_conf.q_ul_4g.num_qgroups;
766         dev_info->queue_size_lim = ACC100_MAX_QUEUE_DEPTH;
767         dev_info->hardware_accelerated = true;
768         dev_info->max_dl_queue_priority =
769                         d->acc100_conf.q_dl_4g.num_qgroups - 1;
770         dev_info->max_ul_queue_priority =
771                         d->acc100_conf.q_ul_4g.num_qgroups - 1;
772         dev_info->default_queue_conf = default_queue_conf;
773         dev_info->cpu_flag_reqs = NULL;
774         dev_info->min_alignment = 64;
775         dev_info->capabilities = bbdev_capabilities;
776 #ifdef ACC100_EXT_MEM
777         dev_info->harq_buffer_size = d->ddr_size;
778 #else
779         dev_info->harq_buffer_size = 0;
780 #endif
781 }
782
783 static const struct rte_bbdev_ops acc100_bbdev_ops = {
784         .setup_queues = acc100_setup_queues,
785         .close = acc100_dev_close,
786         .info_get = acc100_dev_info_get,
787         .queue_setup = acc100_queue_setup,
788         .queue_release = acc100_queue_release,
789 };
790
791 /* ACC100 PCI PF address map */
792 static struct rte_pci_id pci_id_acc100_pf_map[] = {
793         {
794                 RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_PF_DEVICE_ID)
795         },
796         {.device_id = 0},
797 };
798
799 /* ACC100 PCI VF address map */
800 static struct rte_pci_id pci_id_acc100_vf_map[] = {
801         {
802                 RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_VF_DEVICE_ID)
803         },
804         {.device_id = 0},
805 };
806
807 /* Read flag value 0/1 from bitmap */
808 static inline bool
809 check_bit(uint32_t bitmap, uint32_t bitmask)
810 {
811         return bitmap & bitmask;
812 }
813
814 static inline char *
815 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
816 {
817         if (unlikely(len > rte_pktmbuf_tailroom(m)))
818                 return NULL;
819
820         char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
821         m->data_len = (uint16_t)(m->data_len + len);
822         m_head->pkt_len  = (m_head->pkt_len + len);
823         return tail;
824 }
825
826 /* Fill in a frame control word for turbo encoding. */
827 static inline void
828 acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct acc100_fcw_te *fcw)
829 {
830         fcw->code_block_mode = op->turbo_enc.code_block_mode;
831         if (fcw->code_block_mode == 0) { /* For TB mode */
832                 fcw->k_neg = op->turbo_enc.tb_params.k_neg;
833                 fcw->k_pos = op->turbo_enc.tb_params.k_pos;
834                 fcw->c_neg = op->turbo_enc.tb_params.c_neg;
835                 fcw->c = op->turbo_enc.tb_params.c;
836                 fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
837                 fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
838
839                 if (check_bit(op->turbo_enc.op_flags,
840                                 RTE_BBDEV_TURBO_RATE_MATCH)) {
841                         fcw->bypass_rm = 0;
842                         fcw->cab = op->turbo_enc.tb_params.cab;
843                         fcw->ea = op->turbo_enc.tb_params.ea;
844                         fcw->eb = op->turbo_enc.tb_params.eb;
845                 } else {
846                         /* E is set to the encoding output size when RM is
847                          * bypassed.
848                          */
849                         fcw->bypass_rm = 1;
850                         fcw->cab = fcw->c_neg;
851                         fcw->ea = 3 * fcw->k_neg + 12;
852                         fcw->eb = 3 * fcw->k_pos + 12;
853                 }
854         } else { /* For CB mode */
855                 fcw->k_pos = op->turbo_enc.cb_params.k;
856                 fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
857
858                 if (check_bit(op->turbo_enc.op_flags,
859                                 RTE_BBDEV_TURBO_RATE_MATCH)) {
860                         fcw->bypass_rm = 0;
861                         fcw->eb = op->turbo_enc.cb_params.e;
862                 } else {
863                         /* E is set to the encoding output size when RM is
864                          * bypassed.
865                          */
866                         fcw->bypass_rm = 1;
867                         fcw->eb = 3 * fcw->k_pos + 12;
868                 }
869         }
870
871         fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
872                         RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
873         fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
874                         RTE_BBDEV_TURBO_CRC_24B_ATTACH);
875         fcw->rv_idx1 = op->turbo_enc.rv_index;
876 }
877
878 /* Compute value of k0.
879  * Based on 3GPP 38.212 Table 5.4.2.1-2
880  * Starting position of different redundancy versions, k0
881  */
882 static inline uint16_t
883 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
884 {
885         if (rv_index == 0)
886                 return 0;
887         uint16_t n = (bg == 1 ? ACC100_N_ZC_1 : ACC100_N_ZC_2) * z_c;
888         if (n_cb == n) {
889                 if (rv_index == 1)
890                         return (bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * z_c;
891                 else if (rv_index == 2)
892                         return (bg == 1 ? ACC100_K0_2_1 : ACC100_K0_2_2) * z_c;
893                 else
894                         return (bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * z_c;
895         }
896         /* LBRM case - includes a division by N */
897         if (rv_index == 1)
898                 return (((bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * n_cb)
899                                 / n) * z_c;
900         else if (rv_index == 2)
901                 return (((bg == 1 ? ACC100_K0_2_1 : ACC100_K0_2_2) * n_cb)
902                                 / n) * z_c;
903         else
904                 return (((bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * n_cb)
905                                 / n) * z_c;
906 }
907
908 /* Fill in a frame control word for LDPC encoding. */
909 static inline void
910 acc100_fcw_le_fill(const struct rte_bbdev_enc_op *op,
911                 struct acc100_fcw_le *fcw, int num_cb)
912 {
913         fcw->qm = op->ldpc_enc.q_m;
914         fcw->nfiller = op->ldpc_enc.n_filler;
915         fcw->BG = (op->ldpc_enc.basegraph - 1);
916         fcw->Zc = op->ldpc_enc.z_c;
917         fcw->ncb = op->ldpc_enc.n_cb;
918         fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
919                         op->ldpc_enc.rv_index);
920         fcw->rm_e = op->ldpc_enc.cb_params.e;
921         fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
922                         RTE_BBDEV_LDPC_CRC_24B_ATTACH);
923         fcw->bypass_intlv = check_bit(op->ldpc_enc.op_flags,
924                         RTE_BBDEV_LDPC_INTERLEAVER_BYPASS);
925         fcw->mcb_count = num_cb;
926 }
927
928 /* Fill in a frame control word for turbo decoding. */
929 static inline void
930 acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_td *fcw)
931 {
932         /* Note : Early termination is always enabled for 4GUL */
933         fcw->fcw_ver = 1;
934         if (op->turbo_dec.code_block_mode == 0)
935                 fcw->k_pos = op->turbo_dec.tb_params.k_pos;
936         else
937                 fcw->k_pos = op->turbo_dec.cb_params.k;
938         fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
939                         RTE_BBDEV_TURBO_CRC_TYPE_24B);
940         fcw->bypass_sb_deint = 0;
941         fcw->raw_decoder_input_on = 0;
942         fcw->max_iter = op->turbo_dec.iter_max;
943         fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
944                         RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
945 }
946
947 /* Fill in a frame control word for LDPC decoding. */
948 static inline void
949 acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
950                 union acc100_harq_layout_data *harq_layout)
951 {
952         uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
953         uint16_t harq_index;
954         uint32_t l;
955         bool harq_prun = false;
956
957         fcw->qm = op->ldpc_dec.q_m;
958         fcw->nfiller = op->ldpc_dec.n_filler;
959         fcw->BG = (op->ldpc_dec.basegraph - 1);
960         fcw->Zc = op->ldpc_dec.z_c;
961         fcw->ncb = op->ldpc_dec.n_cb;
962         fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
963                         op->ldpc_dec.rv_index);
964         if (op->ldpc_dec.code_block_mode == 1)
965                 fcw->rm_e = op->ldpc_dec.cb_params.e;
966         else
967                 fcw->rm_e = (op->ldpc_dec.tb_params.r <
968                                 op->ldpc_dec.tb_params.cab) ?
969                                                 op->ldpc_dec.tb_params.ea :
970                                                 op->ldpc_dec.tb_params.eb;
971
972         fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
973                         RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
974         fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
975                         RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
976         fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
977                         RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
978         fcw->bypass_dec = check_bit(op->ldpc_dec.op_flags,
979                         RTE_BBDEV_LDPC_DECODE_BYPASS);
980         fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
981                         RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
982         if (op->ldpc_dec.q_m == 1) {
983                 fcw->bypass_intlv = 1;
984                 fcw->qm = 2;
985         }
986         fcw->hcin_decomp_mode = check_bit(op->ldpc_dec.op_flags,
987                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
988         fcw->hcout_comp_mode = check_bit(op->ldpc_dec.op_flags,
989                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
990         fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
991                         RTE_BBDEV_LDPC_LLR_COMPRESSION);
992         harq_index = op->ldpc_dec.harq_combined_output.offset /
993                         ACC100_HARQ_OFFSET;
994 #ifdef ACC100_EXT_MEM
995         /* Limit cases when HARQ pruning is valid */
996         harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
997                         ACC100_HARQ_OFFSET) == 0) &&
998                         (op->ldpc_dec.harq_combined_output.offset <= UINT16_MAX
999                         * ACC100_HARQ_OFFSET);
1000 #endif
1001         if (fcw->hcin_en > 0) {
1002                 harq_in_length = op->ldpc_dec.harq_combined_input.length;
1003                 if (fcw->hcin_decomp_mode > 0)
1004                         harq_in_length = harq_in_length * 8 / 6;
1005                 harq_in_length = RTE_ALIGN(harq_in_length, 64);
1006                 if ((harq_layout[harq_index].offset > 0) & harq_prun) {
1007                         rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
1008                         fcw->hcin_size0 = harq_layout[harq_index].size0;
1009                         fcw->hcin_offset = harq_layout[harq_index].offset;
1010                         fcw->hcin_size1 = harq_in_length -
1011                                         harq_layout[harq_index].offset;
1012                 } else {
1013                         fcw->hcin_size0 = harq_in_length;
1014                         fcw->hcin_offset = 0;
1015                         fcw->hcin_size1 = 0;
1016                 }
1017         } else {
1018                 fcw->hcin_size0 = 0;
1019                 fcw->hcin_offset = 0;
1020                 fcw->hcin_size1 = 0;
1021         }
1022
1023         fcw->itmax = op->ldpc_dec.iter_max;
1024         fcw->itstop = check_bit(op->ldpc_dec.op_flags,
1025                         RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
1026         fcw->synd_precoder = fcw->itstop;
1027         /*
1028          * These are all implicitly set
1029          * fcw->synd_post = 0;
1030          * fcw->so_en = 0;
1031          * fcw->so_bypass_rm = 0;
1032          * fcw->so_bypass_intlv = 0;
1033          * fcw->dec_convllr = 0;
1034          * fcw->hcout_convllr = 0;
1035          * fcw->hcout_size1 = 0;
1036          * fcw->so_it = 0;
1037          * fcw->hcout_offset = 0;
1038          * fcw->negstop_th = 0;
1039          * fcw->negstop_it = 0;
1040          * fcw->negstop_en = 0;
1041          * fcw->gain_i = 1;
1042          * fcw->gain_h = 1;
1043          */
1044         if (fcw->hcout_en > 0) {
1045                 parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
1046                         * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1047                 k0_p = (fcw->k0 > parity_offset) ?
1048                                 fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
1049                 ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
1050                 l = k0_p + fcw->rm_e;
1051                 harq_out_length = (uint16_t) fcw->hcin_size0;
1052                 harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p);
1053                 harq_out_length = (harq_out_length + 0x3F) & 0xFFC0;
1054                 if ((k0_p > fcw->hcin_size0 + ACC100_HARQ_OFFSET_THRESHOLD) &&
1055                                 harq_prun) {
1056                         fcw->hcout_size0 = (uint16_t) fcw->hcin_size0;
1057                         fcw->hcout_offset = k0_p & 0xFFC0;
1058                         fcw->hcout_size1 = harq_out_length - fcw->hcout_offset;
1059                 } else {
1060                         fcw->hcout_size0 = harq_out_length;
1061                         fcw->hcout_size1 = 0;
1062                         fcw->hcout_offset = 0;
1063                 }
1064                 harq_layout[harq_index].offset = fcw->hcout_offset;
1065                 harq_layout[harq_index].size0 = fcw->hcout_size0;
1066         } else {
1067                 fcw->hcout_size0 = 0;
1068                 fcw->hcout_size1 = 0;
1069                 fcw->hcout_offset = 0;
1070         }
1071 }
1072
1073 /**
1074  * Fills descriptor with data pointers of one block type.
1075  *
1076  * @param desc
1077  *   Pointer to DMA descriptor.
1078  * @param input
1079  *   Pointer to pointer to input data which will be encoded. It can be changed
1080  *   and points to next segment in scatter-gather case.
1081  * @param offset
1082  *   Input offset in rte_mbuf structure. It is used for calculating the point
1083  *   where data is starting.
1084  * @param cb_len
1085  *   Length of currently processed Code Block
1086  * @param seg_total_left
1087  *   It indicates how many bytes still left in segment (mbuf) for further
1088  *   processing.
1089  * @param op_flags
1090  *   Store information about device capabilities
1091  * @param next_triplet
1092  *   Index for ACC100 DMA Descriptor triplet
1093  *
1094  * @return
1095  *   Returns index of next triplet on success, other value if lengths of
1096  *   pkt and processed cb do not match.
1097  *
1098  */
1099 static inline int
1100 acc100_dma_fill_blk_type_in(struct acc100_dma_req_desc *desc,
1101                 struct rte_mbuf **input, uint32_t *offset, uint32_t cb_len,
1102                 uint32_t *seg_total_left, int next_triplet)
1103 {
1104         uint32_t part_len;
1105         struct rte_mbuf *m = *input;
1106
1107         part_len = (*seg_total_left < cb_len) ? *seg_total_left : cb_len;
1108         cb_len -= part_len;
1109         *seg_total_left -= part_len;
1110
1111         desc->data_ptrs[next_triplet].address =
1112                         rte_pktmbuf_iova_offset(m, *offset);
1113         desc->data_ptrs[next_triplet].blen = part_len;
1114         desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN;
1115         desc->data_ptrs[next_triplet].last = 0;
1116         desc->data_ptrs[next_triplet].dma_ext = 0;
1117         *offset += part_len;
1118         next_triplet++;
1119
1120         while (cb_len > 0) {
1121                 if (next_triplet < ACC100_DMA_MAX_NUM_POINTERS &&
1122                                 m->next != NULL) {
1123
1124                         m = m->next;
1125                         *seg_total_left = rte_pktmbuf_data_len(m);
1126                         part_len = (*seg_total_left < cb_len) ?
1127                                         *seg_total_left :
1128                                         cb_len;
1129                         desc->data_ptrs[next_triplet].address =
1130                                         rte_pktmbuf_iova_offset(m, 0);
1131                         desc->data_ptrs[next_triplet].blen = part_len;
1132                         desc->data_ptrs[next_triplet].blkid =
1133                                         ACC100_DMA_BLKID_IN;
1134                         desc->data_ptrs[next_triplet].last = 0;
1135                         desc->data_ptrs[next_triplet].dma_ext = 0;
1136                         cb_len -= part_len;
1137                         *seg_total_left -= part_len;
1138                         /* Initializing offset for next segment (mbuf) */
1139                         *offset = part_len;
1140                         next_triplet++;
1141                 } else {
1142                         rte_bbdev_log(ERR,
1143                                 "Some data still left for processing: "
1144                                 "data_left: %u, next_triplet: %u, next_mbuf: %p",
1145                                 cb_len, next_triplet, m->next);
1146                         return -EINVAL;
1147                 }
1148         }
1149         /* Storing new mbuf as it could be changed in scatter-gather case*/
1150         *input = m;
1151
1152         return next_triplet;
1153 }
1154
1155 /* Fills descriptor with data pointers of one block type.
1156  * Returns index of next triplet on success, other value if lengths of
1157  * output data and processed mbuf do not match.
1158  */
1159 static inline int
1160 acc100_dma_fill_blk_type_out(struct acc100_dma_req_desc *desc,
1161                 struct rte_mbuf *output, uint32_t out_offset,
1162                 uint32_t output_len, int next_triplet, int blk_id)
1163 {
1164         desc->data_ptrs[next_triplet].address =
1165                         rte_pktmbuf_iova_offset(output, out_offset);
1166         desc->data_ptrs[next_triplet].blen = output_len;
1167         desc->data_ptrs[next_triplet].blkid = blk_id;
1168         desc->data_ptrs[next_triplet].last = 0;
1169         desc->data_ptrs[next_triplet].dma_ext = 0;
1170         next_triplet++;
1171
1172         return next_triplet;
1173 }
1174
1175 static inline void
1176 acc100_header_init(struct acc100_dma_req_desc *desc)
1177 {
1178         desc->word0 = ACC100_DMA_DESC_TYPE;
1179         desc->word1 = 0; /**< Timestamp could be disabled */
1180         desc->word2 = 0;
1181         desc->word3 = 0;
1182         desc->numCBs = 1;
1183 }
1184
1185 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1186 /* Check if any input data is unexpectedly left for processing */
1187 static inline int
1188 check_mbuf_total_left(uint32_t mbuf_total_left)
1189 {
1190         if (mbuf_total_left == 0)
1191                 return 0;
1192         rte_bbdev_log(ERR,
1193                 "Some date still left for processing: mbuf_total_left = %u",
1194                 mbuf_total_left);
1195         return -EINVAL;
1196 }
1197 #endif
1198
1199 static inline int
1200 acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
1201                 struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1202                 struct rte_mbuf *output, uint32_t *in_offset,
1203                 uint32_t *out_offset, uint32_t *out_length,
1204                 uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
1205 {
1206         int next_triplet = 1; /* FCW already done */
1207         uint32_t e, ea, eb, length;
1208         uint16_t k, k_neg, k_pos;
1209         uint8_t cab, c_neg;
1210
1211         desc->word0 = ACC100_DMA_DESC_TYPE;
1212         desc->word1 = 0; /**< Timestamp could be disabled */
1213         desc->word2 = 0;
1214         desc->word3 = 0;
1215         desc->numCBs = 1;
1216
1217         if (op->turbo_enc.code_block_mode == 0) {
1218                 ea = op->turbo_enc.tb_params.ea;
1219                 eb = op->turbo_enc.tb_params.eb;
1220                 cab = op->turbo_enc.tb_params.cab;
1221                 k_neg = op->turbo_enc.tb_params.k_neg;
1222                 k_pos = op->turbo_enc.tb_params.k_pos;
1223                 c_neg = op->turbo_enc.tb_params.c_neg;
1224                 e = (r < cab) ? ea : eb;
1225                 k = (r < c_neg) ? k_neg : k_pos;
1226         } else {
1227                 e = op->turbo_enc.cb_params.e;
1228                 k = op->turbo_enc.cb_params.k;
1229         }
1230
1231         if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
1232                 length = (k - 24) >> 3;
1233         else
1234                 length = k >> 3;
1235
1236         if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
1237                 rte_bbdev_log(ERR,
1238                                 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1239                                 *mbuf_total_left, length);
1240                 return -1;
1241         }
1242
1243         next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
1244                         length, seg_total_left, next_triplet);
1245         if (unlikely(next_triplet < 0)) {
1246                 rte_bbdev_log(ERR,
1247                                 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1248                                 op);
1249                 return -1;
1250         }
1251         desc->data_ptrs[next_triplet - 1].last = 1;
1252         desc->m2dlen = next_triplet;
1253         *mbuf_total_left -= length;
1254
1255         /* Set output length */
1256         if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
1257                 /* Integer round up division by 8 */
1258                 *out_length = (e + 7) >> 3;
1259         else
1260                 *out_length = (k >> 3) * 3 + 2;
1261
1262         next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
1263                         *out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
1264         if (unlikely(next_triplet < 0)) {
1265                 rte_bbdev_log(ERR,
1266                                 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1267                                 op);
1268                 return -1;
1269         }
1270         op->turbo_enc.output.length += *out_length;
1271         *out_offset += *out_length;
1272         desc->data_ptrs[next_triplet - 1].last = 1;
1273         desc->d2mlen = next_triplet - desc->m2dlen;
1274
1275         desc->op_addr = op;
1276
1277         return 0;
1278 }
1279
1280 static inline int
1281 acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
1282                 struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1283                 struct rte_mbuf *output, uint32_t *in_offset,
1284                 uint32_t *out_offset, uint32_t *out_length,
1285                 uint32_t *mbuf_total_left, uint32_t *seg_total_left)
1286 {
1287         int next_triplet = 1; /* FCW already done */
1288         uint16_t K, in_length_in_bits, in_length_in_bytes;
1289         struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
1290
1291         acc100_header_init(desc);
1292
1293         K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
1294         in_length_in_bits = K - enc->n_filler;
1295         if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
1296                         (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH))
1297                 in_length_in_bits -= 24;
1298         in_length_in_bytes = in_length_in_bits >> 3;
1299
1300         if (unlikely((*mbuf_total_left == 0) ||
1301                         (*mbuf_total_left < in_length_in_bytes))) {
1302                 rte_bbdev_log(ERR,
1303                                 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1304                                 *mbuf_total_left, in_length_in_bytes);
1305                 return -1;
1306         }
1307
1308         next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
1309                         in_length_in_bytes,
1310                         seg_total_left, next_triplet);
1311         if (unlikely(next_triplet < 0)) {
1312                 rte_bbdev_log(ERR,
1313                                 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1314                                 op);
1315                 return -1;
1316         }
1317         desc->data_ptrs[next_triplet - 1].last = 1;
1318         desc->m2dlen = next_triplet;
1319         *mbuf_total_left -= in_length_in_bytes;
1320
1321         /* Set output length */
1322         /* Integer round up division by 8 */
1323         *out_length = (enc->cb_params.e + 7) >> 3;
1324
1325         next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
1326                         *out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
1327         op->ldpc_enc.output.length += *out_length;
1328         *out_offset += *out_length;
1329         desc->data_ptrs[next_triplet - 1].last = 1;
1330         desc->data_ptrs[next_triplet - 1].dma_ext = 0;
1331         desc->d2mlen = next_triplet - desc->m2dlen;
1332
1333         desc->op_addr = op;
1334
1335         return 0;
1336 }
1337
1338 static inline int
1339 acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
1340                 struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1341                 struct rte_mbuf *h_output, struct rte_mbuf *s_output,
1342                 uint32_t *in_offset, uint32_t *h_out_offset,
1343                 uint32_t *s_out_offset, uint32_t *h_out_length,
1344                 uint32_t *s_out_length, uint32_t *mbuf_total_left,
1345                 uint32_t *seg_total_left, uint8_t r)
1346 {
1347         int next_triplet = 1; /* FCW already done */
1348         uint16_t k;
1349         uint16_t crc24_overlap = 0;
1350         uint32_t e, kw;
1351
1352         desc->word0 = ACC100_DMA_DESC_TYPE;
1353         desc->word1 = 0; /**< Timestamp could be disabled */
1354         desc->word2 = 0;
1355         desc->word3 = 0;
1356         desc->numCBs = 1;
1357
1358         if (op->turbo_dec.code_block_mode == 0) {
1359                 k = (r < op->turbo_dec.tb_params.c_neg)
1360                         ? op->turbo_dec.tb_params.k_neg
1361                         : op->turbo_dec.tb_params.k_pos;
1362                 e = (r < op->turbo_dec.tb_params.cab)
1363                         ? op->turbo_dec.tb_params.ea
1364                         : op->turbo_dec.tb_params.eb;
1365         } else {
1366                 k = op->turbo_dec.cb_params.k;
1367                 e = op->turbo_dec.cb_params.e;
1368         }
1369
1370         if ((op->turbo_dec.code_block_mode == 0)
1371                 && !check_bit(op->turbo_dec.op_flags,
1372                 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1373                 crc24_overlap = 24;
1374
1375         /* Calculates circular buffer size.
1376          * According to 3gpp 36.212 section 5.1.4.2
1377          *   Kw = 3 * Kpi,
1378          * where:
1379          *   Kpi = nCol * nRow
1380          * where nCol is 32 and nRow can be calculated from:
1381          *   D =< nCol * nRow
1382          * where D is the size of each output from turbo encoder block (k + 4).
1383          */
1384         kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
1385
1386         if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
1387                 rte_bbdev_log(ERR,
1388                                 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1389                                 *mbuf_total_left, kw);
1390                 return -1;
1391         }
1392
1393         next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset, kw,
1394                         seg_total_left, next_triplet);
1395         if (unlikely(next_triplet < 0)) {
1396                 rte_bbdev_log(ERR,
1397                                 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1398                                 op);
1399                 return -1;
1400         }
1401         desc->data_ptrs[next_triplet - 1].last = 1;
1402         desc->m2dlen = next_triplet;
1403         *mbuf_total_left -= kw;
1404
1405         next_triplet = acc100_dma_fill_blk_type_out(
1406                         desc, h_output, *h_out_offset,
1407                         k >> 3, next_triplet, ACC100_DMA_BLKID_OUT_HARD);
1408         if (unlikely(next_triplet < 0)) {
1409                 rte_bbdev_log(ERR,
1410                                 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1411                                 op);
1412                 return -1;
1413         }
1414
1415         *h_out_length = ((k - crc24_overlap) >> 3);
1416         op->turbo_dec.hard_output.length += *h_out_length;
1417         *h_out_offset += *h_out_length;
1418
1419         /* Soft output */
1420         if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
1421                 if (check_bit(op->turbo_dec.op_flags,
1422                                 RTE_BBDEV_TURBO_EQUALIZER))
1423                         *s_out_length = e;
1424                 else
1425                         *s_out_length = (k * 3) + 12;
1426
1427                 next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
1428                                 *s_out_offset, *s_out_length, next_triplet,
1429                                 ACC100_DMA_BLKID_OUT_SOFT);
1430                 if (unlikely(next_triplet < 0)) {
1431                         rte_bbdev_log(ERR,
1432                                         "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1433                                         op);
1434                         return -1;
1435                 }
1436
1437                 op->turbo_dec.soft_output.length += *s_out_length;
1438                 *s_out_offset += *s_out_length;
1439         }
1440
1441         desc->data_ptrs[next_triplet - 1].last = 1;
1442         desc->d2mlen = next_triplet - desc->m2dlen;
1443
1444         desc->op_addr = op;
1445
1446         return 0;
1447 }
1448
1449 static inline int
1450 acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
1451                 struct acc100_dma_req_desc *desc,
1452                 struct rte_mbuf **input, struct rte_mbuf *h_output,
1453                 uint32_t *in_offset, uint32_t *h_out_offset,
1454                 uint32_t *h_out_length, uint32_t *mbuf_total_left,
1455                 uint32_t *seg_total_left,
1456                 struct acc100_fcw_ld *fcw)
1457 {
1458         struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
1459         int next_triplet = 1; /* FCW already done */
1460         uint32_t input_length;
1461         uint16_t output_length, crc24_overlap = 0;
1462         uint16_t sys_cols, K, h_p_size, h_np_size;
1463         bool h_comp = check_bit(dec->op_flags,
1464                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
1465
1466         acc100_header_init(desc);
1467
1468         if (check_bit(op->ldpc_dec.op_flags,
1469                         RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
1470                 crc24_overlap = 24;
1471
1472         /* Compute some LDPC BG lengths */
1473         input_length = dec->cb_params.e;
1474         if (check_bit(op->ldpc_dec.op_flags,
1475                         RTE_BBDEV_LDPC_LLR_COMPRESSION))
1476                 input_length = (input_length * 3 + 3) / 4;
1477         sys_cols = (dec->basegraph == 1) ? 22 : 10;
1478         K = sys_cols * dec->z_c;
1479         output_length = K - dec->n_filler - crc24_overlap;
1480
1481         if (unlikely((*mbuf_total_left == 0) ||
1482                         (*mbuf_total_left < input_length))) {
1483                 rte_bbdev_log(ERR,
1484                                 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1485                                 *mbuf_total_left, input_length);
1486                 return -1;
1487         }
1488
1489         next_triplet = acc100_dma_fill_blk_type_in(desc, input,
1490                         in_offset, input_length,
1491                         seg_total_left, next_triplet);
1492
1493         if (unlikely(next_triplet < 0)) {
1494                 rte_bbdev_log(ERR,
1495                                 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1496                                 op);
1497                 return -1;
1498         }
1499
1500         if (check_bit(op->ldpc_dec.op_flags,
1501                                 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1502                 h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
1503                 if (h_comp)
1504                         h_p_size = (h_p_size * 3 + 3) / 4;
1505                 desc->data_ptrs[next_triplet].address =
1506                                 dec->harq_combined_input.offset;
1507                 desc->data_ptrs[next_triplet].blen = h_p_size;
1508                 desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN_HARQ;
1509                 desc->data_ptrs[next_triplet].dma_ext = 1;
1510 #ifndef ACC100_EXT_MEM
1511                 acc100_dma_fill_blk_type_out(
1512                                 desc,
1513                                 op->ldpc_dec.harq_combined_input.data,
1514                                 op->ldpc_dec.harq_combined_input.offset,
1515                                 h_p_size,
1516                                 next_triplet,
1517                                 ACC100_DMA_BLKID_IN_HARQ);
1518 #endif
1519                 next_triplet++;
1520         }
1521
1522         desc->data_ptrs[next_triplet - 1].last = 1;
1523         desc->m2dlen = next_triplet;
1524         *mbuf_total_left -= input_length;
1525
1526         next_triplet = acc100_dma_fill_blk_type_out(desc, h_output,
1527                         *h_out_offset, output_length >> 3, next_triplet,
1528                         ACC100_DMA_BLKID_OUT_HARD);
1529
1530         if (check_bit(op->ldpc_dec.op_flags,
1531                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1532                 /* Pruned size of the HARQ */
1533                 h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
1534                 /* Non-Pruned size of the HARQ */
1535                 h_np_size = fcw->hcout_offset > 0 ?
1536                                 fcw->hcout_offset + fcw->hcout_size1 :
1537                                 h_p_size;
1538                 if (h_comp) {
1539                         h_np_size = (h_np_size * 3 + 3) / 4;
1540                         h_p_size = (h_p_size * 3 + 3) / 4;
1541                 }
1542                 dec->harq_combined_output.length = h_np_size;
1543                 desc->data_ptrs[next_triplet].address =
1544                                 dec->harq_combined_output.offset;
1545                 desc->data_ptrs[next_triplet].blen = h_p_size;
1546                 desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_OUT_HARQ;
1547                 desc->data_ptrs[next_triplet].dma_ext = 1;
1548 #ifndef ACC100_EXT_MEM
1549                 acc100_dma_fill_blk_type_out(
1550                                 desc,
1551                                 dec->harq_combined_output.data,
1552                                 dec->harq_combined_output.offset,
1553                                 h_p_size,
1554                                 next_triplet,
1555                                 ACC100_DMA_BLKID_OUT_HARQ);
1556 #endif
1557                 next_triplet++;
1558         }
1559
1560         *h_out_length = output_length >> 3;
1561         dec->hard_output.length += *h_out_length;
1562         *h_out_offset += *h_out_length;
1563         desc->data_ptrs[next_triplet - 1].last = 1;
1564         desc->d2mlen = next_triplet - desc->m2dlen;
1565
1566         desc->op_addr = op;
1567
1568         return 0;
1569 }
1570
1571 static inline void
1572 acc100_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
1573                 struct acc100_dma_req_desc *desc,
1574                 struct rte_mbuf *input, struct rte_mbuf *h_output,
1575                 uint32_t *in_offset, uint32_t *h_out_offset,
1576                 uint32_t *h_out_length,
1577                 union acc100_harq_layout_data *harq_layout)
1578 {
1579         int next_triplet = 1; /* FCW already done */
1580         desc->data_ptrs[next_triplet].address =
1581                         rte_pktmbuf_iova_offset(input, *in_offset);
1582         next_triplet++;
1583
1584         if (check_bit(op->ldpc_dec.op_flags,
1585                                 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1586                 struct rte_bbdev_op_data hi = op->ldpc_dec.harq_combined_input;
1587                 desc->data_ptrs[next_triplet].address = hi.offset;
1588 #ifndef ACC100_EXT_MEM
1589                 desc->data_ptrs[next_triplet].address =
1590                                 rte_pktmbuf_iova_offset(hi.data, hi.offset);
1591 #endif
1592                 next_triplet++;
1593         }
1594
1595         desc->data_ptrs[next_triplet].address =
1596                         rte_pktmbuf_iova_offset(h_output, *h_out_offset);
1597         *h_out_length = desc->data_ptrs[next_triplet].blen;
1598         next_triplet++;
1599
1600         if (check_bit(op->ldpc_dec.op_flags,
1601                                 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1602                 desc->data_ptrs[next_triplet].address =
1603                                 op->ldpc_dec.harq_combined_output.offset;
1604                 /* Adjust based on previous operation */
1605                 struct rte_bbdev_dec_op *prev_op = desc->op_addr;
1606                 op->ldpc_dec.harq_combined_output.length =
1607                                 prev_op->ldpc_dec.harq_combined_output.length;
1608                 int16_t hq_idx = op->ldpc_dec.harq_combined_output.offset /
1609                                 ACC100_HARQ_OFFSET;
1610                 int16_t prev_hq_idx =
1611                                 prev_op->ldpc_dec.harq_combined_output.offset
1612                                 / ACC100_HARQ_OFFSET;
1613                 harq_layout[hq_idx].val = harq_layout[prev_hq_idx].val;
1614 #ifndef ACC100_EXT_MEM
1615                 struct rte_bbdev_op_data ho =
1616                                 op->ldpc_dec.harq_combined_output;
1617                 desc->data_ptrs[next_triplet].address =
1618                                 rte_pktmbuf_iova_offset(ho.data, ho.offset);
1619 #endif
1620                 next_triplet++;
1621         }
1622
1623         op->ldpc_dec.hard_output.length += *h_out_length;
1624         desc->op_addr = op;
1625 }
1626
1627
1628 /* Enqueue a number of operations to HW and update software rings */
1629 static inline void
1630 acc100_dma_enqueue(struct acc100_queue *q, uint16_t n,
1631                 struct rte_bbdev_stats *queue_stats)
1632 {
1633         union acc100_enqueue_reg_fmt enq_req;
1634 #ifdef RTE_BBDEV_OFFLOAD_COST
1635         uint64_t start_time = 0;
1636         queue_stats->acc_offload_cycles = 0;
1637 #else
1638         RTE_SET_USED(queue_stats);
1639 #endif
1640
1641         enq_req.val = 0;
1642         /* Setting offset, 100b for 256 DMA Desc */
1643         enq_req.addr_offset = ACC100_DESC_OFFSET;
1644
1645         /* Split ops into batches */
1646         do {
1647                 union acc100_dma_desc *desc;
1648                 uint16_t enq_batch_size;
1649                 uint64_t offset;
1650                 rte_iova_t req_elem_addr;
1651
1652                 enq_batch_size = RTE_MIN(n, MAX_ENQ_BATCH_SIZE);
1653
1654                 /* Set flag on last descriptor in a batch */
1655                 desc = q->ring_addr + ((q->sw_ring_head + enq_batch_size - 1) &
1656                                 q->sw_ring_wrap_mask);
1657                 desc->req.last_desc_in_batch = 1;
1658
1659                 /* Calculate the 1st descriptor's address */
1660                 offset = ((q->sw_ring_head & q->sw_ring_wrap_mask) *
1661                                 sizeof(union acc100_dma_desc));
1662                 req_elem_addr = q->ring_addr_iova + offset;
1663
1664                 /* Fill enqueue struct */
1665                 enq_req.num_elem = enq_batch_size;
1666                 /* low 6 bits are not needed */
1667                 enq_req.req_elem_addr = (uint32_t)(req_elem_addr >> 6);
1668
1669 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1670                 rte_memdump(stderr, "Req sdone", desc, sizeof(*desc));
1671 #endif
1672                 rte_bbdev_log_debug(
1673                                 "Enqueue %u reqs (phys %#"PRIx64") to reg %p",
1674                                 enq_batch_size,
1675                                 req_elem_addr,
1676                                 (void *)q->mmio_reg_enqueue);
1677
1678                 rte_wmb();
1679
1680 #ifdef RTE_BBDEV_OFFLOAD_COST
1681                 /* Start time measurement for enqueue function offload. */
1682                 start_time = rte_rdtsc_precise();
1683 #endif
1684                 rte_bbdev_log(DEBUG, "Debug : MMIO Enqueue");
1685                 mmio_write(q->mmio_reg_enqueue, enq_req.val);
1686
1687 #ifdef RTE_BBDEV_OFFLOAD_COST
1688                 queue_stats->acc_offload_cycles +=
1689                                 rte_rdtsc_precise() - start_time;
1690 #endif
1691
1692                 q->aq_enqueued++;
1693                 q->sw_ring_head += enq_batch_size;
1694                 n -= enq_batch_size;
1695
1696         } while (n);
1697
1698
1699 }
1700
1701 /* Enqueue one encode operations for ACC100 device in CB mode */
1702 static inline int
1703 enqueue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
1704                 uint16_t total_enqueued_cbs)
1705 {
1706         union acc100_dma_desc *desc = NULL;
1707         int ret;
1708         uint32_t in_offset, out_offset, out_length, mbuf_total_left,
1709                 seg_total_left;
1710         struct rte_mbuf *input, *output_head, *output;
1711
1712         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1713                         & q->sw_ring_wrap_mask);
1714         desc = q->ring_addr + desc_idx;
1715         acc100_fcw_te_fill(op, &desc->req.fcw_te);
1716
1717         input = op->turbo_enc.input.data;
1718         output_head = output = op->turbo_enc.output.data;
1719         in_offset = op->turbo_enc.input.offset;
1720         out_offset = op->turbo_enc.output.offset;
1721         out_length = 0;
1722         mbuf_total_left = op->turbo_enc.input.length;
1723         seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
1724                         - in_offset;
1725
1726         ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
1727                         &in_offset, &out_offset, &out_length, &mbuf_total_left,
1728                         &seg_total_left, 0);
1729
1730         if (unlikely(ret < 0))
1731                 return ret;
1732
1733         mbuf_append(output_head, output, out_length);
1734
1735 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1736         rte_memdump(stderr, "FCW", &desc->req.fcw_te,
1737                         sizeof(desc->req.fcw_te) - 8);
1738         rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1739         if (check_mbuf_total_left(mbuf_total_left) != 0)
1740                 return -EINVAL;
1741 #endif
1742         /* One CB (one op) was successfully prepared to enqueue */
1743         return 1;
1744 }
1745
1746 /* Enqueue one encode operations for ACC100 device in CB mode */
1747 static inline int
1748 enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
1749                 uint16_t total_enqueued_cbs, int16_t num)
1750 {
1751         union acc100_dma_desc *desc = NULL;
1752         uint32_t out_length;
1753         struct rte_mbuf *output_head, *output;
1754         int i, next_triplet;
1755         uint16_t  in_length_in_bytes;
1756         struct rte_bbdev_op_ldpc_enc *enc = &ops[0]->ldpc_enc;
1757
1758         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1759                         & q->sw_ring_wrap_mask);
1760         desc = q->ring_addr + desc_idx;
1761         acc100_fcw_le_fill(ops[0], &desc->req.fcw_le, num);
1762
1763         /** This could be done at polling */
1764         acc100_header_init(&desc->req);
1765         desc->req.numCBs = num;
1766
1767         in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
1768         out_length = (enc->cb_params.e + 7) >> 3;
1769         desc->req.m2dlen = 1 + num;
1770         desc->req.d2mlen = num;
1771         next_triplet = 1;
1772
1773         for (i = 0; i < num; i++) {
1774                 desc->req.data_ptrs[next_triplet].address =
1775                         rte_pktmbuf_iova_offset(ops[i]->ldpc_enc.input.data, 0);
1776                 desc->req.data_ptrs[next_triplet].blen = in_length_in_bytes;
1777                 next_triplet++;
1778                 desc->req.data_ptrs[next_triplet].address =
1779                                 rte_pktmbuf_iova_offset(
1780                                 ops[i]->ldpc_enc.output.data, 0);
1781                 desc->req.data_ptrs[next_triplet].blen = out_length;
1782                 next_triplet++;
1783                 ops[i]->ldpc_enc.output.length = out_length;
1784                 output_head = output = ops[i]->ldpc_enc.output.data;
1785                 mbuf_append(output_head, output, out_length);
1786                 output->data_len = out_length;
1787         }
1788
1789         desc->req.op_addr = ops[0];
1790
1791 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1792         rte_memdump(stderr, "FCW", &desc->req.fcw_le,
1793                         sizeof(desc->req.fcw_le) - 8);
1794         rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1795 #endif
1796
1797         /* One CB (one op) was successfully prepared to enqueue */
1798         return num;
1799 }
1800
1801 /* Enqueue one encode operations for ACC100 device in CB mode */
1802 static inline int
1803 enqueue_ldpc_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
1804                 uint16_t total_enqueued_cbs)
1805 {
1806         union acc100_dma_desc *desc = NULL;
1807         int ret;
1808         uint32_t in_offset, out_offset, out_length, mbuf_total_left,
1809                 seg_total_left;
1810         struct rte_mbuf *input, *output_head, *output;
1811
1812         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1813                         & q->sw_ring_wrap_mask);
1814         desc = q->ring_addr + desc_idx;
1815         acc100_fcw_le_fill(op, &desc->req.fcw_le, 1);
1816
1817         input = op->ldpc_enc.input.data;
1818         output_head = output = op->ldpc_enc.output.data;
1819         in_offset = op->ldpc_enc.input.offset;
1820         out_offset = op->ldpc_enc.output.offset;
1821         out_length = 0;
1822         mbuf_total_left = op->ldpc_enc.input.length;
1823         seg_total_left = rte_pktmbuf_data_len(op->ldpc_enc.input.data)
1824                         - in_offset;
1825
1826         ret = acc100_dma_desc_le_fill(op, &desc->req, &input, output,
1827                         &in_offset, &out_offset, &out_length, &mbuf_total_left,
1828                         &seg_total_left);
1829
1830         if (unlikely(ret < 0))
1831                 return ret;
1832
1833         mbuf_append(output_head, output, out_length);
1834
1835 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1836         rte_memdump(stderr, "FCW", &desc->req.fcw_le,
1837                         sizeof(desc->req.fcw_le) - 8);
1838         rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1839
1840         if (check_mbuf_total_left(mbuf_total_left) != 0)
1841                 return -EINVAL;
1842 #endif
1843         /* One CB (one op) was successfully prepared to enqueue */
1844         return 1;
1845 }
1846
1847
1848 /* Enqueue one encode operations for ACC100 device in TB mode. */
1849 static inline int
1850 enqueue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
1851                 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
1852 {
1853         union acc100_dma_desc *desc = NULL;
1854         int ret;
1855         uint8_t r, c;
1856         uint32_t in_offset, out_offset, out_length, mbuf_total_left,
1857                 seg_total_left;
1858         struct rte_mbuf *input, *output_head, *output;
1859         uint16_t current_enqueued_cbs = 0;
1860
1861         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1862                         & q->sw_ring_wrap_mask);
1863         desc = q->ring_addr + desc_idx;
1864         uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
1865         acc100_fcw_te_fill(op, &desc->req.fcw_te);
1866
1867         input = op->turbo_enc.input.data;
1868         output_head = output = op->turbo_enc.output.data;
1869         in_offset = op->turbo_enc.input.offset;
1870         out_offset = op->turbo_enc.output.offset;
1871         out_length = 0;
1872         mbuf_total_left = op->turbo_enc.input.length;
1873
1874         c = op->turbo_enc.tb_params.c;
1875         r = op->turbo_enc.tb_params.r;
1876
1877         while (mbuf_total_left > 0 && r < c) {
1878                 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
1879                 /* Set up DMA descriptor */
1880                 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
1881                                 & q->sw_ring_wrap_mask);
1882                 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
1883                 desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
1884
1885                 ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
1886                                 &in_offset, &out_offset, &out_length,
1887                                 &mbuf_total_left, &seg_total_left, r);
1888                 if (unlikely(ret < 0))
1889                         return ret;
1890                 mbuf_append(output_head, output, out_length);
1891
1892                 /* Set total number of CBs in TB */
1893                 desc->req.cbs_in_tb = cbs_in_tb;
1894 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1895                 rte_memdump(stderr, "FCW", &desc->req.fcw_te,
1896                                 sizeof(desc->req.fcw_te) - 8);
1897                 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1898 #endif
1899
1900                 if (seg_total_left == 0) {
1901                         /* Go to the next mbuf */
1902                         input = input->next;
1903                         in_offset = 0;
1904                         output = output->next;
1905                         out_offset = 0;
1906                 }
1907
1908                 total_enqueued_cbs++;
1909                 current_enqueued_cbs++;
1910                 r++;
1911         }
1912
1913         if (unlikely(desc == NULL))
1914                 return current_enqueued_cbs;
1915
1916 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1917         if (check_mbuf_total_left(mbuf_total_left) != 0)
1918                 return -EINVAL;
1919 #endif
1920
1921         /* Set SDone on last CB descriptor for TB mode. */
1922         desc->req.sdone_enable = 1;
1923         desc->req.irq_enable = q->irq_enable;
1924
1925         return current_enqueued_cbs;
1926 }
1927
1928 /** Enqueue one decode operations for ACC100 device in CB mode */
1929 static inline int
1930 enqueue_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
1931                 uint16_t total_enqueued_cbs)
1932 {
1933         union acc100_dma_desc *desc = NULL;
1934         int ret;
1935         uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
1936                 h_out_length, mbuf_total_left, seg_total_left;
1937         struct rte_mbuf *input, *h_output_head, *h_output,
1938                 *s_output_head, *s_output;
1939
1940         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1941                         & q->sw_ring_wrap_mask);
1942         desc = q->ring_addr + desc_idx;
1943         acc100_fcw_td_fill(op, &desc->req.fcw_td);
1944
1945         input = op->turbo_dec.input.data;
1946         h_output_head = h_output = op->turbo_dec.hard_output.data;
1947         s_output_head = s_output = op->turbo_dec.soft_output.data;
1948         in_offset = op->turbo_dec.input.offset;
1949         h_out_offset = op->turbo_dec.hard_output.offset;
1950         s_out_offset = op->turbo_dec.soft_output.offset;
1951         h_out_length = s_out_length = 0;
1952         mbuf_total_left = op->turbo_dec.input.length;
1953         seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
1954
1955 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1956         if (unlikely(input == NULL)) {
1957                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1958                 return -EFAULT;
1959         }
1960 #endif
1961
1962         /* Set up DMA descriptor */
1963         desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
1964                         & q->sw_ring_wrap_mask);
1965
1966         ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
1967                         s_output, &in_offset, &h_out_offset, &s_out_offset,
1968                         &h_out_length, &s_out_length, &mbuf_total_left,
1969                         &seg_total_left, 0);
1970
1971         if (unlikely(ret < 0))
1972                 return ret;
1973
1974         /* Hard output */
1975         mbuf_append(h_output_head, h_output, h_out_length);
1976
1977         /* Soft output */
1978         if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
1979                 mbuf_append(s_output_head, s_output, s_out_length);
1980
1981 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1982         rte_memdump(stderr, "FCW", &desc->req.fcw_td,
1983                         sizeof(desc->req.fcw_td) - 8);
1984         rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1985         if (check_mbuf_total_left(mbuf_total_left) != 0)
1986                 return -EINVAL;
1987 #endif
1988
1989         /* One CB (one op) was successfully prepared to enqueue */
1990         return 1;
1991 }
1992
1993 static inline int
1994 harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
1995                 uint16_t total_enqueued_cbs) {
1996         struct acc100_fcw_ld *fcw;
1997         union acc100_dma_desc *desc;
1998         int next_triplet = 1;
1999         struct rte_mbuf *hq_output_head, *hq_output;
2000         uint16_t harq_dma_length_in, harq_dma_length_out;
2001         uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
2002         if (harq_in_length == 0) {
2003                 rte_bbdev_log(ERR, "Loopback of invalid null size\n");
2004                 return -EINVAL;
2005         }
2006
2007         int h_comp = check_bit(op->ldpc_dec.op_flags,
2008                         RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
2009                         ) ? 1 : 0;
2010         if (h_comp == 1) {
2011                 harq_in_length = harq_in_length * 8 / 6;
2012                 harq_in_length = RTE_ALIGN(harq_in_length, 64);
2013                 harq_dma_length_in = harq_in_length * 6 / 8;
2014         } else {
2015                 harq_in_length = RTE_ALIGN(harq_in_length, 64);
2016                 harq_dma_length_in = harq_in_length;
2017         }
2018         harq_dma_length_out = harq_dma_length_in;
2019
2020         bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
2021                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
2022         union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2023         uint16_t harq_index = (ddr_mem_in ?
2024                         op->ldpc_dec.harq_combined_input.offset :
2025                         op->ldpc_dec.harq_combined_output.offset)
2026                         / ACC100_HARQ_OFFSET;
2027
2028         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2029                         & q->sw_ring_wrap_mask);
2030         desc = q->ring_addr + desc_idx;
2031         fcw = &desc->req.fcw_ld;
2032         /* Set the FCW from loopback into DDR */
2033         memset(fcw, 0, sizeof(struct acc100_fcw_ld));
2034         fcw->FCWversion = ACC100_FCW_VER;
2035         fcw->qm = 2;
2036         fcw->Zc = 384;
2037         if (harq_in_length < 16 * ACC100_N_ZC_1)
2038                 fcw->Zc = 16;
2039         fcw->ncb = fcw->Zc * ACC100_N_ZC_1;
2040         fcw->rm_e = 2;
2041         fcw->hcin_en = 1;
2042         fcw->hcout_en = 1;
2043
2044         rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
2045                         ddr_mem_in, harq_index,
2046                         harq_layout[harq_index].offset, harq_in_length,
2047                         harq_dma_length_in);
2048
2049         if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
2050                 fcw->hcin_size0 = harq_layout[harq_index].size0;
2051                 fcw->hcin_offset = harq_layout[harq_index].offset;
2052                 fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
2053                 harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
2054                 if (h_comp == 1)
2055                         harq_dma_length_in = harq_dma_length_in * 6 / 8;
2056         } else {
2057                 fcw->hcin_size0 = harq_in_length;
2058         }
2059         harq_layout[harq_index].val = 0;
2060         rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
2061                         fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
2062         fcw->hcout_size0 = harq_in_length;
2063         fcw->hcin_decomp_mode = h_comp;
2064         fcw->hcout_comp_mode = h_comp;
2065         fcw->gain_i = 1;
2066         fcw->gain_h = 1;
2067
2068         /* Set the prefix of descriptor. This could be done at polling */
2069         acc100_header_init(&desc->req);
2070
2071         /* Null LLR input for Decoder */
2072         desc->req.data_ptrs[next_triplet].address =
2073                         q->lb_in_addr_iova;
2074         desc->req.data_ptrs[next_triplet].blen = 2;
2075         desc->req.data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN;
2076         desc->req.data_ptrs[next_triplet].last = 0;
2077         desc->req.data_ptrs[next_triplet].dma_ext = 0;
2078         next_triplet++;
2079
2080         /* HARQ Combine input from either Memory interface */
2081         if (!ddr_mem_in) {
2082                 next_triplet = acc100_dma_fill_blk_type_out(&desc->req,
2083                                 op->ldpc_dec.harq_combined_input.data,
2084                                 op->ldpc_dec.harq_combined_input.offset,
2085                                 harq_dma_length_in,
2086                                 next_triplet,
2087                                 ACC100_DMA_BLKID_IN_HARQ);
2088         } else {
2089                 desc->req.data_ptrs[next_triplet].address =
2090                                 op->ldpc_dec.harq_combined_input.offset;
2091                 desc->req.data_ptrs[next_triplet].blen =
2092                                 harq_dma_length_in;
2093                 desc->req.data_ptrs[next_triplet].blkid =
2094                                 ACC100_DMA_BLKID_IN_HARQ;
2095                 desc->req.data_ptrs[next_triplet].dma_ext = 1;
2096                 next_triplet++;
2097         }
2098         desc->req.data_ptrs[next_triplet - 1].last = 1;
2099         desc->req.m2dlen = next_triplet;
2100
2101         /* Dropped decoder hard output */
2102         desc->req.data_ptrs[next_triplet].address =
2103                         q->lb_out_addr_iova;
2104         desc->req.data_ptrs[next_triplet].blen = ACC100_BYTES_IN_WORD;
2105         desc->req.data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_OUT_HARD;
2106         desc->req.data_ptrs[next_triplet].last = 0;
2107         desc->req.data_ptrs[next_triplet].dma_ext = 0;
2108         next_triplet++;
2109
2110         /* HARQ Combine output to either Memory interface */
2111         if (check_bit(op->ldpc_dec.op_flags,
2112                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
2113                         )) {
2114                 desc->req.data_ptrs[next_triplet].address =
2115                                 op->ldpc_dec.harq_combined_output.offset;
2116                 desc->req.data_ptrs[next_triplet].blen =
2117                                 harq_dma_length_out;
2118                 desc->req.data_ptrs[next_triplet].blkid =
2119                                 ACC100_DMA_BLKID_OUT_HARQ;
2120                 desc->req.data_ptrs[next_triplet].dma_ext = 1;
2121                 next_triplet++;
2122         } else {
2123                 hq_output_head = op->ldpc_dec.harq_combined_output.data;
2124                 hq_output = op->ldpc_dec.harq_combined_output.data;
2125                 next_triplet = acc100_dma_fill_blk_type_out(
2126                                 &desc->req,
2127                                 op->ldpc_dec.harq_combined_output.data,
2128                                 op->ldpc_dec.harq_combined_output.offset,
2129                                 harq_dma_length_out,
2130                                 next_triplet,
2131                                 ACC100_DMA_BLKID_OUT_HARQ);
2132                 /* HARQ output */
2133                 mbuf_append(hq_output_head, hq_output, harq_dma_length_out);
2134                 op->ldpc_dec.harq_combined_output.length =
2135                                 harq_dma_length_out;
2136         }
2137         desc->req.data_ptrs[next_triplet - 1].last = 1;
2138         desc->req.d2mlen = next_triplet - desc->req.m2dlen;
2139         desc->req.op_addr = op;
2140
2141         /* One CB (one op) was successfully prepared to enqueue */
2142         return 1;
2143 }
2144
2145 /** Enqueue one decode operations for ACC100 device in CB mode */
2146 static inline int
2147 enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2148                 uint16_t total_enqueued_cbs, bool same_op)
2149 {
2150         int ret;
2151         if (unlikely(check_bit(op->ldpc_dec.op_flags,
2152                         RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK))) {
2153                 ret = harq_loopback(q, op, total_enqueued_cbs);
2154                 return ret;
2155         }
2156
2157         union acc100_dma_desc *desc;
2158         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2159                         & q->sw_ring_wrap_mask);
2160         desc = q->ring_addr + desc_idx;
2161         struct rte_mbuf *input, *h_output_head, *h_output;
2162         uint32_t in_offset, h_out_offset, mbuf_total_left, h_out_length = 0;
2163         input = op->ldpc_dec.input.data;
2164         h_output_head = h_output = op->ldpc_dec.hard_output.data;
2165         in_offset = op->ldpc_dec.input.offset;
2166         h_out_offset = op->ldpc_dec.hard_output.offset;
2167         mbuf_total_left = op->ldpc_dec.input.length;
2168 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2169         if (unlikely(input == NULL)) {
2170                 rte_bbdev_log(ERR, "Invalid mbuf pointer");
2171                 return -EFAULT;
2172         }
2173 #endif
2174         union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2175
2176         if (same_op) {
2177                 union acc100_dma_desc *prev_desc;
2178                 desc_idx = ((q->sw_ring_head + total_enqueued_cbs - 1)
2179                                 & q->sw_ring_wrap_mask);
2180                 prev_desc = q->ring_addr + desc_idx;
2181                 uint8_t *prev_ptr = (uint8_t *) prev_desc;
2182                 uint8_t *new_ptr = (uint8_t *) desc;
2183                 /* Copy first 4 words and BDESCs */
2184                 rte_memcpy(new_ptr, prev_ptr, ACC100_5GUL_SIZE_0);
2185                 rte_memcpy(new_ptr + ACC100_5GUL_OFFSET_0,
2186                                 prev_ptr + ACC100_5GUL_OFFSET_0,
2187                                 ACC100_5GUL_SIZE_1);
2188                 desc->req.op_addr = prev_desc->req.op_addr;
2189                 /* Copy FCW */
2190                 rte_memcpy(new_ptr + ACC100_DESC_FCW_OFFSET,
2191                                 prev_ptr + ACC100_DESC_FCW_OFFSET,
2192                                 ACC100_FCW_LD_BLEN);
2193                 acc100_dma_desc_ld_update(op, &desc->req, input, h_output,
2194                                 &in_offset, &h_out_offset,
2195                                 &h_out_length, harq_layout);
2196         } else {
2197                 struct acc100_fcw_ld *fcw;
2198                 uint32_t seg_total_left;
2199                 fcw = &desc->req.fcw_ld;
2200                 acc100_fcw_ld_fill(op, fcw, harq_layout);
2201
2202                 /* Special handling when overusing mbuf */
2203                 if (fcw->rm_e < ACC100_MAX_E_MBUF)
2204                         seg_total_left = rte_pktmbuf_data_len(input)
2205                                         - in_offset;
2206                 else
2207                         seg_total_left = fcw->rm_e;
2208
2209                 ret = acc100_dma_desc_ld_fill(op, &desc->req, &input, h_output,
2210                                 &in_offset, &h_out_offset,
2211                                 &h_out_length, &mbuf_total_left,
2212                                 &seg_total_left, fcw);
2213                 if (unlikely(ret < 0))
2214                         return ret;
2215         }
2216
2217         /* Hard output */
2218         mbuf_append(h_output_head, h_output, h_out_length);
2219 #ifndef ACC100_EXT_MEM
2220         if (op->ldpc_dec.harq_combined_output.length > 0) {
2221                 /* Push the HARQ output into host memory */
2222                 struct rte_mbuf *hq_output_head, *hq_output;
2223                 hq_output_head = op->ldpc_dec.harq_combined_output.data;
2224                 hq_output = op->ldpc_dec.harq_combined_output.data;
2225                 mbuf_append(hq_output_head, hq_output,
2226                                 op->ldpc_dec.harq_combined_output.length);
2227         }
2228 #endif
2229
2230 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2231         rte_memdump(stderr, "FCW", &desc->req.fcw_ld,
2232                         sizeof(desc->req.fcw_ld) - 8);
2233         rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2234 #endif
2235
2236         /* One CB (one op) was successfully prepared to enqueue */
2237         return 1;
2238 }
2239
2240
2241 /* Enqueue one decode operations for ACC100 device in TB mode */
2242 static inline int
2243 enqueue_ldpc_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2244                 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
2245 {
2246         union acc100_dma_desc *desc = NULL;
2247         int ret;
2248         uint8_t r, c;
2249         uint32_t in_offset, h_out_offset,
2250                 h_out_length, mbuf_total_left, seg_total_left;
2251         struct rte_mbuf *input, *h_output_head, *h_output;
2252         uint16_t current_enqueued_cbs = 0;
2253
2254         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2255                         & q->sw_ring_wrap_mask);
2256         desc = q->ring_addr + desc_idx;
2257         uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
2258         union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2259         acc100_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout);
2260
2261         input = op->ldpc_dec.input.data;
2262         h_output_head = h_output = op->ldpc_dec.hard_output.data;
2263         in_offset = op->ldpc_dec.input.offset;
2264         h_out_offset = op->ldpc_dec.hard_output.offset;
2265         h_out_length = 0;
2266         mbuf_total_left = op->ldpc_dec.input.length;
2267         c = op->ldpc_dec.tb_params.c;
2268         r = op->ldpc_dec.tb_params.r;
2269
2270         while (mbuf_total_left > 0 && r < c) {
2271
2272                 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
2273
2274                 /* Set up DMA descriptor */
2275                 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
2276                                 & q->sw_ring_wrap_mask);
2277                 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
2278                 desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
2279                 ret = acc100_dma_desc_ld_fill(op, &desc->req, &input,
2280                                 h_output, &in_offset, &h_out_offset,
2281                                 &h_out_length,
2282                                 &mbuf_total_left, &seg_total_left,
2283                                 &desc->req.fcw_ld);
2284
2285                 if (unlikely(ret < 0))
2286                         return ret;
2287
2288                 /* Hard output */
2289                 mbuf_append(h_output_head, h_output, h_out_length);
2290
2291                 /* Set total number of CBs in TB */
2292                 desc->req.cbs_in_tb = cbs_in_tb;
2293 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2294                 rte_memdump(stderr, "FCW", &desc->req.fcw_td,
2295                                 sizeof(desc->req.fcw_td) - 8);
2296                 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2297 #endif
2298
2299                 if (seg_total_left == 0) {
2300                         /* Go to the next mbuf */
2301                         input = input->next;
2302                         in_offset = 0;
2303                         h_output = h_output->next;
2304                         h_out_offset = 0;
2305                 }
2306                 total_enqueued_cbs++;
2307                 current_enqueued_cbs++;
2308                 r++;
2309         }
2310
2311         if (unlikely(desc == NULL))
2312                 return current_enqueued_cbs;
2313
2314 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2315         if (check_mbuf_total_left(mbuf_total_left) != 0)
2316                 return -EINVAL;
2317 #endif
2318         /* Set SDone on last CB descriptor for TB mode */
2319         desc->req.sdone_enable = 1;
2320         desc->req.irq_enable = q->irq_enable;
2321
2322         return current_enqueued_cbs;
2323 }
2324
2325 /* Enqueue one decode operations for ACC100 device in TB mode */
2326 static inline int
2327 enqueue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2328                 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
2329 {
2330         union acc100_dma_desc *desc = NULL;
2331         int ret;
2332         uint8_t r, c;
2333         uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
2334                 h_out_length, mbuf_total_left, seg_total_left;
2335         struct rte_mbuf *input, *h_output_head, *h_output,
2336                 *s_output_head, *s_output;
2337         uint16_t current_enqueued_cbs = 0;
2338
2339         uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2340                         & q->sw_ring_wrap_mask);
2341         desc = q->ring_addr + desc_idx;
2342         uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
2343         acc100_fcw_td_fill(op, &desc->req.fcw_td);
2344
2345         input = op->turbo_dec.input.data;
2346         h_output_head = h_output = op->turbo_dec.hard_output.data;
2347         s_output_head = s_output = op->turbo_dec.soft_output.data;
2348         in_offset = op->turbo_dec.input.offset;
2349         h_out_offset = op->turbo_dec.hard_output.offset;
2350         s_out_offset = op->turbo_dec.soft_output.offset;
2351         h_out_length = s_out_length = 0;
2352         mbuf_total_left = op->turbo_dec.input.length;
2353         c = op->turbo_dec.tb_params.c;
2354         r = op->turbo_dec.tb_params.r;
2355
2356         while (mbuf_total_left > 0 && r < c) {
2357
2358                 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
2359
2360                 /* Set up DMA descriptor */
2361                 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
2362                                 & q->sw_ring_wrap_mask);
2363                 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
2364                 desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
2365                 ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
2366                                 h_output, s_output, &in_offset, &h_out_offset,
2367                                 &s_out_offset, &h_out_length, &s_out_length,
2368                                 &mbuf_total_left, &seg_total_left, r);
2369
2370                 if (unlikely(ret < 0))
2371                         return ret;
2372
2373                 /* Hard output */
2374                 mbuf_append(h_output_head, h_output, h_out_length);
2375
2376                 /* Soft output */
2377                 if (check_bit(op->turbo_dec.op_flags,
2378                                 RTE_BBDEV_TURBO_SOFT_OUTPUT))
2379                         mbuf_append(s_output_head, s_output, s_out_length);
2380
2381                 /* Set total number of CBs in TB */
2382                 desc->req.cbs_in_tb = cbs_in_tb;
2383 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2384                 rte_memdump(stderr, "FCW", &desc->req.fcw_td,
2385                                 sizeof(desc->req.fcw_td) - 8);
2386                 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2387 #endif
2388
2389                 if (seg_total_left == 0) {
2390                         /* Go to the next mbuf */
2391                         input = input->next;
2392                         in_offset = 0;
2393                         h_output = h_output->next;
2394                         h_out_offset = 0;
2395
2396                         if (check_bit(op->turbo_dec.op_flags,
2397                                         RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
2398                                 s_output = s_output->next;
2399                                 s_out_offset = 0;
2400                         }
2401                 }
2402
2403                 total_enqueued_cbs++;
2404                 current_enqueued_cbs++;
2405                 r++;
2406         }
2407
2408         if (unlikely(desc == NULL))
2409                 return current_enqueued_cbs;
2410
2411 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2412         if (check_mbuf_total_left(mbuf_total_left) != 0)
2413                 return -EINVAL;
2414 #endif
2415         /* Set SDone on last CB descriptor for TB mode */
2416         desc->req.sdone_enable = 1;
2417         desc->req.irq_enable = q->irq_enable;
2418
2419         return current_enqueued_cbs;
2420 }
2421
2422 /* Calculates number of CBs in processed encoder TB based on 'r' and input
2423  * length.
2424  */
2425 static inline uint8_t
2426 get_num_cbs_in_tb_enc(struct rte_bbdev_op_turbo_enc *turbo_enc)
2427 {
2428         uint8_t c, c_neg, r, crc24_bits = 0;
2429         uint16_t k, k_neg, k_pos;
2430         uint8_t cbs_in_tb = 0;
2431         int32_t length;
2432
2433         length = turbo_enc->input.length;
2434         r = turbo_enc->tb_params.r;
2435         c = turbo_enc->tb_params.c;
2436         c_neg = turbo_enc->tb_params.c_neg;
2437         k_neg = turbo_enc->tb_params.k_neg;
2438         k_pos = turbo_enc->tb_params.k_pos;
2439         crc24_bits = 0;
2440         if (check_bit(turbo_enc->op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
2441                 crc24_bits = 24;
2442         while (length > 0 && r < c) {
2443                 k = (r < c_neg) ? k_neg : k_pos;
2444                 length -= (k - crc24_bits) >> 3;
2445                 r++;
2446                 cbs_in_tb++;
2447         }
2448
2449         return cbs_in_tb;
2450 }
2451
2452 /* Calculates number of CBs in processed decoder TB based on 'r' and input
2453  * length.
2454  */
2455 static inline uint16_t
2456 get_num_cbs_in_tb_dec(struct rte_bbdev_op_turbo_dec *turbo_dec)
2457 {
2458         uint8_t c, c_neg, r = 0;
2459         uint16_t kw, k, k_neg, k_pos, cbs_in_tb = 0;
2460         int32_t length;
2461
2462         length = turbo_dec->input.length;
2463         r = turbo_dec->tb_params.r;
2464         c = turbo_dec->tb_params.c;
2465         c_neg = turbo_dec->tb_params.c_neg;
2466         k_neg = turbo_dec->tb_params.k_neg;
2467         k_pos = turbo_dec->tb_params.k_pos;
2468         while (length > 0 && r < c) {
2469                 k = (r < c_neg) ? k_neg : k_pos;
2470                 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
2471                 length -= kw;
2472                 r++;
2473                 cbs_in_tb++;
2474         }
2475
2476         return cbs_in_tb;
2477 }
2478
2479 /* Calculates number of CBs in processed decoder TB based on 'r' and input
2480  * length.
2481  */
2482 static inline uint16_t
2483 get_num_cbs_in_tb_ldpc_dec(struct rte_bbdev_op_ldpc_dec *ldpc_dec)
2484 {
2485         uint16_t r, cbs_in_tb = 0;
2486         int32_t length = ldpc_dec->input.length;
2487         r = ldpc_dec->tb_params.r;
2488         while (length > 0 && r < ldpc_dec->tb_params.c) {
2489                 length -=  (r < ldpc_dec->tb_params.cab) ?
2490                                 ldpc_dec->tb_params.ea :
2491                                 ldpc_dec->tb_params.eb;
2492                 r++;
2493                 cbs_in_tb++;
2494         }
2495         return cbs_in_tb;
2496 }
2497
2498 /* Enqueue encode operations for ACC100 device in CB mode. */
2499 static uint16_t
2500 acc100_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
2501                 struct rte_bbdev_enc_op **ops, uint16_t num)
2502 {
2503         struct acc100_queue *q = q_data->queue_private;
2504         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2505         uint16_t i;
2506         union acc100_dma_desc *desc;
2507         int ret;
2508
2509         for (i = 0; i < num; ++i) {
2510                 /* Check if there are available space for further processing */
2511                 if (unlikely(avail - 1 < 0))
2512                         break;
2513                 avail -= 1;
2514
2515                 ret = enqueue_enc_one_op_cb(q, ops[i], i);
2516                 if (ret < 0)
2517                         break;
2518         }
2519
2520         if (unlikely(i == 0))
2521                 return 0; /* Nothing to enqueue */
2522
2523         /* Set SDone in last CB in enqueued ops for CB mode*/
2524         desc = q->ring_addr + ((q->sw_ring_head + i - 1)
2525                         & q->sw_ring_wrap_mask);
2526         desc->req.sdone_enable = 1;
2527         desc->req.irq_enable = q->irq_enable;
2528
2529         acc100_dma_enqueue(q, i, &q_data->queue_stats);
2530
2531         /* Update stats */
2532         q_data->queue_stats.enqueued_count += i;
2533         q_data->queue_stats.enqueue_err_count += num - i;
2534         return i;
2535 }
2536
2537 /* Check we can mux encode operations with common FCW */
2538 static inline bool
2539 check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
2540         uint16_t i;
2541         if (num <= 1)
2542                 return false;
2543         for (i = 1; i < num; ++i) {
2544                 /* Only mux compatible code blocks */
2545                 if (memcmp((uint8_t *)(&ops[i]->ldpc_enc) + ACC100_ENC_OFFSET,
2546                                 (uint8_t *)(&ops[0]->ldpc_enc) +
2547                                 ACC100_ENC_OFFSET,
2548                                 ACC100_CMP_ENC_SIZE) != 0)
2549                         return false;
2550         }
2551         return true;
2552 }
2553
2554 /** Enqueue encode operations for ACC100 device in CB mode. */
2555 static inline uint16_t
2556 acc100_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
2557                 struct rte_bbdev_enc_op **ops, uint16_t num)
2558 {
2559         struct acc100_queue *q = q_data->queue_private;
2560         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2561         uint16_t i = 0;
2562         union acc100_dma_desc *desc;
2563         int ret, desc_idx = 0;
2564         int16_t enq, left = num;
2565
2566         while (left > 0) {
2567                 if (unlikely(avail < 1))
2568                         break;
2569                 avail--;
2570                 enq = RTE_MIN(left, ACC100_MUX_5GDL_DESC);
2571                 if (check_mux(&ops[i], enq)) {
2572                         ret = enqueue_ldpc_enc_n_op_cb(q, &ops[i],
2573                                         desc_idx, enq);
2574                         if (ret < 0)
2575                                 break;
2576                         i += enq;
2577                 } else {
2578                         ret = enqueue_ldpc_enc_one_op_cb(q, ops[i], desc_idx);
2579                         if (ret < 0)
2580                                 break;
2581                         i++;
2582                 }
2583                 desc_idx++;
2584                 left = num - i;
2585         }
2586
2587         if (unlikely(i == 0))
2588                 return 0; /* Nothing to enqueue */
2589
2590         /* Set SDone in last CB in enqueued ops for CB mode*/
2591         desc = q->ring_addr + ((q->sw_ring_head + desc_idx - 1)
2592                         & q->sw_ring_wrap_mask);
2593         desc->req.sdone_enable = 1;
2594         desc->req.irq_enable = q->irq_enable;
2595
2596         acc100_dma_enqueue(q, desc_idx, &q_data->queue_stats);
2597
2598         /* Update stats */
2599         q_data->queue_stats.enqueued_count += i;
2600         q_data->queue_stats.enqueue_err_count += num - i;
2601
2602         return i;
2603 }
2604
2605 /* Enqueue encode operations for ACC100 device in TB mode. */
2606 static uint16_t
2607 acc100_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
2608                 struct rte_bbdev_enc_op **ops, uint16_t num)
2609 {
2610         struct acc100_queue *q = q_data->queue_private;
2611         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2612         uint16_t i, enqueued_cbs = 0;
2613         uint8_t cbs_in_tb;
2614         int ret;
2615
2616         for (i = 0; i < num; ++i) {
2617                 cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
2618                 /* Check if there are available space for further processing */
2619                 if (unlikely(avail - cbs_in_tb < 0))
2620                         break;
2621                 avail -= cbs_in_tb;
2622
2623                 ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
2624                 if (ret < 0)
2625                         break;
2626                 enqueued_cbs += ret;
2627         }
2628         if (unlikely(enqueued_cbs == 0))
2629                 return 0; /* Nothing to enqueue */
2630
2631         acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
2632
2633         /* Update stats */
2634         q_data->queue_stats.enqueued_count += i;
2635         q_data->queue_stats.enqueue_err_count += num - i;
2636
2637         return i;
2638 }
2639
2640 /* Enqueue encode operations for ACC100 device. */
2641 static uint16_t
2642 acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
2643                 struct rte_bbdev_enc_op **ops, uint16_t num)
2644 {
2645         if (unlikely(num == 0))
2646                 return 0;
2647         if (ops[0]->turbo_enc.code_block_mode == 0)
2648                 return acc100_enqueue_enc_tb(q_data, ops, num);
2649         else
2650                 return acc100_enqueue_enc_cb(q_data, ops, num);
2651 }
2652
2653 /* Enqueue encode operations for ACC100 device. */
2654 static uint16_t
2655 acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
2656                 struct rte_bbdev_enc_op **ops, uint16_t num)
2657 {
2658         if (unlikely(num == 0))
2659                 return 0;
2660         if (ops[0]->ldpc_enc.code_block_mode == 0)
2661                 return acc100_enqueue_enc_tb(q_data, ops, num);
2662         else
2663                 return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
2664 }
2665
2666
2667 /* Enqueue decode operations for ACC100 device in CB mode */
2668 static uint16_t
2669 acc100_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
2670                 struct rte_bbdev_dec_op **ops, uint16_t num)
2671 {
2672         struct acc100_queue *q = q_data->queue_private;
2673         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2674         uint16_t i;
2675         union acc100_dma_desc *desc;
2676         int ret;
2677
2678         for (i = 0; i < num; ++i) {
2679                 /* Check if there are available space for further processing */
2680                 if (unlikely(avail - 1 < 0))
2681                         break;
2682                 avail -= 1;
2683
2684                 ret = enqueue_dec_one_op_cb(q, ops[i], i);
2685                 if (ret < 0)
2686                         break;
2687         }
2688
2689         if (unlikely(i == 0))
2690                 return 0; /* Nothing to enqueue */
2691
2692         /* Set SDone in last CB in enqueued ops for CB mode*/
2693         desc = q->ring_addr + ((q->sw_ring_head + i - 1)
2694                         & q->sw_ring_wrap_mask);
2695         desc->req.sdone_enable = 1;
2696         desc->req.irq_enable = q->irq_enable;
2697
2698         acc100_dma_enqueue(q, i, &q_data->queue_stats);
2699
2700         /* Update stats */
2701         q_data->queue_stats.enqueued_count += i;
2702         q_data->queue_stats.enqueue_err_count += num - i;
2703
2704         return i;
2705 }
2706
2707 /* Check we can mux encode operations with common FCW */
2708 static inline bool
2709 cmp_ldpc_dec_op(struct rte_bbdev_dec_op **ops) {
2710         /* Only mux compatible code blocks */
2711         if (memcmp((uint8_t *)(&ops[0]->ldpc_dec) + ACC100_DEC_OFFSET,
2712                         (uint8_t *)(&ops[1]->ldpc_dec) +
2713                         ACC100_DEC_OFFSET, ACC100_CMP_DEC_SIZE) != 0) {
2714                 return false;
2715         } else
2716                 return true;
2717 }
2718
2719
2720 /* Enqueue decode operations for ACC100 device in TB mode */
2721 static uint16_t
2722 acc100_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
2723                 struct rte_bbdev_dec_op **ops, uint16_t num)
2724 {
2725         struct acc100_queue *q = q_data->queue_private;
2726         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2727         uint16_t i, enqueued_cbs = 0;
2728         uint8_t cbs_in_tb;
2729         int ret;
2730
2731         for (i = 0; i < num; ++i) {
2732                 cbs_in_tb = get_num_cbs_in_tb_ldpc_dec(&ops[i]->ldpc_dec);
2733                 /* Check if there are available space for further processing */
2734                 if (unlikely(avail - cbs_in_tb < 0))
2735                         break;
2736                 avail -= cbs_in_tb;
2737
2738                 ret = enqueue_ldpc_dec_one_op_tb(q, ops[i],
2739                                 enqueued_cbs, cbs_in_tb);
2740                 if (ret < 0)
2741                         break;
2742                 enqueued_cbs += ret;
2743         }
2744
2745         acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
2746
2747         /* Update stats */
2748         q_data->queue_stats.enqueued_count += i;
2749         q_data->queue_stats.enqueue_err_count += num - i;
2750         return i;
2751 }
2752
2753 /* Enqueue decode operations for ACC100 device in CB mode */
2754 static uint16_t
2755 acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
2756                 struct rte_bbdev_dec_op **ops, uint16_t num)
2757 {
2758         struct acc100_queue *q = q_data->queue_private;
2759         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2760         uint16_t i;
2761         union acc100_dma_desc *desc;
2762         int ret;
2763         bool same_op = false;
2764         for (i = 0; i < num; ++i) {
2765                 /* Check if there are available space for further processing */
2766                 if (unlikely(avail < 1))
2767                         break;
2768                 avail -= 1;
2769
2770                 if (i > 0)
2771                         same_op = cmp_ldpc_dec_op(&ops[i-1]);
2772                 rte_bbdev_log(INFO, "Op %d %d %d %d %d %d %d %d %d %d %d %d\n",
2773                         i, ops[i]->ldpc_dec.op_flags, ops[i]->ldpc_dec.rv_index,
2774                         ops[i]->ldpc_dec.iter_max, ops[i]->ldpc_dec.iter_count,
2775                         ops[i]->ldpc_dec.basegraph, ops[i]->ldpc_dec.z_c,
2776                         ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
2777                         ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
2778                         same_op);
2779                 ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
2780                 if (ret < 0)
2781                         break;
2782         }
2783
2784         if (unlikely(i == 0))
2785                 return 0; /* Nothing to enqueue */
2786
2787         /* Set SDone in last CB in enqueued ops for CB mode*/
2788         desc = q->ring_addr + ((q->sw_ring_head + i - 1)
2789                         & q->sw_ring_wrap_mask);
2790
2791         desc->req.sdone_enable = 1;
2792         desc->req.irq_enable = q->irq_enable;
2793
2794         acc100_dma_enqueue(q, i, &q_data->queue_stats);
2795
2796         /* Update stats */
2797         q_data->queue_stats.enqueued_count += i;
2798         q_data->queue_stats.enqueue_err_count += num - i;
2799         return i;
2800 }
2801
2802
2803 /* Enqueue decode operations for ACC100 device in TB mode */
2804 static uint16_t
2805 acc100_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
2806                 struct rte_bbdev_dec_op **ops, uint16_t num)
2807 {
2808         struct acc100_queue *q = q_data->queue_private;
2809         int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2810         uint16_t i, enqueued_cbs = 0;
2811         uint8_t cbs_in_tb;
2812         int ret;
2813
2814         for (i = 0; i < num; ++i) {
2815                 cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
2816                 /* Check if there are available space for further processing */
2817                 if (unlikely(avail - cbs_in_tb < 0))
2818                         break;
2819                 avail -= cbs_in_tb;
2820
2821                 ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
2822                 if (ret < 0)
2823                         break;
2824                 enqueued_cbs += ret;
2825         }
2826
2827         acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
2828
2829         /* Update stats */
2830         q_data->queue_stats.enqueued_count += i;
2831         q_data->queue_stats.enqueue_err_count += num - i;
2832
2833         return i;
2834 }
2835
2836 /* Enqueue decode operations for ACC100 device. */
2837 static uint16_t
2838 acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
2839                 struct rte_bbdev_dec_op **ops, uint16_t num)
2840 {
2841         if (unlikely(num == 0))
2842                 return 0;
2843         if (ops[0]->turbo_dec.code_block_mode == 0)
2844                 return acc100_enqueue_dec_tb(q_data, ops, num);
2845         else
2846                 return acc100_enqueue_dec_cb(q_data, ops, num);
2847 }
2848
2849 /* Enqueue decode operations for ACC100 device. */
2850 static uint16_t
2851 acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
2852                 struct rte_bbdev_dec_op **ops, uint16_t num)
2853 {
2854         struct acc100_queue *q = q_data->queue_private;
2855         int32_t aq_avail = q->aq_depth +
2856                         (q->aq_dequeued - q->aq_enqueued) / 128;
2857
2858         if (unlikely((aq_avail == 0) || (num == 0)))
2859                 return 0;
2860
2861         if (ops[0]->ldpc_dec.code_block_mode == 0)
2862                 return acc100_enqueue_ldpc_dec_tb(q_data, ops, num);
2863         else
2864                 return acc100_enqueue_ldpc_dec_cb(q_data, ops, num);
2865 }
2866
2867
2868 /* Dequeue one encode operations from ACC100 device in CB mode */
2869 static inline int
2870 dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
2871                 uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
2872 {
2873         union acc100_dma_desc *desc, atom_desc;
2874         union acc100_dma_rsp_desc rsp;
2875         struct rte_bbdev_enc_op *op;
2876         int i;
2877
2878         desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
2879                         & q->sw_ring_wrap_mask);
2880         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
2881                         __ATOMIC_RELAXED);
2882
2883         /* Check fdone bit */
2884         if (!(atom_desc.rsp.val & ACC100_FDONE))
2885                 return -1;
2886
2887         rsp.val = atom_desc.rsp.val;
2888         rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
2889
2890         /* Dequeue */
2891         op = desc->req.op_addr;
2892
2893         /* Clearing status, it will be set based on response */
2894         op->status = 0;
2895
2896         op->status |= ((rsp.input_err)
2897                         ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
2898         op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2899         op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2900
2901         if (desc->req.last_desc_in_batch) {
2902                 (*aq_dequeued)++;
2903                 desc->req.last_desc_in_batch = 0;
2904         }
2905         desc->rsp.val = ACC100_DMA_DESC_TYPE;
2906         desc->rsp.add_info_0 = 0; /*Reserved bits */
2907         desc->rsp.add_info_1 = 0; /*Reserved bits */
2908
2909         /* Flag that the muxing cause loss of opaque data */
2910         op->opaque_data = (void *)-1;
2911         for (i = 0 ; i < desc->req.numCBs; i++)
2912                 ref_op[i] = op;
2913
2914         /* One CB (op) was successfully dequeued */
2915         return desc->req.numCBs;
2916 }
2917
2918 /* Dequeue one encode operations from ACC100 device in TB mode */
2919 static inline int
2920 dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
2921                 uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
2922 {
2923         union acc100_dma_desc *desc, *last_desc, atom_desc;
2924         union acc100_dma_rsp_desc rsp;
2925         struct rte_bbdev_enc_op *op;
2926         uint8_t i = 0;
2927         uint16_t current_dequeued_cbs = 0, cbs_in_tb;
2928
2929         desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
2930                         & q->sw_ring_wrap_mask);
2931         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
2932                         __ATOMIC_RELAXED);
2933
2934         /* Check fdone bit */
2935         if (!(atom_desc.rsp.val & ACC100_FDONE))
2936                 return -1;
2937
2938         /* Get number of CBs in dequeued TB */
2939         cbs_in_tb = desc->req.cbs_in_tb;
2940         /* Get last CB */
2941         last_desc = q->ring_addr + ((q->sw_ring_tail
2942                         + total_dequeued_cbs + cbs_in_tb - 1)
2943                         & q->sw_ring_wrap_mask);
2944         /* Check if last CB in TB is ready to dequeue (and thus
2945          * the whole TB) - checking sdone bit. If not return.
2946          */
2947         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
2948                         __ATOMIC_RELAXED);
2949         if (!(atom_desc.rsp.val & ACC100_SDONE))
2950                 return -1;
2951
2952         /* Dequeue */
2953         op = desc->req.op_addr;
2954
2955         /* Clearing status, it will be set based on response */
2956         op->status = 0;
2957
2958         while (i < cbs_in_tb) {
2959                 desc = q->ring_addr + ((q->sw_ring_tail
2960                                 + total_dequeued_cbs)
2961                                 & q->sw_ring_wrap_mask);
2962                 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
2963                                 __ATOMIC_RELAXED);
2964                 rsp.val = atom_desc.rsp.val;
2965                 rte_bbdev_log_debug("Resp. desc %p: %x", desc,
2966                                 rsp.val);
2967
2968                 op->status |= ((rsp.input_err)
2969                                 ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
2970                 op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2971                 op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2972
2973                 if (desc->req.last_desc_in_batch) {
2974                         (*aq_dequeued)++;
2975                         desc->req.last_desc_in_batch = 0;
2976                 }
2977                 desc->rsp.val = ACC100_DMA_DESC_TYPE;
2978                 desc->rsp.add_info_0 = 0;
2979                 desc->rsp.add_info_1 = 0;
2980                 total_dequeued_cbs++;
2981                 current_dequeued_cbs++;
2982                 i++;
2983         }
2984
2985         *ref_op = op;
2986
2987         return current_dequeued_cbs;
2988 }
2989
2990 /* Dequeue one decode operation from ACC100 device in CB mode */
2991 static inline int
2992 dequeue_dec_one_op_cb(struct rte_bbdev_queue_data *q_data,
2993                 struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
2994                 uint16_t dequeued_cbs, uint32_t *aq_dequeued)
2995 {
2996         union acc100_dma_desc *desc, atom_desc;
2997         union acc100_dma_rsp_desc rsp;
2998         struct rte_bbdev_dec_op *op;
2999
3000         desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3001                         & q->sw_ring_wrap_mask);
3002         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3003                         __ATOMIC_RELAXED);
3004
3005         /* Check fdone bit */
3006         if (!(atom_desc.rsp.val & ACC100_FDONE))
3007                 return -1;
3008
3009         rsp.val = atom_desc.rsp.val;
3010         rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
3011
3012         /* Dequeue */
3013         op = desc->req.op_addr;
3014
3015         /* Clearing status, it will be set based on response */
3016         op->status = 0;
3017         op->status |= ((rsp.input_err)
3018                         ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3019         op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3020         op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3021         if (op->status != 0)
3022                 q_data->queue_stats.dequeue_err_count++;
3023
3024         /* CRC invalid if error exists */
3025         if (!op->status)
3026                 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3027         op->turbo_dec.iter_count = (uint8_t) rsp.iter_cnt / 2;
3028         /* Check if this is the last desc in batch (Atomic Queue) */
3029         if (desc->req.last_desc_in_batch) {
3030                 (*aq_dequeued)++;
3031                 desc->req.last_desc_in_batch = 0;
3032         }
3033         desc->rsp.val = ACC100_DMA_DESC_TYPE;
3034         desc->rsp.add_info_0 = 0;
3035         desc->rsp.add_info_1 = 0;
3036         *ref_op = op;
3037
3038         /* One CB (op) was successfully dequeued */
3039         return 1;
3040 }
3041
3042 /* Dequeue one decode operations from ACC100 device in CB mode */
3043 static inline int
3044 dequeue_ldpc_dec_one_op_cb(struct rte_bbdev_queue_data *q_data,
3045                 struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3046                 uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3047 {
3048         union acc100_dma_desc *desc, atom_desc;
3049         union acc100_dma_rsp_desc rsp;
3050         struct rte_bbdev_dec_op *op;
3051
3052         desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3053                         & q->sw_ring_wrap_mask);
3054         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3055                         __ATOMIC_RELAXED);
3056
3057         /* Check fdone bit */
3058         if (!(atom_desc.rsp.val & ACC100_FDONE))
3059                 return -1;
3060
3061         rsp.val = atom_desc.rsp.val;
3062
3063         /* Dequeue */
3064         op = desc->req.op_addr;
3065
3066         /* Clearing status, it will be set based on response */
3067         op->status = 0;
3068         op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
3069         op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
3070         op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
3071         if (op->status != 0)
3072                 q_data->queue_stats.dequeue_err_count++;
3073
3074         op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3075         if (op->ldpc_dec.hard_output.length > 0 && !rsp.synd_ok)
3076                 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR;
3077         op->ldpc_dec.iter_count = (uint8_t) rsp.iter_cnt;
3078
3079         /* Check if this is the last desc in batch (Atomic Queue) */
3080         if (desc->req.last_desc_in_batch) {
3081                 (*aq_dequeued)++;
3082                 desc->req.last_desc_in_batch = 0;
3083         }
3084
3085         desc->rsp.val = ACC100_DMA_DESC_TYPE;
3086         desc->rsp.add_info_0 = 0;
3087         desc->rsp.add_info_1 = 0;
3088
3089         *ref_op = op;
3090
3091         /* One CB (op) was successfully dequeued */
3092         return 1;
3093 }
3094
3095 /* Dequeue one decode operations from ACC100 device in TB mode. */
3096 static inline int
3097 dequeue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3098                 uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3099 {
3100         union acc100_dma_desc *desc, *last_desc, atom_desc;
3101         union acc100_dma_rsp_desc rsp;
3102         struct rte_bbdev_dec_op *op;
3103         uint8_t cbs_in_tb = 1, cb_idx = 0;
3104
3105         desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3106                         & q->sw_ring_wrap_mask);
3107         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3108                         __ATOMIC_RELAXED);
3109
3110         /* Check fdone bit */
3111         if (!(atom_desc.rsp.val & ACC100_FDONE))
3112                 return -1;
3113
3114         /* Dequeue */
3115         op = desc->req.op_addr;
3116
3117         /* Get number of CBs in dequeued TB */
3118         cbs_in_tb = desc->req.cbs_in_tb;
3119         /* Get last CB */
3120         last_desc = q->ring_addr + ((q->sw_ring_tail
3121                         + dequeued_cbs + cbs_in_tb - 1)
3122                         & q->sw_ring_wrap_mask);
3123         /* Check if last CB in TB is ready to dequeue (and thus
3124          * the whole TB) - checking sdone bit. If not return.
3125          */
3126         atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
3127                         __ATOMIC_RELAXED);
3128         if (!(atom_desc.rsp.val & ACC100_SDONE))
3129                 return -1;
3130
3131         /* Clearing status, it will be set based on response */
3132         op->status = 0;
3133
3134         /* Read remaining CBs if exists */
3135         while (cb_idx < cbs_in_tb) {
3136                 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3137                                 & q->sw_ring_wrap_mask);
3138                 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3139                                 __ATOMIC_RELAXED);
3140                 rsp.val = atom_desc.rsp.val;
3141                 rte_bbdev_log_debug("Resp. desc %p: %x", desc,
3142                                 rsp.val);
3143
3144                 op->status |= ((rsp.input_err)
3145                                 ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3146                 op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3147                 op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3148
3149                 /* CRC invalid if error exists */
3150                 if (!op->status)
3151                         op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3152                 op->turbo_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt,
3153                                 op->turbo_dec.iter_count);
3154
3155                 /* Check if this is the last desc in batch (Atomic Queue) */
3156                 if (desc->req.last_desc_in_batch) {
3157                         (*aq_dequeued)++;
3158                         desc->req.last_desc_in_batch = 0;
3159                 }
3160                 desc->rsp.val = ACC100_DMA_DESC_TYPE;
3161                 desc->rsp.add_info_0 = 0;
3162                 desc->rsp.add_info_1 = 0;
3163                 dequeued_cbs++;
3164                 cb_idx++;
3165         }
3166
3167         *ref_op = op;
3168
3169         return cb_idx;
3170 }
3171
3172 /* Dequeue encode operations from ACC100 device. */
3173 static uint16_t
3174 acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
3175                 struct rte_bbdev_enc_op **ops, uint16_t num)
3176 {
3177         struct acc100_queue *q = q_data->queue_private;
3178         uint16_t dequeue_num;
3179         uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3180         uint32_t aq_dequeued = 0;
3181         uint16_t i, dequeued_cbs = 0;
3182         struct rte_bbdev_enc_op *op;
3183         int ret;
3184
3185 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3186         if (unlikely(ops == NULL || q == NULL)) {
3187                 rte_bbdev_log_debug("Unexpected undefined pointer");
3188                 return 0;
3189         }
3190 #endif
3191
3192         dequeue_num = (avail < num) ? avail : num;
3193
3194         for (i = 0; i < dequeue_num; ++i) {
3195                 op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3196                         & q->sw_ring_wrap_mask))->req.op_addr;
3197                 if (op->turbo_enc.code_block_mode == 0)
3198                         ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
3199                                         &aq_dequeued);
3200                 else
3201                         ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
3202                                         &aq_dequeued);
3203
3204                 if (ret < 0)
3205                         break;
3206                 dequeued_cbs += ret;
3207         }
3208
3209         q->aq_dequeued += aq_dequeued;
3210         q->sw_ring_tail += dequeued_cbs;
3211
3212         /* Update enqueue stats */
3213         q_data->queue_stats.dequeued_count += i;
3214
3215         return i;
3216 }
3217
3218 /* Dequeue LDPC encode operations from ACC100 device. */
3219 static uint16_t
3220 acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
3221                 struct rte_bbdev_enc_op **ops, uint16_t num)
3222 {
3223         struct acc100_queue *q = q_data->queue_private;
3224         uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3225         uint32_t aq_dequeued = 0;
3226         uint16_t dequeue_num, i, dequeued_cbs = 0, dequeued_descs = 0;
3227         int ret;
3228
3229 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3230         if (unlikely(ops == 0 && q == NULL))
3231                 return 0;
3232 #endif
3233
3234         dequeue_num = RTE_MIN(avail, num);
3235
3236         for (i = 0; i < dequeue_num; i++) {
3237                 ret = dequeue_enc_one_op_cb(q, &ops[dequeued_cbs],
3238                                 dequeued_descs, &aq_dequeued);
3239                 if (ret < 0)
3240                         break;
3241                 dequeued_cbs += ret;
3242                 dequeued_descs++;
3243                 if (dequeued_cbs >= num)
3244                         break;
3245         }
3246
3247         q->aq_dequeued += aq_dequeued;
3248         q->sw_ring_tail += dequeued_descs;
3249
3250         /* Update enqueue stats */
3251         q_data->queue_stats.dequeued_count += dequeued_cbs;
3252
3253         return dequeued_cbs;
3254 }
3255
3256
3257 /* Dequeue decode operations from ACC100 device. */
3258 static uint16_t
3259 acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
3260                 struct rte_bbdev_dec_op **ops, uint16_t num)
3261 {
3262         struct acc100_queue *q = q_data->queue_private;
3263         uint16_t dequeue_num;
3264         uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3265         uint32_t aq_dequeued = 0;
3266         uint16_t i;
3267         uint16_t dequeued_cbs = 0;
3268         struct rte_bbdev_dec_op *op;
3269         int ret;
3270
3271 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3272         if (unlikely(ops == 0 && q == NULL))
3273                 return 0;
3274 #endif
3275
3276         dequeue_num = (avail < num) ? avail : num;
3277
3278         for (i = 0; i < dequeue_num; ++i) {
3279                 op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3280                         & q->sw_ring_wrap_mask))->req.op_addr;
3281                 if (op->turbo_dec.code_block_mode == 0)
3282                         ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
3283                                         &aq_dequeued);
3284                 else
3285                         ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
3286                                         dequeued_cbs, &aq_dequeued);
3287
3288                 if (ret < 0)
3289                         break;
3290                 dequeued_cbs += ret;
3291         }
3292
3293         q->aq_dequeued += aq_dequeued;
3294         q->sw_ring_tail += dequeued_cbs;
3295
3296         /* Update enqueue stats */
3297         q_data->queue_stats.dequeued_count += i;
3298
3299         return i;
3300 }
3301
3302 /* Dequeue decode operations from ACC100 device. */
3303 static uint16_t
3304 acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
3305                 struct rte_bbdev_dec_op **ops, uint16_t num)
3306 {
3307         struct acc100_queue *q = q_data->queue_private;
3308         uint16_t dequeue_num;
3309         uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3310         uint32_t aq_dequeued = 0;
3311         uint16_t i;
3312         uint16_t dequeued_cbs = 0;
3313         struct rte_bbdev_dec_op *op;
3314         int ret;
3315
3316 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3317         if (unlikely(ops == 0 && q == NULL))
3318                 return 0;
3319 #endif
3320
3321         dequeue_num = RTE_MIN(avail, num);
3322
3323         for (i = 0; i < dequeue_num; ++i) {
3324                 op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3325                         & q->sw_ring_wrap_mask))->req.op_addr;
3326                 if (op->ldpc_dec.code_block_mode == 0)
3327                         ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
3328                                         &aq_dequeued);
3329                 else
3330                         ret = dequeue_ldpc_dec_one_op_cb(
3331                                         q_data, q, &ops[i], dequeued_cbs,
3332                                         &aq_dequeued);
3333
3334                 if (ret < 0)
3335                         break;
3336                 dequeued_cbs += ret;
3337         }
3338
3339         q->aq_dequeued += aq_dequeued;
3340         q->sw_ring_tail += dequeued_cbs;
3341
3342         /* Update enqueue stats */
3343         q_data->queue_stats.dequeued_count += i;
3344
3345         return i;
3346 }
3347
3348 /* Initialization Function */
3349 static void
3350 acc100_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
3351 {
3352         struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
3353
3354         dev->dev_ops = &acc100_bbdev_ops;
3355         dev->enqueue_enc_ops = acc100_enqueue_enc;
3356         dev->enqueue_dec_ops = acc100_enqueue_dec;
3357         dev->dequeue_enc_ops = acc100_dequeue_enc;
3358         dev->dequeue_dec_ops = acc100_dequeue_dec;
3359         dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
3360         dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
3361         dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
3362         dev->dequeue_ldpc_dec_ops = acc100_dequeue_ldpc_dec;
3363
3364         ((struct acc100_device *) dev->data->dev_private)->pf_device =
3365                         !strcmp(drv->driver.name,
3366                                         RTE_STR(ACC100PF_DRIVER_NAME));
3367         ((struct acc100_device *) dev->data->dev_private)->mmio_base =
3368                         pci_dev->mem_resource[0].addr;
3369
3370         rte_bbdev_log_debug("Init device %s [%s] @ vaddr %p paddr %#"PRIx64"",
3371                         drv->driver.name, dev->data->name,
3372                         (void *)pci_dev->mem_resource[0].addr,
3373                         pci_dev->mem_resource[0].phys_addr);
3374 }
3375
3376 static int acc100_pci_probe(struct rte_pci_driver *pci_drv,
3377         struct rte_pci_device *pci_dev)
3378 {
3379         struct rte_bbdev *bbdev = NULL;
3380         char dev_name[RTE_BBDEV_NAME_MAX_LEN];
3381
3382         if (pci_dev == NULL) {
3383                 rte_bbdev_log(ERR, "NULL PCI device");
3384                 return -EINVAL;
3385         }
3386
3387         rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name));
3388
3389         /* Allocate memory to be used privately by drivers */
3390         bbdev = rte_bbdev_allocate(pci_dev->device.name);
3391         if (bbdev == NULL)
3392                 return -ENODEV;
3393
3394         /* allocate device private memory */
3395         bbdev->data->dev_private = rte_zmalloc_socket(dev_name,
3396                         sizeof(struct acc100_device), RTE_CACHE_LINE_SIZE,
3397                         pci_dev->device.numa_node);
3398
3399         if (bbdev->data->dev_private == NULL) {
3400                 rte_bbdev_log(CRIT,
3401                                 "Allocate of %zu bytes for device \"%s\" failed",
3402                                 sizeof(struct acc100_device), dev_name);
3403                                 rte_bbdev_release(bbdev);
3404                         return -ENOMEM;
3405         }
3406
3407         /* Fill HW specific part of device structure */
3408         bbdev->device = &pci_dev->device;
3409         bbdev->intr_handle = &pci_dev->intr_handle;
3410         bbdev->data->socket_id = pci_dev->device.numa_node;
3411
3412         /* Invoke ACC100 device initialization function */
3413         acc100_bbdev_init(bbdev, pci_drv);
3414
3415         rte_bbdev_log_debug("Initialised bbdev %s (id = %u)",
3416                         dev_name, bbdev->data->dev_id);
3417         return 0;
3418 }
3419
3420 static int acc100_pci_remove(struct rte_pci_device *pci_dev)
3421 {
3422         struct rte_bbdev *bbdev;
3423         int ret;
3424         uint8_t dev_id;
3425
3426         if (pci_dev == NULL)
3427                 return -EINVAL;
3428
3429         /* Find device */
3430         bbdev = rte_bbdev_get_named_dev(pci_dev->device.name);
3431         if (bbdev == NULL) {
3432                 rte_bbdev_log(CRIT,
3433                                 "Couldn't find HW dev \"%s\" to uninitialise it",
3434                                 pci_dev->device.name);
3435                 return -ENODEV;
3436         }
3437         dev_id = bbdev->data->dev_id;
3438
3439         /* free device private memory before close */
3440         rte_free(bbdev->data->dev_private);
3441
3442         /* Close device */
3443         ret = rte_bbdev_close(dev_id);
3444         if (ret < 0)
3445                 rte_bbdev_log(ERR,
3446                                 "Device %i failed to close during uninit: %i",
3447                                 dev_id, ret);
3448
3449         /* release bbdev from library */
3450         rte_bbdev_release(bbdev);
3451
3452         rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id);
3453
3454         return 0;
3455 }
3456
3457 static struct rte_pci_driver acc100_pci_pf_driver = {
3458                 .probe = acc100_pci_probe,
3459                 .remove = acc100_pci_remove,
3460                 .id_table = pci_id_acc100_pf_map,
3461                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING
3462 };
3463
3464 static struct rte_pci_driver acc100_pci_vf_driver = {
3465                 .probe = acc100_pci_probe,
3466                 .remove = acc100_pci_remove,
3467                 .id_table = pci_id_acc100_vf_map,
3468                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING
3469 };
3470
3471 RTE_PMD_REGISTER_PCI(ACC100PF_DRIVER_NAME, acc100_pci_pf_driver);
3472 RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
3473 RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
3474 RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);