1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
7 #include <rte_common.h>
10 #include <rte_malloc.h>
11 #include <rte_mempool.h>
12 #include <rte_byteorder.h>
13 #include <rte_errno.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_hexdump.h>
17 #include <rte_bus_pci.h>
18 #ifdef RTE_BBDEV_OFFLOAD_COST
19 #include <rte_cycles.h>
22 #include <rte_bbdev.h>
23 #include <rte_bbdev_pmd.h>
24 #include "rte_acc100_pmd.h"
26 #ifdef RTE_LIBRTE_BBDEV_DEBUG
27 RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, DEBUG);
29 RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, NOTICE);
32 /* Write to MMIO register address */
34 mmio_write(void *addr, uint32_t value)
36 *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value);
39 /* Write a register of a ACC100 device */
41 acc100_reg_write(struct acc100_device *d, uint32_t offset, uint32_t payload)
43 void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
44 mmio_write(reg_addr, payload);
45 usleep(ACC100_LONG_WAIT);
48 /* Read a register of a ACC100 device */
49 static inline uint32_t
50 acc100_reg_read(struct acc100_device *d, uint32_t offset)
53 void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
54 uint32_t ret = *((volatile uint32_t *)(reg_addr));
55 return rte_le_to_cpu_32(ret);
58 /* Basic Implementation of Log2 for exact 2^N */
59 static inline uint32_t
60 log2_basic(uint32_t value)
62 return (value == 0) ? 0 : rte_bsf32(value);
65 /* Calculate memory alignment offset assuming alignment is 2^N */
66 static inline uint32_t
67 calc_mem_alignment_offset(void *unaligned_virt_mem, uint32_t alignment)
69 rte_iova_t unaligned_phy_mem = rte_malloc_virt2iova(unaligned_virt_mem);
70 return (uint32_t)(alignment -
71 (unaligned_phy_mem & (alignment-1)));
74 /* Calculate the offset of the enqueue register */
75 static inline uint32_t
76 queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id)
79 return ((vf_id << 12) + (qgrp_id << 7) + (aq_id << 3) +
82 return ((qgrp_id << 7) + (aq_id << 3) +
86 enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
88 /* Return the queue topology for a Queue Group Index */
90 qtopFromAcc(struct rte_acc100_queue_topology **qtop, int acc_enum,
91 struct rte_acc100_conf *acc100_conf)
93 struct rte_acc100_queue_topology *p_qtop;
97 p_qtop = &(acc100_conf->q_ul_4g);
100 p_qtop = &(acc100_conf->q_ul_5g);
103 p_qtop = &(acc100_conf->q_dl_4g);
106 p_qtop = &(acc100_conf->q_dl_5g);
110 rte_bbdev_log(ERR, "Unexpected error evaluating qtopFromAcc");
117 initQTop(struct rte_acc100_conf *acc100_conf)
119 acc100_conf->q_ul_4g.num_aqs_per_groups = 0;
120 acc100_conf->q_ul_4g.num_qgroups = 0;
121 acc100_conf->q_ul_4g.first_qgroup_index = -1;
122 acc100_conf->q_ul_5g.num_aqs_per_groups = 0;
123 acc100_conf->q_ul_5g.num_qgroups = 0;
124 acc100_conf->q_ul_5g.first_qgroup_index = -1;
125 acc100_conf->q_dl_4g.num_aqs_per_groups = 0;
126 acc100_conf->q_dl_4g.num_qgroups = 0;
127 acc100_conf->q_dl_4g.first_qgroup_index = -1;
128 acc100_conf->q_dl_5g.num_aqs_per_groups = 0;
129 acc100_conf->q_dl_5g.num_qgroups = 0;
130 acc100_conf->q_dl_5g.first_qgroup_index = -1;
134 updateQtop(uint8_t acc, uint8_t qg, struct rte_acc100_conf *acc100_conf,
135 struct acc100_device *d) {
137 struct rte_acc100_queue_topology *q_top = NULL;
138 qtopFromAcc(&q_top, acc, acc100_conf);
139 if (unlikely(q_top == NULL))
142 q_top->num_qgroups++;
143 if (q_top->first_qgroup_index == -1) {
144 q_top->first_qgroup_index = qg;
145 /* Can be optimized to assume all are enabled by default */
146 reg = acc100_reg_read(d, queue_offset(d->pf_device,
147 0, qg, ACC100_NUM_AQS - 1));
148 if (reg & ACC100_QUEUE_ENABLE) {
149 q_top->num_aqs_per_groups = ACC100_NUM_AQS;
152 q_top->num_aqs_per_groups = 0;
153 for (aq = 0; aq < ACC100_NUM_AQS; aq++) {
154 reg = acc100_reg_read(d, queue_offset(d->pf_device,
156 if (reg & ACC100_QUEUE_ENABLE)
157 q_top->num_aqs_per_groups++;
162 /* Fetch configuration enabled for the PF/VF using MMIO Read (slow) */
164 fetch_acc100_config(struct rte_bbdev *dev)
166 struct acc100_device *d = dev->data->dev_private;
167 struct rte_acc100_conf *acc100_conf = &d->acc100_conf;
168 const struct acc100_registry_addr *reg_addr;
170 uint32_t reg, reg_aq, reg_len0, reg_len1;
173 /* No need to retrieve the configuration is already done */
177 /* Choose correct registry addresses for the device type */
179 reg_addr = &pf_reg_addr;
181 reg_addr = &vf_reg_addr;
183 d->ddr_size = (1 + acc100_reg_read(d, reg_addr->ddr_range)) << 10;
185 /* Single VF Bundle by VF */
186 acc100_conf->num_vf_bundles = 1;
187 initQTop(acc100_conf);
189 struct rte_acc100_queue_topology *q_top = NULL;
190 int qman_func_id[ACC100_NUM_ACCS] = {ACC100_ACCMAP_0, ACC100_ACCMAP_1,
191 ACC100_ACCMAP_2, ACC100_ACCMAP_3, ACC100_ACCMAP_4};
192 reg = acc100_reg_read(d, reg_addr->qman_group_func);
193 for (qg = 0; qg < ACC100_NUM_QGRPS_PER_WORD; qg++) {
194 reg_aq = acc100_reg_read(d,
195 queue_offset(d->pf_device, 0, qg, 0));
196 if (reg_aq & ACC100_QUEUE_ENABLE) {
197 uint32_t idx = (reg >> (qg * 4)) & 0x7;
198 if (idx < ACC100_NUM_ACCS) {
199 acc = qman_func_id[idx];
200 updateQtop(acc, qg, acc100_conf, d);
205 /* Check the depth of the AQs*/
206 reg_len0 = acc100_reg_read(d, reg_addr->depth_log0_offset);
207 reg_len1 = acc100_reg_read(d, reg_addr->depth_log1_offset);
208 for (acc = 0; acc < NUM_ACC; acc++) {
209 qtopFromAcc(&q_top, acc, acc100_conf);
210 if (q_top->first_qgroup_index < ACC100_NUM_QGRPS_PER_WORD)
211 q_top->aq_depth_log2 = (reg_len0 >>
212 (q_top->first_qgroup_index * 4))
215 q_top->aq_depth_log2 = (reg_len1 >>
216 ((q_top->first_qgroup_index -
217 ACC100_NUM_QGRPS_PER_WORD) * 4))
223 reg_mode = acc100_reg_read(d, HWPfHiPfMode);
224 acc100_conf->pf_mode_en = (reg_mode == ACC100_PF_VAL) ? 1 : 0;
228 "%s Config LLR SIGN IN/OUT %s %s QG %u %u %u %u AQ %u %u %u %u Len %u %u %u %u\n",
229 (d->pf_device) ? "PF" : "VF",
230 (acc100_conf->input_pos_llr_1_bit) ? "POS" : "NEG",
231 (acc100_conf->output_pos_llr_1_bit) ? "POS" : "NEG",
232 acc100_conf->q_ul_4g.num_qgroups,
233 acc100_conf->q_dl_4g.num_qgroups,
234 acc100_conf->q_ul_5g.num_qgroups,
235 acc100_conf->q_dl_5g.num_qgroups,
236 acc100_conf->q_ul_4g.num_aqs_per_groups,
237 acc100_conf->q_dl_4g.num_aqs_per_groups,
238 acc100_conf->q_ul_5g.num_aqs_per_groups,
239 acc100_conf->q_dl_5g.num_aqs_per_groups,
240 acc100_conf->q_ul_4g.aq_depth_log2,
241 acc100_conf->q_dl_4g.aq_depth_log2,
242 acc100_conf->q_ul_5g.aq_depth_log2,
243 acc100_conf->q_dl_5g.aq_depth_log2);
247 free_base_addresses(void **base_addrs, int size)
250 for (i = 0; i < size; i++)
251 rte_free(base_addrs[i]);
254 static inline uint32_t
257 return sizeof(union acc100_dma_desc);
260 /* Allocate the 2 * 64MB block for the sw rings */
262 alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct acc100_device *d,
265 uint32_t sw_ring_size = ACC100_SIZE_64MBYTE;
266 d->sw_rings_base = rte_zmalloc_socket(dev->device->driver->name,
267 2 * sw_ring_size, RTE_CACHE_LINE_SIZE, socket);
268 if (d->sw_rings_base == NULL) {
269 rte_bbdev_log(ERR, "Failed to allocate memory for %s:%u",
270 dev->device->driver->name,
274 uint32_t next_64mb_align_offset = calc_mem_alignment_offset(
275 d->sw_rings_base, ACC100_SIZE_64MBYTE);
276 d->sw_rings = RTE_PTR_ADD(d->sw_rings_base, next_64mb_align_offset);
277 d->sw_rings_iova = rte_malloc_virt2iova(d->sw_rings_base) +
278 next_64mb_align_offset;
279 d->sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
280 d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
285 /* Attempt to allocate minimised memory space for sw rings */
287 alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct acc100_device *d,
288 uint16_t num_queues, int socket)
290 rte_iova_t sw_rings_base_iova, next_64mb_align_addr_iova;
291 uint32_t next_64mb_align_offset;
292 rte_iova_t sw_ring_iova_end_addr;
293 void *base_addrs[ACC100_SW_RING_MEM_ALLOC_ATTEMPTS];
296 uint32_t q_sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
297 uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
299 /* Find an aligned block of memory to store sw rings */
300 while (i < ACC100_SW_RING_MEM_ALLOC_ATTEMPTS) {
302 * sw_ring allocated memory is guaranteed to be aligned to
303 * q_sw_ring_size at the condition that the requested size is
304 * less than the page size
306 sw_rings_base = rte_zmalloc_socket(
307 dev->device->driver->name,
308 dev_sw_ring_size, q_sw_ring_size, socket);
310 if (sw_rings_base == NULL) {
312 "Failed to allocate memory for %s:%u",
313 dev->device->driver->name,
318 sw_rings_base_iova = rte_malloc_virt2iova(sw_rings_base);
319 next_64mb_align_offset = calc_mem_alignment_offset(
320 sw_rings_base, ACC100_SIZE_64MBYTE);
321 next_64mb_align_addr_iova = sw_rings_base_iova +
322 next_64mb_align_offset;
323 sw_ring_iova_end_addr = sw_rings_base_iova + dev_sw_ring_size;
325 /* Check if the end of the sw ring memory block is before the
326 * start of next 64MB aligned mem address
328 if (sw_ring_iova_end_addr < next_64mb_align_addr_iova) {
329 d->sw_rings_iova = sw_rings_base_iova;
330 d->sw_rings = sw_rings_base;
331 d->sw_rings_base = sw_rings_base;
332 d->sw_ring_size = q_sw_ring_size;
333 d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
336 /* Store the address of the unaligned mem block */
337 base_addrs[i] = sw_rings_base;
341 /* Free all unaligned blocks of mem allocated in the loop */
342 free_base_addresses(base_addrs, i);
345 /* Allocate 64MB memory used for all software rings */
347 acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
349 uint32_t phys_low, phys_high, payload;
350 struct acc100_device *d = dev->data->dev_private;
351 const struct acc100_registry_addr *reg_addr;
353 if (d->pf_device && !d->acc100_conf.pf_mode_en) {
354 rte_bbdev_log(NOTICE,
355 "%s has PF mode disabled. This PF can't be used.",
360 alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
362 /* If minimal memory space approach failed, then allocate
363 * the 2 * 64MB block for the sw rings
365 if (d->sw_rings == NULL)
366 alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
368 if (d->sw_rings == NULL) {
369 rte_bbdev_log(NOTICE,
370 "Failure allocating sw_rings memory");
374 /* Configure ACC100 with the base address for DMA descriptor rings
375 * Same descriptor rings used for UL and DL DMA Engines
376 * Note : Assuming only VF0 bundle is used for PF mode
378 phys_high = (uint32_t)(d->sw_rings_iova >> 32);
379 phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC100_SIZE_64MBYTE-1));
381 /* Choose correct registry addresses for the device type */
383 reg_addr = &pf_reg_addr;
385 reg_addr = &vf_reg_addr;
387 /* Read the populated cfg from ACC100 registers */
388 fetch_acc100_config(dev);
390 /* Release AXI from PF */
392 acc100_reg_write(d, HWPfDmaAxiControl, 1);
394 acc100_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
395 acc100_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
396 acc100_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
397 acc100_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
398 acc100_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
399 acc100_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
400 acc100_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
401 acc100_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
404 * Configure Ring Size to the max queue ring size
405 * (used for wrapping purpose)
407 payload = log2_basic(d->sw_ring_size / 64);
408 acc100_reg_write(d, reg_addr->ring_size, payload);
410 /* Configure tail pointer for use when SDONE enabled */
411 d->tail_ptrs = rte_zmalloc_socket(
412 dev->device->driver->name,
413 ACC100_NUM_QGRPS * ACC100_NUM_AQS * sizeof(uint32_t),
414 RTE_CACHE_LINE_SIZE, socket_id);
415 if (d->tail_ptrs == NULL) {
416 rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
417 dev->device->driver->name,
419 rte_free(d->sw_rings);
422 d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
424 phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
425 phys_low = (uint32_t)(d->tail_ptr_iova);
426 acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
427 acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
428 acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
429 acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
430 acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
431 acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
432 acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
433 acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
435 d->harq_layout = rte_zmalloc_socket("HARQ Layout",
436 ACC100_HARQ_LAYOUT * sizeof(*d->harq_layout),
437 RTE_CACHE_LINE_SIZE, dev->data->socket_id);
438 if (d->harq_layout == NULL) {
439 rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u",
440 dev->device->driver->name,
442 rte_free(d->sw_rings);
446 /* Mark as configured properly */
447 d->configured = true;
450 "ACC100 (%s) configured sw_rings = %p, sw_rings_iova = %#"
451 PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova);
456 /* Free memory used for software rings */
458 acc100_dev_close(struct rte_bbdev *dev)
460 struct acc100_device *d = dev->data->dev_private;
461 if (d->sw_rings_base != NULL) {
462 rte_free(d->tail_ptrs);
463 rte_free(d->sw_rings_base);
464 d->sw_rings_base = NULL;
466 /* Ensure all in flight HW transactions are completed */
467 usleep(ACC100_LONG_WAIT);
472 * Report a ACC100 queue index which is free
473 * Return 0 to 16k for a valid queue_idx or -1 when no queue is available
474 * Note : Only supporting VF0 Bundle for PF mode
477 acc100_find_free_queue_idx(struct rte_bbdev *dev,
478 const struct rte_bbdev_queue_conf *conf)
480 struct acc100_device *d = dev->data->dev_private;
481 int op_2_acc[5] = {0, UL_4G, DL_4G, UL_5G, DL_5G};
482 int acc = op_2_acc[conf->op_type];
483 struct rte_acc100_queue_topology *qtop = NULL;
485 qtopFromAcc(&qtop, acc, &(d->acc100_conf));
488 /* Identify matching QGroup Index which are sorted in priority order */
489 uint16_t group_idx = qtop->first_qgroup_index;
490 group_idx += conf->priority;
491 if (group_idx >= ACC100_NUM_QGRPS ||
492 conf->priority >= qtop->num_qgroups) {
493 rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
494 dev->data->name, conf->priority);
497 /* Find a free AQ_idx */
499 for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
500 if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) {
501 /* Mark the Queue as assigned */
502 d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
503 /* Report the AQ Index */
504 return (group_idx << ACC100_GRP_ID_SHIFT) + aq_idx;
507 rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
508 dev->data->name, conf->priority);
512 /* Setup ACC100 queue */
514 acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
515 const struct rte_bbdev_queue_conf *conf)
517 struct acc100_device *d = dev->data->dev_private;
518 struct acc100_queue *q;
521 /* Allocate the queue data structure. */
522 q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
523 RTE_CACHE_LINE_SIZE, conf->socket);
525 rte_bbdev_log(ERR, "Failed to allocate queue memory");
529 rte_bbdev_log(ERR, "Undefined device");
534 q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
535 q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
537 /* Prepare the Ring with default descriptor format */
538 union acc100_dma_desc *desc = NULL;
539 unsigned int desc_idx, b_idx;
540 int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
541 ACC100_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
542 ACC100_FCW_TD_BLEN : ACC100_FCW_LD_BLEN));
544 for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
545 desc = q->ring_addr + desc_idx;
546 desc->req.word0 = ACC100_DMA_DESC_TYPE;
547 desc->req.word1 = 0; /**< Timestamp */
550 uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
551 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
552 desc->req.data_ptrs[0].blen = fcw_len;
553 desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
554 desc->req.data_ptrs[0].last = 0;
555 desc->req.data_ptrs[0].dma_ext = 0;
556 for (b_idx = 1; b_idx < ACC100_DMA_MAX_NUM_POINTERS - 1;
558 desc->req.data_ptrs[b_idx].blkid = ACC100_DMA_BLKID_IN;
559 desc->req.data_ptrs[b_idx].last = 1;
560 desc->req.data_ptrs[b_idx].dma_ext = 0;
562 desc->req.data_ptrs[b_idx].blkid =
563 ACC100_DMA_BLKID_OUT_ENC;
564 desc->req.data_ptrs[b_idx].last = 1;
565 desc->req.data_ptrs[b_idx].dma_ext = 0;
567 /* Preset some fields of LDPC FCW */
568 desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
569 desc->req.fcw_ld.gain_i = 1;
570 desc->req.fcw_ld.gain_h = 1;
573 q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
575 RTE_CACHE_LINE_SIZE, conf->socket);
576 if (q->lb_in == NULL) {
577 rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
581 q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
582 q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
584 RTE_CACHE_LINE_SIZE, conf->socket);
585 if (q->lb_out == NULL) {
586 rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
591 q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
594 * Software queue ring wraps synchronously with the HW when it reaches
595 * the boundary of the maximum allocated queue size, no matter what the
596 * sw queue size is. This wrapping is guarded by setting the wrap_mask
597 * to represent the maximum queue size as allocated at the time when
598 * the device has been setup (in configure()).
600 * The queue depth is set to the queue size value (conf->queue_size).
601 * This limits the occupancy of the queue at any point of time, so that
602 * the queue does not get swamped with enqueue requests.
604 q->sw_ring_depth = conf->queue_size;
605 q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
607 q->op_type = conf->op_type;
609 q_idx = acc100_find_free_queue_idx(dev, conf);
617 q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF;
618 q->vf_id = (q_idx >> ACC100_VF_ID_SHIFT) & 0x3F;
619 q->aq_id = q_idx & 0xF;
620 q->aq_depth = (conf->op_type == RTE_BBDEV_OP_TURBO_DEC) ?
621 (1 << d->acc100_conf.q_ul_4g.aq_depth_log2) :
622 (1 << d->acc100_conf.q_dl_4g.aq_depth_log2);
624 q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
625 queue_offset(d->pf_device,
626 q->vf_id, q->qgrp_id, q->aq_id));
629 "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p",
630 dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
631 q->aq_id, q->aq_depth, q->mmio_reg_enqueue);
633 dev->data->queues[queue_id].queue_private = q;
637 /* Release ACC100 queue */
639 acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
641 struct acc100_device *d = dev->data->dev_private;
642 struct acc100_queue *q = dev->data->queues[q_id].queue_private;
645 /* Mark the Queue as un-assigned */
646 d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
651 dev->data->queues[q_id].queue_private = NULL;
657 /* Get ACC100 device info */
659 acc100_dev_info_get(struct rte_bbdev *dev,
660 struct rte_bbdev_driver_info *dev_info)
662 struct acc100_device *d = dev->data->dev_private;
664 static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
666 .type = RTE_BBDEV_OP_TURBO_DEC,
669 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
670 RTE_BBDEV_TURBO_CRC_TYPE_24B |
671 RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
672 RTE_BBDEV_TURBO_EARLY_TERMINATION |
673 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
674 RTE_BBDEV_TURBO_MAP_DEC |
675 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
676 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
677 .max_llr_modulus = INT8_MAX,
679 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
680 .num_buffers_hard_out =
681 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
682 .num_buffers_soft_out =
683 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
687 .type = RTE_BBDEV_OP_TURBO_ENC,
690 RTE_BBDEV_TURBO_CRC_24B_ATTACH |
691 RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
692 RTE_BBDEV_TURBO_RATE_MATCH |
693 RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
695 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
697 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
701 .type = RTE_BBDEV_OP_LDPC_ENC,
704 RTE_BBDEV_LDPC_RATE_MATCH |
705 RTE_BBDEV_LDPC_CRC_24B_ATTACH |
706 RTE_BBDEV_LDPC_INTERLEAVER_BYPASS,
708 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
710 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
714 .type = RTE_BBDEV_OP_LDPC_DEC,
717 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
718 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
719 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
720 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
721 #ifdef ACC100_EXT_MEM
722 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK |
723 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE |
724 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE |
726 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
727 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
728 RTE_BBDEV_LDPC_DECODE_BYPASS |
729 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
730 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
731 RTE_BBDEV_LDPC_LLR_COMPRESSION,
735 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
736 .num_buffers_hard_out =
737 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
738 .num_buffers_soft_out = 0,
741 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
744 static struct rte_bbdev_queue_conf default_queue_conf;
745 default_queue_conf.socket = dev->data->socket_id;
746 default_queue_conf.queue_size = ACC100_MAX_QUEUE_DEPTH;
748 dev_info->driver_name = dev->device->driver->name;
750 /* Read and save the populated config from ACC100 registers */
751 fetch_acc100_config(dev);
753 /* This isn't ideal because it reports the maximum number of queues but
754 * does not provide info on how many can be uplink/downlink or different
757 dev_info->max_num_queues =
758 d->acc100_conf.q_dl_5g.num_aqs_per_groups *
759 d->acc100_conf.q_dl_5g.num_qgroups +
760 d->acc100_conf.q_ul_5g.num_aqs_per_groups *
761 d->acc100_conf.q_ul_5g.num_qgroups +
762 d->acc100_conf.q_dl_4g.num_aqs_per_groups *
763 d->acc100_conf.q_dl_4g.num_qgroups +
764 d->acc100_conf.q_ul_4g.num_aqs_per_groups *
765 d->acc100_conf.q_ul_4g.num_qgroups;
766 dev_info->queue_size_lim = ACC100_MAX_QUEUE_DEPTH;
767 dev_info->hardware_accelerated = true;
768 dev_info->max_dl_queue_priority =
769 d->acc100_conf.q_dl_4g.num_qgroups - 1;
770 dev_info->max_ul_queue_priority =
771 d->acc100_conf.q_ul_4g.num_qgroups - 1;
772 dev_info->default_queue_conf = default_queue_conf;
773 dev_info->cpu_flag_reqs = NULL;
774 dev_info->min_alignment = 64;
775 dev_info->capabilities = bbdev_capabilities;
776 #ifdef ACC100_EXT_MEM
777 dev_info->harq_buffer_size = d->ddr_size;
779 dev_info->harq_buffer_size = 0;
783 static const struct rte_bbdev_ops acc100_bbdev_ops = {
784 .setup_queues = acc100_setup_queues,
785 .close = acc100_dev_close,
786 .info_get = acc100_dev_info_get,
787 .queue_setup = acc100_queue_setup,
788 .queue_release = acc100_queue_release,
791 /* ACC100 PCI PF address map */
792 static struct rte_pci_id pci_id_acc100_pf_map[] = {
794 RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_PF_DEVICE_ID)
799 /* ACC100 PCI VF address map */
800 static struct rte_pci_id pci_id_acc100_vf_map[] = {
802 RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_VF_DEVICE_ID)
807 /* Read flag value 0/1 from bitmap */
809 check_bit(uint32_t bitmap, uint32_t bitmask)
811 return bitmap & bitmask;
815 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
817 if (unlikely(len > rte_pktmbuf_tailroom(m)))
820 char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
821 m->data_len = (uint16_t)(m->data_len + len);
822 m_head->pkt_len = (m_head->pkt_len + len);
826 /* Fill in a frame control word for turbo encoding. */
828 acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct acc100_fcw_te *fcw)
830 fcw->code_block_mode = op->turbo_enc.code_block_mode;
831 if (fcw->code_block_mode == 0) { /* For TB mode */
832 fcw->k_neg = op->turbo_enc.tb_params.k_neg;
833 fcw->k_pos = op->turbo_enc.tb_params.k_pos;
834 fcw->c_neg = op->turbo_enc.tb_params.c_neg;
835 fcw->c = op->turbo_enc.tb_params.c;
836 fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
837 fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
839 if (check_bit(op->turbo_enc.op_flags,
840 RTE_BBDEV_TURBO_RATE_MATCH)) {
842 fcw->cab = op->turbo_enc.tb_params.cab;
843 fcw->ea = op->turbo_enc.tb_params.ea;
844 fcw->eb = op->turbo_enc.tb_params.eb;
846 /* E is set to the encoding output size when RM is
850 fcw->cab = fcw->c_neg;
851 fcw->ea = 3 * fcw->k_neg + 12;
852 fcw->eb = 3 * fcw->k_pos + 12;
854 } else { /* For CB mode */
855 fcw->k_pos = op->turbo_enc.cb_params.k;
856 fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
858 if (check_bit(op->turbo_enc.op_flags,
859 RTE_BBDEV_TURBO_RATE_MATCH)) {
861 fcw->eb = op->turbo_enc.cb_params.e;
863 /* E is set to the encoding output size when RM is
867 fcw->eb = 3 * fcw->k_pos + 12;
871 fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
872 RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
873 fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
874 RTE_BBDEV_TURBO_CRC_24B_ATTACH);
875 fcw->rv_idx1 = op->turbo_enc.rv_index;
878 /* Compute value of k0.
879 * Based on 3GPP 38.212 Table 5.4.2.1-2
880 * Starting position of different redundancy versions, k0
882 static inline uint16_t
883 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
887 uint16_t n = (bg == 1 ? ACC100_N_ZC_1 : ACC100_N_ZC_2) * z_c;
890 return (bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * z_c;
891 else if (rv_index == 2)
892 return (bg == 1 ? ACC100_K0_2_1 : ACC100_K0_2_2) * z_c;
894 return (bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * z_c;
896 /* LBRM case - includes a division by N */
898 return (((bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * n_cb)
900 else if (rv_index == 2)
901 return (((bg == 1 ? ACC100_K0_2_1 : ACC100_K0_2_2) * n_cb)
904 return (((bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * n_cb)
908 /* Fill in a frame control word for LDPC encoding. */
910 acc100_fcw_le_fill(const struct rte_bbdev_enc_op *op,
911 struct acc100_fcw_le *fcw, int num_cb)
913 fcw->qm = op->ldpc_enc.q_m;
914 fcw->nfiller = op->ldpc_enc.n_filler;
915 fcw->BG = (op->ldpc_enc.basegraph - 1);
916 fcw->Zc = op->ldpc_enc.z_c;
917 fcw->ncb = op->ldpc_enc.n_cb;
918 fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
919 op->ldpc_enc.rv_index);
920 fcw->rm_e = op->ldpc_enc.cb_params.e;
921 fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
922 RTE_BBDEV_LDPC_CRC_24B_ATTACH);
923 fcw->bypass_intlv = check_bit(op->ldpc_enc.op_flags,
924 RTE_BBDEV_LDPC_INTERLEAVER_BYPASS);
925 fcw->mcb_count = num_cb;
928 /* Fill in a frame control word for turbo decoding. */
930 acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_td *fcw)
932 /* Note : Early termination is always enabled for 4GUL */
934 if (op->turbo_dec.code_block_mode == 0)
935 fcw->k_pos = op->turbo_dec.tb_params.k_pos;
937 fcw->k_pos = op->turbo_dec.cb_params.k;
938 fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
939 RTE_BBDEV_TURBO_CRC_TYPE_24B);
940 fcw->bypass_sb_deint = 0;
941 fcw->raw_decoder_input_on = 0;
942 fcw->max_iter = op->turbo_dec.iter_max;
943 fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
944 RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
947 /* Fill in a frame control word for LDPC decoding. */
949 acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
950 union acc100_harq_layout_data *harq_layout)
952 uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
955 bool harq_prun = false;
957 fcw->qm = op->ldpc_dec.q_m;
958 fcw->nfiller = op->ldpc_dec.n_filler;
959 fcw->BG = (op->ldpc_dec.basegraph - 1);
960 fcw->Zc = op->ldpc_dec.z_c;
961 fcw->ncb = op->ldpc_dec.n_cb;
962 fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
963 op->ldpc_dec.rv_index);
964 if (op->ldpc_dec.code_block_mode == 1)
965 fcw->rm_e = op->ldpc_dec.cb_params.e;
967 fcw->rm_e = (op->ldpc_dec.tb_params.r <
968 op->ldpc_dec.tb_params.cab) ?
969 op->ldpc_dec.tb_params.ea :
970 op->ldpc_dec.tb_params.eb;
972 fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
973 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
974 fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
975 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
976 fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
977 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
978 fcw->bypass_dec = check_bit(op->ldpc_dec.op_flags,
979 RTE_BBDEV_LDPC_DECODE_BYPASS);
980 fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
981 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
982 if (op->ldpc_dec.q_m == 1) {
983 fcw->bypass_intlv = 1;
986 fcw->hcin_decomp_mode = check_bit(op->ldpc_dec.op_flags,
987 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
988 fcw->hcout_comp_mode = check_bit(op->ldpc_dec.op_flags,
989 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
990 fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
991 RTE_BBDEV_LDPC_LLR_COMPRESSION);
992 harq_index = op->ldpc_dec.harq_combined_output.offset /
994 #ifdef ACC100_EXT_MEM
995 /* Limit cases when HARQ pruning is valid */
996 harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
997 ACC100_HARQ_OFFSET) == 0) &&
998 (op->ldpc_dec.harq_combined_output.offset <= UINT16_MAX
999 * ACC100_HARQ_OFFSET);
1001 if (fcw->hcin_en > 0) {
1002 harq_in_length = op->ldpc_dec.harq_combined_input.length;
1003 if (fcw->hcin_decomp_mode > 0)
1004 harq_in_length = harq_in_length * 8 / 6;
1005 harq_in_length = RTE_ALIGN(harq_in_length, 64);
1006 if ((harq_layout[harq_index].offset > 0) & harq_prun) {
1007 rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
1008 fcw->hcin_size0 = harq_layout[harq_index].size0;
1009 fcw->hcin_offset = harq_layout[harq_index].offset;
1010 fcw->hcin_size1 = harq_in_length -
1011 harq_layout[harq_index].offset;
1013 fcw->hcin_size0 = harq_in_length;
1014 fcw->hcin_offset = 0;
1015 fcw->hcin_size1 = 0;
1018 fcw->hcin_size0 = 0;
1019 fcw->hcin_offset = 0;
1020 fcw->hcin_size1 = 0;
1023 fcw->itmax = op->ldpc_dec.iter_max;
1024 fcw->itstop = check_bit(op->ldpc_dec.op_flags,
1025 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
1026 fcw->synd_precoder = fcw->itstop;
1028 * These are all implicitly set
1029 * fcw->synd_post = 0;
1031 * fcw->so_bypass_rm = 0;
1032 * fcw->so_bypass_intlv = 0;
1033 * fcw->dec_convllr = 0;
1034 * fcw->hcout_convllr = 0;
1035 * fcw->hcout_size1 = 0;
1037 * fcw->hcout_offset = 0;
1038 * fcw->negstop_th = 0;
1039 * fcw->negstop_it = 0;
1040 * fcw->negstop_en = 0;
1044 if (fcw->hcout_en > 0) {
1045 parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
1046 * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1047 k0_p = (fcw->k0 > parity_offset) ?
1048 fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
1049 ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
1050 l = k0_p + fcw->rm_e;
1051 harq_out_length = (uint16_t) fcw->hcin_size0;
1052 harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p);
1053 harq_out_length = (harq_out_length + 0x3F) & 0xFFC0;
1054 if ((k0_p > fcw->hcin_size0 + ACC100_HARQ_OFFSET_THRESHOLD) &&
1056 fcw->hcout_size0 = (uint16_t) fcw->hcin_size0;
1057 fcw->hcout_offset = k0_p & 0xFFC0;
1058 fcw->hcout_size1 = harq_out_length - fcw->hcout_offset;
1060 fcw->hcout_size0 = harq_out_length;
1061 fcw->hcout_size1 = 0;
1062 fcw->hcout_offset = 0;
1064 harq_layout[harq_index].offset = fcw->hcout_offset;
1065 harq_layout[harq_index].size0 = fcw->hcout_size0;
1067 fcw->hcout_size0 = 0;
1068 fcw->hcout_size1 = 0;
1069 fcw->hcout_offset = 0;
1074 * Fills descriptor with data pointers of one block type.
1077 * Pointer to DMA descriptor.
1079 * Pointer to pointer to input data which will be encoded. It can be changed
1080 * and points to next segment in scatter-gather case.
1082 * Input offset in rte_mbuf structure. It is used for calculating the point
1083 * where data is starting.
1085 * Length of currently processed Code Block
1086 * @param seg_total_left
1087 * It indicates how many bytes still left in segment (mbuf) for further
1090 * Store information about device capabilities
1091 * @param next_triplet
1092 * Index for ACC100 DMA Descriptor triplet
1095 * Returns index of next triplet on success, other value if lengths of
1096 * pkt and processed cb do not match.
1100 acc100_dma_fill_blk_type_in(struct acc100_dma_req_desc *desc,
1101 struct rte_mbuf **input, uint32_t *offset, uint32_t cb_len,
1102 uint32_t *seg_total_left, int next_triplet)
1105 struct rte_mbuf *m = *input;
1107 part_len = (*seg_total_left < cb_len) ? *seg_total_left : cb_len;
1109 *seg_total_left -= part_len;
1111 desc->data_ptrs[next_triplet].address =
1112 rte_pktmbuf_iova_offset(m, *offset);
1113 desc->data_ptrs[next_triplet].blen = part_len;
1114 desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN;
1115 desc->data_ptrs[next_triplet].last = 0;
1116 desc->data_ptrs[next_triplet].dma_ext = 0;
1117 *offset += part_len;
1120 while (cb_len > 0) {
1121 if (next_triplet < ACC100_DMA_MAX_NUM_POINTERS &&
1125 *seg_total_left = rte_pktmbuf_data_len(m);
1126 part_len = (*seg_total_left < cb_len) ?
1129 desc->data_ptrs[next_triplet].address =
1130 rte_pktmbuf_iova_offset(m, 0);
1131 desc->data_ptrs[next_triplet].blen = part_len;
1132 desc->data_ptrs[next_triplet].blkid =
1133 ACC100_DMA_BLKID_IN;
1134 desc->data_ptrs[next_triplet].last = 0;
1135 desc->data_ptrs[next_triplet].dma_ext = 0;
1137 *seg_total_left -= part_len;
1138 /* Initializing offset for next segment (mbuf) */
1143 "Some data still left for processing: "
1144 "data_left: %u, next_triplet: %u, next_mbuf: %p",
1145 cb_len, next_triplet, m->next);
1149 /* Storing new mbuf as it could be changed in scatter-gather case*/
1152 return next_triplet;
1155 /* Fills descriptor with data pointers of one block type.
1156 * Returns index of next triplet on success, other value if lengths of
1157 * output data and processed mbuf do not match.
1160 acc100_dma_fill_blk_type_out(struct acc100_dma_req_desc *desc,
1161 struct rte_mbuf *output, uint32_t out_offset,
1162 uint32_t output_len, int next_triplet, int blk_id)
1164 desc->data_ptrs[next_triplet].address =
1165 rte_pktmbuf_iova_offset(output, out_offset);
1166 desc->data_ptrs[next_triplet].blen = output_len;
1167 desc->data_ptrs[next_triplet].blkid = blk_id;
1168 desc->data_ptrs[next_triplet].last = 0;
1169 desc->data_ptrs[next_triplet].dma_ext = 0;
1172 return next_triplet;
1176 acc100_header_init(struct acc100_dma_req_desc *desc)
1178 desc->word0 = ACC100_DMA_DESC_TYPE;
1179 desc->word1 = 0; /**< Timestamp could be disabled */
1185 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1186 /* Check if any input data is unexpectedly left for processing */
1188 check_mbuf_total_left(uint32_t mbuf_total_left)
1190 if (mbuf_total_left == 0)
1193 "Some date still left for processing: mbuf_total_left = %u",
1200 acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
1201 struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1202 struct rte_mbuf *output, uint32_t *in_offset,
1203 uint32_t *out_offset, uint32_t *out_length,
1204 uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
1206 int next_triplet = 1; /* FCW already done */
1207 uint32_t e, ea, eb, length;
1208 uint16_t k, k_neg, k_pos;
1211 desc->word0 = ACC100_DMA_DESC_TYPE;
1212 desc->word1 = 0; /**< Timestamp could be disabled */
1217 if (op->turbo_enc.code_block_mode == 0) {
1218 ea = op->turbo_enc.tb_params.ea;
1219 eb = op->turbo_enc.tb_params.eb;
1220 cab = op->turbo_enc.tb_params.cab;
1221 k_neg = op->turbo_enc.tb_params.k_neg;
1222 k_pos = op->turbo_enc.tb_params.k_pos;
1223 c_neg = op->turbo_enc.tb_params.c_neg;
1224 e = (r < cab) ? ea : eb;
1225 k = (r < c_neg) ? k_neg : k_pos;
1227 e = op->turbo_enc.cb_params.e;
1228 k = op->turbo_enc.cb_params.k;
1231 if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
1232 length = (k - 24) >> 3;
1236 if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
1238 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1239 *mbuf_total_left, length);
1243 next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
1244 length, seg_total_left, next_triplet);
1245 if (unlikely(next_triplet < 0)) {
1247 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1251 desc->data_ptrs[next_triplet - 1].last = 1;
1252 desc->m2dlen = next_triplet;
1253 *mbuf_total_left -= length;
1255 /* Set output length */
1256 if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
1257 /* Integer round up division by 8 */
1258 *out_length = (e + 7) >> 3;
1260 *out_length = (k >> 3) * 3 + 2;
1262 next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
1263 *out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
1264 if (unlikely(next_triplet < 0)) {
1266 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1270 op->turbo_enc.output.length += *out_length;
1271 *out_offset += *out_length;
1272 desc->data_ptrs[next_triplet - 1].last = 1;
1273 desc->d2mlen = next_triplet - desc->m2dlen;
1281 acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
1282 struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1283 struct rte_mbuf *output, uint32_t *in_offset,
1284 uint32_t *out_offset, uint32_t *out_length,
1285 uint32_t *mbuf_total_left, uint32_t *seg_total_left)
1287 int next_triplet = 1; /* FCW already done */
1288 uint16_t K, in_length_in_bits, in_length_in_bytes;
1289 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
1291 acc100_header_init(desc);
1293 K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
1294 in_length_in_bits = K - enc->n_filler;
1295 if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
1296 (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH))
1297 in_length_in_bits -= 24;
1298 in_length_in_bytes = in_length_in_bits >> 3;
1300 if (unlikely((*mbuf_total_left == 0) ||
1301 (*mbuf_total_left < in_length_in_bytes))) {
1303 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1304 *mbuf_total_left, in_length_in_bytes);
1308 next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
1310 seg_total_left, next_triplet);
1311 if (unlikely(next_triplet < 0)) {
1313 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1317 desc->data_ptrs[next_triplet - 1].last = 1;
1318 desc->m2dlen = next_triplet;
1319 *mbuf_total_left -= in_length_in_bytes;
1321 /* Set output length */
1322 /* Integer round up division by 8 */
1323 *out_length = (enc->cb_params.e + 7) >> 3;
1325 next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
1326 *out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
1327 op->ldpc_enc.output.length += *out_length;
1328 *out_offset += *out_length;
1329 desc->data_ptrs[next_triplet - 1].last = 1;
1330 desc->data_ptrs[next_triplet - 1].dma_ext = 0;
1331 desc->d2mlen = next_triplet - desc->m2dlen;
1339 acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
1340 struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1341 struct rte_mbuf *h_output, struct rte_mbuf *s_output,
1342 uint32_t *in_offset, uint32_t *h_out_offset,
1343 uint32_t *s_out_offset, uint32_t *h_out_length,
1344 uint32_t *s_out_length, uint32_t *mbuf_total_left,
1345 uint32_t *seg_total_left, uint8_t r)
1347 int next_triplet = 1; /* FCW already done */
1349 uint16_t crc24_overlap = 0;
1352 desc->word0 = ACC100_DMA_DESC_TYPE;
1353 desc->word1 = 0; /**< Timestamp could be disabled */
1358 if (op->turbo_dec.code_block_mode == 0) {
1359 k = (r < op->turbo_dec.tb_params.c_neg)
1360 ? op->turbo_dec.tb_params.k_neg
1361 : op->turbo_dec.tb_params.k_pos;
1362 e = (r < op->turbo_dec.tb_params.cab)
1363 ? op->turbo_dec.tb_params.ea
1364 : op->turbo_dec.tb_params.eb;
1366 k = op->turbo_dec.cb_params.k;
1367 e = op->turbo_dec.cb_params.e;
1370 if ((op->turbo_dec.code_block_mode == 0)
1371 && !check_bit(op->turbo_dec.op_flags,
1372 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1375 /* Calculates circular buffer size.
1376 * According to 3gpp 36.212 section 5.1.4.2
1380 * where nCol is 32 and nRow can be calculated from:
1382 * where D is the size of each output from turbo encoder block (k + 4).
1384 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
1386 if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
1388 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1389 *mbuf_total_left, kw);
1393 next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset, kw,
1394 seg_total_left, next_triplet);
1395 if (unlikely(next_triplet < 0)) {
1397 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1401 desc->data_ptrs[next_triplet - 1].last = 1;
1402 desc->m2dlen = next_triplet;
1403 *mbuf_total_left -= kw;
1405 next_triplet = acc100_dma_fill_blk_type_out(
1406 desc, h_output, *h_out_offset,
1407 k >> 3, next_triplet, ACC100_DMA_BLKID_OUT_HARD);
1408 if (unlikely(next_triplet < 0)) {
1410 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1415 *h_out_length = ((k - crc24_overlap) >> 3);
1416 op->turbo_dec.hard_output.length += *h_out_length;
1417 *h_out_offset += *h_out_length;
1420 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
1421 if (check_bit(op->turbo_dec.op_flags,
1422 RTE_BBDEV_TURBO_EQUALIZER))
1425 *s_out_length = (k * 3) + 12;
1427 next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
1428 *s_out_offset, *s_out_length, next_triplet,
1429 ACC100_DMA_BLKID_OUT_SOFT);
1430 if (unlikely(next_triplet < 0)) {
1432 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1437 op->turbo_dec.soft_output.length += *s_out_length;
1438 *s_out_offset += *s_out_length;
1441 desc->data_ptrs[next_triplet - 1].last = 1;
1442 desc->d2mlen = next_triplet - desc->m2dlen;
1450 acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
1451 struct acc100_dma_req_desc *desc,
1452 struct rte_mbuf **input, struct rte_mbuf *h_output,
1453 uint32_t *in_offset, uint32_t *h_out_offset,
1454 uint32_t *h_out_length, uint32_t *mbuf_total_left,
1455 uint32_t *seg_total_left,
1456 struct acc100_fcw_ld *fcw)
1458 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
1459 int next_triplet = 1; /* FCW already done */
1460 uint32_t input_length;
1461 uint16_t output_length, crc24_overlap = 0;
1462 uint16_t sys_cols, K, h_p_size, h_np_size;
1463 bool h_comp = check_bit(dec->op_flags,
1464 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
1466 acc100_header_init(desc);
1468 if (check_bit(op->ldpc_dec.op_flags,
1469 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
1472 /* Compute some LDPC BG lengths */
1473 input_length = dec->cb_params.e;
1474 if (check_bit(op->ldpc_dec.op_flags,
1475 RTE_BBDEV_LDPC_LLR_COMPRESSION))
1476 input_length = (input_length * 3 + 3) / 4;
1477 sys_cols = (dec->basegraph == 1) ? 22 : 10;
1478 K = sys_cols * dec->z_c;
1479 output_length = K - dec->n_filler - crc24_overlap;
1481 if (unlikely((*mbuf_total_left == 0) ||
1482 (*mbuf_total_left < input_length))) {
1484 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1485 *mbuf_total_left, input_length);
1489 next_triplet = acc100_dma_fill_blk_type_in(desc, input,
1490 in_offset, input_length,
1491 seg_total_left, next_triplet);
1493 if (unlikely(next_triplet < 0)) {
1495 "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1500 if (check_bit(op->ldpc_dec.op_flags,
1501 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1502 h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
1504 h_p_size = (h_p_size * 3 + 3) / 4;
1505 desc->data_ptrs[next_triplet].address =
1506 dec->harq_combined_input.offset;
1507 desc->data_ptrs[next_triplet].blen = h_p_size;
1508 desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN_HARQ;
1509 desc->data_ptrs[next_triplet].dma_ext = 1;
1510 #ifndef ACC100_EXT_MEM
1511 acc100_dma_fill_blk_type_out(
1513 op->ldpc_dec.harq_combined_input.data,
1514 op->ldpc_dec.harq_combined_input.offset,
1517 ACC100_DMA_BLKID_IN_HARQ);
1522 desc->data_ptrs[next_triplet - 1].last = 1;
1523 desc->m2dlen = next_triplet;
1524 *mbuf_total_left -= input_length;
1526 next_triplet = acc100_dma_fill_blk_type_out(desc, h_output,
1527 *h_out_offset, output_length >> 3, next_triplet,
1528 ACC100_DMA_BLKID_OUT_HARD);
1530 if (check_bit(op->ldpc_dec.op_flags,
1531 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1532 /* Pruned size of the HARQ */
1533 h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
1534 /* Non-Pruned size of the HARQ */
1535 h_np_size = fcw->hcout_offset > 0 ?
1536 fcw->hcout_offset + fcw->hcout_size1 :
1539 h_np_size = (h_np_size * 3 + 3) / 4;
1540 h_p_size = (h_p_size * 3 + 3) / 4;
1542 dec->harq_combined_output.length = h_np_size;
1543 desc->data_ptrs[next_triplet].address =
1544 dec->harq_combined_output.offset;
1545 desc->data_ptrs[next_triplet].blen = h_p_size;
1546 desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_OUT_HARQ;
1547 desc->data_ptrs[next_triplet].dma_ext = 1;
1548 #ifndef ACC100_EXT_MEM
1549 acc100_dma_fill_blk_type_out(
1551 dec->harq_combined_output.data,
1552 dec->harq_combined_output.offset,
1555 ACC100_DMA_BLKID_OUT_HARQ);
1560 *h_out_length = output_length >> 3;
1561 dec->hard_output.length += *h_out_length;
1562 *h_out_offset += *h_out_length;
1563 desc->data_ptrs[next_triplet - 1].last = 1;
1564 desc->d2mlen = next_triplet - desc->m2dlen;
1572 acc100_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
1573 struct acc100_dma_req_desc *desc,
1574 struct rte_mbuf *input, struct rte_mbuf *h_output,
1575 uint32_t *in_offset, uint32_t *h_out_offset,
1576 uint32_t *h_out_length,
1577 union acc100_harq_layout_data *harq_layout)
1579 int next_triplet = 1; /* FCW already done */
1580 desc->data_ptrs[next_triplet].address =
1581 rte_pktmbuf_iova_offset(input, *in_offset);
1584 if (check_bit(op->ldpc_dec.op_flags,
1585 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1586 struct rte_bbdev_op_data hi = op->ldpc_dec.harq_combined_input;
1587 desc->data_ptrs[next_triplet].address = hi.offset;
1588 #ifndef ACC100_EXT_MEM
1589 desc->data_ptrs[next_triplet].address =
1590 rte_pktmbuf_iova_offset(hi.data, hi.offset);
1595 desc->data_ptrs[next_triplet].address =
1596 rte_pktmbuf_iova_offset(h_output, *h_out_offset);
1597 *h_out_length = desc->data_ptrs[next_triplet].blen;
1600 if (check_bit(op->ldpc_dec.op_flags,
1601 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1602 desc->data_ptrs[next_triplet].address =
1603 op->ldpc_dec.harq_combined_output.offset;
1604 /* Adjust based on previous operation */
1605 struct rte_bbdev_dec_op *prev_op = desc->op_addr;
1606 op->ldpc_dec.harq_combined_output.length =
1607 prev_op->ldpc_dec.harq_combined_output.length;
1608 int16_t hq_idx = op->ldpc_dec.harq_combined_output.offset /
1610 int16_t prev_hq_idx =
1611 prev_op->ldpc_dec.harq_combined_output.offset
1612 / ACC100_HARQ_OFFSET;
1613 harq_layout[hq_idx].val = harq_layout[prev_hq_idx].val;
1614 #ifndef ACC100_EXT_MEM
1615 struct rte_bbdev_op_data ho =
1616 op->ldpc_dec.harq_combined_output;
1617 desc->data_ptrs[next_triplet].address =
1618 rte_pktmbuf_iova_offset(ho.data, ho.offset);
1623 op->ldpc_dec.hard_output.length += *h_out_length;
1628 /* Enqueue a number of operations to HW and update software rings */
1630 acc100_dma_enqueue(struct acc100_queue *q, uint16_t n,
1631 struct rte_bbdev_stats *queue_stats)
1633 union acc100_enqueue_reg_fmt enq_req;
1634 #ifdef RTE_BBDEV_OFFLOAD_COST
1635 uint64_t start_time = 0;
1636 queue_stats->acc_offload_cycles = 0;
1638 RTE_SET_USED(queue_stats);
1642 /* Setting offset, 100b for 256 DMA Desc */
1643 enq_req.addr_offset = ACC100_DESC_OFFSET;
1645 /* Split ops into batches */
1647 union acc100_dma_desc *desc;
1648 uint16_t enq_batch_size;
1650 rte_iova_t req_elem_addr;
1652 enq_batch_size = RTE_MIN(n, MAX_ENQ_BATCH_SIZE);
1654 /* Set flag on last descriptor in a batch */
1655 desc = q->ring_addr + ((q->sw_ring_head + enq_batch_size - 1) &
1656 q->sw_ring_wrap_mask);
1657 desc->req.last_desc_in_batch = 1;
1659 /* Calculate the 1st descriptor's address */
1660 offset = ((q->sw_ring_head & q->sw_ring_wrap_mask) *
1661 sizeof(union acc100_dma_desc));
1662 req_elem_addr = q->ring_addr_iova + offset;
1664 /* Fill enqueue struct */
1665 enq_req.num_elem = enq_batch_size;
1666 /* low 6 bits are not needed */
1667 enq_req.req_elem_addr = (uint32_t)(req_elem_addr >> 6);
1669 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1670 rte_memdump(stderr, "Req sdone", desc, sizeof(*desc));
1672 rte_bbdev_log_debug(
1673 "Enqueue %u reqs (phys %#"PRIx64") to reg %p",
1676 (void *)q->mmio_reg_enqueue);
1680 #ifdef RTE_BBDEV_OFFLOAD_COST
1681 /* Start time measurement for enqueue function offload. */
1682 start_time = rte_rdtsc_precise();
1684 rte_bbdev_log(DEBUG, "Debug : MMIO Enqueue");
1685 mmio_write(q->mmio_reg_enqueue, enq_req.val);
1687 #ifdef RTE_BBDEV_OFFLOAD_COST
1688 queue_stats->acc_offload_cycles +=
1689 rte_rdtsc_precise() - start_time;
1693 q->sw_ring_head += enq_batch_size;
1694 n -= enq_batch_size;
1701 /* Enqueue one encode operations for ACC100 device in CB mode */
1703 enqueue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
1704 uint16_t total_enqueued_cbs)
1706 union acc100_dma_desc *desc = NULL;
1708 uint32_t in_offset, out_offset, out_length, mbuf_total_left,
1710 struct rte_mbuf *input, *output_head, *output;
1712 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1713 & q->sw_ring_wrap_mask);
1714 desc = q->ring_addr + desc_idx;
1715 acc100_fcw_te_fill(op, &desc->req.fcw_te);
1717 input = op->turbo_enc.input.data;
1718 output_head = output = op->turbo_enc.output.data;
1719 in_offset = op->turbo_enc.input.offset;
1720 out_offset = op->turbo_enc.output.offset;
1722 mbuf_total_left = op->turbo_enc.input.length;
1723 seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
1726 ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
1727 &in_offset, &out_offset, &out_length, &mbuf_total_left,
1728 &seg_total_left, 0);
1730 if (unlikely(ret < 0))
1733 mbuf_append(output_head, output, out_length);
1735 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1736 rte_memdump(stderr, "FCW", &desc->req.fcw_te,
1737 sizeof(desc->req.fcw_te) - 8);
1738 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1739 if (check_mbuf_total_left(mbuf_total_left) != 0)
1742 /* One CB (one op) was successfully prepared to enqueue */
1746 /* Enqueue one encode operations for ACC100 device in CB mode */
1748 enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
1749 uint16_t total_enqueued_cbs, int16_t num)
1751 union acc100_dma_desc *desc = NULL;
1752 uint32_t out_length;
1753 struct rte_mbuf *output_head, *output;
1754 int i, next_triplet;
1755 uint16_t in_length_in_bytes;
1756 struct rte_bbdev_op_ldpc_enc *enc = &ops[0]->ldpc_enc;
1758 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1759 & q->sw_ring_wrap_mask);
1760 desc = q->ring_addr + desc_idx;
1761 acc100_fcw_le_fill(ops[0], &desc->req.fcw_le, num);
1763 /** This could be done at polling */
1764 acc100_header_init(&desc->req);
1765 desc->req.numCBs = num;
1767 in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
1768 out_length = (enc->cb_params.e + 7) >> 3;
1769 desc->req.m2dlen = 1 + num;
1770 desc->req.d2mlen = num;
1773 for (i = 0; i < num; i++) {
1774 desc->req.data_ptrs[next_triplet].address =
1775 rte_pktmbuf_iova_offset(ops[i]->ldpc_enc.input.data, 0);
1776 desc->req.data_ptrs[next_triplet].blen = in_length_in_bytes;
1778 desc->req.data_ptrs[next_triplet].address =
1779 rte_pktmbuf_iova_offset(
1780 ops[i]->ldpc_enc.output.data, 0);
1781 desc->req.data_ptrs[next_triplet].blen = out_length;
1783 ops[i]->ldpc_enc.output.length = out_length;
1784 output_head = output = ops[i]->ldpc_enc.output.data;
1785 mbuf_append(output_head, output, out_length);
1786 output->data_len = out_length;
1789 desc->req.op_addr = ops[0];
1791 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1792 rte_memdump(stderr, "FCW", &desc->req.fcw_le,
1793 sizeof(desc->req.fcw_le) - 8);
1794 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1797 /* One CB (one op) was successfully prepared to enqueue */
1801 /* Enqueue one encode operations for ACC100 device in CB mode */
1803 enqueue_ldpc_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
1804 uint16_t total_enqueued_cbs)
1806 union acc100_dma_desc *desc = NULL;
1808 uint32_t in_offset, out_offset, out_length, mbuf_total_left,
1810 struct rte_mbuf *input, *output_head, *output;
1812 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1813 & q->sw_ring_wrap_mask);
1814 desc = q->ring_addr + desc_idx;
1815 acc100_fcw_le_fill(op, &desc->req.fcw_le, 1);
1817 input = op->ldpc_enc.input.data;
1818 output_head = output = op->ldpc_enc.output.data;
1819 in_offset = op->ldpc_enc.input.offset;
1820 out_offset = op->ldpc_enc.output.offset;
1822 mbuf_total_left = op->ldpc_enc.input.length;
1823 seg_total_left = rte_pktmbuf_data_len(op->ldpc_enc.input.data)
1826 ret = acc100_dma_desc_le_fill(op, &desc->req, &input, output,
1827 &in_offset, &out_offset, &out_length, &mbuf_total_left,
1830 if (unlikely(ret < 0))
1833 mbuf_append(output_head, output, out_length);
1835 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1836 rte_memdump(stderr, "FCW", &desc->req.fcw_le,
1837 sizeof(desc->req.fcw_le) - 8);
1838 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1840 if (check_mbuf_total_left(mbuf_total_left) != 0)
1843 /* One CB (one op) was successfully prepared to enqueue */
1848 /* Enqueue one encode operations for ACC100 device in TB mode. */
1850 enqueue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
1851 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
1853 union acc100_dma_desc *desc = NULL;
1856 uint32_t in_offset, out_offset, out_length, mbuf_total_left,
1858 struct rte_mbuf *input, *output_head, *output;
1859 uint16_t current_enqueued_cbs = 0;
1861 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1862 & q->sw_ring_wrap_mask);
1863 desc = q->ring_addr + desc_idx;
1864 uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
1865 acc100_fcw_te_fill(op, &desc->req.fcw_te);
1867 input = op->turbo_enc.input.data;
1868 output_head = output = op->turbo_enc.output.data;
1869 in_offset = op->turbo_enc.input.offset;
1870 out_offset = op->turbo_enc.output.offset;
1872 mbuf_total_left = op->turbo_enc.input.length;
1874 c = op->turbo_enc.tb_params.c;
1875 r = op->turbo_enc.tb_params.r;
1877 while (mbuf_total_left > 0 && r < c) {
1878 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
1879 /* Set up DMA descriptor */
1880 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
1881 & q->sw_ring_wrap_mask);
1882 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
1883 desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
1885 ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
1886 &in_offset, &out_offset, &out_length,
1887 &mbuf_total_left, &seg_total_left, r);
1888 if (unlikely(ret < 0))
1890 mbuf_append(output_head, output, out_length);
1892 /* Set total number of CBs in TB */
1893 desc->req.cbs_in_tb = cbs_in_tb;
1894 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1895 rte_memdump(stderr, "FCW", &desc->req.fcw_te,
1896 sizeof(desc->req.fcw_te) - 8);
1897 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1900 if (seg_total_left == 0) {
1901 /* Go to the next mbuf */
1902 input = input->next;
1904 output = output->next;
1908 total_enqueued_cbs++;
1909 current_enqueued_cbs++;
1913 if (unlikely(desc == NULL))
1914 return current_enqueued_cbs;
1916 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1917 if (check_mbuf_total_left(mbuf_total_left) != 0)
1921 /* Set SDone on last CB descriptor for TB mode. */
1922 desc->req.sdone_enable = 1;
1923 desc->req.irq_enable = q->irq_enable;
1925 return current_enqueued_cbs;
1928 /** Enqueue one decode operations for ACC100 device in CB mode */
1930 enqueue_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
1931 uint16_t total_enqueued_cbs)
1933 union acc100_dma_desc *desc = NULL;
1935 uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
1936 h_out_length, mbuf_total_left, seg_total_left;
1937 struct rte_mbuf *input, *h_output_head, *h_output,
1938 *s_output_head, *s_output;
1940 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
1941 & q->sw_ring_wrap_mask);
1942 desc = q->ring_addr + desc_idx;
1943 acc100_fcw_td_fill(op, &desc->req.fcw_td);
1945 input = op->turbo_dec.input.data;
1946 h_output_head = h_output = op->turbo_dec.hard_output.data;
1947 s_output_head = s_output = op->turbo_dec.soft_output.data;
1948 in_offset = op->turbo_dec.input.offset;
1949 h_out_offset = op->turbo_dec.hard_output.offset;
1950 s_out_offset = op->turbo_dec.soft_output.offset;
1951 h_out_length = s_out_length = 0;
1952 mbuf_total_left = op->turbo_dec.input.length;
1953 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
1955 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1956 if (unlikely(input == NULL)) {
1957 rte_bbdev_log(ERR, "Invalid mbuf pointer");
1962 /* Set up DMA descriptor */
1963 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
1964 & q->sw_ring_wrap_mask);
1966 ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
1967 s_output, &in_offset, &h_out_offset, &s_out_offset,
1968 &h_out_length, &s_out_length, &mbuf_total_left,
1969 &seg_total_left, 0);
1971 if (unlikely(ret < 0))
1975 mbuf_append(h_output_head, h_output, h_out_length);
1978 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
1979 mbuf_append(s_output_head, s_output, s_out_length);
1981 #ifdef RTE_LIBRTE_BBDEV_DEBUG
1982 rte_memdump(stderr, "FCW", &desc->req.fcw_td,
1983 sizeof(desc->req.fcw_td) - 8);
1984 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
1985 if (check_mbuf_total_left(mbuf_total_left) != 0)
1989 /* One CB (one op) was successfully prepared to enqueue */
1994 harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
1995 uint16_t total_enqueued_cbs) {
1996 struct acc100_fcw_ld *fcw;
1997 union acc100_dma_desc *desc;
1998 int next_triplet = 1;
1999 struct rte_mbuf *hq_output_head, *hq_output;
2000 uint16_t harq_dma_length_in, harq_dma_length_out;
2001 uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
2002 if (harq_in_length == 0) {
2003 rte_bbdev_log(ERR, "Loopback of invalid null size\n");
2007 int h_comp = check_bit(op->ldpc_dec.op_flags,
2008 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
2011 harq_in_length = harq_in_length * 8 / 6;
2012 harq_in_length = RTE_ALIGN(harq_in_length, 64);
2013 harq_dma_length_in = harq_in_length * 6 / 8;
2015 harq_in_length = RTE_ALIGN(harq_in_length, 64);
2016 harq_dma_length_in = harq_in_length;
2018 harq_dma_length_out = harq_dma_length_in;
2020 bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
2021 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
2022 union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2023 uint16_t harq_index = (ddr_mem_in ?
2024 op->ldpc_dec.harq_combined_input.offset :
2025 op->ldpc_dec.harq_combined_output.offset)
2026 / ACC100_HARQ_OFFSET;
2028 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2029 & q->sw_ring_wrap_mask);
2030 desc = q->ring_addr + desc_idx;
2031 fcw = &desc->req.fcw_ld;
2032 /* Set the FCW from loopback into DDR */
2033 memset(fcw, 0, sizeof(struct acc100_fcw_ld));
2034 fcw->FCWversion = ACC100_FCW_VER;
2037 if (harq_in_length < 16 * ACC100_N_ZC_1)
2039 fcw->ncb = fcw->Zc * ACC100_N_ZC_1;
2044 rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
2045 ddr_mem_in, harq_index,
2046 harq_layout[harq_index].offset, harq_in_length,
2047 harq_dma_length_in);
2049 if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
2050 fcw->hcin_size0 = harq_layout[harq_index].size0;
2051 fcw->hcin_offset = harq_layout[harq_index].offset;
2052 fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
2053 harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
2055 harq_dma_length_in = harq_dma_length_in * 6 / 8;
2057 fcw->hcin_size0 = harq_in_length;
2059 harq_layout[harq_index].val = 0;
2060 rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
2061 fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
2062 fcw->hcout_size0 = harq_in_length;
2063 fcw->hcin_decomp_mode = h_comp;
2064 fcw->hcout_comp_mode = h_comp;
2068 /* Set the prefix of descriptor. This could be done at polling */
2069 acc100_header_init(&desc->req);
2071 /* Null LLR input for Decoder */
2072 desc->req.data_ptrs[next_triplet].address =
2074 desc->req.data_ptrs[next_triplet].blen = 2;
2075 desc->req.data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN;
2076 desc->req.data_ptrs[next_triplet].last = 0;
2077 desc->req.data_ptrs[next_triplet].dma_ext = 0;
2080 /* HARQ Combine input from either Memory interface */
2082 next_triplet = acc100_dma_fill_blk_type_out(&desc->req,
2083 op->ldpc_dec.harq_combined_input.data,
2084 op->ldpc_dec.harq_combined_input.offset,
2087 ACC100_DMA_BLKID_IN_HARQ);
2089 desc->req.data_ptrs[next_triplet].address =
2090 op->ldpc_dec.harq_combined_input.offset;
2091 desc->req.data_ptrs[next_triplet].blen =
2093 desc->req.data_ptrs[next_triplet].blkid =
2094 ACC100_DMA_BLKID_IN_HARQ;
2095 desc->req.data_ptrs[next_triplet].dma_ext = 1;
2098 desc->req.data_ptrs[next_triplet - 1].last = 1;
2099 desc->req.m2dlen = next_triplet;
2101 /* Dropped decoder hard output */
2102 desc->req.data_ptrs[next_triplet].address =
2103 q->lb_out_addr_iova;
2104 desc->req.data_ptrs[next_triplet].blen = ACC100_BYTES_IN_WORD;
2105 desc->req.data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_OUT_HARD;
2106 desc->req.data_ptrs[next_triplet].last = 0;
2107 desc->req.data_ptrs[next_triplet].dma_ext = 0;
2110 /* HARQ Combine output to either Memory interface */
2111 if (check_bit(op->ldpc_dec.op_flags,
2112 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
2114 desc->req.data_ptrs[next_triplet].address =
2115 op->ldpc_dec.harq_combined_output.offset;
2116 desc->req.data_ptrs[next_triplet].blen =
2117 harq_dma_length_out;
2118 desc->req.data_ptrs[next_triplet].blkid =
2119 ACC100_DMA_BLKID_OUT_HARQ;
2120 desc->req.data_ptrs[next_triplet].dma_ext = 1;
2123 hq_output_head = op->ldpc_dec.harq_combined_output.data;
2124 hq_output = op->ldpc_dec.harq_combined_output.data;
2125 next_triplet = acc100_dma_fill_blk_type_out(
2127 op->ldpc_dec.harq_combined_output.data,
2128 op->ldpc_dec.harq_combined_output.offset,
2129 harq_dma_length_out,
2131 ACC100_DMA_BLKID_OUT_HARQ);
2133 mbuf_append(hq_output_head, hq_output, harq_dma_length_out);
2134 op->ldpc_dec.harq_combined_output.length =
2135 harq_dma_length_out;
2137 desc->req.data_ptrs[next_triplet - 1].last = 1;
2138 desc->req.d2mlen = next_triplet - desc->req.m2dlen;
2139 desc->req.op_addr = op;
2141 /* One CB (one op) was successfully prepared to enqueue */
2145 /** Enqueue one decode operations for ACC100 device in CB mode */
2147 enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2148 uint16_t total_enqueued_cbs, bool same_op)
2151 if (unlikely(check_bit(op->ldpc_dec.op_flags,
2152 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK))) {
2153 ret = harq_loopback(q, op, total_enqueued_cbs);
2157 union acc100_dma_desc *desc;
2158 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2159 & q->sw_ring_wrap_mask);
2160 desc = q->ring_addr + desc_idx;
2161 struct rte_mbuf *input, *h_output_head, *h_output;
2162 uint32_t in_offset, h_out_offset, mbuf_total_left, h_out_length = 0;
2163 input = op->ldpc_dec.input.data;
2164 h_output_head = h_output = op->ldpc_dec.hard_output.data;
2165 in_offset = op->ldpc_dec.input.offset;
2166 h_out_offset = op->ldpc_dec.hard_output.offset;
2167 mbuf_total_left = op->ldpc_dec.input.length;
2168 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2169 if (unlikely(input == NULL)) {
2170 rte_bbdev_log(ERR, "Invalid mbuf pointer");
2174 union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2177 union acc100_dma_desc *prev_desc;
2178 desc_idx = ((q->sw_ring_head + total_enqueued_cbs - 1)
2179 & q->sw_ring_wrap_mask);
2180 prev_desc = q->ring_addr + desc_idx;
2181 uint8_t *prev_ptr = (uint8_t *) prev_desc;
2182 uint8_t *new_ptr = (uint8_t *) desc;
2183 /* Copy first 4 words and BDESCs */
2184 rte_memcpy(new_ptr, prev_ptr, ACC100_5GUL_SIZE_0);
2185 rte_memcpy(new_ptr + ACC100_5GUL_OFFSET_0,
2186 prev_ptr + ACC100_5GUL_OFFSET_0,
2187 ACC100_5GUL_SIZE_1);
2188 desc->req.op_addr = prev_desc->req.op_addr;
2190 rte_memcpy(new_ptr + ACC100_DESC_FCW_OFFSET,
2191 prev_ptr + ACC100_DESC_FCW_OFFSET,
2192 ACC100_FCW_LD_BLEN);
2193 acc100_dma_desc_ld_update(op, &desc->req, input, h_output,
2194 &in_offset, &h_out_offset,
2195 &h_out_length, harq_layout);
2197 struct acc100_fcw_ld *fcw;
2198 uint32_t seg_total_left;
2199 fcw = &desc->req.fcw_ld;
2200 acc100_fcw_ld_fill(op, fcw, harq_layout);
2202 /* Special handling when overusing mbuf */
2203 if (fcw->rm_e < ACC100_MAX_E_MBUF)
2204 seg_total_left = rte_pktmbuf_data_len(input)
2207 seg_total_left = fcw->rm_e;
2209 ret = acc100_dma_desc_ld_fill(op, &desc->req, &input, h_output,
2210 &in_offset, &h_out_offset,
2211 &h_out_length, &mbuf_total_left,
2212 &seg_total_left, fcw);
2213 if (unlikely(ret < 0))
2218 mbuf_append(h_output_head, h_output, h_out_length);
2219 #ifndef ACC100_EXT_MEM
2220 if (op->ldpc_dec.harq_combined_output.length > 0) {
2221 /* Push the HARQ output into host memory */
2222 struct rte_mbuf *hq_output_head, *hq_output;
2223 hq_output_head = op->ldpc_dec.harq_combined_output.data;
2224 hq_output = op->ldpc_dec.harq_combined_output.data;
2225 mbuf_append(hq_output_head, hq_output,
2226 op->ldpc_dec.harq_combined_output.length);
2230 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2231 rte_memdump(stderr, "FCW", &desc->req.fcw_ld,
2232 sizeof(desc->req.fcw_ld) - 8);
2233 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2236 /* One CB (one op) was successfully prepared to enqueue */
2241 /* Enqueue one decode operations for ACC100 device in TB mode */
2243 enqueue_ldpc_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2244 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
2246 union acc100_dma_desc *desc = NULL;
2249 uint32_t in_offset, h_out_offset,
2250 h_out_length, mbuf_total_left, seg_total_left;
2251 struct rte_mbuf *input, *h_output_head, *h_output;
2252 uint16_t current_enqueued_cbs = 0;
2254 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2255 & q->sw_ring_wrap_mask);
2256 desc = q->ring_addr + desc_idx;
2257 uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
2258 union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2259 acc100_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout);
2261 input = op->ldpc_dec.input.data;
2262 h_output_head = h_output = op->ldpc_dec.hard_output.data;
2263 in_offset = op->ldpc_dec.input.offset;
2264 h_out_offset = op->ldpc_dec.hard_output.offset;
2266 mbuf_total_left = op->ldpc_dec.input.length;
2267 c = op->ldpc_dec.tb_params.c;
2268 r = op->ldpc_dec.tb_params.r;
2270 while (mbuf_total_left > 0 && r < c) {
2272 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
2274 /* Set up DMA descriptor */
2275 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
2276 & q->sw_ring_wrap_mask);
2277 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
2278 desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
2279 ret = acc100_dma_desc_ld_fill(op, &desc->req, &input,
2280 h_output, &in_offset, &h_out_offset,
2282 &mbuf_total_left, &seg_total_left,
2285 if (unlikely(ret < 0))
2289 mbuf_append(h_output_head, h_output, h_out_length);
2291 /* Set total number of CBs in TB */
2292 desc->req.cbs_in_tb = cbs_in_tb;
2293 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2294 rte_memdump(stderr, "FCW", &desc->req.fcw_td,
2295 sizeof(desc->req.fcw_td) - 8);
2296 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2299 if (seg_total_left == 0) {
2300 /* Go to the next mbuf */
2301 input = input->next;
2303 h_output = h_output->next;
2306 total_enqueued_cbs++;
2307 current_enqueued_cbs++;
2311 if (unlikely(desc == NULL))
2312 return current_enqueued_cbs;
2314 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2315 if (check_mbuf_total_left(mbuf_total_left) != 0)
2318 /* Set SDone on last CB descriptor for TB mode */
2319 desc->req.sdone_enable = 1;
2320 desc->req.irq_enable = q->irq_enable;
2322 return current_enqueued_cbs;
2325 /* Enqueue one decode operations for ACC100 device in TB mode */
2327 enqueue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2328 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
2330 union acc100_dma_desc *desc = NULL;
2333 uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
2334 h_out_length, mbuf_total_left, seg_total_left;
2335 struct rte_mbuf *input, *h_output_head, *h_output,
2336 *s_output_head, *s_output;
2337 uint16_t current_enqueued_cbs = 0;
2339 uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2340 & q->sw_ring_wrap_mask);
2341 desc = q->ring_addr + desc_idx;
2342 uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
2343 acc100_fcw_td_fill(op, &desc->req.fcw_td);
2345 input = op->turbo_dec.input.data;
2346 h_output_head = h_output = op->turbo_dec.hard_output.data;
2347 s_output_head = s_output = op->turbo_dec.soft_output.data;
2348 in_offset = op->turbo_dec.input.offset;
2349 h_out_offset = op->turbo_dec.hard_output.offset;
2350 s_out_offset = op->turbo_dec.soft_output.offset;
2351 h_out_length = s_out_length = 0;
2352 mbuf_total_left = op->turbo_dec.input.length;
2353 c = op->turbo_dec.tb_params.c;
2354 r = op->turbo_dec.tb_params.r;
2356 while (mbuf_total_left > 0 && r < c) {
2358 seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
2360 /* Set up DMA descriptor */
2361 desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
2362 & q->sw_ring_wrap_mask);
2363 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
2364 desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
2365 ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
2366 h_output, s_output, &in_offset, &h_out_offset,
2367 &s_out_offset, &h_out_length, &s_out_length,
2368 &mbuf_total_left, &seg_total_left, r);
2370 if (unlikely(ret < 0))
2374 mbuf_append(h_output_head, h_output, h_out_length);
2377 if (check_bit(op->turbo_dec.op_flags,
2378 RTE_BBDEV_TURBO_SOFT_OUTPUT))
2379 mbuf_append(s_output_head, s_output, s_out_length);
2381 /* Set total number of CBs in TB */
2382 desc->req.cbs_in_tb = cbs_in_tb;
2383 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2384 rte_memdump(stderr, "FCW", &desc->req.fcw_td,
2385 sizeof(desc->req.fcw_td) - 8);
2386 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2389 if (seg_total_left == 0) {
2390 /* Go to the next mbuf */
2391 input = input->next;
2393 h_output = h_output->next;
2396 if (check_bit(op->turbo_dec.op_flags,
2397 RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
2398 s_output = s_output->next;
2403 total_enqueued_cbs++;
2404 current_enqueued_cbs++;
2408 if (unlikely(desc == NULL))
2409 return current_enqueued_cbs;
2411 #ifdef RTE_LIBRTE_BBDEV_DEBUG
2412 if (check_mbuf_total_left(mbuf_total_left) != 0)
2415 /* Set SDone on last CB descriptor for TB mode */
2416 desc->req.sdone_enable = 1;
2417 desc->req.irq_enable = q->irq_enable;
2419 return current_enqueued_cbs;
2422 /* Calculates number of CBs in processed encoder TB based on 'r' and input
2425 static inline uint8_t
2426 get_num_cbs_in_tb_enc(struct rte_bbdev_op_turbo_enc *turbo_enc)
2428 uint8_t c, c_neg, r, crc24_bits = 0;
2429 uint16_t k, k_neg, k_pos;
2430 uint8_t cbs_in_tb = 0;
2433 length = turbo_enc->input.length;
2434 r = turbo_enc->tb_params.r;
2435 c = turbo_enc->tb_params.c;
2436 c_neg = turbo_enc->tb_params.c_neg;
2437 k_neg = turbo_enc->tb_params.k_neg;
2438 k_pos = turbo_enc->tb_params.k_pos;
2440 if (check_bit(turbo_enc->op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
2442 while (length > 0 && r < c) {
2443 k = (r < c_neg) ? k_neg : k_pos;
2444 length -= (k - crc24_bits) >> 3;
2452 /* Calculates number of CBs in processed decoder TB based on 'r' and input
2455 static inline uint16_t
2456 get_num_cbs_in_tb_dec(struct rte_bbdev_op_turbo_dec *turbo_dec)
2458 uint8_t c, c_neg, r = 0;
2459 uint16_t kw, k, k_neg, k_pos, cbs_in_tb = 0;
2462 length = turbo_dec->input.length;
2463 r = turbo_dec->tb_params.r;
2464 c = turbo_dec->tb_params.c;
2465 c_neg = turbo_dec->tb_params.c_neg;
2466 k_neg = turbo_dec->tb_params.k_neg;
2467 k_pos = turbo_dec->tb_params.k_pos;
2468 while (length > 0 && r < c) {
2469 k = (r < c_neg) ? k_neg : k_pos;
2470 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
2479 /* Calculates number of CBs in processed decoder TB based on 'r' and input
2482 static inline uint16_t
2483 get_num_cbs_in_tb_ldpc_dec(struct rte_bbdev_op_ldpc_dec *ldpc_dec)
2485 uint16_t r, cbs_in_tb = 0;
2486 int32_t length = ldpc_dec->input.length;
2487 r = ldpc_dec->tb_params.r;
2488 while (length > 0 && r < ldpc_dec->tb_params.c) {
2489 length -= (r < ldpc_dec->tb_params.cab) ?
2490 ldpc_dec->tb_params.ea :
2491 ldpc_dec->tb_params.eb;
2498 /* Enqueue encode operations for ACC100 device in CB mode. */
2500 acc100_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
2501 struct rte_bbdev_enc_op **ops, uint16_t num)
2503 struct acc100_queue *q = q_data->queue_private;
2504 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2506 union acc100_dma_desc *desc;
2509 for (i = 0; i < num; ++i) {
2510 /* Check if there are available space for further processing */
2511 if (unlikely(avail - 1 < 0))
2515 ret = enqueue_enc_one_op_cb(q, ops[i], i);
2520 if (unlikely(i == 0))
2521 return 0; /* Nothing to enqueue */
2523 /* Set SDone in last CB in enqueued ops for CB mode*/
2524 desc = q->ring_addr + ((q->sw_ring_head + i - 1)
2525 & q->sw_ring_wrap_mask);
2526 desc->req.sdone_enable = 1;
2527 desc->req.irq_enable = q->irq_enable;
2529 acc100_dma_enqueue(q, i, &q_data->queue_stats);
2532 q_data->queue_stats.enqueued_count += i;
2533 q_data->queue_stats.enqueue_err_count += num - i;
2537 /* Check we can mux encode operations with common FCW */
2539 check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
2543 for (i = 1; i < num; ++i) {
2544 /* Only mux compatible code blocks */
2545 if (memcmp((uint8_t *)(&ops[i]->ldpc_enc) + ACC100_ENC_OFFSET,
2546 (uint8_t *)(&ops[0]->ldpc_enc) +
2548 ACC100_CMP_ENC_SIZE) != 0)
2554 /** Enqueue encode operations for ACC100 device in CB mode. */
2555 static inline uint16_t
2556 acc100_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
2557 struct rte_bbdev_enc_op **ops, uint16_t num)
2559 struct acc100_queue *q = q_data->queue_private;
2560 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2562 union acc100_dma_desc *desc;
2563 int ret, desc_idx = 0;
2564 int16_t enq, left = num;
2567 if (unlikely(avail < 1))
2570 enq = RTE_MIN(left, ACC100_MUX_5GDL_DESC);
2571 if (check_mux(&ops[i], enq)) {
2572 ret = enqueue_ldpc_enc_n_op_cb(q, &ops[i],
2578 ret = enqueue_ldpc_enc_one_op_cb(q, ops[i], desc_idx);
2587 if (unlikely(i == 0))
2588 return 0; /* Nothing to enqueue */
2590 /* Set SDone in last CB in enqueued ops for CB mode*/
2591 desc = q->ring_addr + ((q->sw_ring_head + desc_idx - 1)
2592 & q->sw_ring_wrap_mask);
2593 desc->req.sdone_enable = 1;
2594 desc->req.irq_enable = q->irq_enable;
2596 acc100_dma_enqueue(q, desc_idx, &q_data->queue_stats);
2599 q_data->queue_stats.enqueued_count += i;
2600 q_data->queue_stats.enqueue_err_count += num - i;
2605 /* Enqueue encode operations for ACC100 device in TB mode. */
2607 acc100_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
2608 struct rte_bbdev_enc_op **ops, uint16_t num)
2610 struct acc100_queue *q = q_data->queue_private;
2611 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2612 uint16_t i, enqueued_cbs = 0;
2616 for (i = 0; i < num; ++i) {
2617 cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
2618 /* Check if there are available space for further processing */
2619 if (unlikely(avail - cbs_in_tb < 0))
2623 ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
2626 enqueued_cbs += ret;
2628 if (unlikely(enqueued_cbs == 0))
2629 return 0; /* Nothing to enqueue */
2631 acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
2634 q_data->queue_stats.enqueued_count += i;
2635 q_data->queue_stats.enqueue_err_count += num - i;
2640 /* Enqueue encode operations for ACC100 device. */
2642 acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
2643 struct rte_bbdev_enc_op **ops, uint16_t num)
2645 if (unlikely(num == 0))
2647 if (ops[0]->turbo_enc.code_block_mode == 0)
2648 return acc100_enqueue_enc_tb(q_data, ops, num);
2650 return acc100_enqueue_enc_cb(q_data, ops, num);
2653 /* Enqueue encode operations for ACC100 device. */
2655 acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
2656 struct rte_bbdev_enc_op **ops, uint16_t num)
2658 if (unlikely(num == 0))
2660 if (ops[0]->ldpc_enc.code_block_mode == 0)
2661 return acc100_enqueue_enc_tb(q_data, ops, num);
2663 return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
2667 /* Enqueue decode operations for ACC100 device in CB mode */
2669 acc100_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
2670 struct rte_bbdev_dec_op **ops, uint16_t num)
2672 struct acc100_queue *q = q_data->queue_private;
2673 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2675 union acc100_dma_desc *desc;
2678 for (i = 0; i < num; ++i) {
2679 /* Check if there are available space for further processing */
2680 if (unlikely(avail - 1 < 0))
2684 ret = enqueue_dec_one_op_cb(q, ops[i], i);
2689 if (unlikely(i == 0))
2690 return 0; /* Nothing to enqueue */
2692 /* Set SDone in last CB in enqueued ops for CB mode*/
2693 desc = q->ring_addr + ((q->sw_ring_head + i - 1)
2694 & q->sw_ring_wrap_mask);
2695 desc->req.sdone_enable = 1;
2696 desc->req.irq_enable = q->irq_enable;
2698 acc100_dma_enqueue(q, i, &q_data->queue_stats);
2701 q_data->queue_stats.enqueued_count += i;
2702 q_data->queue_stats.enqueue_err_count += num - i;
2707 /* Check we can mux encode operations with common FCW */
2709 cmp_ldpc_dec_op(struct rte_bbdev_dec_op **ops) {
2710 /* Only mux compatible code blocks */
2711 if (memcmp((uint8_t *)(&ops[0]->ldpc_dec) + ACC100_DEC_OFFSET,
2712 (uint8_t *)(&ops[1]->ldpc_dec) +
2713 ACC100_DEC_OFFSET, ACC100_CMP_DEC_SIZE) != 0) {
2720 /* Enqueue decode operations for ACC100 device in TB mode */
2722 acc100_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
2723 struct rte_bbdev_dec_op **ops, uint16_t num)
2725 struct acc100_queue *q = q_data->queue_private;
2726 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2727 uint16_t i, enqueued_cbs = 0;
2731 for (i = 0; i < num; ++i) {
2732 cbs_in_tb = get_num_cbs_in_tb_ldpc_dec(&ops[i]->ldpc_dec);
2733 /* Check if there are available space for further processing */
2734 if (unlikely(avail - cbs_in_tb < 0))
2738 ret = enqueue_ldpc_dec_one_op_tb(q, ops[i],
2739 enqueued_cbs, cbs_in_tb);
2742 enqueued_cbs += ret;
2745 acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
2748 q_data->queue_stats.enqueued_count += i;
2749 q_data->queue_stats.enqueue_err_count += num - i;
2753 /* Enqueue decode operations for ACC100 device in CB mode */
2755 acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
2756 struct rte_bbdev_dec_op **ops, uint16_t num)
2758 struct acc100_queue *q = q_data->queue_private;
2759 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2761 union acc100_dma_desc *desc;
2763 bool same_op = false;
2764 for (i = 0; i < num; ++i) {
2765 /* Check if there are available space for further processing */
2766 if (unlikely(avail < 1))
2771 same_op = cmp_ldpc_dec_op(&ops[i-1]);
2772 rte_bbdev_log(INFO, "Op %d %d %d %d %d %d %d %d %d %d %d %d\n",
2773 i, ops[i]->ldpc_dec.op_flags, ops[i]->ldpc_dec.rv_index,
2774 ops[i]->ldpc_dec.iter_max, ops[i]->ldpc_dec.iter_count,
2775 ops[i]->ldpc_dec.basegraph, ops[i]->ldpc_dec.z_c,
2776 ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
2777 ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
2779 ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
2784 if (unlikely(i == 0))
2785 return 0; /* Nothing to enqueue */
2787 /* Set SDone in last CB in enqueued ops for CB mode*/
2788 desc = q->ring_addr + ((q->sw_ring_head + i - 1)
2789 & q->sw_ring_wrap_mask);
2791 desc->req.sdone_enable = 1;
2792 desc->req.irq_enable = q->irq_enable;
2794 acc100_dma_enqueue(q, i, &q_data->queue_stats);
2797 q_data->queue_stats.enqueued_count += i;
2798 q_data->queue_stats.enqueue_err_count += num - i;
2803 /* Enqueue decode operations for ACC100 device in TB mode */
2805 acc100_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
2806 struct rte_bbdev_dec_op **ops, uint16_t num)
2808 struct acc100_queue *q = q_data->queue_private;
2809 int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
2810 uint16_t i, enqueued_cbs = 0;
2814 for (i = 0; i < num; ++i) {
2815 cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
2816 /* Check if there are available space for further processing */
2817 if (unlikely(avail - cbs_in_tb < 0))
2821 ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
2824 enqueued_cbs += ret;
2827 acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
2830 q_data->queue_stats.enqueued_count += i;
2831 q_data->queue_stats.enqueue_err_count += num - i;
2836 /* Enqueue decode operations for ACC100 device. */
2838 acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
2839 struct rte_bbdev_dec_op **ops, uint16_t num)
2841 if (unlikely(num == 0))
2843 if (ops[0]->turbo_dec.code_block_mode == 0)
2844 return acc100_enqueue_dec_tb(q_data, ops, num);
2846 return acc100_enqueue_dec_cb(q_data, ops, num);
2849 /* Enqueue decode operations for ACC100 device. */
2851 acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
2852 struct rte_bbdev_dec_op **ops, uint16_t num)
2854 struct acc100_queue *q = q_data->queue_private;
2855 int32_t aq_avail = q->aq_depth +
2856 (q->aq_dequeued - q->aq_enqueued) / 128;
2858 if (unlikely((aq_avail == 0) || (num == 0)))
2861 if (ops[0]->ldpc_dec.code_block_mode == 0)
2862 return acc100_enqueue_ldpc_dec_tb(q_data, ops, num);
2864 return acc100_enqueue_ldpc_dec_cb(q_data, ops, num);
2868 /* Dequeue one encode operations from ACC100 device in CB mode */
2870 dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
2871 uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
2873 union acc100_dma_desc *desc, atom_desc;
2874 union acc100_dma_rsp_desc rsp;
2875 struct rte_bbdev_enc_op *op;
2878 desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
2879 & q->sw_ring_wrap_mask);
2880 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
2883 /* Check fdone bit */
2884 if (!(atom_desc.rsp.val & ACC100_FDONE))
2887 rsp.val = atom_desc.rsp.val;
2888 rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
2891 op = desc->req.op_addr;
2893 /* Clearing status, it will be set based on response */
2896 op->status |= ((rsp.input_err)
2897 ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
2898 op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2899 op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2901 if (desc->req.last_desc_in_batch) {
2903 desc->req.last_desc_in_batch = 0;
2905 desc->rsp.val = ACC100_DMA_DESC_TYPE;
2906 desc->rsp.add_info_0 = 0; /*Reserved bits */
2907 desc->rsp.add_info_1 = 0; /*Reserved bits */
2909 /* Flag that the muxing cause loss of opaque data */
2910 op->opaque_data = (void *)-1;
2911 for (i = 0 ; i < desc->req.numCBs; i++)
2914 /* One CB (op) was successfully dequeued */
2915 return desc->req.numCBs;
2918 /* Dequeue one encode operations from ACC100 device in TB mode */
2920 dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
2921 uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
2923 union acc100_dma_desc *desc, *last_desc, atom_desc;
2924 union acc100_dma_rsp_desc rsp;
2925 struct rte_bbdev_enc_op *op;
2927 uint16_t current_dequeued_cbs = 0, cbs_in_tb;
2929 desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
2930 & q->sw_ring_wrap_mask);
2931 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
2934 /* Check fdone bit */
2935 if (!(atom_desc.rsp.val & ACC100_FDONE))
2938 /* Get number of CBs in dequeued TB */
2939 cbs_in_tb = desc->req.cbs_in_tb;
2941 last_desc = q->ring_addr + ((q->sw_ring_tail
2942 + total_dequeued_cbs + cbs_in_tb - 1)
2943 & q->sw_ring_wrap_mask);
2944 /* Check if last CB in TB is ready to dequeue (and thus
2945 * the whole TB) - checking sdone bit. If not return.
2947 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
2949 if (!(atom_desc.rsp.val & ACC100_SDONE))
2953 op = desc->req.op_addr;
2955 /* Clearing status, it will be set based on response */
2958 while (i < cbs_in_tb) {
2959 desc = q->ring_addr + ((q->sw_ring_tail
2960 + total_dequeued_cbs)
2961 & q->sw_ring_wrap_mask);
2962 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
2964 rsp.val = atom_desc.rsp.val;
2965 rte_bbdev_log_debug("Resp. desc %p: %x", desc,
2968 op->status |= ((rsp.input_err)
2969 ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
2970 op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2971 op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
2973 if (desc->req.last_desc_in_batch) {
2975 desc->req.last_desc_in_batch = 0;
2977 desc->rsp.val = ACC100_DMA_DESC_TYPE;
2978 desc->rsp.add_info_0 = 0;
2979 desc->rsp.add_info_1 = 0;
2980 total_dequeued_cbs++;
2981 current_dequeued_cbs++;
2987 return current_dequeued_cbs;
2990 /* Dequeue one decode operation from ACC100 device in CB mode */
2992 dequeue_dec_one_op_cb(struct rte_bbdev_queue_data *q_data,
2993 struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
2994 uint16_t dequeued_cbs, uint32_t *aq_dequeued)
2996 union acc100_dma_desc *desc, atom_desc;
2997 union acc100_dma_rsp_desc rsp;
2998 struct rte_bbdev_dec_op *op;
3000 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3001 & q->sw_ring_wrap_mask);
3002 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3005 /* Check fdone bit */
3006 if (!(atom_desc.rsp.val & ACC100_FDONE))
3009 rsp.val = atom_desc.rsp.val;
3010 rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
3013 op = desc->req.op_addr;
3015 /* Clearing status, it will be set based on response */
3017 op->status |= ((rsp.input_err)
3018 ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3019 op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3020 op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3021 if (op->status != 0)
3022 q_data->queue_stats.dequeue_err_count++;
3024 /* CRC invalid if error exists */
3026 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3027 op->turbo_dec.iter_count = (uint8_t) rsp.iter_cnt / 2;
3028 /* Check if this is the last desc in batch (Atomic Queue) */
3029 if (desc->req.last_desc_in_batch) {
3031 desc->req.last_desc_in_batch = 0;
3033 desc->rsp.val = ACC100_DMA_DESC_TYPE;
3034 desc->rsp.add_info_0 = 0;
3035 desc->rsp.add_info_1 = 0;
3038 /* One CB (op) was successfully dequeued */
3042 /* Dequeue one decode operations from ACC100 device in CB mode */
3044 dequeue_ldpc_dec_one_op_cb(struct rte_bbdev_queue_data *q_data,
3045 struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3046 uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3048 union acc100_dma_desc *desc, atom_desc;
3049 union acc100_dma_rsp_desc rsp;
3050 struct rte_bbdev_dec_op *op;
3052 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3053 & q->sw_ring_wrap_mask);
3054 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3057 /* Check fdone bit */
3058 if (!(atom_desc.rsp.val & ACC100_FDONE))
3061 rsp.val = atom_desc.rsp.val;
3064 op = desc->req.op_addr;
3066 /* Clearing status, it will be set based on response */
3068 op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
3069 op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
3070 op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
3071 if (op->status != 0)
3072 q_data->queue_stats.dequeue_err_count++;
3074 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3075 if (op->ldpc_dec.hard_output.length > 0 && !rsp.synd_ok)
3076 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR;
3077 op->ldpc_dec.iter_count = (uint8_t) rsp.iter_cnt;
3079 /* Check if this is the last desc in batch (Atomic Queue) */
3080 if (desc->req.last_desc_in_batch) {
3082 desc->req.last_desc_in_batch = 0;
3085 desc->rsp.val = ACC100_DMA_DESC_TYPE;
3086 desc->rsp.add_info_0 = 0;
3087 desc->rsp.add_info_1 = 0;
3091 /* One CB (op) was successfully dequeued */
3095 /* Dequeue one decode operations from ACC100 device in TB mode. */
3097 dequeue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3098 uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3100 union acc100_dma_desc *desc, *last_desc, atom_desc;
3101 union acc100_dma_rsp_desc rsp;
3102 struct rte_bbdev_dec_op *op;
3103 uint8_t cbs_in_tb = 1, cb_idx = 0;
3105 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3106 & q->sw_ring_wrap_mask);
3107 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3110 /* Check fdone bit */
3111 if (!(atom_desc.rsp.val & ACC100_FDONE))
3115 op = desc->req.op_addr;
3117 /* Get number of CBs in dequeued TB */
3118 cbs_in_tb = desc->req.cbs_in_tb;
3120 last_desc = q->ring_addr + ((q->sw_ring_tail
3121 + dequeued_cbs + cbs_in_tb - 1)
3122 & q->sw_ring_wrap_mask);
3123 /* Check if last CB in TB is ready to dequeue (and thus
3124 * the whole TB) - checking sdone bit. If not return.
3126 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
3128 if (!(atom_desc.rsp.val & ACC100_SDONE))
3131 /* Clearing status, it will be set based on response */
3134 /* Read remaining CBs if exists */
3135 while (cb_idx < cbs_in_tb) {
3136 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3137 & q->sw_ring_wrap_mask);
3138 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3140 rsp.val = atom_desc.rsp.val;
3141 rte_bbdev_log_debug("Resp. desc %p: %x", desc,
3144 op->status |= ((rsp.input_err)
3145 ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3146 op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3147 op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3149 /* CRC invalid if error exists */
3151 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3152 op->turbo_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt,
3153 op->turbo_dec.iter_count);
3155 /* Check if this is the last desc in batch (Atomic Queue) */
3156 if (desc->req.last_desc_in_batch) {
3158 desc->req.last_desc_in_batch = 0;
3160 desc->rsp.val = ACC100_DMA_DESC_TYPE;
3161 desc->rsp.add_info_0 = 0;
3162 desc->rsp.add_info_1 = 0;
3172 /* Dequeue encode operations from ACC100 device. */
3174 acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
3175 struct rte_bbdev_enc_op **ops, uint16_t num)
3177 struct acc100_queue *q = q_data->queue_private;
3178 uint16_t dequeue_num;
3179 uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3180 uint32_t aq_dequeued = 0;
3181 uint16_t i, dequeued_cbs = 0;
3182 struct rte_bbdev_enc_op *op;
3185 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3186 if (unlikely(ops == NULL || q == NULL)) {
3187 rte_bbdev_log_debug("Unexpected undefined pointer");
3192 dequeue_num = (avail < num) ? avail : num;
3194 for (i = 0; i < dequeue_num; ++i) {
3195 op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3196 & q->sw_ring_wrap_mask))->req.op_addr;
3197 if (op->turbo_enc.code_block_mode == 0)
3198 ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
3201 ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
3206 dequeued_cbs += ret;
3209 q->aq_dequeued += aq_dequeued;
3210 q->sw_ring_tail += dequeued_cbs;
3212 /* Update enqueue stats */
3213 q_data->queue_stats.dequeued_count += i;
3218 /* Dequeue LDPC encode operations from ACC100 device. */
3220 acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
3221 struct rte_bbdev_enc_op **ops, uint16_t num)
3223 struct acc100_queue *q = q_data->queue_private;
3224 uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3225 uint32_t aq_dequeued = 0;
3226 uint16_t dequeue_num, i, dequeued_cbs = 0, dequeued_descs = 0;
3229 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3230 if (unlikely(ops == 0 && q == NULL))
3234 dequeue_num = RTE_MIN(avail, num);
3236 for (i = 0; i < dequeue_num; i++) {
3237 ret = dequeue_enc_one_op_cb(q, &ops[dequeued_cbs],
3238 dequeued_descs, &aq_dequeued);
3241 dequeued_cbs += ret;
3243 if (dequeued_cbs >= num)
3247 q->aq_dequeued += aq_dequeued;
3248 q->sw_ring_tail += dequeued_descs;
3250 /* Update enqueue stats */
3251 q_data->queue_stats.dequeued_count += dequeued_cbs;
3253 return dequeued_cbs;
3257 /* Dequeue decode operations from ACC100 device. */
3259 acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
3260 struct rte_bbdev_dec_op **ops, uint16_t num)
3262 struct acc100_queue *q = q_data->queue_private;
3263 uint16_t dequeue_num;
3264 uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3265 uint32_t aq_dequeued = 0;
3267 uint16_t dequeued_cbs = 0;
3268 struct rte_bbdev_dec_op *op;
3271 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3272 if (unlikely(ops == 0 && q == NULL))
3276 dequeue_num = (avail < num) ? avail : num;
3278 for (i = 0; i < dequeue_num; ++i) {
3279 op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3280 & q->sw_ring_wrap_mask))->req.op_addr;
3281 if (op->turbo_dec.code_block_mode == 0)
3282 ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
3285 ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
3286 dequeued_cbs, &aq_dequeued);
3290 dequeued_cbs += ret;
3293 q->aq_dequeued += aq_dequeued;
3294 q->sw_ring_tail += dequeued_cbs;
3296 /* Update enqueue stats */
3297 q_data->queue_stats.dequeued_count += i;
3302 /* Dequeue decode operations from ACC100 device. */
3304 acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
3305 struct rte_bbdev_dec_op **ops, uint16_t num)
3307 struct acc100_queue *q = q_data->queue_private;
3308 uint16_t dequeue_num;
3309 uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3310 uint32_t aq_dequeued = 0;
3312 uint16_t dequeued_cbs = 0;
3313 struct rte_bbdev_dec_op *op;
3316 #ifdef RTE_LIBRTE_BBDEV_DEBUG
3317 if (unlikely(ops == 0 && q == NULL))
3321 dequeue_num = RTE_MIN(avail, num);
3323 for (i = 0; i < dequeue_num; ++i) {
3324 op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3325 & q->sw_ring_wrap_mask))->req.op_addr;
3326 if (op->ldpc_dec.code_block_mode == 0)
3327 ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
3330 ret = dequeue_ldpc_dec_one_op_cb(
3331 q_data, q, &ops[i], dequeued_cbs,
3336 dequeued_cbs += ret;
3339 q->aq_dequeued += aq_dequeued;
3340 q->sw_ring_tail += dequeued_cbs;
3342 /* Update enqueue stats */
3343 q_data->queue_stats.dequeued_count += i;
3348 /* Initialization Function */
3350 acc100_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
3352 struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
3354 dev->dev_ops = &acc100_bbdev_ops;
3355 dev->enqueue_enc_ops = acc100_enqueue_enc;
3356 dev->enqueue_dec_ops = acc100_enqueue_dec;
3357 dev->dequeue_enc_ops = acc100_dequeue_enc;
3358 dev->dequeue_dec_ops = acc100_dequeue_dec;
3359 dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
3360 dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
3361 dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
3362 dev->dequeue_ldpc_dec_ops = acc100_dequeue_ldpc_dec;
3364 ((struct acc100_device *) dev->data->dev_private)->pf_device =
3365 !strcmp(drv->driver.name,
3366 RTE_STR(ACC100PF_DRIVER_NAME));
3367 ((struct acc100_device *) dev->data->dev_private)->mmio_base =
3368 pci_dev->mem_resource[0].addr;
3370 rte_bbdev_log_debug("Init device %s [%s] @ vaddr %p paddr %#"PRIx64"",
3371 drv->driver.name, dev->data->name,
3372 (void *)pci_dev->mem_resource[0].addr,
3373 pci_dev->mem_resource[0].phys_addr);
3376 static int acc100_pci_probe(struct rte_pci_driver *pci_drv,
3377 struct rte_pci_device *pci_dev)
3379 struct rte_bbdev *bbdev = NULL;
3380 char dev_name[RTE_BBDEV_NAME_MAX_LEN];
3382 if (pci_dev == NULL) {
3383 rte_bbdev_log(ERR, "NULL PCI device");
3387 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name));
3389 /* Allocate memory to be used privately by drivers */
3390 bbdev = rte_bbdev_allocate(pci_dev->device.name);
3394 /* allocate device private memory */
3395 bbdev->data->dev_private = rte_zmalloc_socket(dev_name,
3396 sizeof(struct acc100_device), RTE_CACHE_LINE_SIZE,
3397 pci_dev->device.numa_node);
3399 if (bbdev->data->dev_private == NULL) {
3401 "Allocate of %zu bytes for device \"%s\" failed",
3402 sizeof(struct acc100_device), dev_name);
3403 rte_bbdev_release(bbdev);
3407 /* Fill HW specific part of device structure */
3408 bbdev->device = &pci_dev->device;
3409 bbdev->intr_handle = &pci_dev->intr_handle;
3410 bbdev->data->socket_id = pci_dev->device.numa_node;
3412 /* Invoke ACC100 device initialization function */
3413 acc100_bbdev_init(bbdev, pci_drv);
3415 rte_bbdev_log_debug("Initialised bbdev %s (id = %u)",
3416 dev_name, bbdev->data->dev_id);
3420 static int acc100_pci_remove(struct rte_pci_device *pci_dev)
3422 struct rte_bbdev *bbdev;
3426 if (pci_dev == NULL)
3430 bbdev = rte_bbdev_get_named_dev(pci_dev->device.name);
3431 if (bbdev == NULL) {
3433 "Couldn't find HW dev \"%s\" to uninitialise it",
3434 pci_dev->device.name);
3437 dev_id = bbdev->data->dev_id;
3439 /* free device private memory before close */
3440 rte_free(bbdev->data->dev_private);
3443 ret = rte_bbdev_close(dev_id);
3446 "Device %i failed to close during uninit: %i",
3449 /* release bbdev from library */
3450 rte_bbdev_release(bbdev);
3452 rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id);
3457 static struct rte_pci_driver acc100_pci_pf_driver = {
3458 .probe = acc100_pci_probe,
3459 .remove = acc100_pci_remove,
3460 .id_table = pci_id_acc100_pf_map,
3461 .drv_flags = RTE_PCI_DRV_NEED_MAPPING
3464 static struct rte_pci_driver acc100_pci_vf_driver = {
3465 .probe = acc100_pci_probe,
3466 .remove = acc100_pci_remove,
3467 .id_table = pci_id_acc100_vf_map,
3468 .drv_flags = RTE_PCI_DRV_NEED_MAPPING
3471 RTE_PMD_REGISTER_PCI(ACC100PF_DRIVER_NAME, acc100_pci_pf_driver);
3472 RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
3473 RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
3474 RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);