ccdabcb0b91852f90531c23f04eead2f45f5cbf3
[dpdk.git] / drivers / baseband / acc100 / rte_acc100_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4
5 #include <unistd.h>
6
7 #include <rte_common.h>
8 #include <rte_log.h>
9 #include <rte_dev.h>
10 #include <rte_malloc.h>
11 #include <rte_mempool.h>
12 #include <rte_byteorder.h>
13 #include <rte_errno.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_hexdump.h>
16 #include <rte_pci.h>
17 #include <rte_bus_pci.h>
18
19 #include <rte_bbdev.h>
20 #include <rte_bbdev_pmd.h>
21 #include "rte_acc100_pmd.h"
22
23 #ifdef RTE_LIBRTE_BBDEV_DEBUG
24 RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, DEBUG);
25 #else
26 RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, NOTICE);
27 #endif
28
29 /* Write to MMIO register address */
30 static inline void
31 mmio_write(void *addr, uint32_t value)
32 {
33         *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value);
34 }
35
36 /* Write a register of a ACC100 device */
37 static inline void
38 acc100_reg_write(struct acc100_device *d, uint32_t offset, uint32_t payload)
39 {
40         void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
41         mmio_write(reg_addr, payload);
42         usleep(ACC100_LONG_WAIT);
43 }
44
45 /* Read a register of a ACC100 device */
46 static inline uint32_t
47 acc100_reg_read(struct acc100_device *d, uint32_t offset)
48 {
49
50         void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
51         uint32_t ret = *((volatile uint32_t *)(reg_addr));
52         return rte_le_to_cpu_32(ret);
53 }
54
55 /* Basic Implementation of Log2 for exact 2^N */
56 static inline uint32_t
57 log2_basic(uint32_t value)
58 {
59         return (value == 0) ? 0 : rte_bsf32(value);
60 }
61
62 /* Calculate memory alignment offset assuming alignment is 2^N */
63 static inline uint32_t
64 calc_mem_alignment_offset(void *unaligned_virt_mem, uint32_t alignment)
65 {
66         rte_iova_t unaligned_phy_mem = rte_malloc_virt2iova(unaligned_virt_mem);
67         return (uint32_t)(alignment -
68                         (unaligned_phy_mem & (alignment-1)));
69 }
70
71 /* Calculate the offset of the enqueue register */
72 static inline uint32_t
73 queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id)
74 {
75         if (pf_device)
76                 return ((vf_id << 12) + (qgrp_id << 7) + (aq_id << 3) +
77                                 HWPfQmgrIngressAq);
78         else
79                 return ((qgrp_id << 7) + (aq_id << 3) +
80                                 HWVfQmgrIngressAq);
81 }
82
83 enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
84
85 /* Return the queue topology for a Queue Group Index */
86 static inline void
87 qtopFromAcc(struct rte_acc100_queue_topology **qtop, int acc_enum,
88                 struct rte_acc100_conf *acc100_conf)
89 {
90         struct rte_acc100_queue_topology *p_qtop;
91         p_qtop = NULL;
92         switch (acc_enum) {
93         case UL_4G:
94                 p_qtop = &(acc100_conf->q_ul_4g);
95                 break;
96         case UL_5G:
97                 p_qtop = &(acc100_conf->q_ul_5g);
98                 break;
99         case DL_4G:
100                 p_qtop = &(acc100_conf->q_dl_4g);
101                 break;
102         case DL_5G:
103                 p_qtop = &(acc100_conf->q_dl_5g);
104                 break;
105         default:
106                 /* NOTREACHED */
107                 rte_bbdev_log(ERR, "Unexpected error evaluating qtopFromAcc");
108                 break;
109         }
110         *qtop = p_qtop;
111 }
112
113 static void
114 initQTop(struct rte_acc100_conf *acc100_conf)
115 {
116         acc100_conf->q_ul_4g.num_aqs_per_groups = 0;
117         acc100_conf->q_ul_4g.num_qgroups = 0;
118         acc100_conf->q_ul_4g.first_qgroup_index = -1;
119         acc100_conf->q_ul_5g.num_aqs_per_groups = 0;
120         acc100_conf->q_ul_5g.num_qgroups = 0;
121         acc100_conf->q_ul_5g.first_qgroup_index = -1;
122         acc100_conf->q_dl_4g.num_aqs_per_groups = 0;
123         acc100_conf->q_dl_4g.num_qgroups = 0;
124         acc100_conf->q_dl_4g.first_qgroup_index = -1;
125         acc100_conf->q_dl_5g.num_aqs_per_groups = 0;
126         acc100_conf->q_dl_5g.num_qgroups = 0;
127         acc100_conf->q_dl_5g.first_qgroup_index = -1;
128 }
129
130 static inline void
131 updateQtop(uint8_t acc, uint8_t qg, struct rte_acc100_conf *acc100_conf,
132                 struct acc100_device *d) {
133         uint32_t reg;
134         struct rte_acc100_queue_topology *q_top = NULL;
135         qtopFromAcc(&q_top, acc, acc100_conf);
136         if (unlikely(q_top == NULL))
137                 return;
138         uint16_t aq;
139         q_top->num_qgroups++;
140         if (q_top->first_qgroup_index == -1) {
141                 q_top->first_qgroup_index = qg;
142                 /* Can be optimized to assume all are enabled by default */
143                 reg = acc100_reg_read(d, queue_offset(d->pf_device,
144                                 0, qg, ACC100_NUM_AQS - 1));
145                 if (reg & ACC100_QUEUE_ENABLE) {
146                         q_top->num_aqs_per_groups = ACC100_NUM_AQS;
147                         return;
148                 }
149                 q_top->num_aqs_per_groups = 0;
150                 for (aq = 0; aq < ACC100_NUM_AQS; aq++) {
151                         reg = acc100_reg_read(d, queue_offset(d->pf_device,
152                                         0, qg, aq));
153                         if (reg & ACC100_QUEUE_ENABLE)
154                                 q_top->num_aqs_per_groups++;
155                 }
156         }
157 }
158
159 /* Fetch configuration enabled for the PF/VF using MMIO Read (slow) */
160 static inline void
161 fetch_acc100_config(struct rte_bbdev *dev)
162 {
163         struct acc100_device *d = dev->data->dev_private;
164         struct rte_acc100_conf *acc100_conf = &d->acc100_conf;
165         const struct acc100_registry_addr *reg_addr;
166         uint8_t acc, qg;
167         uint32_t reg, reg_aq, reg_len0, reg_len1;
168         uint32_t reg_mode;
169
170         /* No need to retrieve the configuration is already done */
171         if (d->configured)
172                 return;
173
174         /* Choose correct registry addresses for the device type */
175         if (d->pf_device)
176                 reg_addr = &pf_reg_addr;
177         else
178                 reg_addr = &vf_reg_addr;
179
180         d->ddr_size = (1 + acc100_reg_read(d, reg_addr->ddr_range)) << 10;
181
182         /* Single VF Bundle by VF */
183         acc100_conf->num_vf_bundles = 1;
184         initQTop(acc100_conf);
185
186         struct rte_acc100_queue_topology *q_top = NULL;
187         int qman_func_id[ACC100_NUM_ACCS] = {ACC100_ACCMAP_0, ACC100_ACCMAP_1,
188                         ACC100_ACCMAP_2, ACC100_ACCMAP_3, ACC100_ACCMAP_4};
189         reg = acc100_reg_read(d, reg_addr->qman_group_func);
190         for (qg = 0; qg < ACC100_NUM_QGRPS_PER_WORD; qg++) {
191                 reg_aq = acc100_reg_read(d,
192                                 queue_offset(d->pf_device, 0, qg, 0));
193                 if (reg_aq & ACC100_QUEUE_ENABLE) {
194                         uint32_t idx = (reg >> (qg * 4)) & 0x7;
195                         if (idx < ACC100_NUM_ACCS) {
196                                 acc = qman_func_id[idx];
197                                 updateQtop(acc, qg, acc100_conf, d);
198                         }
199                 }
200         }
201
202         /* Check the depth of the AQs*/
203         reg_len0 = acc100_reg_read(d, reg_addr->depth_log0_offset);
204         reg_len1 = acc100_reg_read(d, reg_addr->depth_log1_offset);
205         for (acc = 0; acc < NUM_ACC; acc++) {
206                 qtopFromAcc(&q_top, acc, acc100_conf);
207                 if (q_top->first_qgroup_index < ACC100_NUM_QGRPS_PER_WORD)
208                         q_top->aq_depth_log2 = (reg_len0 >>
209                                         (q_top->first_qgroup_index * 4))
210                                         & 0xF;
211                 else
212                         q_top->aq_depth_log2 = (reg_len1 >>
213                                         ((q_top->first_qgroup_index -
214                                         ACC100_NUM_QGRPS_PER_WORD) * 4))
215                                         & 0xF;
216         }
217
218         /* Read PF mode */
219         if (d->pf_device) {
220                 reg_mode = acc100_reg_read(d, HWPfHiPfMode);
221                 acc100_conf->pf_mode_en = (reg_mode == ACC100_PF_VAL) ? 1 : 0;
222         }
223
224         rte_bbdev_log_debug(
225                         "%s Config LLR SIGN IN/OUT %s %s QG %u %u %u %u AQ %u %u %u %u Len %u %u %u %u\n",
226                         (d->pf_device) ? "PF" : "VF",
227                         (acc100_conf->input_pos_llr_1_bit) ? "POS" : "NEG",
228                         (acc100_conf->output_pos_llr_1_bit) ? "POS" : "NEG",
229                         acc100_conf->q_ul_4g.num_qgroups,
230                         acc100_conf->q_dl_4g.num_qgroups,
231                         acc100_conf->q_ul_5g.num_qgroups,
232                         acc100_conf->q_dl_5g.num_qgroups,
233                         acc100_conf->q_ul_4g.num_aqs_per_groups,
234                         acc100_conf->q_dl_4g.num_aqs_per_groups,
235                         acc100_conf->q_ul_5g.num_aqs_per_groups,
236                         acc100_conf->q_dl_5g.num_aqs_per_groups,
237                         acc100_conf->q_ul_4g.aq_depth_log2,
238                         acc100_conf->q_dl_4g.aq_depth_log2,
239                         acc100_conf->q_ul_5g.aq_depth_log2,
240                         acc100_conf->q_dl_5g.aq_depth_log2);
241 }
242
243 static void
244 free_base_addresses(void **base_addrs, int size)
245 {
246         int i;
247         for (i = 0; i < size; i++)
248                 rte_free(base_addrs[i]);
249 }
250
251 static inline uint32_t
252 get_desc_len(void)
253 {
254         return sizeof(union acc100_dma_desc);
255 }
256
257 /* Allocate the 2 * 64MB block for the sw rings */
258 static int
259 alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct acc100_device *d,
260                 int socket)
261 {
262         uint32_t sw_ring_size = ACC100_SIZE_64MBYTE;
263         d->sw_rings_base = rte_zmalloc_socket(dev->device->driver->name,
264                         2 * sw_ring_size, RTE_CACHE_LINE_SIZE, socket);
265         if (d->sw_rings_base == NULL) {
266                 rte_bbdev_log(ERR, "Failed to allocate memory for %s:%u",
267                                 dev->device->driver->name,
268                                 dev->data->dev_id);
269                 return -ENOMEM;
270         }
271         uint32_t next_64mb_align_offset = calc_mem_alignment_offset(
272                         d->sw_rings_base, ACC100_SIZE_64MBYTE);
273         d->sw_rings = RTE_PTR_ADD(d->sw_rings_base, next_64mb_align_offset);
274         d->sw_rings_iova = rte_malloc_virt2iova(d->sw_rings_base) +
275                         next_64mb_align_offset;
276         d->sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
277         d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
278
279         return 0;
280 }
281
282 /* Attempt to allocate minimised memory space for sw rings */
283 static void
284 alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct acc100_device *d,
285                 uint16_t num_queues, int socket)
286 {
287         rte_iova_t sw_rings_base_iova, next_64mb_align_addr_iova;
288         uint32_t next_64mb_align_offset;
289         rte_iova_t sw_ring_iova_end_addr;
290         void *base_addrs[ACC100_SW_RING_MEM_ALLOC_ATTEMPTS];
291         void *sw_rings_base;
292         int i = 0;
293         uint32_t q_sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
294         uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
295
296         /* Find an aligned block of memory to store sw rings */
297         while (i < ACC100_SW_RING_MEM_ALLOC_ATTEMPTS) {
298                 /*
299                  * sw_ring allocated memory is guaranteed to be aligned to
300                  * q_sw_ring_size at the condition that the requested size is
301                  * less than the page size
302                  */
303                 sw_rings_base = rte_zmalloc_socket(
304                                 dev->device->driver->name,
305                                 dev_sw_ring_size, q_sw_ring_size, socket);
306
307                 if (sw_rings_base == NULL) {
308                         rte_bbdev_log(ERR,
309                                         "Failed to allocate memory for %s:%u",
310                                         dev->device->driver->name,
311                                         dev->data->dev_id);
312                         break;
313                 }
314
315                 sw_rings_base_iova = rte_malloc_virt2iova(sw_rings_base);
316                 next_64mb_align_offset = calc_mem_alignment_offset(
317                                 sw_rings_base, ACC100_SIZE_64MBYTE);
318                 next_64mb_align_addr_iova = sw_rings_base_iova +
319                                 next_64mb_align_offset;
320                 sw_ring_iova_end_addr = sw_rings_base_iova + dev_sw_ring_size;
321
322                 /* Check if the end of the sw ring memory block is before the
323                  * start of next 64MB aligned mem address
324                  */
325                 if (sw_ring_iova_end_addr < next_64mb_align_addr_iova) {
326                         d->sw_rings_iova = sw_rings_base_iova;
327                         d->sw_rings = sw_rings_base;
328                         d->sw_rings_base = sw_rings_base;
329                         d->sw_ring_size = q_sw_ring_size;
330                         d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
331                         break;
332                 }
333                 /* Store the address of the unaligned mem block */
334                 base_addrs[i] = sw_rings_base;
335                 i++;
336         }
337
338         /* Free all unaligned blocks of mem allocated in the loop */
339         free_base_addresses(base_addrs, i);
340 }
341
342
343 /* Allocate 64MB memory used for all software rings */
344 static int
345 acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
346 {
347         uint32_t phys_low, phys_high, payload;
348         struct acc100_device *d = dev->data->dev_private;
349         const struct acc100_registry_addr *reg_addr;
350
351         if (d->pf_device && !d->acc100_conf.pf_mode_en) {
352                 rte_bbdev_log(NOTICE,
353                                 "%s has PF mode disabled. This PF can't be used.",
354                                 dev->data->name);
355                 return -ENODEV;
356         }
357
358         alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
359
360         /* If minimal memory space approach failed, then allocate
361          * the 2 * 64MB block for the sw rings
362          */
363         if (d->sw_rings == NULL)
364                 alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
365
366         if (d->sw_rings == NULL) {
367                 rte_bbdev_log(NOTICE,
368                                 "Failure allocating sw_rings memory");
369                 return -ENODEV;
370         }
371
372         /* Configure ACC100 with the base address for DMA descriptor rings
373          * Same descriptor rings used for UL and DL DMA Engines
374          * Note : Assuming only VF0 bundle is used for PF mode
375          */
376         phys_high = (uint32_t)(d->sw_rings_iova >> 32);
377         phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC100_SIZE_64MBYTE-1));
378
379         /* Choose correct registry addresses for the device type */
380         if (d->pf_device)
381                 reg_addr = &pf_reg_addr;
382         else
383                 reg_addr = &vf_reg_addr;
384
385         /* Read the populated cfg from ACC100 registers */
386         fetch_acc100_config(dev);
387
388         /* Release AXI from PF */
389         if (d->pf_device)
390                 acc100_reg_write(d, HWPfDmaAxiControl, 1);
391
392         acc100_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
393         acc100_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
394         acc100_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
395         acc100_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
396         acc100_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
397         acc100_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
398         acc100_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
399         acc100_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
400
401         /*
402          * Configure Ring Size to the max queue ring size
403          * (used for wrapping purpose)
404          */
405         payload = log2_basic(d->sw_ring_size / 64);
406         acc100_reg_write(d, reg_addr->ring_size, payload);
407
408         /* Configure tail pointer for use when SDONE enabled */
409         d->tail_ptrs = rte_zmalloc_socket(
410                         dev->device->driver->name,
411                         ACC100_NUM_QGRPS * ACC100_NUM_AQS * sizeof(uint32_t),
412                         RTE_CACHE_LINE_SIZE, socket_id);
413         if (d->tail_ptrs == NULL) {
414                 rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
415                                 dev->device->driver->name,
416                                 dev->data->dev_id);
417                 rte_free(d->sw_rings);
418                 return -ENOMEM;
419         }
420         d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
421
422         phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
423         phys_low  = (uint32_t)(d->tail_ptr_iova);
424         acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
425         acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
426         acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
427         acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
428         acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
429         acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
430         acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
431         acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
432
433         d->harq_layout = rte_zmalloc_socket("HARQ Layout",
434                         ACC100_HARQ_LAYOUT * sizeof(*d->harq_layout),
435                         RTE_CACHE_LINE_SIZE, dev->data->socket_id);
436         if (d->harq_layout == NULL) {
437                 rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u",
438                                 dev->device->driver->name,
439                                 dev->data->dev_id);
440                 rte_free(d->sw_rings);
441                 return -ENOMEM;
442         }
443
444         /* Mark as configured properly */
445         d->configured = true;
446
447         rte_bbdev_log_debug(
448                         "ACC100 (%s) configured  sw_rings = %p, sw_rings_iova = %#"
449                         PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova);
450
451         return 0;
452 }
453
454 /* Free memory used for software rings */
455 static int
456 acc100_dev_close(struct rte_bbdev *dev)
457 {
458         struct acc100_device *d = dev->data->dev_private;
459         if (d->sw_rings_base != NULL) {
460                 rte_free(d->tail_ptrs);
461                 rte_free(d->sw_rings_base);
462                 d->sw_rings_base = NULL;
463         }
464         /* Ensure all in flight HW transactions are completed */
465         usleep(ACC100_LONG_WAIT);
466         return 0;
467 }
468
469
470 /**
471  * Report a ACC100 queue index which is free
472  * Return 0 to 16k for a valid queue_idx or -1 when no queue is available
473  * Note : Only supporting VF0 Bundle for PF mode
474  */
475 static int
476 acc100_find_free_queue_idx(struct rte_bbdev *dev,
477                 const struct rte_bbdev_queue_conf *conf)
478 {
479         struct acc100_device *d = dev->data->dev_private;
480         int op_2_acc[5] = {0, UL_4G, DL_4G, UL_5G, DL_5G};
481         int acc = op_2_acc[conf->op_type];
482         struct rte_acc100_queue_topology *qtop = NULL;
483
484         qtopFromAcc(&qtop, acc, &(d->acc100_conf));
485         if (qtop == NULL)
486                 return -1;
487         /* Identify matching QGroup Index which are sorted in priority order */
488         uint16_t group_idx = qtop->first_qgroup_index;
489         group_idx += conf->priority;
490         if (group_idx >= ACC100_NUM_QGRPS ||
491                         conf->priority >= qtop->num_qgroups) {
492                 rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
493                                 dev->data->name, conf->priority);
494                 return -1;
495         }
496         /* Find a free AQ_idx  */
497         uint16_t aq_idx;
498         for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
499                 if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) {
500                         /* Mark the Queue as assigned */
501                         d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
502                         /* Report the AQ Index */
503                         return (group_idx << ACC100_GRP_ID_SHIFT) + aq_idx;
504                 }
505         }
506         rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
507                         dev->data->name, conf->priority);
508         return -1;
509 }
510
511 /* Setup ACC100 queue */
512 static int
513 acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
514                 const struct rte_bbdev_queue_conf *conf)
515 {
516         struct acc100_device *d = dev->data->dev_private;
517         struct acc100_queue *q;
518         int16_t q_idx;
519
520         /* Allocate the queue data structure. */
521         q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
522                         RTE_CACHE_LINE_SIZE, conf->socket);
523         if (q == NULL) {
524                 rte_bbdev_log(ERR, "Failed to allocate queue memory");
525                 return -ENOMEM;
526         }
527         if (d == NULL) {
528                 rte_bbdev_log(ERR, "Undefined device");
529                 return -ENODEV;
530         }
531
532         q->d = d;
533         q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
534         q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
535
536         /* Prepare the Ring with default descriptor format */
537         union acc100_dma_desc *desc = NULL;
538         unsigned int desc_idx, b_idx;
539         int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
540                 ACC100_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
541                 ACC100_FCW_TD_BLEN : ACC100_FCW_LD_BLEN));
542
543         for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
544                 desc = q->ring_addr + desc_idx;
545                 desc->req.word0 = ACC100_DMA_DESC_TYPE;
546                 desc->req.word1 = 0; /**< Timestamp */
547                 desc->req.word2 = 0;
548                 desc->req.word3 = 0;
549                 uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
550                 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
551                 desc->req.data_ptrs[0].blen = fcw_len;
552                 desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
553                 desc->req.data_ptrs[0].last = 0;
554                 desc->req.data_ptrs[0].dma_ext = 0;
555                 for (b_idx = 1; b_idx < ACC100_DMA_MAX_NUM_POINTERS - 1;
556                                 b_idx++) {
557                         desc->req.data_ptrs[b_idx].blkid = ACC100_DMA_BLKID_IN;
558                         desc->req.data_ptrs[b_idx].last = 1;
559                         desc->req.data_ptrs[b_idx].dma_ext = 0;
560                         b_idx++;
561                         desc->req.data_ptrs[b_idx].blkid =
562                                         ACC100_DMA_BLKID_OUT_ENC;
563                         desc->req.data_ptrs[b_idx].last = 1;
564                         desc->req.data_ptrs[b_idx].dma_ext = 0;
565                 }
566                 /* Preset some fields of LDPC FCW */
567                 desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
568                 desc->req.fcw_ld.gain_i = 1;
569                 desc->req.fcw_ld.gain_h = 1;
570         }
571
572         q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
573                         RTE_CACHE_LINE_SIZE,
574                         RTE_CACHE_LINE_SIZE, conf->socket);
575         if (q->lb_in == NULL) {
576                 rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
577                 rte_free(q);
578                 return -ENOMEM;
579         }
580         q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
581         q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
582                         RTE_CACHE_LINE_SIZE,
583                         RTE_CACHE_LINE_SIZE, conf->socket);
584         if (q->lb_out == NULL) {
585                 rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
586                 rte_free(q->lb_in);
587                 rte_free(q);
588                 return -ENOMEM;
589         }
590         q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
591
592         /*
593          * Software queue ring wraps synchronously with the HW when it reaches
594          * the boundary of the maximum allocated queue size, no matter what the
595          * sw queue size is. This wrapping is guarded by setting the wrap_mask
596          * to represent the maximum queue size as allocated at the time when
597          * the device has been setup (in configure()).
598          *
599          * The queue depth is set to the queue size value (conf->queue_size).
600          * This limits the occupancy of the queue at any point of time, so that
601          * the queue does not get swamped with enqueue requests.
602          */
603         q->sw_ring_depth = conf->queue_size;
604         q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
605
606         q->op_type = conf->op_type;
607
608         q_idx = acc100_find_free_queue_idx(dev, conf);
609         if (q_idx == -1) {
610                 rte_free(q->lb_in);
611                 rte_free(q->lb_out);
612                 rte_free(q);
613                 return -1;
614         }
615
616         q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF;
617         q->vf_id = (q_idx >> ACC100_VF_ID_SHIFT)  & 0x3F;
618         q->aq_id = q_idx & 0xF;
619         q->aq_depth = (conf->op_type ==  RTE_BBDEV_OP_TURBO_DEC) ?
620                         (1 << d->acc100_conf.q_ul_4g.aq_depth_log2) :
621                         (1 << d->acc100_conf.q_dl_4g.aq_depth_log2);
622
623         q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
624                         queue_offset(d->pf_device,
625                                         q->vf_id, q->qgrp_id, q->aq_id));
626
627         rte_bbdev_log_debug(
628                         "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p",
629                         dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
630                         q->aq_id, q->aq_depth, q->mmio_reg_enqueue);
631
632         dev->data->queues[queue_id].queue_private = q;
633         return 0;
634 }
635
636 /* Release ACC100 queue */
637 static int
638 acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
639 {
640         struct acc100_device *d = dev->data->dev_private;
641         struct acc100_queue *q = dev->data->queues[q_id].queue_private;
642
643         if (q != NULL) {
644                 /* Mark the Queue as un-assigned */
645                 d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
646                                 (1 << q->aq_id));
647                 rte_free(q->lb_in);
648                 rte_free(q->lb_out);
649                 rte_free(q);
650                 dev->data->queues[q_id].queue_private = NULL;
651         }
652
653         return 0;
654 }
655
656 /* Get ACC100 device info */
657 static void
658 acc100_dev_info_get(struct rte_bbdev *dev,
659                 struct rte_bbdev_driver_info *dev_info)
660 {
661         struct acc100_device *d = dev->data->dev_private;
662
663         static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
664                 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
665         };
666
667         static struct rte_bbdev_queue_conf default_queue_conf;
668         default_queue_conf.socket = dev->data->socket_id;
669         default_queue_conf.queue_size = ACC100_MAX_QUEUE_DEPTH;
670
671         dev_info->driver_name = dev->device->driver->name;
672
673         /* Read and save the populated config from ACC100 registers */
674         fetch_acc100_config(dev);
675
676         /* This isn't ideal because it reports the maximum number of queues but
677          * does not provide info on how many can be uplink/downlink or different
678          * priorities
679          */
680         dev_info->max_num_queues =
681                         d->acc100_conf.q_dl_5g.num_aqs_per_groups *
682                         d->acc100_conf.q_dl_5g.num_qgroups +
683                         d->acc100_conf.q_ul_5g.num_aqs_per_groups *
684                         d->acc100_conf.q_ul_5g.num_qgroups +
685                         d->acc100_conf.q_dl_4g.num_aqs_per_groups *
686                         d->acc100_conf.q_dl_4g.num_qgroups +
687                         d->acc100_conf.q_ul_4g.num_aqs_per_groups *
688                         d->acc100_conf.q_ul_4g.num_qgroups;
689         dev_info->queue_size_lim = ACC100_MAX_QUEUE_DEPTH;
690         dev_info->hardware_accelerated = true;
691         dev_info->max_dl_queue_priority =
692                         d->acc100_conf.q_dl_4g.num_qgroups - 1;
693         dev_info->max_ul_queue_priority =
694                         d->acc100_conf.q_ul_4g.num_qgroups - 1;
695         dev_info->default_queue_conf = default_queue_conf;
696         dev_info->cpu_flag_reqs = NULL;
697         dev_info->min_alignment = 64;
698         dev_info->capabilities = bbdev_capabilities;
699         dev_info->harq_buffer_size = d->ddr_size;
700 }
701
702 static const struct rte_bbdev_ops acc100_bbdev_ops = {
703         .setup_queues = acc100_setup_queues,
704         .close = acc100_dev_close,
705         .info_get = acc100_dev_info_get,
706         .queue_setup = acc100_queue_setup,
707         .queue_release = acc100_queue_release,
708 };
709
710 /* ACC100 PCI PF address map */
711 static struct rte_pci_id pci_id_acc100_pf_map[] = {
712         {
713                 RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_PF_DEVICE_ID)
714         },
715         {.device_id = 0},
716 };
717
718 /* ACC100 PCI VF address map */
719 static struct rte_pci_id pci_id_acc100_vf_map[] = {
720         {
721                 RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_VF_DEVICE_ID)
722         },
723         {.device_id = 0},
724 };
725
726 /* Initialization Function */
727 static void
728 acc100_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
729 {
730         struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
731
732         dev->dev_ops = &acc100_bbdev_ops;
733
734         ((struct acc100_device *) dev->data->dev_private)->pf_device =
735                         !strcmp(drv->driver.name,
736                                         RTE_STR(ACC100PF_DRIVER_NAME));
737         ((struct acc100_device *) dev->data->dev_private)->mmio_base =
738                         pci_dev->mem_resource[0].addr;
739
740         rte_bbdev_log_debug("Init device %s [%s] @ vaddr %p paddr %#"PRIx64"",
741                         drv->driver.name, dev->data->name,
742                         (void *)pci_dev->mem_resource[0].addr,
743                         pci_dev->mem_resource[0].phys_addr);
744 }
745
746 static int acc100_pci_probe(struct rte_pci_driver *pci_drv,
747         struct rte_pci_device *pci_dev)
748 {
749         struct rte_bbdev *bbdev = NULL;
750         char dev_name[RTE_BBDEV_NAME_MAX_LEN];
751
752         if (pci_dev == NULL) {
753                 rte_bbdev_log(ERR, "NULL PCI device");
754                 return -EINVAL;
755         }
756
757         rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name));
758
759         /* Allocate memory to be used privately by drivers */
760         bbdev = rte_bbdev_allocate(pci_dev->device.name);
761         if (bbdev == NULL)
762                 return -ENODEV;
763
764         /* allocate device private memory */
765         bbdev->data->dev_private = rte_zmalloc_socket(dev_name,
766                         sizeof(struct acc100_device), RTE_CACHE_LINE_SIZE,
767                         pci_dev->device.numa_node);
768
769         if (bbdev->data->dev_private == NULL) {
770                 rte_bbdev_log(CRIT,
771                                 "Allocate of %zu bytes for device \"%s\" failed",
772                                 sizeof(struct acc100_device), dev_name);
773                                 rte_bbdev_release(bbdev);
774                         return -ENOMEM;
775         }
776
777         /* Fill HW specific part of device structure */
778         bbdev->device = &pci_dev->device;
779         bbdev->intr_handle = &pci_dev->intr_handle;
780         bbdev->data->socket_id = pci_dev->device.numa_node;
781
782         /* Invoke ACC100 device initialization function */
783         acc100_bbdev_init(bbdev, pci_drv);
784
785         rte_bbdev_log_debug("Initialised bbdev %s (id = %u)",
786                         dev_name, bbdev->data->dev_id);
787         return 0;
788 }
789
790 static int acc100_pci_remove(struct rte_pci_device *pci_dev)
791 {
792         struct rte_bbdev *bbdev;
793         int ret;
794         uint8_t dev_id;
795
796         if (pci_dev == NULL)
797                 return -EINVAL;
798
799         /* Find device */
800         bbdev = rte_bbdev_get_named_dev(pci_dev->device.name);
801         if (bbdev == NULL) {
802                 rte_bbdev_log(CRIT,
803                                 "Couldn't find HW dev \"%s\" to uninitialise it",
804                                 pci_dev->device.name);
805                 return -ENODEV;
806         }
807         dev_id = bbdev->data->dev_id;
808
809         /* free device private memory before close */
810         rte_free(bbdev->data->dev_private);
811
812         /* Close device */
813         ret = rte_bbdev_close(dev_id);
814         if (ret < 0)
815                 rte_bbdev_log(ERR,
816                                 "Device %i failed to close during uninit: %i",
817                                 dev_id, ret);
818
819         /* release bbdev from library */
820         rte_bbdev_release(bbdev);
821
822         rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id);
823
824         return 0;
825 }
826
827 static struct rte_pci_driver acc100_pci_pf_driver = {
828                 .probe = acc100_pci_probe,
829                 .remove = acc100_pci_remove,
830                 .id_table = pci_id_acc100_pf_map,
831                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING
832 };
833
834 static struct rte_pci_driver acc100_pci_vf_driver = {
835                 .probe = acc100_pci_probe,
836                 .remove = acc100_pci_remove,
837                 .id_table = pci_id_acc100_vf_map,
838                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING
839 };
840
841 RTE_PMD_REGISTER_PCI(ACC100PF_DRIVER_NAME, acc100_pci_pf_driver);
842 RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
843 RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
844 RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);