dma/cnxk: add scatter-gather copy
[dpdk.git] / drivers / dma / cnxk / cnxk_dmadev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (C) 2021 Marvell International Ltd.
3  */
4
5 #include <string.h>
6 #include <unistd.h>
7
8 #include <rte_bus.h>
9 #include <rte_bus_pci.h>
10 #include <rte_common.h>
11 #include <rte_eal.h>
12 #include <rte_lcore.h>
13 #include <rte_mempool.h>
14 #include <rte_pci.h>
15 #include <rte_dmadev.h>
16 #include <rte_dmadev_pmd.h>
17
18 #include <roc_api.h>
19 #include <cnxk_dmadev.h>
20
21 static int
22 cnxk_dmadev_info_get(const struct rte_dma_dev *dev,
23                      struct rte_dma_info *dev_info, uint32_t size)
24 {
25         RTE_SET_USED(dev);
26         RTE_SET_USED(size);
27
28         dev_info->max_vchans = 1;
29         dev_info->nb_vchans = 1;
30         dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
31                 RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM |
32                 RTE_DMA_CAPA_DEV_TO_DEV | RTE_DMA_CAPA_OPS_COPY |
33                 RTE_DMA_CAPA_OPS_COPY_SG;
34         dev_info->max_desc = DPI_MAX_DESC;
35         dev_info->min_desc = 1;
36         dev_info->max_sges = DPI_MAX_POINTER;
37
38         return 0;
39 }
40
41 static int
42 cnxk_dmadev_configure(struct rte_dma_dev *dev,
43                       const struct rte_dma_conf *conf, uint32_t conf_sz)
44 {
45         struct cnxk_dpi_vf_s *dpivf = NULL;
46         int rc = 0;
47
48         RTE_SET_USED(conf);
49         RTE_SET_USED(conf);
50         RTE_SET_USED(conf_sz);
51         RTE_SET_USED(conf_sz);
52         dpivf = dev->fp_obj->dev_private;
53         rc = roc_dpi_configure(&dpivf->rdpi);
54         if (rc < 0)
55                 plt_err("DMA configure failed err = %d", rc);
56
57         return rc;
58 }
59
60 static int
61 cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
62                         const struct rte_dma_vchan_conf *conf,
63                         uint32_t conf_sz)
64 {
65         struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
66         struct cnxk_dpi_compl_s *comp_data;
67         union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
68         int i;
69
70         RTE_SET_USED(vchan);
71         RTE_SET_USED(conf_sz);
72
73         header->s.pt = DPI_HDR_PT_ZBW_CA;
74
75         switch (conf->direction) {
76         case RTE_DMA_DIR_DEV_TO_MEM:
77                 header->s.xtype = DPI_XTYPE_INBOUND;
78                 header->s.lport = conf->src_port.pcie.coreid;
79                 header->s.fport = 0;
80                 header->s.pvfe = 1;
81                 break;
82         case RTE_DMA_DIR_MEM_TO_DEV:
83                 header->s.xtype = DPI_XTYPE_OUTBOUND;
84                 header->s.lport = 0;
85                 header->s.fport = conf->dst_port.pcie.coreid;
86                 header->s.pvfe = 1;
87                 break;
88         case RTE_DMA_DIR_MEM_TO_MEM:
89                 header->s.xtype = DPI_XTYPE_INTERNAL_ONLY;
90                 header->s.lport = 0;
91                 header->s.fport = 0;
92                 header->s.pvfe = 0;
93                 break;
94         case RTE_DMA_DIR_DEV_TO_DEV:
95                 header->s.xtype = DPI_XTYPE_EXTERNAL_ONLY;
96                 header->s.lport = conf->src_port.pcie.coreid;
97                 header->s.fport = conf->dst_port.pcie.coreid;
98         };
99
100         for (i = 0; i < conf->nb_desc; i++) {
101                 comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
102                 if (comp_data == NULL) {
103                         plt_err("Failed to allocate for comp_data");
104                         return -ENOMEM;
105                 }
106                 dpivf->conf.c_desc.compl_ptr[i] = comp_data;
107         };
108         dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
109         dpivf->conf.c_desc.head = 0;
110         dpivf->conf.c_desc.tail = 0;
111
112         return 0;
113 }
114
115 static int
116 cnxk_dmadev_start(struct rte_dma_dev *dev)
117 {
118         struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
119
120         dpivf->desc_idx = 0;
121         dpivf->num_words = 0;
122         roc_dpi_enable(&dpivf->rdpi);
123
124         return 0;
125 }
126
127 static int
128 cnxk_dmadev_stop(struct rte_dma_dev *dev)
129 {
130         struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
131
132         roc_dpi_disable(&dpivf->rdpi);
133
134         return 0;
135 }
136
137 static int
138 cnxk_dmadev_close(struct rte_dma_dev *dev)
139 {
140         struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
141
142         roc_dpi_disable(&dpivf->rdpi);
143         roc_dpi_dev_fini(&dpivf->rdpi);
144
145         return 0;
146 }
147
148 static inline int
149 __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
150 {
151         uint64_t *ptr = dpi->chunk_base;
152
153         if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) ||
154             cmds == NULL)
155                 return -EINVAL;
156
157         /*
158          * Normally there is plenty of room in the current buffer for the
159          * command
160          */
161         if (dpi->chunk_head + cmd_count < dpi->pool_size_m1) {
162                 ptr += dpi->chunk_head;
163                 dpi->chunk_head += cmd_count;
164                 while (cmd_count--)
165                         *ptr++ = *cmds++;
166         } else {
167                 int count;
168                 uint64_t *new_buff = dpi->chunk_next;
169
170                 dpi->chunk_next =
171                         (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
172                 if (!dpi->chunk_next) {
173                         plt_err("Failed to alloc next buffer from NPA");
174                         return -ENOMEM;
175                 }
176
177                 /*
178                  * Figure out how many cmd words will fit in this buffer.
179                  * One location will be needed for the next buffer pointer.
180                  */
181                 count = dpi->pool_size_m1 - dpi->chunk_head;
182                 ptr += dpi->chunk_head;
183                 cmd_count -= count;
184                 while (count--)
185                         *ptr++ = *cmds++;
186
187                 /*
188                  * chunk next ptr is 2 DWORDS
189                  * second DWORD is reserved.
190                  */
191                 *ptr++ = (uint64_t)new_buff;
192                 *ptr = 0;
193
194                 /*
195                  * The current buffer is full and has a link to the next
196                  * buffers. Time to write the rest of the commands into the new
197                  * buffer.
198                  */
199                 dpi->chunk_base = new_buff;
200                 dpi->chunk_head = cmd_count;
201                 ptr = new_buff;
202                 while (cmd_count--)
203                         *ptr++ = *cmds++;
204
205                 /* queue index may be greater than pool size */
206                 if (dpi->chunk_head >= dpi->pool_size_m1) {
207                         new_buff = dpi->chunk_next;
208                         dpi->chunk_next =
209                                 (void *)roc_npa_aura_op_alloc(dpi->aura_handle,
210                                                               0);
211                         if (!dpi->chunk_next) {
212                                 plt_err("Failed to alloc next buffer from NPA");
213                                 return -ENOMEM;
214                         }
215                         /* Write next buffer address */
216                         *ptr = (uint64_t)new_buff;
217                         dpi->chunk_base = new_buff;
218                         dpi->chunk_head = 0;
219                 }
220         }
221
222         return 0;
223 }
224
225 static int
226 cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
227                  rte_iova_t dst, uint32_t length, uint64_t flags)
228 {
229         struct cnxk_dpi_vf_s *dpivf = dev_private;
230         union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
231         struct cnxk_dpi_compl_s *comp_ptr;
232         rte_iova_t fptr, lptr;
233         int num_words = 0;
234         int rc;
235
236         RTE_SET_USED(vchan);
237
238         comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
239         comp_ptr->cdata = DPI_REQ_CDATA;
240         header->s.ptr = (uint64_t)comp_ptr;
241         STRM_INC(dpivf->conf.c_desc);
242
243         header->s.nfst = 1;
244         header->s.nlst = 1;
245
246         /*
247          * For inbound case, src pointers are last pointers.
248          * For all other cases, src pointers are first pointers.
249          */
250         if (header->s.xtype == DPI_XTYPE_INBOUND) {
251                 fptr = dst;
252                 lptr = src;
253         } else {
254                 fptr = src;
255                 lptr = dst;
256         }
257
258         dpivf->cmd[0] = header->u[0];
259         dpivf->cmd[1] = header->u[1];
260         dpivf->cmd[2] = header->u[2];
261         /* word3 is always 0 */
262         num_words += 4;
263         dpivf->cmd[num_words++] = length;
264         dpivf->cmd[num_words++] = fptr;
265         dpivf->cmd[num_words++] = length;
266         dpivf->cmd[num_words++] = lptr;
267
268         rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
269         if (!rc) {
270                 if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
271                         rte_wmb();
272                         plt_write64(num_words,
273                                     dpivf->rdpi.rbase + DPI_VDMA_DBELL);
274                 }
275                 dpivf->num_words += num_words;
276         }
277
278         return dpivf->desc_idx++;
279 }
280
281 static int
282 cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan,
283                     const struct rte_dma_sge *src,
284                     const struct rte_dma_sge *dst,
285                     uint16_t nb_src, uint16_t nb_dst, uint64_t flags)
286 {
287         struct cnxk_dpi_vf_s *dpivf = dev_private;
288         union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
289         const struct rte_dma_sge *fptr, *lptr;
290         struct cnxk_dpi_compl_s *comp_ptr;
291         int num_words = 0;
292         int i, rc;
293
294         RTE_SET_USED(vchan);
295
296         comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
297         comp_ptr->cdata = DPI_REQ_CDATA;
298         header->s.ptr = (uint64_t)comp_ptr;
299         STRM_INC(dpivf->conf.c_desc);
300
301         /*
302          * For inbound case, src pointers are last pointers.
303          * For all other cases, src pointers are first pointers.
304          */
305         if (header->s.xtype == DPI_XTYPE_INBOUND) {
306                 header->s.nfst = nb_dst & 0xf;
307                 header->s.nlst = nb_src & 0xf;
308                 fptr = &dst[0];
309                 lptr = &src[0];
310         } else {
311                 header->s.nfst = nb_src & 0xf;
312                 header->s.nlst = nb_dst & 0xf;
313                 fptr = &src[0];
314                 lptr = &dst[0];
315         }
316
317         dpivf->cmd[0] = header->u[0];
318         dpivf->cmd[1] = header->u[1];
319         dpivf->cmd[2] = header->u[2];
320         num_words += 4;
321         for (i = 0; i < header->s.nfst; i++) {
322                 dpivf->cmd[num_words++] = (uint64_t)fptr->length;
323                 dpivf->cmd[num_words++] = fptr->addr;
324                 fptr++;
325         }
326
327         for (i = 0; i < header->s.nlst; i++) {
328                 dpivf->cmd[num_words++] = (uint64_t)lptr->length;
329                 dpivf->cmd[num_words++] = lptr->addr;
330                 lptr++;
331         }
332
333         rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
334         if (!rc) {
335                 if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
336                         rte_wmb();
337                         plt_write64(num_words,
338                                     dpivf->rdpi.rbase + DPI_VDMA_DBELL);
339                 }
340                 dpivf->num_words += num_words;
341         }
342
343         return dpivf->desc_idx++;
344 }
345
346 static uint16_t
347 cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
348                       uint16_t *last_idx, bool *has_error)
349 {
350         struct cnxk_dpi_vf_s *dpivf = dev_private;
351         int cnt;
352
353         RTE_SET_USED(vchan);
354         for (cnt = 0; cnt < nb_cpls; cnt++) {
355                 struct cnxk_dpi_compl_s *comp_ptr =
356                         dpivf->conf.c_desc.compl_ptr[cnt];
357
358                 if (comp_ptr->cdata) {
359                         *has_error = 1;
360                         break;
361                 }
362         }
363
364         *last_idx = cnt - 1;
365         dpivf->conf.c_desc.tail = cnt;
366
367         return cnt;
368 }
369
370 static uint16_t
371 cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan,
372                              const uint16_t nb_cpls, uint16_t *last_idx,
373                              enum rte_dma_status_code *status)
374 {
375         struct cnxk_dpi_vf_s *dpivf = dev_private;
376         int cnt;
377
378         RTE_SET_USED(vchan);
379         RTE_SET_USED(last_idx);
380         for (cnt = 0; cnt < nb_cpls; cnt++) {
381                 struct cnxk_dpi_compl_s *comp_ptr =
382                         dpivf->conf.c_desc.compl_ptr[cnt];
383                 status[cnt] = comp_ptr->cdata;
384         }
385
386         *last_idx = cnt - 1;
387         dpivf->conf.c_desc.tail = 0;
388
389         return cnt;
390 }
391
392 static int
393 cnxk_dmadev_submit(void *dev_private, uint16_t vchan __rte_unused)
394 {
395         struct cnxk_dpi_vf_s *dpivf = dev_private;
396
397         rte_wmb();
398         plt_write64(dpivf->num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
399
400         return 0;
401 }
402
403 static const struct rte_dma_dev_ops cnxk_dmadev_ops = {
404         .dev_close = cnxk_dmadev_close,
405         .dev_configure = cnxk_dmadev_configure,
406         .dev_info_get = cnxk_dmadev_info_get,
407         .dev_start = cnxk_dmadev_start,
408         .dev_stop = cnxk_dmadev_stop,
409         .vchan_setup = cnxk_dmadev_vchan_setup,
410 };
411
412 static int
413 cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
414                   struct rte_pci_device *pci_dev)
415 {
416         struct cnxk_dpi_vf_s *dpivf = NULL;
417         char name[RTE_DEV_NAME_MAX_LEN];
418         struct rte_dma_dev *dmadev;
419         struct roc_dpi *rdpi = NULL;
420         int rc;
421
422         if (!pci_dev->mem_resource[0].addr)
423                 return -ENODEV;
424
425         rc = roc_plt_init();
426         if (rc) {
427                 plt_err("Failed to initialize platform model, rc=%d", rc);
428                 return rc;
429         }
430         memset(name, 0, sizeof(name));
431         rte_pci_device_name(&pci_dev->addr, name, sizeof(name));
432
433         dmadev = rte_dma_pmd_allocate(name, pci_dev->device.numa_node,
434                                       sizeof(*dpivf));
435         if (dmadev == NULL) {
436                 plt_err("dma device allocation failed for %s", name);
437                 return -ENOMEM;
438         }
439
440         dpivf = dmadev->data->dev_private;
441
442         dmadev->device = &pci_dev->device;
443         dmadev->fp_obj->dev_private = dpivf;
444         dmadev->dev_ops = &cnxk_dmadev_ops;
445
446         dmadev->fp_obj->copy = cnxk_dmadev_copy;
447         dmadev->fp_obj->copy_sg = cnxk_dmadev_copy_sg;
448         dmadev->fp_obj->submit = cnxk_dmadev_submit;
449         dmadev->fp_obj->completed = cnxk_dmadev_completed;
450         dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;
451
452         rdpi = &dpivf->rdpi;
453
454         rdpi->pci_dev = pci_dev;
455         rc = roc_dpi_dev_init(rdpi);
456         if (rc < 0)
457                 goto err_out_free;
458
459         return 0;
460
461 err_out_free:
462         if (dmadev)
463                 rte_dma_pmd_release(name);
464
465         return rc;
466 }
467
468 static int
469 cnxk_dmadev_remove(struct rte_pci_device *pci_dev)
470 {
471         char name[RTE_DEV_NAME_MAX_LEN];
472
473         memset(name, 0, sizeof(name));
474         rte_pci_device_name(&pci_dev->addr, name, sizeof(name));
475
476         return rte_dma_pmd_release(name);
477 }
478
479 static const struct rte_pci_id cnxk_dma_pci_map[] = {
480         {
481                 RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM,
482                                PCI_DEVID_CNXK_DPI_VF)
483         },
484         {
485                 .vendor_id = 0,
486         },
487 };
488
489 static struct rte_pci_driver cnxk_dmadev = {
490         .id_table  = cnxk_dma_pci_map,
491         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
492         .probe     = cnxk_dmadev_probe,
493         .remove    = cnxk_dmadev_remove,
494 };
495
496 RTE_PMD_REGISTER_PCI(cnxk_dmadev_pci_driver, cnxk_dmadev);
497 RTE_PMD_REGISTER_PCI_TABLE(cnxk_dmadev_pci_driver, cnxk_dma_pci_map);
498 RTE_PMD_REGISTER_KMOD_DEP(cnxk_dmadev_pci_driver, "vfio-pci");