dma/ioat: add data path job submission
[dpdk.git] / drivers / dma / ioat / ioat_dmadev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4
5 #include <rte_bus_pci.h>
6 #include <rte_dmadev_pmd.h>
7 #include <rte_malloc.h>
8 #include <rte_prefetch.h>
9
10 #include "ioat_internal.h"
11
12 static struct rte_pci_driver ioat_pmd_drv;
13
14 RTE_LOG_REGISTER_DEFAULT(ioat_pmd_logtype, INFO);
15
16 #define DESC_SZ sizeof(struct ioat_dma_hw_desc)
17
18 #define IOAT_PMD_NAME dmadev_ioat
19 #define IOAT_PMD_NAME_STR RTE_STR(IOAT_PMD_NAME)
20
21 /* IOAT operations. */
22 enum rte_ioat_ops {
23         ioat_op_copy = 0,       /* Standard DMA Operation */
24         ioat_op_fill            /* Block Fill */
25 };
26
27 /* Configure a device. */
28 static int
29 ioat_dev_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
30                 uint32_t conf_sz)
31 {
32         if (sizeof(struct rte_dma_conf) != conf_sz)
33                 return -EINVAL;
34
35         if (dev_conf->nb_vchans != 1)
36                 return -EINVAL;
37
38         return 0;
39 }
40
41 /* Setup a virtual channel for IOAT, only 1 vchan is supported. */
42 static int
43 ioat_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
44                 const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
45 {
46         struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
47         uint16_t max_desc = qconf->nb_desc;
48         int i;
49
50         if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
51                 return -EINVAL;
52
53         ioat->qcfg = *qconf;
54
55         if (!rte_is_power_of_2(max_desc)) {
56                 max_desc = rte_align32pow2(max_desc);
57                 IOAT_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
58                 ioat->qcfg.nb_desc = max_desc;
59         }
60
61         /* In case we are reconfiguring a device, free any existing memory. */
62         rte_free(ioat->desc_ring);
63
64         ioat->desc_ring = rte_zmalloc(NULL, sizeof(*ioat->desc_ring) * max_desc, 0);
65         if (ioat->desc_ring == NULL)
66                 return -ENOMEM;
67
68         ioat->ring_addr = rte_mem_virt2iova(ioat->desc_ring);
69
70         ioat->status_addr = rte_mem_virt2iova(ioat) + offsetof(struct ioat_dmadev, status);
71
72         /* Ensure all counters are reset, if reconfiguring/restarting device. */
73         ioat->next_read = 0;
74         ioat->next_write = 0;
75         ioat->last_write = 0;
76         ioat->offset = 0;
77         ioat->failure = 0;
78
79         /* Configure descriptor ring - each one points to next. */
80         for (i = 0; i < ioat->qcfg.nb_desc; i++) {
81                 ioat->desc_ring[i].next = ioat->ring_addr +
82                                 (((i + 1) % ioat->qcfg.nb_desc) * DESC_SZ);
83         }
84
85         return 0;
86 }
87
88 /* Recover IOAT device. */
89 static inline int
90 __ioat_recover(struct ioat_dmadev *ioat)
91 {
92         uint32_t chanerr, retry = 0;
93         uint16_t mask = ioat->qcfg.nb_desc - 1;
94
95         /* Clear any channel errors. Reading and writing to chanerr does this. */
96         chanerr = ioat->regs->chanerr;
97         ioat->regs->chanerr = chanerr;
98
99         /* Reset Channel. */
100         ioat->regs->chancmd = IOAT_CHANCMD_RESET;
101
102         /* Write new chain address to trigger state change. */
103         ioat->regs->chainaddr = ioat->desc_ring[(ioat->next_read - 1) & mask].next;
104         /* Ensure channel control and status addr are correct. */
105         ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
106                         IOAT_CHANCTRL_ERR_COMPLETION_EN;
107         ioat->regs->chancmp = ioat->status_addr;
108
109         /* Allow HW time to move to the ARMED state. */
110         do {
111                 rte_pause();
112                 retry++;
113         } while (ioat->regs->chansts != IOAT_CHANSTS_ARMED && retry < 200);
114
115         /* Exit as failure if device is still HALTED. */
116         if (ioat->regs->chansts != IOAT_CHANSTS_ARMED)
117                 return -1;
118
119         /* Store next write as offset as recover will move HW and SW ring out of sync. */
120         ioat->offset = ioat->next_read;
121
122         /* Prime status register with previous address. */
123         ioat->status = ioat->desc_ring[(ioat->next_read - 2) & mask].next;
124
125         return 0;
126 }
127
128 /* Start a configured device. */
129 static int
130 ioat_dev_start(struct rte_dma_dev *dev)
131 {
132         struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
133
134         if (ioat->qcfg.nb_desc == 0 || ioat->desc_ring == NULL)
135                 return -EBUSY;
136
137         /* Inform hardware of where the descriptor ring is. */
138         ioat->regs->chainaddr = ioat->ring_addr;
139         /* Inform hardware of where to write the status/completions. */
140         ioat->regs->chancmp = ioat->status_addr;
141
142         /* Prime the status register to be set to the last element. */
143         ioat->status = ioat->ring_addr + ((ioat->qcfg.nb_desc - 1) * DESC_SZ);
144
145         printf("IOAT.status: %s [0x%"PRIx64"]\n",
146                         chansts_readable[ioat->status & IOAT_CHANSTS_STATUS],
147                         ioat->status);
148
149         if ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED) {
150                 IOAT_PMD_WARN("Device HALTED on start, attempting to recover\n");
151                 if (__ioat_recover(ioat) != 0) {
152                         IOAT_PMD_ERR("Device couldn't be recovered");
153                         return -1;
154                 }
155         }
156
157         return 0;
158 }
159
160 /* Stop a configured device. */
161 static int
162 ioat_dev_stop(struct rte_dma_dev *dev)
163 {
164         struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
165         uint32_t retry = 0;
166
167         ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
168
169         do {
170                 rte_pause();
171                 retry++;
172         } while ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) != IOAT_CHANSTS_SUSPENDED
173                         && retry < 200);
174
175         return ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED) ? 0 : -1;
176 }
177
178 /* Get device information of a device. */
179 static int
180 ioat_dev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
181 {
182         struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
183         if (size < sizeof(*info))
184                 return -EINVAL;
185         info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
186                         RTE_DMA_CAPA_OPS_COPY |
187                         RTE_DMA_CAPA_OPS_FILL;
188         if (ioat->version >= IOAT_VER_3_4)
189                 info->dev_capa |= RTE_DMA_CAPA_HANDLES_ERRORS;
190         info->max_vchans = 1;
191         info->min_desc = 32;
192         info->max_desc = 4096;
193         return 0;
194 }
195
196 /* Close a configured device. */
197 static int
198 ioat_dev_close(struct rte_dma_dev *dev)
199 {
200         struct ioat_dmadev *ioat;
201
202         if (!dev) {
203                 IOAT_PMD_ERR("Invalid device");
204                 return -EINVAL;
205         }
206
207         ioat = dev->fp_obj->dev_private;
208         if (!ioat) {
209                 IOAT_PMD_ERR("Error getting dev_private");
210                 return -EINVAL;
211         }
212
213         rte_free(ioat->desc_ring);
214
215         return 0;
216 }
217
218 /* Trigger hardware to begin performing enqueued operations. */
219 static inline void
220 __submit(struct ioat_dmadev *ioat)
221 {
222         *ioat->doorbell = ioat->next_write - ioat->offset;
223
224         ioat->last_write = ioat->next_write;
225 }
226
227 /* External submit function wrapper. */
228 static int
229 ioat_submit(void *dev_private, uint16_t qid __rte_unused)
230 {
231         struct ioat_dmadev *ioat = dev_private;
232
233         __submit(ioat);
234
235         return 0;
236 }
237
238 /* Write descriptor for enqueue. */
239 static inline int
240 __write_desc(void *dev_private, uint32_t op, uint64_t src, phys_addr_t dst,
241                 unsigned int length, uint64_t flags)
242 {
243         struct ioat_dmadev *ioat = dev_private;
244         uint16_t ret;
245         const unsigned short mask = ioat->qcfg.nb_desc - 1;
246         const unsigned short read = ioat->next_read;
247         unsigned short write = ioat->next_write;
248         const unsigned short space = mask + read - write;
249         struct ioat_dma_hw_desc *desc;
250
251         if (space == 0)
252                 return -ENOSPC;
253
254         ioat->next_write = write + 1;
255         write &= mask;
256
257         desc = &ioat->desc_ring[write];
258         desc->size = length;
259         desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
260                         (1 << IOAT_COMP_UPDATE_SHIFT));
261
262         /* In IOAT the fence ensures that all operations including the current one
263          * are completed before moving on, DMAdev assumes that the fence ensures
264          * all operations before the current one are completed before starting
265          * the current one, so in IOAT we set the fence for the previous descriptor.
266          */
267         if (flags & RTE_DMA_OP_FLAG_FENCE)
268                 ioat->desc_ring[(write - 1) & mask].u.control.fence = 1;
269
270         desc->src_addr = src;
271         desc->dest_addr = dst;
272
273         rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
274
275         ret = (uint16_t)(ioat->next_write - 1);
276
277         if (flags & RTE_DMA_OP_FLAG_SUBMIT)
278                 __submit(ioat);
279
280         return ret;
281 }
282
283 /* Enqueue a fill operation onto the ioat device. */
284 static int
285 ioat_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
286                 rte_iova_t dst, unsigned int length, uint64_t flags)
287 {
288         return __write_desc(dev_private, ioat_op_fill, pattern, dst, length, flags);
289 }
290
291 /* Enqueue a copy operation onto the ioat device. */
292 static int
293 ioat_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
294                 rte_iova_t dst, unsigned int length, uint64_t flags)
295 {
296         return __write_desc(dev_private, ioat_op_copy, src, dst, length, flags);
297 }
298
299 /* Dump DMA device info. */
300 static int
301 __dev_dump(void *dev_private, FILE *f)
302 {
303         struct ioat_dmadev *ioat = dev_private;
304         uint64_t chansts_masked = ioat->regs->chansts & IOAT_CHANSTS_STATUS;
305         uint32_t chanerr = ioat->regs->chanerr;
306         uint64_t mask = (ioat->qcfg.nb_desc - 1);
307         char ver = ioat->version;
308         fprintf(f, "========= IOAT =========\n");
309         fprintf(f, "  IOAT version: %d.%d\n", ver >> 4, ver & 0xF);
310         fprintf(f, "  Channel status: %s [0x%"PRIx64"]\n",
311                         chansts_readable[chansts_masked], chansts_masked);
312         fprintf(f, "  ChainADDR: 0x%"PRIu64"\n", ioat->regs->chainaddr);
313         if (chanerr == 0) {
314                 fprintf(f, "  No Channel Errors\n");
315         } else {
316                 fprintf(f, "  ChanERR: 0x%"PRIu32"\n", chanerr);
317                 if (chanerr & IOAT_CHANERR_INVALID_SRC_ADDR_MASK)
318                         fprintf(f, "    Invalid Source Address\n");
319                 if (chanerr & IOAT_CHANERR_INVALID_DST_ADDR_MASK)
320                         fprintf(f, "    Invalid Destination Address\n");
321                 if (chanerr & IOAT_CHANERR_INVALID_LENGTH_MASK)
322                         fprintf(f, "    Invalid Descriptor Length\n");
323                 if (chanerr & IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)
324                         fprintf(f, "    Descriptor Read Error\n");
325                 if ((chanerr & ~(IOAT_CHANERR_INVALID_SRC_ADDR_MASK |
326                                 IOAT_CHANERR_INVALID_DST_ADDR_MASK |
327                                 IOAT_CHANERR_INVALID_LENGTH_MASK |
328                                 IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)) != 0)
329                         fprintf(f, "    Unknown Error(s)\n");
330         }
331         fprintf(f, "== Private Data ==\n");
332         fprintf(f, "  Config: { ring_size: %u }\n", ioat->qcfg.nb_desc);
333         fprintf(f, "  Status: 0x%"PRIx64"\n", ioat->status);
334         fprintf(f, "  Status IOVA: 0x%"PRIx64"\n", ioat->status_addr);
335         fprintf(f, "  Status ADDR: %p\n", &ioat->status);
336         fprintf(f, "  Ring IOVA: 0x%"PRIx64"\n", ioat->ring_addr);
337         fprintf(f, "  Ring ADDR: 0x%"PRIx64"\n", ioat->desc_ring[0].next-64);
338         fprintf(f, "  Next write: %"PRIu16"\n", ioat->next_write);
339         fprintf(f, "  Next read: %"PRIu16"\n", ioat->next_read);
340         struct ioat_dma_hw_desc *desc_ring = &ioat->desc_ring[(ioat->next_write - 1) & mask];
341         fprintf(f, "  Last Descriptor Written {\n");
342         fprintf(f, "    Size: %"PRIu32"\n", desc_ring->size);
343         fprintf(f, "    Control: 0x%"PRIx32"\n", desc_ring->u.control_raw);
344         fprintf(f, "    Src: 0x%"PRIx64"\n", desc_ring->src_addr);
345         fprintf(f, "    Dest: 0x%"PRIx64"\n", desc_ring->dest_addr);
346         fprintf(f, "    Next: 0x%"PRIx64"\n", desc_ring->next);
347         fprintf(f, "  }\n");
348         fprintf(f, "  Next Descriptor {\n");
349         fprintf(f, "    Size: %"PRIu32"\n", ioat->desc_ring[ioat->next_read & mask].size);
350         fprintf(f, "    Src: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].src_addr);
351         fprintf(f, "    Dest: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].dest_addr);
352         fprintf(f, "    Next: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].next);
353         fprintf(f, "  }\n");
354
355         return 0;
356 }
357
358 /* Public wrapper for dump. */
359 static int
360 ioat_dev_dump(const struct rte_dma_dev *dev, FILE *f)
361 {
362         return __dev_dump(dev->fp_obj->dev_private, f);
363 }
364
365 /* Create a DMA device. */
366 static int
367 ioat_dmadev_create(const char *name, struct rte_pci_device *dev)
368 {
369         static const struct rte_dma_dev_ops ioat_dmadev_ops = {
370                 .dev_close = ioat_dev_close,
371                 .dev_configure = ioat_dev_configure,
372                 .dev_dump = ioat_dev_dump,
373                 .dev_info_get = ioat_dev_info_get,
374                 .dev_start = ioat_dev_start,
375                 .dev_stop = ioat_dev_stop,
376                 .vchan_setup = ioat_vchan_setup,
377         };
378
379         struct rte_dma_dev *dmadev = NULL;
380         struct ioat_dmadev *ioat = NULL;
381         int retry = 0;
382
383         if (!name) {
384                 IOAT_PMD_ERR("Invalid name of the device!");
385                 return -EINVAL;
386         }
387
388         /* Allocate device structure. */
389         dmadev = rte_dma_pmd_allocate(name, dev->device.numa_node, sizeof(struct ioat_dmadev));
390         if (dmadev == NULL) {
391                 IOAT_PMD_ERR("Unable to allocate dma device");
392                 return -ENOMEM;
393         }
394
395         dmadev->device = &dev->device;
396
397         dmadev->fp_obj->dev_private = dmadev->data->dev_private;
398
399         dmadev->dev_ops = &ioat_dmadev_ops;
400
401         dmadev->fp_obj->copy = ioat_enqueue_copy;
402         dmadev->fp_obj->fill = ioat_enqueue_fill;
403         dmadev->fp_obj->submit = ioat_submit;
404
405         ioat = dmadev->data->dev_private;
406         ioat->dmadev = dmadev;
407         ioat->regs = dev->mem_resource[0].addr;
408         ioat->doorbell = &ioat->regs->dmacount;
409         ioat->qcfg.nb_desc = 0;
410         ioat->desc_ring = NULL;
411         ioat->version = ioat->regs->cbver;
412
413         /* Do device initialization - reset and set error behaviour. */
414         if (ioat->regs->chancnt != 1)
415                 IOAT_PMD_WARN("%s: Channel count == %d\n", __func__,
416                                 ioat->regs->chancnt);
417
418         /* Locked by someone else. */
419         if (ioat->regs->chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) {
420                 IOAT_PMD_WARN("%s: Channel appears locked\n", __func__);
421                 ioat->regs->chanctrl = 0;
422         }
423
424         /* clear any previous errors */
425         if (ioat->regs->chanerr != 0) {
426                 uint32_t val = ioat->regs->chanerr;
427                 ioat->regs->chanerr = val;
428         }
429
430         ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
431         rte_delay_ms(1);
432         ioat->regs->chancmd = IOAT_CHANCMD_RESET;
433         rte_delay_ms(1);
434         while (ioat->regs->chancmd & IOAT_CHANCMD_RESET) {
435                 ioat->regs->chainaddr = 0;
436                 rte_delay_ms(1);
437                 if (++retry >= 200) {
438                         IOAT_PMD_ERR("%s: cannot reset device. CHANCMD=%#"PRIx8
439                                         ", CHANSTS=%#"PRIx64", CHANERR=%#"PRIx32"\n",
440                                         __func__,
441                                         ioat->regs->chancmd,
442                                         ioat->regs->chansts,
443                                         ioat->regs->chanerr);
444                         rte_dma_pmd_release(name);
445                         return -EIO;
446                 }
447         }
448         ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
449                         IOAT_CHANCTRL_ERR_COMPLETION_EN;
450
451         dmadev->fp_obj->dev_private = ioat;
452
453         dmadev->state = RTE_DMA_DEV_READY;
454
455         return 0;
456
457 }
458
459 /* Destroy a DMA device. */
460 static int
461 ioat_dmadev_destroy(const char *name)
462 {
463         int ret;
464
465         if (!name) {
466                 IOAT_PMD_ERR("Invalid device name");
467                 return -EINVAL;
468         }
469
470         ret = rte_dma_pmd_release(name);
471         if (ret)
472                 IOAT_PMD_DEBUG("Device cleanup failed");
473
474         return 0;
475 }
476
477 /* Probe DMA device. */
478 static int
479 ioat_dmadev_probe(struct rte_pci_driver *drv, struct rte_pci_device *dev)
480 {
481         char name[32];
482
483         rte_pci_device_name(&dev->addr, name, sizeof(name));
484         IOAT_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node);
485
486         dev->device.driver = &drv->driver;
487         return ioat_dmadev_create(name, dev);
488 }
489
490 /* Remove DMA device. */
491 static int
492 ioat_dmadev_remove(struct rte_pci_device *dev)
493 {
494         char name[32];
495
496         rte_pci_device_name(&dev->addr, name, sizeof(name));
497
498         IOAT_PMD_INFO("Closing %s on NUMA node %d",
499                         name, dev->device.numa_node);
500
501         return ioat_dmadev_destroy(name);
502 }
503
504 static const struct rte_pci_id pci_id_ioat_map[] = {
505         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_SKX) },
506         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX0) },
507         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX1) },
508         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX2) },
509         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX3) },
510         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX4) },
511         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX5) },
512         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX6) },
513         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX7) },
514         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDXE) },
515         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDXF) },
516         { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_ICX) },
517         { .vendor_id = 0, /* sentinel */ },
518 };
519
520 static struct rte_pci_driver ioat_pmd_drv = {
521         .id_table = pci_id_ioat_map,
522         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
523         .probe = ioat_dmadev_probe,
524         .remove = ioat_dmadev_remove,
525 };
526
527 RTE_PMD_REGISTER_PCI(IOAT_PMD_NAME, ioat_pmd_drv);
528 RTE_PMD_REGISTER_PCI_TABLE(IOAT_PMD_NAME, pci_id_ioat_map);
529 RTE_PMD_REGISTER_KMOD_DEP(IOAT_PMD_NAME, "* igb_uio | uio_pci_generic | vfio-pci");