1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2021 Intel Corporation
5 #include <rte_bus_pci.h>
6 #include <rte_dmadev_pmd.h>
7 #include <rte_malloc.h>
8 #include <rte_prefetch.h>
10 #include "ioat_internal.h"
12 static struct rte_pci_driver ioat_pmd_drv;
14 RTE_LOG_REGISTER_DEFAULT(ioat_pmd_logtype, INFO);
16 #define DESC_SZ sizeof(struct ioat_dma_hw_desc)
18 #define IOAT_PMD_NAME dmadev_ioat
19 #define IOAT_PMD_NAME_STR RTE_STR(IOAT_PMD_NAME)
21 /* IOAT operations. */
23 ioat_op_copy = 0, /* Standard DMA Operation */
24 ioat_op_fill /* Block Fill */
27 /* Configure a device. */
29 ioat_dev_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
32 if (sizeof(struct rte_dma_conf) != conf_sz)
35 if (dev_conf->nb_vchans != 1)
41 /* Setup a virtual channel for IOAT, only 1 vchan is supported. */
43 ioat_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
44 const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
46 struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
47 uint16_t max_desc = qconf->nb_desc;
50 if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
55 if (!rte_is_power_of_2(max_desc)) {
56 max_desc = rte_align32pow2(max_desc);
57 IOAT_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
58 ioat->qcfg.nb_desc = max_desc;
61 /* In case we are reconfiguring a device, free any existing memory. */
62 rte_free(ioat->desc_ring);
64 ioat->desc_ring = rte_zmalloc(NULL, sizeof(*ioat->desc_ring) * max_desc, 0);
65 if (ioat->desc_ring == NULL)
68 ioat->ring_addr = rte_mem_virt2iova(ioat->desc_ring);
70 ioat->status_addr = rte_mem_virt2iova(ioat) + offsetof(struct ioat_dmadev, status);
72 /* Ensure all counters are reset, if reconfiguring/restarting device. */
79 /* Configure descriptor ring - each one points to next. */
80 for (i = 0; i < ioat->qcfg.nb_desc; i++) {
81 ioat->desc_ring[i].next = ioat->ring_addr +
82 (((i + 1) % ioat->qcfg.nb_desc) * DESC_SZ);
88 /* Recover IOAT device. */
90 __ioat_recover(struct ioat_dmadev *ioat)
92 uint32_t chanerr, retry = 0;
93 uint16_t mask = ioat->qcfg.nb_desc - 1;
95 /* Clear any channel errors. Reading and writing to chanerr does this. */
96 chanerr = ioat->regs->chanerr;
97 ioat->regs->chanerr = chanerr;
100 ioat->regs->chancmd = IOAT_CHANCMD_RESET;
102 /* Write new chain address to trigger state change. */
103 ioat->regs->chainaddr = ioat->desc_ring[(ioat->next_read - 1) & mask].next;
104 /* Ensure channel control and status addr are correct. */
105 ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
106 IOAT_CHANCTRL_ERR_COMPLETION_EN;
107 ioat->regs->chancmp = ioat->status_addr;
109 /* Allow HW time to move to the ARMED state. */
113 } while (ioat->regs->chansts != IOAT_CHANSTS_ARMED && retry < 200);
115 /* Exit as failure if device is still HALTED. */
116 if (ioat->regs->chansts != IOAT_CHANSTS_ARMED)
119 /* Store next write as offset as recover will move HW and SW ring out of sync. */
120 ioat->offset = ioat->next_read;
122 /* Prime status register with previous address. */
123 ioat->status = ioat->desc_ring[(ioat->next_read - 2) & mask].next;
128 /* Start a configured device. */
130 ioat_dev_start(struct rte_dma_dev *dev)
132 struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
134 if (ioat->qcfg.nb_desc == 0 || ioat->desc_ring == NULL)
137 /* Inform hardware of where the descriptor ring is. */
138 ioat->regs->chainaddr = ioat->ring_addr;
139 /* Inform hardware of where to write the status/completions. */
140 ioat->regs->chancmp = ioat->status_addr;
142 /* Prime the status register to be set to the last element. */
143 ioat->status = ioat->ring_addr + ((ioat->qcfg.nb_desc - 1) * DESC_SZ);
145 printf("IOAT.status: %s [0x%"PRIx64"]\n",
146 chansts_readable[ioat->status & IOAT_CHANSTS_STATUS],
149 if ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED) {
150 IOAT_PMD_WARN("Device HALTED on start, attempting to recover\n");
151 if (__ioat_recover(ioat) != 0) {
152 IOAT_PMD_ERR("Device couldn't be recovered");
160 /* Stop a configured device. */
162 ioat_dev_stop(struct rte_dma_dev *dev)
164 struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
167 ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
172 } while ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) != IOAT_CHANSTS_SUSPENDED
175 return ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED) ? 0 : -1;
178 /* Get device information of a device. */
180 ioat_dev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
182 struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
183 if (size < sizeof(*info))
185 info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
186 RTE_DMA_CAPA_OPS_COPY |
187 RTE_DMA_CAPA_OPS_FILL;
188 if (ioat->version >= IOAT_VER_3_4)
189 info->dev_capa |= RTE_DMA_CAPA_HANDLES_ERRORS;
190 info->max_vchans = 1;
192 info->max_desc = 4096;
196 /* Close a configured device. */
198 ioat_dev_close(struct rte_dma_dev *dev)
200 struct ioat_dmadev *ioat;
203 IOAT_PMD_ERR("Invalid device");
207 ioat = dev->fp_obj->dev_private;
209 IOAT_PMD_ERR("Error getting dev_private");
213 rte_free(ioat->desc_ring);
218 /* Trigger hardware to begin performing enqueued operations. */
220 __submit(struct ioat_dmadev *ioat)
222 *ioat->doorbell = ioat->next_write - ioat->offset;
224 ioat->last_write = ioat->next_write;
227 /* External submit function wrapper. */
229 ioat_submit(void *dev_private, uint16_t qid __rte_unused)
231 struct ioat_dmadev *ioat = dev_private;
238 /* Write descriptor for enqueue. */
240 __write_desc(void *dev_private, uint32_t op, uint64_t src, phys_addr_t dst,
241 unsigned int length, uint64_t flags)
243 struct ioat_dmadev *ioat = dev_private;
245 const unsigned short mask = ioat->qcfg.nb_desc - 1;
246 const unsigned short read = ioat->next_read;
247 unsigned short write = ioat->next_write;
248 const unsigned short space = mask + read - write;
249 struct ioat_dma_hw_desc *desc;
254 ioat->next_write = write + 1;
257 desc = &ioat->desc_ring[write];
259 desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
260 (1 << IOAT_COMP_UPDATE_SHIFT));
262 /* In IOAT the fence ensures that all operations including the current one
263 * are completed before moving on, DMAdev assumes that the fence ensures
264 * all operations before the current one are completed before starting
265 * the current one, so in IOAT we set the fence for the previous descriptor.
267 if (flags & RTE_DMA_OP_FLAG_FENCE)
268 ioat->desc_ring[(write - 1) & mask].u.control.fence = 1;
270 desc->src_addr = src;
271 desc->dest_addr = dst;
273 rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
275 ret = (uint16_t)(ioat->next_write - 1);
277 if (flags & RTE_DMA_OP_FLAG_SUBMIT)
283 /* Enqueue a fill operation onto the ioat device. */
285 ioat_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
286 rte_iova_t dst, unsigned int length, uint64_t flags)
288 return __write_desc(dev_private, ioat_op_fill, pattern, dst, length, flags);
291 /* Enqueue a copy operation onto the ioat device. */
293 ioat_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
294 rte_iova_t dst, unsigned int length, uint64_t flags)
296 return __write_desc(dev_private, ioat_op_copy, src, dst, length, flags);
299 /* Dump DMA device info. */
301 __dev_dump(void *dev_private, FILE *f)
303 struct ioat_dmadev *ioat = dev_private;
304 uint64_t chansts_masked = ioat->regs->chansts & IOAT_CHANSTS_STATUS;
305 uint32_t chanerr = ioat->regs->chanerr;
306 uint64_t mask = (ioat->qcfg.nb_desc - 1);
307 char ver = ioat->version;
308 fprintf(f, "========= IOAT =========\n");
309 fprintf(f, " IOAT version: %d.%d\n", ver >> 4, ver & 0xF);
310 fprintf(f, " Channel status: %s [0x%"PRIx64"]\n",
311 chansts_readable[chansts_masked], chansts_masked);
312 fprintf(f, " ChainADDR: 0x%"PRIu64"\n", ioat->regs->chainaddr);
314 fprintf(f, " No Channel Errors\n");
316 fprintf(f, " ChanERR: 0x%"PRIu32"\n", chanerr);
317 if (chanerr & IOAT_CHANERR_INVALID_SRC_ADDR_MASK)
318 fprintf(f, " Invalid Source Address\n");
319 if (chanerr & IOAT_CHANERR_INVALID_DST_ADDR_MASK)
320 fprintf(f, " Invalid Destination Address\n");
321 if (chanerr & IOAT_CHANERR_INVALID_LENGTH_MASK)
322 fprintf(f, " Invalid Descriptor Length\n");
323 if (chanerr & IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)
324 fprintf(f, " Descriptor Read Error\n");
325 if ((chanerr & ~(IOAT_CHANERR_INVALID_SRC_ADDR_MASK |
326 IOAT_CHANERR_INVALID_DST_ADDR_MASK |
327 IOAT_CHANERR_INVALID_LENGTH_MASK |
328 IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)) != 0)
329 fprintf(f, " Unknown Error(s)\n");
331 fprintf(f, "== Private Data ==\n");
332 fprintf(f, " Config: { ring_size: %u }\n", ioat->qcfg.nb_desc);
333 fprintf(f, " Status: 0x%"PRIx64"\n", ioat->status);
334 fprintf(f, " Status IOVA: 0x%"PRIx64"\n", ioat->status_addr);
335 fprintf(f, " Status ADDR: %p\n", &ioat->status);
336 fprintf(f, " Ring IOVA: 0x%"PRIx64"\n", ioat->ring_addr);
337 fprintf(f, " Ring ADDR: 0x%"PRIx64"\n", ioat->desc_ring[0].next-64);
338 fprintf(f, " Next write: %"PRIu16"\n", ioat->next_write);
339 fprintf(f, " Next read: %"PRIu16"\n", ioat->next_read);
340 struct ioat_dma_hw_desc *desc_ring = &ioat->desc_ring[(ioat->next_write - 1) & mask];
341 fprintf(f, " Last Descriptor Written {\n");
342 fprintf(f, " Size: %"PRIu32"\n", desc_ring->size);
343 fprintf(f, " Control: 0x%"PRIx32"\n", desc_ring->u.control_raw);
344 fprintf(f, " Src: 0x%"PRIx64"\n", desc_ring->src_addr);
345 fprintf(f, " Dest: 0x%"PRIx64"\n", desc_ring->dest_addr);
346 fprintf(f, " Next: 0x%"PRIx64"\n", desc_ring->next);
348 fprintf(f, " Next Descriptor {\n");
349 fprintf(f, " Size: %"PRIu32"\n", ioat->desc_ring[ioat->next_read & mask].size);
350 fprintf(f, " Src: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].src_addr);
351 fprintf(f, " Dest: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].dest_addr);
352 fprintf(f, " Next: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].next);
358 /* Public wrapper for dump. */
360 ioat_dev_dump(const struct rte_dma_dev *dev, FILE *f)
362 return __dev_dump(dev->fp_obj->dev_private, f);
365 /* Create a DMA device. */
367 ioat_dmadev_create(const char *name, struct rte_pci_device *dev)
369 static const struct rte_dma_dev_ops ioat_dmadev_ops = {
370 .dev_close = ioat_dev_close,
371 .dev_configure = ioat_dev_configure,
372 .dev_dump = ioat_dev_dump,
373 .dev_info_get = ioat_dev_info_get,
374 .dev_start = ioat_dev_start,
375 .dev_stop = ioat_dev_stop,
376 .vchan_setup = ioat_vchan_setup,
379 struct rte_dma_dev *dmadev = NULL;
380 struct ioat_dmadev *ioat = NULL;
384 IOAT_PMD_ERR("Invalid name of the device!");
388 /* Allocate device structure. */
389 dmadev = rte_dma_pmd_allocate(name, dev->device.numa_node, sizeof(struct ioat_dmadev));
390 if (dmadev == NULL) {
391 IOAT_PMD_ERR("Unable to allocate dma device");
395 dmadev->device = &dev->device;
397 dmadev->fp_obj->dev_private = dmadev->data->dev_private;
399 dmadev->dev_ops = &ioat_dmadev_ops;
401 dmadev->fp_obj->copy = ioat_enqueue_copy;
402 dmadev->fp_obj->fill = ioat_enqueue_fill;
403 dmadev->fp_obj->submit = ioat_submit;
405 ioat = dmadev->data->dev_private;
406 ioat->dmadev = dmadev;
407 ioat->regs = dev->mem_resource[0].addr;
408 ioat->doorbell = &ioat->regs->dmacount;
409 ioat->qcfg.nb_desc = 0;
410 ioat->desc_ring = NULL;
411 ioat->version = ioat->regs->cbver;
413 /* Do device initialization - reset and set error behaviour. */
414 if (ioat->regs->chancnt != 1)
415 IOAT_PMD_WARN("%s: Channel count == %d\n", __func__,
416 ioat->regs->chancnt);
418 /* Locked by someone else. */
419 if (ioat->regs->chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) {
420 IOAT_PMD_WARN("%s: Channel appears locked\n", __func__);
421 ioat->regs->chanctrl = 0;
424 /* clear any previous errors */
425 if (ioat->regs->chanerr != 0) {
426 uint32_t val = ioat->regs->chanerr;
427 ioat->regs->chanerr = val;
430 ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
432 ioat->regs->chancmd = IOAT_CHANCMD_RESET;
434 while (ioat->regs->chancmd & IOAT_CHANCMD_RESET) {
435 ioat->regs->chainaddr = 0;
437 if (++retry >= 200) {
438 IOAT_PMD_ERR("%s: cannot reset device. CHANCMD=%#"PRIx8
439 ", CHANSTS=%#"PRIx64", CHANERR=%#"PRIx32"\n",
443 ioat->regs->chanerr);
444 rte_dma_pmd_release(name);
448 ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
449 IOAT_CHANCTRL_ERR_COMPLETION_EN;
451 dmadev->fp_obj->dev_private = ioat;
453 dmadev->state = RTE_DMA_DEV_READY;
459 /* Destroy a DMA device. */
461 ioat_dmadev_destroy(const char *name)
466 IOAT_PMD_ERR("Invalid device name");
470 ret = rte_dma_pmd_release(name);
472 IOAT_PMD_DEBUG("Device cleanup failed");
477 /* Probe DMA device. */
479 ioat_dmadev_probe(struct rte_pci_driver *drv, struct rte_pci_device *dev)
483 rte_pci_device_name(&dev->addr, name, sizeof(name));
484 IOAT_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node);
486 dev->device.driver = &drv->driver;
487 return ioat_dmadev_create(name, dev);
490 /* Remove DMA device. */
492 ioat_dmadev_remove(struct rte_pci_device *dev)
496 rte_pci_device_name(&dev->addr, name, sizeof(name));
498 IOAT_PMD_INFO("Closing %s on NUMA node %d",
499 name, dev->device.numa_node);
501 return ioat_dmadev_destroy(name);
504 static const struct rte_pci_id pci_id_ioat_map[] = {
505 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_SKX) },
506 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX0) },
507 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX1) },
508 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX2) },
509 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX3) },
510 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX4) },
511 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX5) },
512 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX6) },
513 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX7) },
514 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDXE) },
515 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDXF) },
516 { RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_ICX) },
517 { .vendor_id = 0, /* sentinel */ },
520 static struct rte_pci_driver ioat_pmd_drv = {
521 .id_table = pci_id_ioat_map,
522 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
523 .probe = ioat_dmadev_probe,
524 .remove = ioat_dmadev_remove,
527 RTE_PMD_REGISTER_PCI(IOAT_PMD_NAME, ioat_pmd_drv);
528 RTE_PMD_REGISTER_PCI_TABLE(IOAT_PMD_NAME, pci_id_ioat_map);
529 RTE_PMD_REGISTER_KMOD_DEP(IOAT_PMD_NAME, "* igb_uio | uio_pci_generic | vfio-pci");