+/* IOAT operations. */
+enum rte_ioat_ops {
+ ioat_op_copy = 0, /* Standard DMA Operation */
+ ioat_op_fill /* Block Fill */
+};
+
+/* Configure a device. */
+static int
+ioat_dev_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
+ uint32_t conf_sz)
+{
+ if (sizeof(struct rte_dma_conf) != conf_sz)
+ return -EINVAL;
+
+ if (dev_conf->nb_vchans != 1)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Setup a virtual channel for IOAT, only 1 vchan is supported. */
+static int
+ioat_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
+ const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
+{
+ struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
+ uint16_t max_desc = qconf->nb_desc;
+ int i;
+
+ if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
+ return -EINVAL;
+
+ ioat->qcfg = *qconf;
+
+ if (!rte_is_power_of_2(max_desc)) {
+ max_desc = rte_align32pow2(max_desc);
+ IOAT_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
+ ioat->qcfg.nb_desc = max_desc;
+ }
+
+ /* In case we are reconfiguring a device, free any existing memory. */
+ rte_free(ioat->desc_ring);
+
+ ioat->desc_ring = rte_zmalloc(NULL, sizeof(*ioat->desc_ring) * max_desc, 0);
+ if (ioat->desc_ring == NULL)
+ return -ENOMEM;
+
+ ioat->ring_addr = rte_mem_virt2iova(ioat->desc_ring);
+
+ ioat->status_addr = rte_mem_virt2iova(ioat) + offsetof(struct ioat_dmadev, status);
+
+ /* Ensure all counters are reset, if reconfiguring/restarting device. */
+ ioat->next_read = 0;
+ ioat->next_write = 0;
+ ioat->last_write = 0;
+ ioat->offset = 0;
+ ioat->failure = 0;
+
+ /* Reset Stats. */
+ ioat->stats = (struct rte_dma_stats){0};
+
+ /* Configure descriptor ring - each one points to next. */
+ for (i = 0; i < ioat->qcfg.nb_desc; i++) {
+ ioat->desc_ring[i].next = ioat->ring_addr +
+ (((i + 1) % ioat->qcfg.nb_desc) * DESC_SZ);
+ }
+
+ return 0;
+}
+
+/* Recover IOAT device. */
+static inline int
+__ioat_recover(struct ioat_dmadev *ioat)
+{
+ uint32_t chanerr, retry = 0;
+ uint16_t mask = ioat->qcfg.nb_desc - 1;
+
+ /* Clear any channel errors. Reading and writing to chanerr does this. */
+ chanerr = ioat->regs->chanerr;
+ ioat->regs->chanerr = chanerr;
+
+ /* Reset Channel. */
+ ioat->regs->chancmd = IOAT_CHANCMD_RESET;
+
+ /* Write new chain address to trigger state change. */
+ ioat->regs->chainaddr = ioat->desc_ring[(ioat->next_read - 1) & mask].next;
+ /* Ensure channel control and status addr are correct. */
+ ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
+ IOAT_CHANCTRL_ERR_COMPLETION_EN;
+ ioat->regs->chancmp = ioat->status_addr;
+
+ /* Allow HW time to move to the ARMED state. */
+ do {
+ rte_pause();
+ retry++;
+ } while (ioat->regs->chansts != IOAT_CHANSTS_ARMED && retry < 200);
+
+ /* Exit as failure if device is still HALTED. */
+ if (ioat->regs->chansts != IOAT_CHANSTS_ARMED)
+ return -1;
+
+ /* Store next write as offset as recover will move HW and SW ring out of sync. */
+ ioat->offset = ioat->next_read;
+
+ /* Prime status register with previous address. */
+ ioat->status = ioat->desc_ring[(ioat->next_read - 2) & mask].next;
+
+ return 0;
+}
+
+/* Start a configured device. */
+static int
+ioat_dev_start(struct rte_dma_dev *dev)
+{
+ struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
+
+ if (ioat->qcfg.nb_desc == 0 || ioat->desc_ring == NULL)
+ return -EBUSY;
+
+ /* Inform hardware of where the descriptor ring is. */
+ ioat->regs->chainaddr = ioat->ring_addr;
+ /* Inform hardware of where to write the status/completions. */
+ ioat->regs->chancmp = ioat->status_addr;
+
+ /* Prime the status register to be set to the last element. */
+ ioat->status = ioat->ring_addr + ((ioat->qcfg.nb_desc - 1) * DESC_SZ);
+
+ printf("IOAT.status: %s [0x%"PRIx64"]\n",
+ chansts_readable[ioat->status & IOAT_CHANSTS_STATUS],
+ ioat->status);
+
+ if ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED) {
+ IOAT_PMD_WARN("Device HALTED on start, attempting to recover\n");
+ if (__ioat_recover(ioat) != 0) {
+ IOAT_PMD_ERR("Device couldn't be recovered");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/* Stop a configured device. */
+static int
+ioat_dev_stop(struct rte_dma_dev *dev)
+{
+ struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
+ uint32_t retry = 0;
+
+ ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
+
+ do {
+ rte_pause();
+ retry++;
+ } while ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) != IOAT_CHANSTS_SUSPENDED
+ && retry < 200);
+
+ return ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED) ? 0 : -1;
+}
+
+/* Get device information of a device. */
+static int
+ioat_dev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
+{
+ struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
+ if (size < sizeof(*info))
+ return -EINVAL;
+ info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+ RTE_DMA_CAPA_OPS_COPY |
+ RTE_DMA_CAPA_OPS_FILL;
+ if (ioat->version >= IOAT_VER_3_4)
+ info->dev_capa |= RTE_DMA_CAPA_HANDLES_ERRORS;
+ info->max_vchans = 1;
+ info->min_desc = 32;
+ info->max_desc = 4096;
+ return 0;
+}
+
+/* Close a configured device. */
+static int
+ioat_dev_close(struct rte_dma_dev *dev)
+{
+ struct ioat_dmadev *ioat;
+
+ if (!dev) {
+ IOAT_PMD_ERR("Invalid device");
+ return -EINVAL;
+ }
+
+ ioat = dev->fp_obj->dev_private;
+ if (!ioat) {
+ IOAT_PMD_ERR("Error getting dev_private");
+ return -EINVAL;
+ }
+
+ rte_free(ioat->desc_ring);
+
+ return 0;
+}
+
+/* Trigger hardware to begin performing enqueued operations. */
+static inline void
+__submit(struct ioat_dmadev *ioat)
+{
+ *ioat->doorbell = ioat->next_write - ioat->offset;
+
+ ioat->stats.submitted += (uint16_t)(ioat->next_write - ioat->last_write);
+
+ ioat->last_write = ioat->next_write;
+}
+
+/* External submit function wrapper. */
+static int
+ioat_submit(void *dev_private, uint16_t qid __rte_unused)
+{
+ struct ioat_dmadev *ioat = dev_private;
+
+ __submit(ioat);
+
+ return 0;
+}
+
+/* Write descriptor for enqueue. */
+static inline int
+__write_desc(void *dev_private, uint32_t op, uint64_t src, phys_addr_t dst,
+ unsigned int length, uint64_t flags)
+{
+ struct ioat_dmadev *ioat = dev_private;
+ uint16_t ret;
+ const unsigned short mask = ioat->qcfg.nb_desc - 1;
+ const unsigned short read = ioat->next_read;
+ unsigned short write = ioat->next_write;
+ const unsigned short space = mask + read - write;
+ struct ioat_dma_hw_desc *desc;
+
+ if (space == 0)
+ return -ENOSPC;
+
+ ioat->next_write = write + 1;
+ write &= mask;
+
+ desc = &ioat->desc_ring[write];
+ desc->size = length;
+ desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
+ (1 << IOAT_COMP_UPDATE_SHIFT));
+
+ /* In IOAT the fence ensures that all operations including the current one
+ * are completed before moving on, DMAdev assumes that the fence ensures
+ * all operations before the current one are completed before starting
+ * the current one, so in IOAT we set the fence for the previous descriptor.
+ */
+ if (flags & RTE_DMA_OP_FLAG_FENCE)
+ ioat->desc_ring[(write - 1) & mask].u.control.fence = 1;
+
+ desc->src_addr = src;
+ desc->dest_addr = dst;
+
+ rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
+
+ ret = (uint16_t)(ioat->next_write - 1);
+
+ if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+ __submit(ioat);
+
+ return ret;
+}
+
+/* Enqueue a fill operation onto the ioat device. */
+static int
+ioat_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
+ rte_iova_t dst, unsigned int length, uint64_t flags)
+{
+ return __write_desc(dev_private, ioat_op_fill, pattern, dst, length, flags);
+}
+
+/* Enqueue a copy operation onto the ioat device. */
+static int
+ioat_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
+ rte_iova_t dst, unsigned int length, uint64_t flags)
+{
+ return __write_desc(dev_private, ioat_op_copy, src, dst, length, flags);
+}
+