dma/idxd: add data path job submission
[dpdk.git] / drivers / dma / idxd / idxd_common.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2021 Intel Corporation
3  */
4
5 #include <x86intrin.h>
6
7 #include <rte_malloc.h>
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_prefetch.h>
11
12 #include "idxd_internal.h"
13
14 #define IDXD_PMD_NAME_STR "dmadev_idxd"
15
16 static __rte_always_inline rte_iova_t
17 __desc_idx_to_iova(struct idxd_dmadev *idxd, uint16_t n)
18 {
19         return idxd->desc_iova + (n * sizeof(struct idxd_hw_desc));
20 }
21
22 static __rte_always_inline void
23 __idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
24 {
25         asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
26                         :
27                         : "a" (dst), "d" (src)
28                         : "memory");
29 }
30
31 static __rte_always_inline void
32 __submit(struct idxd_dmadev *idxd)
33 {
34         rte_prefetch1(&idxd->batch_comp_ring[idxd->batch_idx_read]);
35
36         if (idxd->batch_size == 0)
37                 return;
38
39         /* write completion to batch comp ring */
40         rte_iova_t comp_addr = idxd->batch_iova +
41                         (idxd->batch_idx_write * sizeof(struct idxd_completion));
42
43         if (idxd->batch_size == 1) {
44                 /* submit batch directly */
45                 struct idxd_hw_desc desc =
46                                 idxd->desc_ring[idxd->batch_start & idxd->desc_ring_mask];
47                 desc.completion = comp_addr;
48                 desc.op_flags |= IDXD_FLAG_REQUEST_COMPLETION;
49                 _mm_sfence(); /* fence before writing desc to device */
50                 __idxd_movdir64b(idxd->portal, &desc);
51         } else {
52                 const struct idxd_hw_desc batch_desc = {
53                                 .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
54                                 IDXD_FLAG_COMPLETION_ADDR_VALID |
55                                 IDXD_FLAG_REQUEST_COMPLETION,
56                                 .desc_addr = __desc_idx_to_iova(idxd,
57                                                 idxd->batch_start & idxd->desc_ring_mask),
58                                 .completion = comp_addr,
59                                 .size = idxd->batch_size,
60                 };
61                 _mm_sfence(); /* fence before writing desc to device */
62                 __idxd_movdir64b(idxd->portal, &batch_desc);
63         }
64
65         if (++idxd->batch_idx_write > idxd->max_batches)
66                 idxd->batch_idx_write = 0;
67
68         idxd->batch_start += idxd->batch_size;
69         idxd->batch_size = 0;
70         idxd->batch_idx_ring[idxd->batch_idx_write] = idxd->batch_start;
71         _mm256_store_si256((void *)&idxd->batch_comp_ring[idxd->batch_idx_write],
72                         _mm256_setzero_si256());
73 }
74
75 static __rte_always_inline int
76 __idxd_write_desc(struct idxd_dmadev *idxd,
77                 const uint32_t op_flags,
78                 const rte_iova_t src,
79                 const rte_iova_t dst,
80                 const uint32_t size,
81                 const uint32_t flags)
82 {
83         uint16_t mask = idxd->desc_ring_mask;
84         uint16_t job_id = idxd->batch_start + idxd->batch_size;
85         /* we never wrap batches, so we only mask the start and allow start+size to overflow */
86         uint16_t write_idx = (idxd->batch_start & mask) + idxd->batch_size;
87
88         /* first check batch ring space then desc ring space */
89         if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
90                         idxd->batch_idx_write + 1 == idxd->batch_idx_read)
91                 return -ENOSPC;
92         if (((write_idx + 1) & mask) == (idxd->ids_returned & mask))
93                 return -ENOSPC;
94
95         /* write desc. Note: descriptors don't wrap, but the completion address does */
96         const uint64_t op_flags64 = (uint64_t)(op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID) << 32;
97         const uint64_t comp_addr = __desc_idx_to_iova(idxd, write_idx & mask);
98         _mm256_store_si256((void *)&idxd->desc_ring[write_idx],
99                         _mm256_set_epi64x(dst, src, comp_addr, op_flags64));
100         _mm256_store_si256((void *)&idxd->desc_ring[write_idx].size,
101                         _mm256_set_epi64x(0, 0, 0, size));
102
103         idxd->batch_size++;
104
105         rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
106
107         if (flags & RTE_DMA_OP_FLAG_SUBMIT)
108                 __submit(idxd);
109
110         return job_id;
111 }
112
113 int
114 idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
115                 rte_iova_t dst, unsigned int length, uint64_t flags)
116 {
117         /* we can take advantage of the fact that the fence flag in dmadev and DSA are the same,
118          * but check it at compile time to be sure.
119          */
120         RTE_BUILD_BUG_ON(RTE_DMA_OP_FLAG_FENCE != IDXD_FLAG_FENCE);
121         uint32_t memmove = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) |
122                         IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
123         return __idxd_write_desc(dev_private, memmove, src, dst, length,
124                         flags);
125 }
126
127 int
128 idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
129                 rte_iova_t dst, unsigned int length, uint64_t flags)
130 {
131         uint32_t fill = (idxd_op_fill << IDXD_CMD_OP_SHIFT) |
132                         IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
133         return __idxd_write_desc(dev_private, fill, pattern, dst, length,
134                         flags);
135 }
136
137 int
138 idxd_submit(void *dev_private, uint16_t qid __rte_unused)
139 {
140         __submit(dev_private);
141         return 0;
142 }
143
144 int
145 idxd_dump(const struct rte_dma_dev *dev, FILE *f)
146 {
147         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
148         unsigned int i;
149
150         fprintf(f, "== IDXD Private Data ==\n");
151         fprintf(f, "  Portal: %p\n", idxd->portal);
152         fprintf(f, "  Config: { ring_size: %u }\n",
153                         idxd->qcfg.nb_desc);
154         fprintf(f, "  Batch ring (sz = %u, max_batches = %u):\n\t",
155                         idxd->max_batches + 1, idxd->max_batches);
156         for (i = 0; i <= idxd->max_batches; i++) {
157                 fprintf(f, " %u ", idxd->batch_idx_ring[i]);
158                 if (i == idxd->batch_idx_read && i == idxd->batch_idx_write)
159                         fprintf(f, "[rd ptr, wr ptr] ");
160                 else if (i == idxd->batch_idx_read)
161                         fprintf(f, "[rd ptr] ");
162                 else if (i == idxd->batch_idx_write)
163                         fprintf(f, "[wr ptr] ");
164                 if (i == idxd->max_batches)
165                         fprintf(f, "\n");
166         }
167
168         fprintf(f, "  Curr batch: start = %u, size = %u\n", idxd->batch_start, idxd->batch_size);
169         fprintf(f, "  IDS: avail = %u, returned: %u\n", idxd->ids_avail, idxd->ids_returned);
170         return 0;
171 }
172
173 int
174 idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
175 {
176         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
177
178         if (size < sizeof(*info))
179                 return -EINVAL;
180
181         *info = (struct rte_dma_info) {
182                         .dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_HANDLES_ERRORS |
183                                 RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_FILL,
184                         .max_vchans = 1,
185                         .max_desc = 4096,
186                         .min_desc = 64,
187         };
188         if (idxd->sva_support)
189                 info->dev_capa |= RTE_DMA_CAPA_SVA;
190         return 0;
191 }
192
193 int
194 idxd_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
195                 uint32_t conf_sz)
196 {
197         if (sizeof(struct rte_dma_conf) != conf_sz)
198                 return -EINVAL;
199
200         if (dev_conf->nb_vchans != 1)
201                 return -EINVAL;
202         return 0;
203 }
204
205 int
206 idxd_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
207                 const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
208 {
209         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
210         uint16_t max_desc = qconf->nb_desc;
211
212         if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
213                 return -EINVAL;
214
215         idxd->qcfg = *qconf;
216
217         if (!rte_is_power_of_2(max_desc))
218                 max_desc = rte_align32pow2(max_desc);
219         IDXD_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
220         idxd->desc_ring_mask = max_desc - 1;
221         idxd->qcfg.nb_desc = max_desc;
222
223         /* in case we are reconfiguring a device, free any existing memory */
224         rte_free(idxd->desc_ring);
225
226         /* allocate the descriptor ring at 2x size as batches can't wrap */
227         idxd->desc_ring = rte_zmalloc(NULL, sizeof(*idxd->desc_ring) * max_desc * 2, 0);
228         if (idxd->desc_ring == NULL)
229                 return -ENOMEM;
230         idxd->desc_iova = rte_mem_virt2iova(idxd->desc_ring);
231
232         idxd->batch_idx_read = 0;
233         idxd->batch_idx_write = 0;
234         idxd->batch_start = 0;
235         idxd->batch_size = 0;
236         idxd->ids_returned = 0;
237         idxd->ids_avail = 0;
238
239         memset(idxd->batch_comp_ring, 0, sizeof(*idxd->batch_comp_ring) *
240                         (idxd->max_batches + 1));
241         return 0;
242 }
243
244 int
245 idxd_dmadev_create(const char *name, struct rte_device *dev,
246                    const struct idxd_dmadev *base_idxd,
247                    const struct rte_dma_dev_ops *ops)
248 {
249         struct idxd_dmadev *idxd = NULL;
250         struct rte_dma_dev *dmadev = NULL;
251         int ret = 0;
252
253         RTE_BUILD_BUG_ON(sizeof(struct idxd_hw_desc) != 64);
254         RTE_BUILD_BUG_ON(offsetof(struct idxd_hw_desc, size) != 32);
255         RTE_BUILD_BUG_ON(sizeof(struct idxd_completion) != 32);
256
257         if (!name) {
258                 IDXD_PMD_ERR("Invalid name of the device!");
259                 ret = -EINVAL;
260                 goto cleanup;
261         }
262
263         /* Allocate device structure */
264         dmadev = rte_dma_pmd_allocate(name, dev->numa_node, sizeof(struct idxd_dmadev));
265         if (dmadev == NULL) {
266                 IDXD_PMD_ERR("Unable to allocate dma device");
267                 ret = -ENOMEM;
268                 goto cleanup;
269         }
270         dmadev->dev_ops = ops;
271         dmadev->device = dev;
272
273         dmadev->fp_obj->copy = idxd_enqueue_copy;
274         dmadev->fp_obj->fill = idxd_enqueue_fill;
275         dmadev->fp_obj->submit = idxd_submit;
276
277         idxd = dmadev->data->dev_private;
278         *idxd = *base_idxd; /* copy over the main fields already passed in */
279         idxd->dmadev = dmadev;
280
281         /* allocate batch index ring and completion ring.
282          * The +1 is because we can never fully use
283          * the ring, otherwise read == write means both full and empty.
284          */
285         idxd->batch_comp_ring = rte_zmalloc_socket(NULL, (sizeof(idxd->batch_idx_ring[0]) +
286                         sizeof(idxd->batch_comp_ring[0]))       * (idxd->max_batches + 1),
287                         sizeof(idxd->batch_comp_ring[0]), dev->numa_node);
288         if (idxd->batch_comp_ring == NULL) {
289                 IDXD_PMD_ERR("Unable to reserve memory for batch data\n");
290                 ret = -ENOMEM;
291                 goto cleanup;
292         }
293         idxd->batch_idx_ring = (void *)&idxd->batch_comp_ring[idxd->max_batches+1];
294         idxd->batch_iova = rte_mem_virt2iova(idxd->batch_comp_ring);
295
296         dmadev->fp_obj->dev_private = idxd;
297
298         idxd->dmadev->state = RTE_DMA_DEV_READY;
299
300         return 0;
301
302 cleanup:
303         if (dmadev)
304                 rte_dma_pmd_release(name);
305
306         return ret;
307 }
308
309 int idxd_pmd_logtype;
310
311 RTE_LOG_REGISTER_DEFAULT(idxd_pmd_logtype, WARNING);