raw/ioat: fix ring space checks
[dpdk.git] / drivers / raw / ioat / rte_idxd_rawdev_fns.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef _RTE_IDXD_RAWDEV_FNS_H_
5 #define _RTE_IDXD_RAWDEV_FNS_H_
6
7 /**
8  * @file
9  * This header file contains the implementation of the various ioat
10  * rawdev functions for DSA hardware. The API specification and key
11  * public structures are defined in "rte_ioat_rawdev.h".
12  *
13  * This file should not be included directly, but instead applications should
14  * include "rte_ioat_rawdev.h", which then includes this file - and the
15  * IOAT/CBDMA equivalent header - in turn.
16  */
17
18 #include <stdint.h>
19
20 /*
21  * Defines used in the data path for interacting with IDXD hardware.
22  */
23 #define IDXD_CMD_OP_SHIFT 24
24 enum rte_idxd_ops {
25         idxd_op_nop = 0,
26         idxd_op_batch,
27         idxd_op_drain,
28         idxd_op_memmove,
29         idxd_op_fill
30 };
31
32 #define IDXD_FLAG_FENCE                 (1 << 0)
33 #define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
34 #define IDXD_FLAG_REQUEST_COMPLETION    (1 << 3)
35 #define IDXD_FLAG_CACHE_CONTROL         (1 << 8)
36
37 #define IOAT_COMP_UPDATE_SHIFT  3
38 #define IOAT_CMD_OP_SHIFT       24
39 enum rte_ioat_ops {
40         ioat_op_copy = 0,       /* Standard DMA Operation */
41         ioat_op_fill            /* Block Fill */
42 };
43
44 /**
45  * Hardware descriptor used by DSA hardware, for both bursts and
46  * for individual operations.
47  */
48 struct rte_idxd_hw_desc {
49         uint32_t pasid;
50         uint32_t op_flags;
51         rte_iova_t completion;
52
53         RTE_STD_C11
54         union {
55                 rte_iova_t src;      /* source address for copy ops etc. */
56                 rte_iova_t desc_addr; /* descriptor pointer for batch */
57         };
58         rte_iova_t dst;
59
60         uint32_t size;    /* length of data for op, or batch size */
61
62         uint16_t intr_handle; /* completion interrupt handle */
63
64         /* remaining 26 bytes are reserved */
65         uint16_t __reserved[13];
66 } __rte_aligned(64);
67
68 /**
69  * Completion record structure written back by DSA
70  */
71 struct rte_idxd_completion {
72         uint8_t status;
73         uint8_t result;
74         /* 16-bits pad here */
75         uint32_t completed_size; /* data length, or descriptors for batch */
76
77         rte_iova_t fault_address;
78         uint32_t invalid_flags;
79 } __rte_aligned(32);
80
81 /**
82  * structure used to save the "handles" provided by the user to be
83  * returned to the user on job completion.
84  */
85 struct rte_idxd_user_hdl {
86         uint64_t src;
87         uint64_t dst;
88 };
89
90 /**
91  * @internal
92  * Structure representing an IDXD device instance
93  */
94 struct rte_idxd_rawdev {
95         enum rte_ioat_dev_type type;
96         struct rte_ioat_xstats xstats;
97
98         void *portal; /* address to write the batch descriptor */
99
100         struct rte_ioat_rawdev_config cfg;
101         rte_iova_t desc_iova; /* base address of desc ring, needed for completions */
102
103         /* counters to track the batches */
104         unsigned short max_batches;
105         unsigned short batch_idx_read;
106         unsigned short batch_idx_write;
107         unsigned short *batch_idx_ring; /* store where each batch ends */
108
109         /* track descriptors and handles */
110         unsigned short desc_ring_mask;
111         unsigned short hdls_avail; /* handles for ops completed */
112         unsigned short hdls_read; /* the read pointer for hdls/desc rings */
113         unsigned short batch_start; /* start+size == write pointer for hdls/desc */
114         unsigned short batch_size;
115
116         struct rte_idxd_hw_desc *desc_ring;
117         struct rte_idxd_user_hdl *hdl_ring;
118         /* flags to indicate handle validity. Kept separate from ring, to avoid
119          * using 8 bytes per flag. Upper 8 bits holds error code if any.
120          */
121         uint16_t *hdl_ring_flags;
122 };
123
124 #define RTE_IDXD_HDL_NORMAL     0
125 #define RTE_IDXD_HDL_INVALID    (1 << 0) /* no handle stored for this element */
126 #define RTE_IDXD_HDL_OP_FAILED  (1 << 1) /* return failure for this one */
127 #define RTE_IDXD_HDL_OP_SKIPPED (1 << 2) /* this op was skipped */
128
129 static __rte_always_inline uint16_t
130 __idxd_burst_capacity(int dev_id)
131 {
132         struct rte_idxd_rawdev *idxd =
133                         (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
134         uint16_t write_idx = idxd->batch_start + idxd->batch_size;
135         uint16_t used_space, free_space;
136
137         /* Check for space in the batch ring */
138         if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
139                         idxd->batch_idx_write + 1 == idxd->batch_idx_read)
140                 return 0;
141
142         /* for descriptors, check for wrap-around on write but not read */
143         if (idxd->hdls_read > write_idx)
144                 write_idx += idxd->desc_ring_mask + 1;
145         used_space = write_idx - idxd->hdls_read;
146
147         /* Return amount of free space in the descriptor ring
148          * subtract 1 for space for batch descriptor and 1 for possible null desc
149          */
150         free_space = idxd->desc_ring_mask - used_space;
151         if (free_space < 2)
152                 return 0;
153         return free_space - 2;
154 }
155
156 static __rte_always_inline rte_iova_t
157 __desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n)
158 {
159         return idxd->desc_iova + (n * sizeof(struct rte_idxd_hw_desc));
160 }
161
162 static __rte_always_inline int
163 __idxd_write_desc(int dev_id,
164                 const uint32_t op_flags,
165                 const rte_iova_t src,
166                 const rte_iova_t dst,
167                 const uint32_t size,
168                 const struct rte_idxd_user_hdl *hdl)
169 {
170         struct rte_idxd_rawdev *idxd =
171                         (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
172         uint16_t write_idx = idxd->batch_start + idxd->batch_size;
173         uint16_t mask = idxd->desc_ring_mask;
174
175         /* first check batch ring space then desc ring space */
176         if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
177                         idxd->batch_idx_write + 1 == idxd->batch_idx_read)
178                 goto failed;
179         /* for descriptor ring, we always need a slot for batch completion */
180         if (((write_idx + 2) & mask) == idxd->hdls_read ||
181                         ((write_idx + 1) & mask) == idxd->hdls_read)
182                 goto failed;
183
184         /* write desc and handle. Note, descriptors don't wrap */
185         idxd->desc_ring[write_idx].pasid = 0;
186         idxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID;
187         idxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx & mask);
188         idxd->desc_ring[write_idx].src = src;
189         idxd->desc_ring[write_idx].dst = dst;
190         idxd->desc_ring[write_idx].size = size;
191
192         if (hdl == NULL)
193                 idxd->hdl_ring_flags[write_idx & mask] = RTE_IDXD_HDL_INVALID;
194         else
195                 idxd->hdl_ring[write_idx & mask] = *hdl;
196         idxd->batch_size++;
197
198         idxd->xstats.enqueued++;
199
200         rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
201         return 1;
202
203 failed:
204         idxd->xstats.enqueue_failed++;
205         rte_errno = ENOSPC;
206         return 0;
207 }
208
209 static __rte_always_inline int
210 __idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst,
211                 unsigned int length, uintptr_t dst_hdl)
212 {
213         const struct rte_idxd_user_hdl hdl = {
214                         .dst = dst_hdl
215         };
216         return __idxd_write_desc(dev_id,
217                         (idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
218                         pattern, dst, length, &hdl);
219 }
220
221 static __rte_always_inline int
222 __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst,
223                 unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
224 {
225         const struct rte_idxd_user_hdl hdl = {
226                         .src = src_hdl,
227                         .dst = dst_hdl
228         };
229         return __idxd_write_desc(dev_id,
230                         (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
231                         src, dst, length, &hdl);
232 }
233
234 static __rte_always_inline int
235 __idxd_fence(int dev_id)
236 {
237         /* only op field needs filling - zero src, dst and length */
238         return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, NULL);
239 }
240
241 static __rte_always_inline void
242 __idxd_movdir64b(volatile void *dst, const struct rte_idxd_hw_desc *src)
243 {
244         asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
245                         :
246                         : "a" (dst), "d" (src)
247                         : "memory");
248 }
249
250 static __rte_always_inline int
251 __idxd_perform_ops(int dev_id)
252 {
253         struct rte_idxd_rawdev *idxd =
254                         (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
255
256         if (!idxd->cfg.no_prefetch_completions)
257                 rte_prefetch1(&idxd->desc_ring[idxd->batch_idx_ring[idxd->batch_idx_read]]);
258
259         if (idxd->batch_size == 0)
260                 return 0;
261
262         if (idxd->batch_size == 1)
263                 /* use a fence as a null descriptor, so batch_size >= 2 */
264                 if (__idxd_fence(dev_id) != 1)
265                         return -1;
266
267         /* write completion beyond last desc in the batch */
268         uint16_t comp_idx = (idxd->batch_start + idxd->batch_size) & idxd->desc_ring_mask;
269         *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */
270         idxd->hdl_ring_flags[comp_idx] = RTE_IDXD_HDL_INVALID;
271
272         const struct rte_idxd_hw_desc batch_desc = {
273                         .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
274                                 IDXD_FLAG_COMPLETION_ADDR_VALID |
275                                 IDXD_FLAG_REQUEST_COMPLETION,
276                         .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start),
277                         .completion = __desc_idx_to_iova(idxd, comp_idx),
278                         .size = idxd->batch_size,
279         };
280
281         _mm_sfence(); /* fence before writing desc to device */
282         __idxd_movdir64b(idxd->portal, &batch_desc);
283         idxd->xstats.started += idxd->batch_size;
284
285         idxd->batch_start += idxd->batch_size + 1;
286         idxd->batch_start &= idxd->desc_ring_mask;
287         idxd->batch_size = 0;
288
289         idxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx;
290         if (idxd->batch_idx_write > idxd->max_batches)
291                 idxd->batch_idx_write = 0;
292
293         return 0;
294 }
295
296 static __rte_always_inline int
297 __idxd_completed_ops(int dev_id, uint8_t max_ops, uint32_t *status, uint8_t *num_unsuccessful,
298                 uintptr_t *src_hdls, uintptr_t *dst_hdls)
299 {
300         struct rte_idxd_rawdev *idxd =
301                         (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
302         unsigned short n, h_idx;
303
304         while (idxd->batch_idx_read != idxd->batch_idx_write) {
305                 uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read];
306                 volatile struct rte_idxd_completion *comp_to_chk =
307                                 (struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk];
308                 uint8_t batch_status = comp_to_chk->status;
309                 if (batch_status == 0)
310                         break;
311                 comp_to_chk->status = 0;
312                 if (unlikely(batch_status > 1)) {
313                         /* error occurred somewhere in batch, start where last checked */
314                         uint16_t desc_count = comp_to_chk->completed_size;
315                         uint16_t batch_start = idxd->hdls_avail;
316                         uint16_t batch_end = idx_to_chk;
317
318                         if (batch_start > batch_end)
319                                 batch_end += idxd->desc_ring_mask + 1;
320                         /* go through each batch entry and see status */
321                         for (n = 0; n < desc_count; n++) {
322                                 uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
323                                 volatile struct rte_idxd_completion *comp =
324                                         (struct rte_idxd_completion *)&idxd->desc_ring[idx];
325                                 if (comp->status != 0 &&
326                                                 idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) {
327                                         idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_FAILED;
328                                         idxd->hdl_ring_flags[idx] |= (comp->status << 8);
329                                         comp->status = 0; /* clear error for next time */
330                                 }
331                         }
332                         /* if batch is incomplete, mark rest as skipped */
333                         for ( ; n < batch_end - batch_start; n++) {
334                                 uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
335                                 if (idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL)
336                                         idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_SKIPPED;
337                         }
338                 }
339                 /* avail points to one after the last one written */
340                 idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask;
341                 idxd->batch_idx_read++;
342                 if (idxd->batch_idx_read > idxd->max_batches)
343                         idxd->batch_idx_read = 0;
344         }
345
346         if (idxd->cfg.hdls_disable && status == NULL) {
347                 n = (idxd->hdls_avail < idxd->hdls_read) ?
348                                 (idxd->hdls_avail + idxd->desc_ring_mask + 1 - idxd->hdls_read) :
349                                 (idxd->hdls_avail - idxd->hdls_read);
350                 idxd->hdls_read = idxd->hdls_avail;
351                 goto out;
352         }
353
354         n = 0;
355         h_idx = idxd->hdls_read;
356         while (h_idx != idxd->hdls_avail) {
357                 uint16_t flag = idxd->hdl_ring_flags[h_idx];
358                 if (flag != RTE_IDXD_HDL_INVALID) {
359                         if (!idxd->cfg.hdls_disable) {
360                                 src_hdls[n] = idxd->hdl_ring[h_idx].src;
361                                 dst_hdls[n] = idxd->hdl_ring[h_idx].dst;
362                         }
363                         if (unlikely(flag != RTE_IDXD_HDL_NORMAL)) {
364                                 if (status != NULL)
365                                         status[n] = flag == RTE_IDXD_HDL_OP_SKIPPED ?
366                                                         RTE_IOAT_OP_SKIPPED :
367                                                         /* failure case, return err code */
368                                                         idxd->hdl_ring_flags[h_idx] >> 8;
369                                 if (num_unsuccessful != NULL)
370                                         *num_unsuccessful += 1;
371                         }
372                         n++;
373                 }
374                 idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
375                 if (++h_idx > idxd->desc_ring_mask)
376                         h_idx = 0;
377                 if (n >= max_ops)
378                         break;
379         }
380
381         /* skip over any remaining blank elements, e.g. batch completion */
382         while (idxd->hdl_ring_flags[h_idx] == RTE_IDXD_HDL_INVALID && h_idx != idxd->hdls_avail) {
383                 idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
384                 if (++h_idx > idxd->desc_ring_mask)
385                         h_idx = 0;
386         }
387         idxd->hdls_read = h_idx;
388
389 out:
390         idxd->xstats.completed += n;
391         return n;
392 }
393
394 #endif